1//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//                     The LLVM Compiler Infrastructure
3//
4// This file is distributed under the University of Illinois Open Source
5// License. See LICENSE.TXT for details.
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the SPUTargetLowering class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "SPUISelLowering.h"
14#include "SPUTargetMachine.h"
15#include "SPUFrameLowering.h"
16#include "SPUMachineFunction.h"
17#include "llvm/Constants.h"
18#include "llvm/Function.h"
19#include "llvm/Intrinsics.h"
20#include "llvm/CallingConv.h"
21#include "llvm/Type.h"
22#include "llvm/CodeGen/CallingConvLower.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
28#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29#include "llvm/Target/TargetOptions.h"
30#include "llvm/ADT/VectorExtras.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Support/raw_ostream.h"
35#include <map>
36
37using namespace llvm;
38
39// Used in getTargetNodeName() below
40namespace {
41  std::map<unsigned, const char *> node_names;
42
43  // Byte offset of the preferred slot (counted from the MSB)
44  int prefslotOffset(EVT VT) {
45    int retval=0;
46    if (VT==MVT::i1) retval=3;
47    if (VT==MVT::i8) retval=3;
48    if (VT==MVT::i16) retval=2;
49
50    return retval;
51  }
52
53  //! Expand a library call into an actual call DAG node
54  /*!
55   \note
56   This code is taken from SelectionDAGLegalize, since it is not exposed as
57   part of the LLVM SelectionDAG API.
58   */
59
60  SDValue
61  ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
62                bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
63    // The input chain to this libcall is the entry node of the function.
64    // Legalizing the call will automatically add the previous call to the
65    // dependence.
66    SDValue InChain = DAG.getEntryNode();
67
68    TargetLowering::ArgListTy Args;
69    TargetLowering::ArgListEntry Entry;
70    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
71      EVT ArgVT = Op.getOperand(i).getValueType();
72      Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
73      Entry.Node = Op.getOperand(i);
74      Entry.Ty = ArgTy;
75      Entry.isSExt = isSigned;
76      Entry.isZExt = !isSigned;
77      Args.push_back(Entry);
78    }
79    SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
80                                           TLI.getPointerTy());
81
82    // Splice the libcall in wherever FindInputOutputChains tells us to.
83    Type *RetTy =
84                Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
85    std::pair<SDValue, SDValue> CallInfo =
86            TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
87                            0, TLI.getLibcallCallingConv(LC), false,
88                            /*isReturnValueUsed=*/true,
89                            Callee, Args, DAG, Op.getDebugLoc());
90
91    return CallInfo.first;
92  }
93}
94
95SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
96  : TargetLowering(TM, new TargetLoweringObjectFileELF()),
97    SPUTM(TM) {
98
99  // Use _setjmp/_longjmp instead of setjmp/longjmp.
100  setUseUnderscoreSetJmp(true);
101  setUseUnderscoreLongJmp(true);
102
103  // Set RTLIB libcall names as used by SPU:
104  setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
105
106  // Set up the SPU's register classes:
107  addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
108  addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
109  addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
110  addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
111  addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
112  addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
113  addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
114
115  // SPU has no sign or zero extended loads for i1, i8, i16:
116  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
117  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
118  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
119
120  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
121  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
122
123  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
124  setTruncStoreAction(MVT::i128, MVT::i32, Expand);
125  setTruncStoreAction(MVT::i128, MVT::i16, Expand);
126  setTruncStoreAction(MVT::i128, MVT::i8, Expand);
127
128  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
129
130  // SPU constant load actions are custom lowered:
131  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
132  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
133
134  // SPU's loads and stores have to be custom lowered:
135  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
136       ++sctype) {
137    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
138
139    setOperationAction(ISD::LOAD,   VT, Custom);
140    setOperationAction(ISD::STORE,  VT, Custom);
141    setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
142    setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
143    setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
144
145    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
146      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
147      setTruncStoreAction(VT, StoreVT, Expand);
148    }
149  }
150
151  for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
152       ++sctype) {
153    MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
154
155    setOperationAction(ISD::LOAD,   VT, Custom);
156    setOperationAction(ISD::STORE,  VT, Custom);
157
158    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
159      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
160      setTruncStoreAction(VT, StoreVT, Expand);
161    }
162  }
163
164  // Expand the jumptable branches
165  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
166  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
167
168  // Custom lower SELECT_CC for most cases, but expand by default
169  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
170  setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
171  setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
172  setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
173  setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
174
175  // SPU has no intrinsics for these particular operations:
176  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
177  setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand);
178
179  // SPU has no division/remainder instructions
180  setOperationAction(ISD::SREM,    MVT::i8,   Expand);
181  setOperationAction(ISD::UREM,    MVT::i8,   Expand);
182  setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
183  setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
184  setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
185  setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
186  setOperationAction(ISD::SREM,    MVT::i16,  Expand);
187  setOperationAction(ISD::UREM,    MVT::i16,  Expand);
188  setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
189  setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
190  setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
191  setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
192  setOperationAction(ISD::SREM,    MVT::i32,  Expand);
193  setOperationAction(ISD::UREM,    MVT::i32,  Expand);
194  setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
195  setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
196  setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
197  setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
198  setOperationAction(ISD::SREM,    MVT::i64,  Expand);
199  setOperationAction(ISD::UREM,    MVT::i64,  Expand);
200  setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
201  setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
202  setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
203  setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
204  setOperationAction(ISD::SREM,    MVT::i128, Expand);
205  setOperationAction(ISD::UREM,    MVT::i128, Expand);
206  setOperationAction(ISD::SDIV,    MVT::i128, Expand);
207  setOperationAction(ISD::UDIV,    MVT::i128, Expand);
208  setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
209  setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
210
211  // We don't support sin/cos/sqrt/fmod
212  setOperationAction(ISD::FSIN , MVT::f64, Expand);
213  setOperationAction(ISD::FCOS , MVT::f64, Expand);
214  setOperationAction(ISD::FREM , MVT::f64, Expand);
215  setOperationAction(ISD::FSIN , MVT::f32, Expand);
216  setOperationAction(ISD::FCOS , MVT::f32, Expand);
217  setOperationAction(ISD::FREM , MVT::f32, Expand);
218
219  // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
220  // for f32!)
221  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
222  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
223
224  setOperationAction(ISD::FMA, MVT::f64, Expand);
225  setOperationAction(ISD::FMA, MVT::f32, Expand);
226
227  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
228  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
229
230  // SPU can do rotate right and left, so legalize it... but customize for i8
231  // because instructions don't exist.
232
233  // FIXME: Change from "expand" to appropriate type once ROTR is supported in
234  //        .td files.
235  setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
236  setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
237  setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
238
239  setOperationAction(ISD::ROTL, MVT::i32,    Legal);
240  setOperationAction(ISD::ROTL, MVT::i16,    Legal);
241  setOperationAction(ISD::ROTL, MVT::i8,     Custom);
242
243  // SPU has no native version of shift left/right for i8
244  setOperationAction(ISD::SHL,  MVT::i8,     Custom);
245  setOperationAction(ISD::SRL,  MVT::i8,     Custom);
246  setOperationAction(ISD::SRA,  MVT::i8,     Custom);
247
248  // Make these operations legal and handle them during instruction selection:
249  setOperationAction(ISD::SHL,  MVT::i64,    Legal);
250  setOperationAction(ISD::SRL,  MVT::i64,    Legal);
251  setOperationAction(ISD::SRA,  MVT::i64,    Legal);
252
253  // Custom lower i8, i32 and i64 multiplications
254  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
255  setOperationAction(ISD::MUL,  MVT::i32,    Legal);
256  setOperationAction(ISD::MUL,  MVT::i64,    Legal);
257
258  // Expand double-width multiplication
259  // FIXME: It would probably be reasonable to support some of these operations
260  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
261  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
262  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
263  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
264  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
265  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
266  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
267  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
268  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
269  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
270  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
271  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
272  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
273  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
274  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
275  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
276
277  // Need to custom handle (some) common i8, i64 math ops
278  setOperationAction(ISD::ADD,  MVT::i8,     Custom);
279  setOperationAction(ISD::ADD,  MVT::i64,    Legal);
280  setOperationAction(ISD::SUB,  MVT::i8,     Custom);
281  setOperationAction(ISD::SUB,  MVT::i64,    Legal);
282
283  // SPU does not have BSWAP. It does have i32 support CTLZ.
284  // CTPOP has to be custom lowered.
285  setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
286  setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
287
288  setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
289  setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
290  setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
291  setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
292  setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
293
294  setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
295  setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
296  setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
297  setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
298  setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
299
300  setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
301  setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
302  setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
303  setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
304  setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
305
306  // SPU has a version of select that implements (a&~c)|(b&c), just like
307  // select ought to work:
308  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
309  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
310  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
311  setOperationAction(ISD::SELECT, MVT::i64,  Legal);
312
313  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
314  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
315  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
316  setOperationAction(ISD::SETCC, MVT::i64,   Legal);
317  setOperationAction(ISD::SETCC, MVT::f64,   Custom);
318
319  // Custom lower i128 -> i64 truncates
320  setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
321
322  // Custom lower i32/i64 -> i128 sign extend
323  setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
324
325  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
326  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
327  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
328  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
329  // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
330  // to expand to a libcall, hence the custom lowering:
331  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
332  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
333  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
334  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
335  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
336  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
337
338  // FDIV on SPU requires custom lowering
339  setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
340
341  // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
342  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
343  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
344  setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
345  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
346  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
347  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
348  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
349  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
350
351  setOperationAction(ISD::BITCAST, MVT::i32, Legal);
352  setOperationAction(ISD::BITCAST, MVT::f32, Legal);
353  setOperationAction(ISD::BITCAST, MVT::i64, Legal);
354  setOperationAction(ISD::BITCAST, MVT::f64, Legal);
355
356  // We cannot sextinreg(i1).  Expand to shifts.
357  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
358
359  // We want to legalize GlobalAddress and ConstantPool nodes into the
360  // appropriate instructions to materialize the address.
361  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
362       ++sctype) {
363    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
364
365    setOperationAction(ISD::GlobalAddress,  VT, Custom);
366    setOperationAction(ISD::ConstantPool,   VT, Custom);
367    setOperationAction(ISD::JumpTable,      VT, Custom);
368  }
369
370  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
371  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
372
373  // Use the default implementation.
374  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
375  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
376  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
377  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
378  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
379  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
380  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
381
382  // Cell SPU has instructions for converting between i64 and fp.
383  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
384  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
385
386  // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
387  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
388
389  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
390  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
391
392  // First set operation action for all vector types to expand. Then we
393  // will selectively turn on ones that can be effectively codegen'd.
394  addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
395  addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
396  addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
397  addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
398  addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
399  addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
400
401  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
402       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
403    MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
404
405    // Set operation actions to legal types only.
406    if (!isTypeLegal(VT)) continue;
407
408    // add/sub are legal for all supported vector VT's.
409    setOperationAction(ISD::ADD,     VT, Legal);
410    setOperationAction(ISD::SUB,     VT, Legal);
411    // mul has to be custom lowered.
412    setOperationAction(ISD::MUL,     VT, Legal);
413
414    setOperationAction(ISD::AND,     VT, Legal);
415    setOperationAction(ISD::OR,      VT, Legal);
416    setOperationAction(ISD::XOR,     VT, Legal);
417    setOperationAction(ISD::LOAD,    VT, Custom);
418    setOperationAction(ISD::SELECT,  VT, Legal);
419    setOperationAction(ISD::STORE,   VT, Custom);
420
421    // These operations need to be expanded:
422    setOperationAction(ISD::SDIV,    VT, Expand);
423    setOperationAction(ISD::SREM,    VT, Expand);
424    setOperationAction(ISD::UDIV,    VT, Expand);
425    setOperationAction(ISD::UREM,    VT, Expand);
426
427    // Custom lower build_vector, constant pool spills, insert and
428    // extract vector elements:
429    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
430    setOperationAction(ISD::ConstantPool, VT, Custom);
431    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
432    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
433    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
434    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
435  }
436
437  setOperationAction(ISD::AND, MVT::v16i8, Custom);
438  setOperationAction(ISD::OR,  MVT::v16i8, Custom);
439  setOperationAction(ISD::XOR, MVT::v16i8, Custom);
440  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
441
442  setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
443
444  setBooleanContents(ZeroOrNegativeOneBooleanContent);
445  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // FIXME: Is this correct?
446
447  setStackPointerRegisterToSaveRestore(SPU::R1);
448
449  // We have target-specific dag combine patterns for the following nodes:
450  setTargetDAGCombine(ISD::ADD);
451  setTargetDAGCombine(ISD::ZERO_EXTEND);
452  setTargetDAGCombine(ISD::SIGN_EXTEND);
453  setTargetDAGCombine(ISD::ANY_EXTEND);
454
455  setMinFunctionAlignment(3);
456
457  computeRegisterProperties();
458
459  // Set pre-RA register scheduler default to BURR, which produces slightly
460  // better code than the default (could also be TDRR, but TargetLowering.h
461  // needs a mod to support that model):
462  setSchedulingPreference(Sched::RegPressure);
463}
464
465const char *
466SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
467{
468  if (node_names.empty()) {
469    node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
470    node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
471    node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
472    node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
473    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
474    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
475    node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
476    node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
477    node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
478    node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
479    node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
480    node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
481    node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
482    node_names[(unsigned) SPUISD::SHL_BITS] = "SPUISD::SHL_BITS";
483    node_names[(unsigned) SPUISD::SHL_BYTES] = "SPUISD::SHL_BYTES";
484    node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
485    node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
486    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
487    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
488            "SPUISD::ROTBYTES_LEFT_BITS";
489    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
490    node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
491    node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
492    node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
493    node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
494  }
495
496  std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
497
498  return ((i != node_names.end()) ? i->second : 0);
499}
500
501//===----------------------------------------------------------------------===//
502// Return the Cell SPU's SETCC result type
503//===----------------------------------------------------------------------===//
504
505EVT SPUTargetLowering::getSetCCResultType(EVT VT) const {
506  // i8, i16 and i32 are valid SETCC result types
507  MVT::SimpleValueType retval;
508
509  switch(VT.getSimpleVT().SimpleTy){
510    case MVT::i1:
511    case MVT::i8:
512      retval = MVT::i8; break;
513    case MVT::i16:
514      retval = MVT::i16; break;
515    case MVT::i32:
516    default:
517      retval = MVT::i32;
518  }
519  return retval;
520}
521
522//===----------------------------------------------------------------------===//
523// Calling convention code:
524//===----------------------------------------------------------------------===//
525
526#include "SPUGenCallingConv.inc"
527
528//===----------------------------------------------------------------------===//
529//  LowerOperation implementation
530//===----------------------------------------------------------------------===//
531
532/// Custom lower loads for CellSPU
533/*!
534 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
535 within a 16-byte block, we have to rotate to extract the requested element.
536
537 For extending loads, we also want to ensure that the following sequence is
538 emitted, e.g. for MVT::f32 extending load to MVT::f64:
539
540\verbatim
541%1  v16i8,ch = load
542%2  v16i8,ch = rotate %1
543%3  v4f8, ch = bitconvert %2
544%4  f32      = vec2perfslot %3
545%5  f64      = fp_extend %4
546\endverbatim
547*/
548static SDValue
549LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
550  LoadSDNode *LN = cast<LoadSDNode>(Op);
551  SDValue the_chain = LN->getChain();
552  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
553  EVT InVT = LN->getMemoryVT();
554  EVT OutVT = Op.getValueType();
555  ISD::LoadExtType ExtType = LN->getExtensionType();
556  unsigned alignment = LN->getAlignment();
557  int pso = prefslotOffset(InVT);
558  DebugLoc dl = Op.getDebugLoc();
559  EVT vecVT = InVT.isVector()? InVT: EVT::getVectorVT(*DAG.getContext(), InVT,
560                                                  (128 / InVT.getSizeInBits()));
561
562  // two sanity checks
563  assert( LN->getAddressingMode() == ISD::UNINDEXED
564          && "we should get only UNINDEXED adresses");
565  // clean aligned loads can be selected as-is
566  if (InVT.getSizeInBits() == 128 && (alignment%16) == 0)
567    return SDValue();
568
569  // Get pointerinfos to the memory chunk(s) that contain the data to load
570  uint64_t mpi_offset = LN->getPointerInfo().Offset;
571  mpi_offset -= mpi_offset%16;
572  MachinePointerInfo lowMemPtr(LN->getPointerInfo().V, mpi_offset);
573  MachinePointerInfo highMemPtr(LN->getPointerInfo().V, mpi_offset+16);
574
575  SDValue result;
576  SDValue basePtr = LN->getBasePtr();
577  SDValue rotate;
578
579  if ((alignment%16) == 0) {
580    ConstantSDNode *CN;
581
582    // Special cases for a known aligned load to simplify the base pointer
583    // and the rotation amount:
584    if (basePtr.getOpcode() == ISD::ADD
585        && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
586      // Known offset into basePtr
587      int64_t offset = CN->getSExtValue();
588      int64_t rotamt = int64_t((offset & 0xf) - pso);
589
590      if (rotamt < 0)
591        rotamt += 16;
592
593      rotate = DAG.getConstant(rotamt, MVT::i16);
594
595      // Simplify the base pointer for this case:
596      basePtr = basePtr.getOperand(0);
597      if ((offset & ~0xf) > 0) {
598        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
599                              basePtr,
600                              DAG.getConstant((offset & ~0xf), PtrVT));
601      }
602    } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
603               || (basePtr.getOpcode() == SPUISD::IndirectAddr
604                   && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
605                   && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
606      // Plain aligned a-form address: rotate into preferred slot
607      // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
608      int64_t rotamt = -pso;
609      if (rotamt < 0)
610        rotamt += 16;
611      rotate = DAG.getConstant(rotamt, MVT::i16);
612    } else {
613      // Offset the rotate amount by the basePtr and the preferred slot
614      // byte offset
615      int64_t rotamt = -pso;
616      if (rotamt < 0)
617        rotamt += 16;
618      rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
619                           basePtr,
620                           DAG.getConstant(rotamt, PtrVT));
621    }
622  } else {
623    // Unaligned load: must be more pessimistic about addressing modes:
624    if (basePtr.getOpcode() == ISD::ADD) {
625      MachineFunction &MF = DAG.getMachineFunction();
626      MachineRegisterInfo &RegInfo = MF.getRegInfo();
627      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
628      SDValue Flag;
629
630      SDValue Op0 = basePtr.getOperand(0);
631      SDValue Op1 = basePtr.getOperand(1);
632
633      if (isa<ConstantSDNode>(Op1)) {
634        // Convert the (add <ptr>, <const>) to an indirect address contained
635        // in a register. Note that this is done because we need to avoid
636        // creating a 0(reg) d-form address due to the SPU's block loads.
637        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
638        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
639        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
640      } else {
641        // Convert the (add <arg1>, <arg2>) to an indirect address, which
642        // will likely be lowered as a reg(reg) x-form address.
643        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
644      }
645    } else {
646      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
647                            basePtr,
648                            DAG.getConstant(0, PtrVT));
649   }
650
651    // Offset the rotate amount by the basePtr and the preferred slot
652    // byte offset
653    rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
654                         basePtr,
655                         DAG.getConstant(-pso, PtrVT));
656  }
657
658  // Do the load as a i128 to allow possible shifting
659  SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr,
660                       lowMemPtr,
661                       LN->isVolatile(), LN->isNonTemporal(), 16);
662
663  // When the size is not greater than alignment we get all data with just
664  // one load
665  if (alignment >= InVT.getSizeInBits()/8) {
666    // Update the chain
667    the_chain = low.getValue(1);
668
669    // Rotate into the preferred slot:
670    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::i128,
671                         low.getValue(0), rotate);
672
673    // Convert the loaded v16i8 vector to the appropriate vector type
674    // specified by the operand:
675    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
676                                 InVT, (128 / InVT.getSizeInBits()));
677    result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
678                         DAG.getNode(ISD::BITCAST, dl, vecVT, result));
679  }
680  // When alignment is less than the size, we might need (known only at
681  // run-time) two loads
682  // TODO: if the memory address is composed only from constants, we have
683  // extra kowledge, and might avoid the second load
684  else {
685    // storage position offset from lower 16 byte aligned memory chunk
686    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
687                                  basePtr, DAG.getConstant( 0xf, MVT::i32 ) );
688    // get a registerfull of ones. (this implementation is a workaround: LLVM
689    // cannot handle 128 bit signed int constants)
690    SDValue ones = DAG.getConstant(-1, MVT::v4i32 );
691    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
692
693    SDValue high = DAG.getLoad(MVT::i128, dl, the_chain,
694                               DAG.getNode(ISD::ADD, dl, PtrVT,
695                                           basePtr,
696                                           DAG.getConstant(16, PtrVT)),
697                               highMemPtr,
698                               LN->isVolatile(), LN->isNonTemporal(), 16);
699
700    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
701                                                              high.getValue(1));
702
703    // Shift the (possible) high part right to compensate the misalignemnt.
704    // if there is no highpart (i.e. value is i64 and offset is 4), this
705    // will zero out the high value.
706    high = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, high,
707                                     DAG.getNode(ISD::SUB, dl, MVT::i32,
708                                                 DAG.getConstant( 16, MVT::i32),
709                                                 offset
710                                                ));
711
712    // Shift the low similarly
713    // TODO: add SPUISD::SHL_BYTES
714    low = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, low, offset );
715
716    // Merge the two parts
717    result = DAG.getNode(ISD::BITCAST, dl, vecVT,
718                          DAG.getNode(ISD::OR, dl, MVT::i128, low, high));
719
720    if (!InVT.isVector()) {
721      result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT, result );
722     }
723
724  }
725    // Handle extending loads by extending the scalar result:
726    if (ExtType == ISD::SEXTLOAD) {
727      result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
728    } else if (ExtType == ISD::ZEXTLOAD) {
729      result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
730    } else if (ExtType == ISD::EXTLOAD) {
731      unsigned NewOpc = ISD::ANY_EXTEND;
732
733      if (OutVT.isFloatingPoint())
734        NewOpc = ISD::FP_EXTEND;
735
736      result = DAG.getNode(NewOpc, dl, OutVT, result);
737    }
738
739    SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
740    SDValue retops[2] = {
741      result,
742      the_chain
743    };
744
745    result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
746                         retops, sizeof(retops) / sizeof(retops[0]));
747    return result;
748}
749
750/// Custom lower stores for CellSPU
751/*!
752 All CellSPU stores are aligned to 16-byte boundaries, so for elements
753 within a 16-byte block, we have to generate a shuffle to insert the
754 requested element into its place, then store the resulting block.
755 */
756static SDValue
757LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
758  StoreSDNode *SN = cast<StoreSDNode>(Op);
759  SDValue Value = SN->getValue();
760  EVT VT = Value.getValueType();
761  EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
762  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
763  DebugLoc dl = Op.getDebugLoc();
764  unsigned alignment = SN->getAlignment();
765  SDValue result;
766  EVT vecVT = StVT.isVector()? StVT: EVT::getVectorVT(*DAG.getContext(), StVT,
767                                                 (128 / StVT.getSizeInBits()));
768  // Get pointerinfos to the memory chunk(s) that contain the data to load
769  uint64_t mpi_offset = SN->getPointerInfo().Offset;
770  mpi_offset -= mpi_offset%16;
771  MachinePointerInfo lowMemPtr(SN->getPointerInfo().V, mpi_offset);
772  MachinePointerInfo highMemPtr(SN->getPointerInfo().V, mpi_offset+16);
773
774
775  // two sanity checks
776  assert( SN->getAddressingMode() == ISD::UNINDEXED
777          && "we should get only UNINDEXED adresses");
778  // clean aligned loads can be selected as-is
779  if (StVT.getSizeInBits() == 128 && (alignment%16) == 0)
780    return SDValue();
781
782  SDValue alignLoadVec;
783  SDValue basePtr = SN->getBasePtr();
784  SDValue the_chain = SN->getChain();
785  SDValue insertEltOffs;
786
787  if ((alignment%16) == 0) {
788    ConstantSDNode *CN;
789    // Special cases for a known aligned load to simplify the base pointer
790    // and insertion byte:
791    if (basePtr.getOpcode() == ISD::ADD
792        && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
793      // Known offset into basePtr
794      int64_t offset = CN->getSExtValue();
795
796      // Simplify the base pointer for this case:
797      basePtr = basePtr.getOperand(0);
798      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
799                                  basePtr,
800                                  DAG.getConstant((offset & 0xf), PtrVT));
801
802      if ((offset & ~0xf) > 0) {
803        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
804                              basePtr,
805                              DAG.getConstant((offset & ~0xf), PtrVT));
806      }
807    } else {
808      // Otherwise, assume it's at byte 0 of basePtr
809      insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
810                                  basePtr,
811                                  DAG.getConstant(0, PtrVT));
812      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
813                                  basePtr,
814                                  DAG.getConstant(0, PtrVT));
815    }
816  } else {
817    // Unaligned load: must be more pessimistic about addressing modes:
818    if (basePtr.getOpcode() == ISD::ADD) {
819      MachineFunction &MF = DAG.getMachineFunction();
820      MachineRegisterInfo &RegInfo = MF.getRegInfo();
821      unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
822      SDValue Flag;
823
824      SDValue Op0 = basePtr.getOperand(0);
825      SDValue Op1 = basePtr.getOperand(1);
826
827      if (isa<ConstantSDNode>(Op1)) {
828        // Convert the (add <ptr>, <const>) to an indirect address contained
829        // in a register. Note that this is done because we need to avoid
830        // creating a 0(reg) d-form address due to the SPU's block loads.
831        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
832        the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
833        basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
834      } else {
835        // Convert the (add <arg1>, <arg2>) to an indirect address, which
836        // will likely be lowered as a reg(reg) x-form address.
837        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
838      }
839    } else {
840      basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
841                            basePtr,
842                            DAG.getConstant(0, PtrVT));
843    }
844
845    // Insertion point is solely determined by basePtr's contents
846    insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
847                                basePtr,
848                                DAG.getConstant(0, PtrVT));
849  }
850
851  // Load the lower part of the memory to which to store.
852  SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr,
853                          lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16);
854
855  // if we don't need to store over the 16 byte boundary, one store suffices
856  if (alignment >= StVT.getSizeInBits()/8) {
857    // Update the chain
858    the_chain = low.getValue(1);
859
860    LoadSDNode *LN = cast<LoadSDNode>(low);
861    SDValue theValue = SN->getValue();
862
863    if (StVT != VT
864        && (theValue.getOpcode() == ISD::AssertZext
865            || theValue.getOpcode() == ISD::AssertSext)) {
866      // Drill down and get the value for zero- and sign-extended
867      // quantities
868      theValue = theValue.getOperand(0);
869    }
870
871    // If the base pointer is already a D-form address, then just create
872    // a new D-form address with a slot offset and the orignal base pointer.
873    // Otherwise generate a D-form address with the slot offset relative
874    // to the stack pointer, which is always aligned.
875#if !defined(NDEBUG)
876      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
877        errs() << "CellSPU LowerSTORE: basePtr = ";
878        basePtr.getNode()->dump(&DAG);
879        errs() << "\n";
880      }
881#endif
882
883    SDValue insertEltOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT,
884                                      insertEltOffs);
885    SDValue vectorizeOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT,
886                                      theValue);
887
888    result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
889                         vectorizeOp, low,
890                         DAG.getNode(ISD::BITCAST, dl,
891                                     MVT::v4i32, insertEltOp));
892
893    result = DAG.getStore(the_chain, dl, result, basePtr,
894                          lowMemPtr,
895                          LN->isVolatile(), LN->isNonTemporal(),
896                          16);
897
898  }
899  // do the store when it might cross the 16 byte memory access boundary.
900  else {
901    // TODO issue a warning if SN->isVolatile()== true? This is likely not
902    // what the user wanted.
903
904    // address offset from nearest lower 16byte alinged address
905    SDValue offset = DAG.getNode(ISD::AND, dl, MVT::i32,
906                                    SN->getBasePtr(),
907                                    DAG.getConstant(0xf, MVT::i32));
908    // 16 - offset
909    SDValue offset_compl = DAG.getNode(ISD::SUB, dl, MVT::i32,
910                                           DAG.getConstant( 16, MVT::i32),
911                                           offset);
912    // 16 - sizeof(Value)
913    SDValue surplus = DAG.getNode(ISD::SUB, dl, MVT::i32,
914                                     DAG.getConstant( 16, MVT::i32),
915                                     DAG.getConstant( VT.getSizeInBits()/8,
916                                                      MVT::i32));
917    // get a registerfull of ones
918    SDValue ones = DAG.getConstant(-1, MVT::v4i32);
919    ones = DAG.getNode(ISD::BITCAST, dl, MVT::i128, ones);
920
921    // Create the 128 bit masks that have ones where the data to store is
922    // located.
923    SDValue lowmask, himask;
924    // if the value to store don't fill up the an entire 128 bits, zero
925    // out the last bits of the mask so that only the value we want to store
926    // is masked.
927    // this is e.g. in the case of store i32, align 2
928    if (!VT.isVector()){
929      Value = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, Value);
930      lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, ones, surplus);
931      lowmask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
932                                                               surplus);
933      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
934      Value = DAG.getNode(ISD::AND, dl, MVT::i128, Value, lowmask);
935
936    }
937    else {
938      lowmask = ones;
939      Value = DAG.getNode(ISD::BITCAST, dl, MVT::i128, Value);
940    }
941    // this will zero, if there are no data that goes to the high quad
942    himask = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, lowmask,
943                                                            offset_compl);
944    lowmask = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, lowmask,
945                                                             offset);
946
947    // Load in the old data and zero out the parts that will be overwritten with
948    // the new data to store.
949    SDValue hi = DAG.getLoad(MVT::i128, dl, the_chain,
950                               DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
951                                           DAG.getConstant( 16, PtrVT)),
952                               highMemPtr,
953                               SN->isVolatile(), SN->isNonTemporal(), 16);
954    the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1),
955                                                              hi.getValue(1));
956
957    low = DAG.getNode(ISD::AND, dl, MVT::i128,
958                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, low),
959                        DAG.getNode( ISD::XOR, dl, MVT::i128, lowmask, ones));
960    hi = DAG.getNode(ISD::AND, dl, MVT::i128,
961                        DAG.getNode( ISD::BITCAST, dl, MVT::i128, hi),
962                        DAG.getNode( ISD::XOR, dl, MVT::i128, himask, ones));
963
964    // Shift the Value to store into place. rlow contains the parts that go to
965    // the lower memory chunk, rhi has the parts that go to the upper one.
966    SDValue rlow = DAG.getNode(SPUISD::SRL_BYTES, dl, MVT::i128, Value, offset);
967    rlow = DAG.getNode(ISD::AND, dl, MVT::i128, rlow, lowmask);
968    SDValue rhi = DAG.getNode(SPUISD::SHL_BYTES, dl, MVT::i128, Value,
969                                                            offset_compl);
970
971    // Merge the old data and the new data and store the results
972    // Need to convert vectors here to integer as 'OR'ing floats assert
973    rlow = DAG.getNode(ISD::OR, dl, MVT::i128,
974                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, low),
975                          DAG.getNode(ISD::BITCAST, dl, MVT::i128, rlow));
976    rhi = DAG.getNode(ISD::OR, dl, MVT::i128,
977                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, hi),
978                         DAG.getNode(ISD::BITCAST, dl, MVT::i128, rhi));
979
980    low = DAG.getStore(the_chain, dl, rlow, basePtr,
981                          lowMemPtr,
982                          SN->isVolatile(), SN->isNonTemporal(), 16);
983    hi  = DAG.getStore(the_chain, dl, rhi,
984                            DAG.getNode(ISD::ADD, dl, PtrVT, basePtr,
985                                        DAG.getConstant( 16, PtrVT)),
986                            highMemPtr,
987                            SN->isVolatile(), SN->isNonTemporal(), 16);
988    result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(0),
989                                                           hi.getValue(0));
990  }
991
992  return result;
993}
994
995//! Generate the address of a constant pool entry.
996static SDValue
997LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
998  EVT PtrVT = Op.getValueType();
999  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1000  const Constant *C = CP->getConstVal();
1001  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
1002  SDValue Zero = DAG.getConstant(0, PtrVT);
1003  const TargetMachine &TM = DAG.getTarget();
1004  // FIXME there is no actual debug info here
1005  DebugLoc dl = Op.getDebugLoc();
1006
1007  if (TM.getRelocationModel() == Reloc::Static) {
1008    if (!ST->usingLargeMem()) {
1009      // Just return the SDValue with the constant pool address in it.
1010      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
1011    } else {
1012      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
1013      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
1014      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1015    }
1016  }
1017
1018  llvm_unreachable("LowerConstantPool: Relocation model other than static"
1019                   " not supported.");
1020  return SDValue();
1021}
1022
1023//! Alternate entry point for generating the address of a constant pool entry
1024SDValue
1025SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
1026  return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
1027}
1028
1029static SDValue
1030LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1031  EVT PtrVT = Op.getValueType();
1032  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1033  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
1034  SDValue Zero = DAG.getConstant(0, PtrVT);
1035  const TargetMachine &TM = DAG.getTarget();
1036  // FIXME there is no actual debug info here
1037  DebugLoc dl = Op.getDebugLoc();
1038
1039  if (TM.getRelocationModel() == Reloc::Static) {
1040    if (!ST->usingLargeMem()) {
1041      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
1042    } else {
1043      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
1044      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
1045      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1046    }
1047  }
1048
1049  llvm_unreachable("LowerJumpTable: Relocation model other than static"
1050                   " not supported.");
1051  return SDValue();
1052}
1053
1054static SDValue
1055LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1056  EVT PtrVT = Op.getValueType();
1057  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
1058  const GlobalValue *GV = GSDN->getGlobal();
1059  SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
1060                                          PtrVT, GSDN->getOffset());
1061  const TargetMachine &TM = DAG.getTarget();
1062  SDValue Zero = DAG.getConstant(0, PtrVT);
1063  // FIXME there is no actual debug info here
1064  DebugLoc dl = Op.getDebugLoc();
1065
1066  if (TM.getRelocationModel() == Reloc::Static) {
1067    if (!ST->usingLargeMem()) {
1068      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
1069    } else {
1070      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
1071      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
1072      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
1073    }
1074  } else {
1075    report_fatal_error("LowerGlobalAddress: Relocation model other than static"
1076                      "not supported.");
1077    /*NOTREACHED*/
1078  }
1079
1080  return SDValue();
1081}
1082
1083//! Custom lower double precision floating point constants
1084static SDValue
1085LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
1086  EVT VT = Op.getValueType();
1087  // FIXME there is no actual debug info here
1088  DebugLoc dl = Op.getDebugLoc();
1089
1090  if (VT == MVT::f64) {
1091    ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1092
1093    assert((FP != 0) &&
1094           "LowerConstantFP: Node is not ConstantFPSDNode");
1095
1096    uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1097    SDValue T = DAG.getConstant(dbits, MVT::i64);
1098    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1099    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1100                       DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Tvec));
1101  }
1102
1103  return SDValue();
1104}
1105
1106SDValue
1107SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1108                                        CallingConv::ID CallConv, bool isVarArg,
1109                                        const SmallVectorImpl<ISD::InputArg>
1110                                          &Ins,
1111                                        DebugLoc dl, SelectionDAG &DAG,
1112                                        SmallVectorImpl<SDValue> &InVals)
1113                                          const {
1114
1115  MachineFunction &MF = DAG.getMachineFunction();
1116  MachineFrameInfo *MFI = MF.getFrameInfo();
1117  MachineRegisterInfo &RegInfo = MF.getRegInfo();
1118  SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1119
1120  unsigned ArgOffset = SPUFrameLowering::minStackSize();
1121  unsigned ArgRegIdx = 0;
1122  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
1123
1124  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1125
1126  SmallVector<CCValAssign, 16> ArgLocs;
1127  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1128		 getTargetMachine(), ArgLocs, *DAG.getContext());
1129  // FIXME: allow for other calling conventions
1130  CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1131
1132  // Add DAG nodes to load the arguments or copy them out of registers.
1133  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1134    EVT ObjectVT = Ins[ArgNo].VT;
1135    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1136    SDValue ArgVal;
1137    CCValAssign &VA = ArgLocs[ArgNo];
1138
1139    if (VA.isRegLoc()) {
1140      const TargetRegisterClass *ArgRegClass;
1141
1142      switch (ObjectVT.getSimpleVT().SimpleTy) {
1143      default:
1144        report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1145                           Twine(ObjectVT.getEVTString()));
1146      case MVT::i8:
1147        ArgRegClass = &SPU::R8CRegClass;
1148        break;
1149      case MVT::i16:
1150        ArgRegClass = &SPU::R16CRegClass;
1151        break;
1152      case MVT::i32:
1153        ArgRegClass = &SPU::R32CRegClass;
1154        break;
1155      case MVT::i64:
1156        ArgRegClass = &SPU::R64CRegClass;
1157        break;
1158      case MVT::i128:
1159        ArgRegClass = &SPU::GPRCRegClass;
1160        break;
1161      case MVT::f32:
1162        ArgRegClass = &SPU::R32FPRegClass;
1163        break;
1164      case MVT::f64:
1165        ArgRegClass = &SPU::R64FPRegClass;
1166        break;
1167      case MVT::v2f64:
1168      case MVT::v4f32:
1169      case MVT::v2i64:
1170      case MVT::v4i32:
1171      case MVT::v8i16:
1172      case MVT::v16i8:
1173        ArgRegClass = &SPU::VECREGRegClass;
1174        break;
1175      }
1176
1177      unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1178      RegInfo.addLiveIn(VA.getLocReg(), VReg);
1179      ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1180      ++ArgRegIdx;
1181    } else {
1182      // We need to load the argument to a virtual register if we determined
1183      // above that we ran out of physical registers of the appropriate type
1184      // or we're forced to do vararg
1185      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1186      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1187      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(),
1188                           false, false, 0);
1189      ArgOffset += StackSlotSize;
1190    }
1191
1192    InVals.push_back(ArgVal);
1193    // Update the chain
1194    Chain = ArgVal.getOperand(0);
1195  }
1196
1197  // vararg handling:
1198  if (isVarArg) {
1199    // FIXME: we should be able to query the argument registers from
1200    //        tablegen generated code.
1201    static const unsigned ArgRegs[] = {
1202      SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
1203      SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1204      SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1205      SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1206      SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1207      SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1208      SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1209      SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1210      SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1211      SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1212      SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1213    };
1214    // size of ArgRegs array
1215    unsigned NumArgRegs = 77;
1216
1217    // We will spill (79-3)+1 registers to the stack
1218    SmallVector<SDValue, 79-3+1> MemOps;
1219
1220    // Create the frame slot
1221    for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1222      FuncInfo->setVarArgsFrameIndex(
1223        MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1224      SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1225      unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::VECREGRegClass);
1226      SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1227      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, MachinePointerInfo(),
1228                                   false, false, 0);
1229      Chain = Store.getOperand(0);
1230      MemOps.push_back(Store);
1231
1232      // Increment address by stack slot size for the next stored argument
1233      ArgOffset += StackSlotSize;
1234    }
1235    if (!MemOps.empty())
1236      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1237                          &MemOps[0], MemOps.size());
1238  }
1239
1240  return Chain;
1241}
1242
1243/// isLSAAddress - Return the immediate to use if the specified
1244/// value is representable as a LSA address.
1245static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1246  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1247  if (!C) return 0;
1248
1249  int Addr = C->getZExtValue();
1250  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1251      (Addr << 14 >> 14) != Addr)
1252    return 0;  // Top 14 bits have to be sext of immediate.
1253
1254  return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1255}
1256
1257SDValue
1258SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1259                             CallingConv::ID CallConv, bool isVarArg,
1260                             bool &isTailCall,
1261                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1262                             const SmallVectorImpl<SDValue> &OutVals,
1263                             const SmallVectorImpl<ISD::InputArg> &Ins,
1264                             DebugLoc dl, SelectionDAG &DAG,
1265                             SmallVectorImpl<SDValue> &InVals) const {
1266  // CellSPU target does not yet support tail call optimization.
1267  isTailCall = false;
1268
1269  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1270  unsigned NumOps     = Outs.size();
1271  unsigned StackSlotSize = SPUFrameLowering::stackSlotSize();
1272
1273  SmallVector<CCValAssign, 16> ArgLocs;
1274  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1275		 getTargetMachine(), ArgLocs, *DAG.getContext());
1276  // FIXME: allow for other calling conventions
1277  CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1278
1279  const unsigned NumArgRegs = ArgLocs.size();
1280
1281
1282  // Handy pointer type
1283  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1284
1285  // Set up a copy of the stack pointer for use loading and storing any
1286  // arguments that may not fit in the registers available for argument
1287  // passing.
1288  SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1289
1290  // Figure out which arguments are going to go in registers, and which in
1291  // memory.
1292  unsigned ArgOffset = SPUFrameLowering::minStackSize(); // Just below [LR]
1293  unsigned ArgRegIdx = 0;
1294
1295  // Keep track of registers passing arguments
1296  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1297  // And the arguments passed on the stack
1298  SmallVector<SDValue, 8> MemOpChains;
1299
1300  for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1301    SDValue Arg = OutVals[ArgRegIdx];
1302    CCValAssign &VA = ArgLocs[ArgRegIdx];
1303
1304    // PtrOff will be used to store the current argument to the stack if a
1305    // register cannot be found for it.
1306    SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1307    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1308
1309    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1310    default: llvm_unreachable("Unexpected ValueType for argument!");
1311    case MVT::i8:
1312    case MVT::i16:
1313    case MVT::i32:
1314    case MVT::i64:
1315    case MVT::i128:
1316    case MVT::f32:
1317    case MVT::f64:
1318    case MVT::v2i64:
1319    case MVT::v2f64:
1320    case MVT::v4f32:
1321    case MVT::v4i32:
1322    case MVT::v8i16:
1323    case MVT::v16i8:
1324      if (ArgRegIdx != NumArgRegs) {
1325        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1326      } else {
1327        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1328                                           MachinePointerInfo(),
1329                                           false, false, 0));
1330        ArgOffset += StackSlotSize;
1331      }
1332      break;
1333    }
1334  }
1335
1336  // Accumulate how many bytes are to be pushed on the stack, including the
1337  // linkage area, and parameter passing area.  According to the SPU ABI,
1338  // we minimally need space for [LR] and [SP].
1339  unsigned NumStackBytes = ArgOffset - SPUFrameLowering::minStackSize();
1340
1341  // Insert a call sequence start
1342  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1343                                                            true));
1344
1345  if (!MemOpChains.empty()) {
1346    // Adjust the stack pointer for the stack arguments.
1347    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1348                        &MemOpChains[0], MemOpChains.size());
1349  }
1350
1351  // Build a sequence of copy-to-reg nodes chained together with token chain
1352  // and flag operands which copy the outgoing args into the appropriate regs.
1353  SDValue InFlag;
1354  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1355    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1356                             RegsToPass[i].second, InFlag);
1357    InFlag = Chain.getValue(1);
1358  }
1359
1360  SmallVector<SDValue, 8> Ops;
1361  unsigned CallOpc = SPUISD::CALL;
1362
1363  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1364  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1365  // node so that legalize doesn't hack it.
1366  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1367    const GlobalValue *GV = G->getGlobal();
1368    EVT CalleeVT = Callee.getValueType();
1369    SDValue Zero = DAG.getConstant(0, PtrVT);
1370    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1371
1372    if (!ST->usingLargeMem()) {
1373      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1374      // style calls, otherwise, external symbols are BRASL calls. This assumes
1375      // that declared/defined symbols are in the same compilation unit and can
1376      // be reached through PC-relative jumps.
1377      //
1378      // NOTE:
1379      // This may be an unsafe assumption for JIT and really large compilation
1380      // units.
1381      if (GV->isDeclaration()) {
1382        Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1383      } else {
1384        Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1385      }
1386    } else {
1387      // "Large memory" mode: Turn all calls into indirect calls with a X-form
1388      // address pairs:
1389      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1390    }
1391  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1392    EVT CalleeVT = Callee.getValueType();
1393    SDValue Zero = DAG.getConstant(0, PtrVT);
1394    SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1395        Callee.getValueType());
1396
1397    if (!ST->usingLargeMem()) {
1398      Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1399    } else {
1400      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1401    }
1402  } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1403    // If this is an absolute destination address that appears to be a legal
1404    // local store address, use the munged value.
1405    Callee = SDValue(Dest, 0);
1406  }
1407
1408  Ops.push_back(Chain);
1409  Ops.push_back(Callee);
1410
1411  // Add argument registers to the end of the list so that they are known live
1412  // into the call.
1413  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1414    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1415                                  RegsToPass[i].second.getValueType()));
1416
1417  if (InFlag.getNode())
1418    Ops.push_back(InFlag);
1419  // Returns a chain and a flag for retval copy to use.
1420  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Glue),
1421                      &Ops[0], Ops.size());
1422  InFlag = Chain.getValue(1);
1423
1424  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1425                             DAG.getIntPtrConstant(0, true), InFlag);
1426  if (!Ins.empty())
1427    InFlag = Chain.getValue(1);
1428
1429  // If the function returns void, just return the chain.
1430  if (Ins.empty())
1431    return Chain;
1432
1433  // Now handle the return value(s)
1434  SmallVector<CCValAssign, 16> RVLocs;
1435  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1436		    getTargetMachine(), RVLocs, *DAG.getContext());
1437  CCRetInfo.AnalyzeCallResult(Ins, CCC_SPU);
1438
1439
1440  // If the call has results, copy the values out of the ret val registers.
1441  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1442    CCValAssign VA = RVLocs[i];
1443
1444    SDValue Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(),
1445                                     InFlag);
1446    Chain = Val.getValue(1);
1447    InFlag = Val.getValue(2);
1448    InVals.push_back(Val);
1449   }
1450
1451  return Chain;
1452}
1453
1454SDValue
1455SPUTargetLowering::LowerReturn(SDValue Chain,
1456                               CallingConv::ID CallConv, bool isVarArg,
1457                               const SmallVectorImpl<ISD::OutputArg> &Outs,
1458                               const SmallVectorImpl<SDValue> &OutVals,
1459                               DebugLoc dl, SelectionDAG &DAG) const {
1460
1461  SmallVector<CCValAssign, 16> RVLocs;
1462  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1463		 getTargetMachine(), RVLocs, *DAG.getContext());
1464  CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1465
1466  // If this is the first return lowered for this function, add the regs to the
1467  // liveout set for the function.
1468  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1469    for (unsigned i = 0; i != RVLocs.size(); ++i)
1470      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1471  }
1472
1473  SDValue Flag;
1474
1475  // Copy the result values into the output registers.
1476  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1477    CCValAssign &VA = RVLocs[i];
1478    assert(VA.isRegLoc() && "Can only return in registers!");
1479    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1480                             OutVals[i], Flag);
1481    Flag = Chain.getValue(1);
1482  }
1483
1484  if (Flag.getNode())
1485    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1486  else
1487    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1488}
1489
1490
1491//===----------------------------------------------------------------------===//
1492// Vector related lowering:
1493//===----------------------------------------------------------------------===//
1494
1495static ConstantSDNode *
1496getVecImm(SDNode *N) {
1497  SDValue OpVal(0, 0);
1498
1499  // Check to see if this buildvec has a single non-undef value in its elements.
1500  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1501    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1502    if (OpVal.getNode() == 0)
1503      OpVal = N->getOperand(i);
1504    else if (OpVal != N->getOperand(i))
1505      return 0;
1506  }
1507
1508  if (OpVal.getNode() != 0) {
1509    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1510      return CN;
1511    }
1512  }
1513
1514  return 0;
1515}
1516
1517/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1518/// and the value fits into an unsigned 18-bit constant, and if so, return the
1519/// constant
1520SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1521                              EVT ValueType) {
1522  if (ConstantSDNode *CN = getVecImm(N)) {
1523    uint64_t Value = CN->getZExtValue();
1524    if (ValueType == MVT::i64) {
1525      uint64_t UValue = CN->getZExtValue();
1526      uint32_t upper = uint32_t(UValue >> 32);
1527      uint32_t lower = uint32_t(UValue);
1528      if (upper != lower)
1529        return SDValue();
1530      Value = Value >> 32;
1531    }
1532    if (Value <= 0x3ffff)
1533      return DAG.getTargetConstant(Value, ValueType);
1534  }
1535
1536  return SDValue();
1537}
1538
1539/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1540/// and the value fits into a signed 16-bit constant, and if so, return the
1541/// constant
1542SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1543                              EVT ValueType) {
1544  if (ConstantSDNode *CN = getVecImm(N)) {
1545    int64_t Value = CN->getSExtValue();
1546    if (ValueType == MVT::i64) {
1547      uint64_t UValue = CN->getZExtValue();
1548      uint32_t upper = uint32_t(UValue >> 32);
1549      uint32_t lower = uint32_t(UValue);
1550      if (upper != lower)
1551        return SDValue();
1552      Value = Value >> 32;
1553    }
1554    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1555      return DAG.getTargetConstant(Value, ValueType);
1556    }
1557  }
1558
1559  return SDValue();
1560}
1561
1562/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1563/// and the value fits into a signed 10-bit constant, and if so, return the
1564/// constant
1565SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1566                              EVT ValueType) {
1567  if (ConstantSDNode *CN = getVecImm(N)) {
1568    int64_t Value = CN->getSExtValue();
1569    if (ValueType == MVT::i64) {
1570      uint64_t UValue = CN->getZExtValue();
1571      uint32_t upper = uint32_t(UValue >> 32);
1572      uint32_t lower = uint32_t(UValue);
1573      if (upper != lower)
1574        return SDValue();
1575      Value = Value >> 32;
1576    }
1577    if (isInt<10>(Value))
1578      return DAG.getTargetConstant(Value, ValueType);
1579  }
1580
1581  return SDValue();
1582}
1583
1584/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1585/// and the value fits into a signed 8-bit constant, and if so, return the
1586/// constant.
1587///
1588/// @note: The incoming vector is v16i8 because that's the only way we can load
1589/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1590/// same value.
1591SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1592                             EVT ValueType) {
1593  if (ConstantSDNode *CN = getVecImm(N)) {
1594    int Value = (int) CN->getZExtValue();
1595    if (ValueType == MVT::i16
1596        && Value <= 0xffff                 /* truncated from uint64_t */
1597        && ((short) Value >> 8) == ((short) Value & 0xff))
1598      return DAG.getTargetConstant(Value & 0xff, ValueType);
1599    else if (ValueType == MVT::i8
1600             && (Value & 0xff) == Value)
1601      return DAG.getTargetConstant(Value, ValueType);
1602  }
1603
1604  return SDValue();
1605}
1606
1607/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1608/// and the value fits into a signed 16-bit constant, and if so, return the
1609/// constant
1610SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1611                               EVT ValueType) {
1612  if (ConstantSDNode *CN = getVecImm(N)) {
1613    uint64_t Value = CN->getZExtValue();
1614    if ((ValueType == MVT::i32
1615          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1616        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1617      return DAG.getTargetConstant(Value >> 16, ValueType);
1618  }
1619
1620  return SDValue();
1621}
1622
1623/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1624SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1625  if (ConstantSDNode *CN = getVecImm(N)) {
1626    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1627  }
1628
1629  return SDValue();
1630}
1631
1632/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1633SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1634  if (ConstantSDNode *CN = getVecImm(N)) {
1635    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1636  }
1637
1638  return SDValue();
1639}
1640
1641//! Lower a BUILD_VECTOR instruction creatively:
1642static SDValue
1643LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1644  EVT VT = Op.getValueType();
1645  EVT EltVT = VT.getVectorElementType();
1646  DebugLoc dl = Op.getDebugLoc();
1647  BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1648  assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1649  unsigned minSplatBits = EltVT.getSizeInBits();
1650
1651  if (minSplatBits < 16)
1652    minSplatBits = 16;
1653
1654  APInt APSplatBits, APSplatUndef;
1655  unsigned SplatBitSize;
1656  bool HasAnyUndefs;
1657
1658  if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1659                            HasAnyUndefs, minSplatBits)
1660      || minSplatBits < SplatBitSize)
1661    return SDValue();   // Wasn't a constant vector or splat exceeded min
1662
1663  uint64_t SplatBits = APSplatBits.getZExtValue();
1664
1665  switch (VT.getSimpleVT().SimpleTy) {
1666  default:
1667    report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1668                       Twine(VT.getEVTString()));
1669    /*NOTREACHED*/
1670  case MVT::v4f32: {
1671    uint32_t Value32 = uint32_t(SplatBits);
1672    assert(SplatBitSize == 32
1673           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1674    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1675    SDValue T = DAG.getConstant(Value32, MVT::i32);
1676    return DAG.getNode(ISD::BITCAST, dl, MVT::v4f32,
1677                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1678    break;
1679  }
1680  case MVT::v2f64: {
1681    uint64_t f64val = uint64_t(SplatBits);
1682    assert(SplatBitSize == 64
1683           && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1684    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1685    SDValue T = DAG.getConstant(f64val, MVT::i64);
1686    return DAG.getNode(ISD::BITCAST, dl, MVT::v2f64,
1687                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1688    break;
1689  }
1690  case MVT::v16i8: {
1691   // 8-bit constants have to be expanded to 16-bits
1692   unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1693   SmallVector<SDValue, 8> Ops;
1694
1695   Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1696   return DAG.getNode(ISD::BITCAST, dl, VT,
1697                      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1698  }
1699  case MVT::v8i16: {
1700    unsigned short Value16 = SplatBits;
1701    SDValue T = DAG.getConstant(Value16, EltVT);
1702    SmallVector<SDValue, 8> Ops;
1703
1704    Ops.assign(8, T);
1705    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1706  }
1707  case MVT::v4i32: {
1708    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1709    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1710  }
1711  case MVT::v2i64: {
1712    return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1713  }
1714  }
1715
1716  return SDValue();
1717}
1718
1719/*!
1720 */
1721SDValue
1722SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1723                     DebugLoc dl) {
1724  uint32_t upper = uint32_t(SplatVal >> 32);
1725  uint32_t lower = uint32_t(SplatVal);
1726
1727  if (upper == lower) {
1728    // Magic constant that can be matched by IL, ILA, et. al.
1729    SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1730    return DAG.getNode(ISD::BITCAST, dl, OpVT,
1731                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1732                                   Val, Val, Val, Val));
1733  } else {
1734    bool upper_special, lower_special;
1735
1736    // NOTE: This code creates common-case shuffle masks that can be easily
1737    // detected as common expressions. It is not attempting to create highly
1738    // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1739
1740    // Detect if the upper or lower half is a special shuffle mask pattern:
1741    upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1742    lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1743
1744    // Both upper and lower are special, lower to a constant pool load:
1745    if (lower_special && upper_special) {
1746      SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1747      return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1748                         SplatValCN, SplatValCN);
1749    }
1750
1751    SDValue LO32;
1752    SDValue HI32;
1753    SmallVector<SDValue, 16> ShufBytes;
1754    SDValue Result;
1755
1756    // Create lower vector if not a special pattern
1757    if (!lower_special) {
1758      SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1759      LO32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1760                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1761                                     LO32C, LO32C, LO32C, LO32C));
1762    }
1763
1764    // Create upper vector if not a special pattern
1765    if (!upper_special) {
1766      SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1767      HI32 = DAG.getNode(ISD::BITCAST, dl, OpVT,
1768                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1769                                     HI32C, HI32C, HI32C, HI32C));
1770    }
1771
1772    // If either upper or lower are special, then the two input operands are
1773    // the same (basically, one of them is a "don't care")
1774    if (lower_special)
1775      LO32 = HI32;
1776    if (upper_special)
1777      HI32 = LO32;
1778
1779    for (int i = 0; i < 4; ++i) {
1780      uint64_t val = 0;
1781      for (int j = 0; j < 4; ++j) {
1782        SDValue V;
1783        bool process_upper, process_lower;
1784        val <<= 8;
1785        process_upper = (upper_special && (i & 1) == 0);
1786        process_lower = (lower_special && (i & 1) == 1);
1787
1788        if (process_upper || process_lower) {
1789          if ((process_upper && upper == 0)
1790                  || (process_lower && lower == 0))
1791            val |= 0x80;
1792          else if ((process_upper && upper == 0xffffffff)
1793                  || (process_lower && lower == 0xffffffff))
1794            val |= 0xc0;
1795          else if ((process_upper && upper == 0x80000000)
1796                  || (process_lower && lower == 0x80000000))
1797            val |= (j == 0 ? 0xe0 : 0x80);
1798        } else
1799          val |= i * 4 + j + ((i & 1) * 16);
1800      }
1801
1802      ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1803    }
1804
1805    return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1806                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1807                                   &ShufBytes[0], ShufBytes.size()));
1808  }
1809}
1810
1811/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1812/// which the Cell can operate. The code inspects V3 to ascertain whether the
1813/// permutation vector, V3, is monotonically increasing with one "exception"
1814/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1815/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1816/// In either case, the net result is going to eventually invoke SHUFB to
1817/// permute/shuffle the bytes from V1 and V2.
1818/// \note
1819/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1820/// control word for byte/halfword/word insertion. This takes care of a single
1821/// element move from V2 into V1.
1822/// \note
1823/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1824static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1825  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1826  SDValue V1 = Op.getOperand(0);
1827  SDValue V2 = Op.getOperand(1);
1828  DebugLoc dl = Op.getDebugLoc();
1829
1830  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1831
1832  // If we have a single element being moved from V1 to V2, this can be handled
1833  // using the C*[DX] compute mask instructions, but the vector elements have
1834  // to be monotonically increasing with one exception element, and the source
1835  // slot of the element to move must be the same as the destination.
1836  EVT VecVT = V1.getValueType();
1837  EVT EltVT = VecVT.getVectorElementType();
1838  unsigned EltsFromV2 = 0;
1839  unsigned V2EltOffset = 0;
1840  unsigned V2EltIdx0 = 0;
1841  unsigned CurrElt = 0;
1842  unsigned MaxElts = VecVT.getVectorNumElements();
1843  unsigned PrevElt = 0;
1844  bool monotonic = true;
1845  bool rotate = true;
1846  int rotamt=0;
1847  EVT maskVT;             // which of the c?d instructions to use
1848
1849  if (EltVT == MVT::i8) {
1850    V2EltIdx0 = 16;
1851    maskVT = MVT::v16i8;
1852  } else if (EltVT == MVT::i16) {
1853    V2EltIdx0 = 8;
1854    maskVT = MVT::v8i16;
1855  } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1856    V2EltIdx0 = 4;
1857    maskVT = MVT::v4i32;
1858  } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1859    V2EltIdx0 = 2;
1860    maskVT = MVT::v2i64;
1861  } else
1862    llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1863
1864  for (unsigned i = 0; i != MaxElts; ++i) {
1865    if (SVN->getMaskElt(i) < 0)
1866      continue;
1867
1868    unsigned SrcElt = SVN->getMaskElt(i);
1869
1870    if (monotonic) {
1871      if (SrcElt >= V2EltIdx0) {
1872        // TODO: optimize for the monotonic case when several consecutive
1873        // elements are taken form V2. Do we ever get such a case?
1874        if (EltsFromV2 == 0 && CurrElt == (SrcElt - V2EltIdx0))
1875          V2EltOffset = (SrcElt - V2EltIdx0) * (EltVT.getSizeInBits()/8);
1876        else
1877          monotonic = false;
1878        ++EltsFromV2;
1879      } else if (CurrElt != SrcElt) {
1880        monotonic = false;
1881      }
1882
1883      ++CurrElt;
1884    }
1885
1886    if (rotate) {
1887      if (PrevElt > 0 && SrcElt < MaxElts) {
1888        if ((PrevElt == SrcElt - 1)
1889            || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1890          PrevElt = SrcElt;
1891        } else {
1892          rotate = false;
1893        }
1894      } else if (i == 0 || (PrevElt==0 && SrcElt==1)) {
1895        // First time or after a "wrap around"
1896        rotamt = SrcElt-i;
1897        PrevElt = SrcElt;
1898      } else {
1899        // This isn't a rotation, takes elements from vector 2
1900        rotate = false;
1901      }
1902    }
1903  }
1904
1905  if (EltsFromV2 == 1 && monotonic) {
1906    // Compute mask and shuffle
1907    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1908
1909    // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1910    // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1911    SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1912                                DAG.getRegister(SPU::R1, PtrVT),
1913                                DAG.getConstant(V2EltOffset, MVT::i32));
1914    SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1915                                     maskVT, Pointer);
1916
1917    // Use shuffle mask in SHUFB synthetic instruction:
1918    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1919                       ShufMaskOp);
1920  } else if (rotate) {
1921    if (rotamt < 0)
1922      rotamt +=MaxElts;
1923    rotamt *= EltVT.getSizeInBits()/8;
1924    return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1925                       V1, DAG.getConstant(rotamt, MVT::i16));
1926  } else {
1927   // Convert the SHUFFLE_VECTOR mask's input element units to the
1928   // actual bytes.
1929    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1930
1931    SmallVector<SDValue, 16> ResultMask;
1932    for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1933      unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1934
1935      for (unsigned j = 0; j < BytesPerElement; ++j)
1936        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1937    }
1938    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1939                                    &ResultMask[0], ResultMask.size());
1940    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1941  }
1942}
1943
1944static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1945  SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1946  DebugLoc dl = Op.getDebugLoc();
1947
1948  if (Op0.getNode()->getOpcode() == ISD::Constant) {
1949    // For a constant, build the appropriate constant vector, which will
1950    // eventually simplify to a vector register load.
1951
1952    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1953    SmallVector<SDValue, 16> ConstVecValues;
1954    EVT VT;
1955    size_t n_copies;
1956
1957    // Create a constant vector:
1958    switch (Op.getValueType().getSimpleVT().SimpleTy) {
1959    default: llvm_unreachable("Unexpected constant value type in "
1960                              "LowerSCALAR_TO_VECTOR");
1961    case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1962    case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1963    case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1964    case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1965    case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1966    case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1967    }
1968
1969    SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1970    for (size_t j = 0; j < n_copies; ++j)
1971      ConstVecValues.push_back(CValue);
1972
1973    return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1974                       &ConstVecValues[0], ConstVecValues.size());
1975  } else {
1976    // Otherwise, copy the value from one register to another:
1977    switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1978    default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1979    case MVT::i8:
1980    case MVT::i16:
1981    case MVT::i32:
1982    case MVT::i64:
1983    case MVT::f32:
1984    case MVT::f64:
1985      return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1986    }
1987  }
1988
1989  return SDValue();
1990}
1991
1992static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1993  EVT VT = Op.getValueType();
1994  SDValue N = Op.getOperand(0);
1995  SDValue Elt = Op.getOperand(1);
1996  DebugLoc dl = Op.getDebugLoc();
1997  SDValue retval;
1998
1999  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2000    // Constant argument:
2001    int EltNo = (int) C->getZExtValue();
2002
2003    // sanity checks:
2004    if (VT == MVT::i8 && EltNo >= 16)
2005      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2006    else if (VT == MVT::i16 && EltNo >= 8)
2007      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2008    else if (VT == MVT::i32 && EltNo >= 4)
2009      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2010    else if (VT == MVT::i64 && EltNo >= 2)
2011      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2012
2013    if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2014      // i32 and i64: Element 0 is the preferred slot
2015      return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
2016    }
2017
2018    // Need to generate shuffle mask and extract:
2019    int prefslot_begin = -1, prefslot_end = -1;
2020    int elt_byte = EltNo * VT.getSizeInBits() / 8;
2021
2022    switch (VT.getSimpleVT().SimpleTy) {
2023    default:
2024      assert(false && "Invalid value type!");
2025    case MVT::i8: {
2026      prefslot_begin = prefslot_end = 3;
2027      break;
2028    }
2029    case MVT::i16: {
2030      prefslot_begin = 2; prefslot_end = 3;
2031      break;
2032    }
2033    case MVT::i32:
2034    case MVT::f32: {
2035      prefslot_begin = 0; prefslot_end = 3;
2036      break;
2037    }
2038    case MVT::i64:
2039    case MVT::f64: {
2040      prefslot_begin = 0; prefslot_end = 7;
2041      break;
2042    }
2043    }
2044
2045    assert(prefslot_begin != -1 && prefslot_end != -1 &&
2046           "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2047
2048    unsigned int ShufBytes[16] = {
2049      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
2050    };
2051    for (int i = 0; i < 16; ++i) {
2052      // zero fill uppper part of preferred slot, don't care about the
2053      // other slots:
2054      unsigned int mask_val;
2055      if (i <= prefslot_end) {
2056        mask_val =
2057          ((i < prefslot_begin)
2058           ? 0x80
2059           : elt_byte + (i - prefslot_begin));
2060
2061        ShufBytes[i] = mask_val;
2062      } else
2063        ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2064    }
2065
2066    SDValue ShufMask[4];
2067    for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2068      unsigned bidx = i * 4;
2069      unsigned int bits = ((ShufBytes[bidx] << 24) |
2070                           (ShufBytes[bidx+1] << 16) |
2071                           (ShufBytes[bidx+2] << 8) |
2072                           ShufBytes[bidx+3]);
2073      ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2074    }
2075
2076    SDValue ShufMaskVec =
2077      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2078                  &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2079
2080    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2081                         DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2082                                     N, N, ShufMaskVec));
2083  } else {
2084    // Variable index: Rotate the requested element into slot 0, then replicate
2085    // slot 0 across the vector
2086    EVT VecVT = N.getValueType();
2087    if (!VecVT.isSimple() || !VecVT.isVector()) {
2088      report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2089                        "vector type!");
2090    }
2091
2092    // Make life easier by making sure the index is zero-extended to i32
2093    if (Elt.getValueType() != MVT::i32)
2094      Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2095
2096    // Scale the index to a bit/byte shift quantity
2097    APInt scaleFactor =
2098            APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2099    unsigned scaleShift = scaleFactor.logBase2();
2100    SDValue vecShift;
2101
2102    if (scaleShift > 0) {
2103      // Scale the shift factor:
2104      Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2105                        DAG.getConstant(scaleShift, MVT::i32));
2106    }
2107
2108    vecShift = DAG.getNode(SPUISD::SHL_BYTES, dl, VecVT, N, Elt);
2109
2110    // Replicate the bytes starting at byte 0 across the entire vector (for
2111    // consistency with the notion of a unified register set)
2112    SDValue replicate;
2113
2114    switch (VT.getSimpleVT().SimpleTy) {
2115    default:
2116      report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2117                        "type");
2118      /*NOTREACHED*/
2119    case MVT::i8: {
2120      SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2121      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2122                              factor, factor, factor, factor);
2123      break;
2124    }
2125    case MVT::i16: {
2126      SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2127      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2128                              factor, factor, factor, factor);
2129      break;
2130    }
2131    case MVT::i32:
2132    case MVT::f32: {
2133      SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2134      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2135                              factor, factor, factor, factor);
2136      break;
2137    }
2138    case MVT::i64:
2139    case MVT::f64: {
2140      SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2141      SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2142      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2143                              loFactor, hiFactor, loFactor, hiFactor);
2144      break;
2145    }
2146    }
2147
2148    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2149                         DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2150                                     vecShift, vecShift, replicate));
2151  }
2152
2153  return retval;
2154}
2155
2156static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2157  SDValue VecOp = Op.getOperand(0);
2158  SDValue ValOp = Op.getOperand(1);
2159  SDValue IdxOp = Op.getOperand(2);
2160  DebugLoc dl = Op.getDebugLoc();
2161  EVT VT = Op.getValueType();
2162  EVT eltVT = ValOp.getValueType();
2163
2164  // use 0 when the lane to insert to is 'undef'
2165  int64_t Offset=0;
2166  if (IdxOp.getOpcode() != ISD::UNDEF) {
2167    ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2168    assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2169    Offset = (CN->getSExtValue()) * eltVT.getSizeInBits()/8;
2170  }
2171
2172  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2173  // Use $sp ($1) because it's always 16-byte aligned and it's available:
2174  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2175                                DAG.getRegister(SPU::R1, PtrVT),
2176                                DAG.getConstant(Offset, PtrVT));
2177  // widen the mask when dealing with half vectors
2178  EVT maskVT = EVT::getVectorVT(*(DAG.getContext()), VT.getVectorElementType(),
2179                                128/ VT.getVectorElementType().getSizeInBits());
2180  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, maskVT, Pointer);
2181
2182  SDValue result =
2183    DAG.getNode(SPUISD::SHUFB, dl, VT,
2184                DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2185                VecOp,
2186                DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ShufMask));
2187
2188  return result;
2189}
2190
2191static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2192                           const TargetLowering &TLI)
2193{
2194  SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2195  DebugLoc dl = Op.getDebugLoc();
2196  EVT ShiftVT = TLI.getShiftAmountTy(N0.getValueType());
2197
2198  assert(Op.getValueType() == MVT::i8);
2199  switch (Opc) {
2200  default:
2201    llvm_unreachable("Unhandled i8 math operator");
2202    /*NOTREACHED*/
2203    break;
2204  case ISD::ADD: {
2205    // 8-bit addition: Promote the arguments up to 16-bits and truncate
2206    // the result:
2207    SDValue N1 = Op.getOperand(1);
2208    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2209    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2210    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2211                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2212
2213  }
2214
2215  case ISD::SUB: {
2216    // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2217    // the result:
2218    SDValue N1 = Op.getOperand(1);
2219    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2220    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2221    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2222                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2223  }
2224  case ISD::ROTR:
2225  case ISD::ROTL: {
2226    SDValue N1 = Op.getOperand(1);
2227    EVT N1VT = N1.getValueType();
2228
2229    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2230    if (!N1VT.bitsEq(ShiftVT)) {
2231      unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2232                       ? ISD::ZERO_EXTEND
2233                       : ISD::TRUNCATE;
2234      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2235    }
2236
2237    // Replicate lower 8-bits into upper 8:
2238    SDValue ExpandArg =
2239      DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2240                  DAG.getNode(ISD::SHL, dl, MVT::i16,
2241                              N0, DAG.getConstant(8, MVT::i32)));
2242
2243    // Truncate back down to i8
2244    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2245                       DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2246  }
2247  case ISD::SRL:
2248  case ISD::SHL: {
2249    SDValue N1 = Op.getOperand(1);
2250    EVT N1VT = N1.getValueType();
2251
2252    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2253    if (!N1VT.bitsEq(ShiftVT)) {
2254      unsigned N1Opc = ISD::ZERO_EXTEND;
2255
2256      if (N1.getValueType().bitsGT(ShiftVT))
2257        N1Opc = ISD::TRUNCATE;
2258
2259      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2260    }
2261
2262    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2263                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2264  }
2265  case ISD::SRA: {
2266    SDValue N1 = Op.getOperand(1);
2267    EVT N1VT = N1.getValueType();
2268
2269    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2270    if (!N1VT.bitsEq(ShiftVT)) {
2271      unsigned N1Opc = ISD::SIGN_EXTEND;
2272
2273      if (N1VT.bitsGT(ShiftVT))
2274        N1Opc = ISD::TRUNCATE;
2275      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2276    }
2277
2278    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2279                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2280  }
2281  case ISD::MUL: {
2282    SDValue N1 = Op.getOperand(1);
2283
2284    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2285    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2286    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2287                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2288    break;
2289  }
2290  }
2291
2292  return SDValue();
2293}
2294
2295//! Lower byte immediate operations for v16i8 vectors:
2296static SDValue
2297LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2298  SDValue ConstVec;
2299  SDValue Arg;
2300  EVT VT = Op.getValueType();
2301  DebugLoc dl = Op.getDebugLoc();
2302
2303  ConstVec = Op.getOperand(0);
2304  Arg = Op.getOperand(1);
2305  if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2306    if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2307      ConstVec = ConstVec.getOperand(0);
2308    } else {
2309      ConstVec = Op.getOperand(1);
2310      Arg = Op.getOperand(0);
2311      if (ConstVec.getNode()->getOpcode() == ISD::BITCAST) {
2312        ConstVec = ConstVec.getOperand(0);
2313      }
2314    }
2315  }
2316
2317  if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2318    BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2319    assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2320
2321    APInt APSplatBits, APSplatUndef;
2322    unsigned SplatBitSize;
2323    bool HasAnyUndefs;
2324    unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2325
2326    if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2327                              HasAnyUndefs, minSplatBits)
2328        && minSplatBits <= SplatBitSize) {
2329      uint64_t SplatBits = APSplatBits.getZExtValue();
2330      SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2331
2332      SmallVector<SDValue, 16> tcVec;
2333      tcVec.assign(16, tc);
2334      return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2335                         DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2336    }
2337  }
2338
2339  // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2340  // lowered.  Return the operation, rather than a null SDValue.
2341  return Op;
2342}
2343
2344//! Custom lowering for CTPOP (count population)
2345/*!
2346  Custom lowering code that counts the number ones in the input
2347  operand. SPU has such an instruction, but it counts the number of
2348  ones per byte, which then have to be accumulated.
2349*/
2350static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2351  EVT VT = Op.getValueType();
2352  EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2353                               VT, (128 / VT.getSizeInBits()));
2354  DebugLoc dl = Op.getDebugLoc();
2355
2356  switch (VT.getSimpleVT().SimpleTy) {
2357  default:
2358    assert(false && "Invalid value type!");
2359  case MVT::i8: {
2360    SDValue N = Op.getOperand(0);
2361    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2362
2363    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2364    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2365
2366    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2367  }
2368
2369  case MVT::i16: {
2370    MachineFunction &MF = DAG.getMachineFunction();
2371    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2372
2373    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2374
2375    SDValue N = Op.getOperand(0);
2376    SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2377    SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2378    SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2379
2380    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2381    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2382
2383    // CNTB_result becomes the chain to which all of the virtual registers
2384    // CNTB_reg, SUM1_reg become associated:
2385    SDValue CNTB_result =
2386      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2387
2388    SDValue CNTB_rescopy =
2389      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2390
2391    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2392
2393    return DAG.getNode(ISD::AND, dl, MVT::i16,
2394                       DAG.getNode(ISD::ADD, dl, MVT::i16,
2395                                   DAG.getNode(ISD::SRL, dl, MVT::i16,
2396                                               Tmp1, Shift1),
2397                                   Tmp1),
2398                       Mask0);
2399  }
2400
2401  case MVT::i32: {
2402    MachineFunction &MF = DAG.getMachineFunction();
2403    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2404
2405    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2406    unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2407
2408    SDValue N = Op.getOperand(0);
2409    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2410    SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2411    SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2412    SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2413
2414    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2415    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2416
2417    // CNTB_result becomes the chain to which all of the virtual registers
2418    // CNTB_reg, SUM1_reg become associated:
2419    SDValue CNTB_result =
2420      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2421
2422    SDValue CNTB_rescopy =
2423      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2424
2425    SDValue Comp1 =
2426      DAG.getNode(ISD::SRL, dl, MVT::i32,
2427                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2428                  Shift1);
2429
2430    SDValue Sum1 =
2431      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2432                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2433
2434    SDValue Sum1_rescopy =
2435      DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2436
2437    SDValue Comp2 =
2438      DAG.getNode(ISD::SRL, dl, MVT::i32,
2439                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2440                  Shift2);
2441    SDValue Sum2 =
2442      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2443                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2444
2445    return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2446  }
2447
2448  case MVT::i64:
2449    break;
2450  }
2451
2452  return SDValue();
2453}
2454
2455//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2456/*!
2457 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2458 All conversions to i64 are expanded to a libcall.
2459 */
2460static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2461                              const SPUTargetLowering &TLI) {
2462  EVT OpVT = Op.getValueType();
2463  SDValue Op0 = Op.getOperand(0);
2464  EVT Op0VT = Op0.getValueType();
2465
2466  if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2467      || OpVT == MVT::i64) {
2468    // Convert f32 / f64 to i32 / i64 via libcall.
2469    RTLIB::Libcall LC =
2470            (Op.getOpcode() == ISD::FP_TO_SINT)
2471             ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2472             : RTLIB::getFPTOUINT(Op0VT, OpVT);
2473    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2474    SDValue Dummy;
2475    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2476  }
2477
2478  return Op;
2479}
2480
2481//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2482/*!
2483 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2484 All conversions from i64 are expanded to a libcall.
2485 */
2486static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2487                              const SPUTargetLowering &TLI) {
2488  EVT OpVT = Op.getValueType();
2489  SDValue Op0 = Op.getOperand(0);
2490  EVT Op0VT = Op0.getValueType();
2491
2492  if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2493      || Op0VT == MVT::i64) {
2494    // Convert i32, i64 to f64 via libcall:
2495    RTLIB::Libcall LC =
2496            (Op.getOpcode() == ISD::SINT_TO_FP)
2497             ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2498             : RTLIB::getUINTTOFP(Op0VT, OpVT);
2499    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2500    SDValue Dummy;
2501    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2502  }
2503
2504  return Op;
2505}
2506
2507//! Lower ISD::SETCC
2508/*!
2509 This handles MVT::f64 (double floating point) condition lowering
2510 */
2511static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2512                          const TargetLowering &TLI) {
2513  CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2514  DebugLoc dl = Op.getDebugLoc();
2515  assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2516
2517  SDValue lhs = Op.getOperand(0);
2518  SDValue rhs = Op.getOperand(1);
2519  EVT lhsVT = lhs.getValueType();
2520  assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2521
2522  EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2523  APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2524  EVT IntVT(MVT::i64);
2525
2526  // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2527  // selected to a NOP:
2528  SDValue i64lhs = DAG.getNode(ISD::BITCAST, dl, IntVT, lhs);
2529  SDValue lhsHi32 =
2530          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2531                      DAG.getNode(ISD::SRL, dl, IntVT,
2532                                  i64lhs, DAG.getConstant(32, MVT::i32)));
2533  SDValue lhsHi32abs =
2534          DAG.getNode(ISD::AND, dl, MVT::i32,
2535                      lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2536  SDValue lhsLo32 =
2537          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2538
2539  // SETO and SETUO only use the lhs operand:
2540  if (CC->get() == ISD::SETO) {
2541    // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2542    // SETUO
2543    APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2544    return DAG.getNode(ISD::XOR, dl, ccResultVT,
2545                       DAG.getSetCC(dl, ccResultVT,
2546                                    lhs, DAG.getConstantFP(0.0, lhsVT),
2547                                    ISD::SETUO),
2548                       DAG.getConstant(ccResultAllOnes, ccResultVT));
2549  } else if (CC->get() == ISD::SETUO) {
2550    // Evaluates to true if Op0 is [SQ]NaN
2551    return DAG.getNode(ISD::AND, dl, ccResultVT,
2552                       DAG.getSetCC(dl, ccResultVT,
2553                                    lhsHi32abs,
2554                                    DAG.getConstant(0x7ff00000, MVT::i32),
2555                                    ISD::SETGE),
2556                       DAG.getSetCC(dl, ccResultVT,
2557                                    lhsLo32,
2558                                    DAG.getConstant(0, MVT::i32),
2559                                    ISD::SETGT));
2560  }
2561
2562  SDValue i64rhs = DAG.getNode(ISD::BITCAST, dl, IntVT, rhs);
2563  SDValue rhsHi32 =
2564          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2565                      DAG.getNode(ISD::SRL, dl, IntVT,
2566                                  i64rhs, DAG.getConstant(32, MVT::i32)));
2567
2568  // If a value is negative, subtract from the sign magnitude constant:
2569  SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2570
2571  // Convert the sign-magnitude representation into 2's complement:
2572  SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2573                                      lhsHi32, DAG.getConstant(31, MVT::i32));
2574  SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2575  SDValue lhsSelect =
2576          DAG.getNode(ISD::SELECT, dl, IntVT,
2577                      lhsSelectMask, lhsSignMag2TC, i64lhs);
2578
2579  SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2580                                      rhsHi32, DAG.getConstant(31, MVT::i32));
2581  SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2582  SDValue rhsSelect =
2583          DAG.getNode(ISD::SELECT, dl, IntVT,
2584                      rhsSelectMask, rhsSignMag2TC, i64rhs);
2585
2586  unsigned compareOp;
2587
2588  switch (CC->get()) {
2589  case ISD::SETOEQ:
2590  case ISD::SETUEQ:
2591    compareOp = ISD::SETEQ; break;
2592  case ISD::SETOGT:
2593  case ISD::SETUGT:
2594    compareOp = ISD::SETGT; break;
2595  case ISD::SETOGE:
2596  case ISD::SETUGE:
2597    compareOp = ISD::SETGE; break;
2598  case ISD::SETOLT:
2599  case ISD::SETULT:
2600    compareOp = ISD::SETLT; break;
2601  case ISD::SETOLE:
2602  case ISD::SETULE:
2603    compareOp = ISD::SETLE; break;
2604  case ISD::SETUNE:
2605  case ISD::SETONE:
2606    compareOp = ISD::SETNE; break;
2607  default:
2608    report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2609  }
2610
2611  SDValue result =
2612          DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2613                       (ISD::CondCode) compareOp);
2614
2615  if ((CC->get() & 0x8) == 0) {
2616    // Ordered comparison:
2617    SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2618                                  lhs, DAG.getConstantFP(0.0, MVT::f64),
2619                                  ISD::SETO);
2620    SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2621                                  rhs, DAG.getConstantFP(0.0, MVT::f64),
2622                                  ISD::SETO);
2623    SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2624
2625    result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2626  }
2627
2628  return result;
2629}
2630
2631//! Lower ISD::SELECT_CC
2632/*!
2633  ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2634  SELB instruction.
2635
2636  \note Need to revisit this in the future: if the code path through the true
2637  and false value computations is longer than the latency of a branch (6
2638  cycles), then it would be more advantageous to branch and insert a new basic
2639  block and branch on the condition. However, this code does not make that
2640  assumption, given the simplisitc uses so far.
2641 */
2642
2643static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2644                              const TargetLowering &TLI) {
2645  EVT VT = Op.getValueType();
2646  SDValue lhs = Op.getOperand(0);
2647  SDValue rhs = Op.getOperand(1);
2648  SDValue trueval = Op.getOperand(2);
2649  SDValue falseval = Op.getOperand(3);
2650  SDValue condition = Op.getOperand(4);
2651  DebugLoc dl = Op.getDebugLoc();
2652
2653  // NOTE: SELB's arguments: $rA, $rB, $mask
2654  //
2655  // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2656  // where bits in $mask are 1. CCond will be inverted, having 1s where the
2657  // condition was true and 0s where the condition was false. Hence, the
2658  // arguments to SELB get reversed.
2659
2660  // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2661  // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2662  // with another "cannot select select_cc" assert:
2663
2664  SDValue compare = DAG.getNode(ISD::SETCC, dl,
2665                                TLI.getSetCCResultType(Op.getValueType()),
2666                                lhs, rhs, condition);
2667  return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2668}
2669
2670//! Custom lower ISD::TRUNCATE
2671static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2672{
2673  // Type to truncate to
2674  EVT VT = Op.getValueType();
2675  MVT simpleVT = VT.getSimpleVT();
2676  EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2677                               VT, (128 / VT.getSizeInBits()));
2678  DebugLoc dl = Op.getDebugLoc();
2679
2680  // Type to truncate from
2681  SDValue Op0 = Op.getOperand(0);
2682  EVT Op0VT = Op0.getValueType();
2683
2684  if (Op0VT == MVT::i128 && simpleVT == MVT::i64) {
2685    // Create shuffle mask, least significant doubleword of quadword
2686    unsigned maskHigh = 0x08090a0b;
2687    unsigned maskLow = 0x0c0d0e0f;
2688    // Use a shuffle to perform the truncation
2689    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2690                                   DAG.getConstant(maskHigh, MVT::i32),
2691                                   DAG.getConstant(maskLow, MVT::i32),
2692                                   DAG.getConstant(maskHigh, MVT::i32),
2693                                   DAG.getConstant(maskLow, MVT::i32));
2694
2695    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2696                                       Op0, Op0, shufMask);
2697
2698    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2699  }
2700
2701  return SDValue();             // Leave the truncate unmolested
2702}
2703
2704/*!
2705 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2706 * algorithm is to duplicate the sign bit using rotmai to generate at
2707 * least one byte full of sign bits. Then propagate the "sign-byte" into
2708 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2709 *
2710 * @param Op The sext operand
2711 * @param DAG The current DAG
2712 * @return The SDValue with the entire instruction sequence
2713 */
2714static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2715{
2716  DebugLoc dl = Op.getDebugLoc();
2717
2718  // Type to extend to
2719  MVT OpVT = Op.getValueType().getSimpleVT();
2720
2721  // Type to extend from
2722  SDValue Op0 = Op.getOperand(0);
2723  MVT Op0VT = Op0.getValueType().getSimpleVT();
2724
2725  // extend i8 & i16 via i32
2726  if (Op0VT == MVT::i8 || Op0VT == MVT::i16) {
2727    Op0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Op0);
2728    Op0VT = MVT::i32;
2729  }
2730
2731  // The type to extend to needs to be a i128 and
2732  // the type to extend from needs to be i64 or i32.
2733  assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2734          "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2735  (void)OpVT;
2736
2737  // Create shuffle mask
2738  unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2739  unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2740  unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2741  SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2742                                 DAG.getConstant(mask1, MVT::i32),
2743                                 DAG.getConstant(mask1, MVT::i32),
2744                                 DAG.getConstant(mask2, MVT::i32),
2745                                 DAG.getConstant(mask3, MVT::i32));
2746
2747  // Word wise arithmetic right shift to generate at least one byte
2748  // that contains sign bits.
2749  MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2750  SDValue sraVal = DAG.getNode(ISD::SRA,
2751                 dl,
2752                 mvt,
2753                 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2754                 DAG.getConstant(31, MVT::i32));
2755
2756  // reinterpret as a i128 (SHUFB requires it). This gets lowered away.
2757  SDValue extended = SDValue(DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
2758                                        dl, Op0VT, Op0,
2759                                        DAG.getTargetConstant(
2760                                                  SPU::GPRCRegClass.getID(),
2761                                                  MVT::i32)), 0);
2762  // Shuffle bytes - Copy the sign bits into the upper 64 bits
2763  // and the input value into the lower 64 bits.
2764  SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2765        extended, sraVal, shufMask);
2766  return DAG.getNode(ISD::BITCAST, dl, MVT::i128, extShuffle);
2767}
2768
2769//! Custom (target-specific) lowering entry point
2770/*!
2771  This is where LLVM's DAG selection process calls to do target-specific
2772  lowering of nodes.
2773 */
2774SDValue
2775SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2776{
2777  unsigned Opc = (unsigned) Op.getOpcode();
2778  EVT VT = Op.getValueType();
2779
2780  switch (Opc) {
2781  default: {
2782#ifndef NDEBUG
2783    errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2784    errs() << "Op.getOpcode() = " << Opc << "\n";
2785    errs() << "*Op.getNode():\n";
2786    Op.getNode()->dump();
2787#endif
2788    llvm_unreachable(0);
2789  }
2790  case ISD::LOAD:
2791  case ISD::EXTLOAD:
2792  case ISD::SEXTLOAD:
2793  case ISD::ZEXTLOAD:
2794    return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2795  case ISD::STORE:
2796    return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2797  case ISD::ConstantPool:
2798    return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2799  case ISD::GlobalAddress:
2800    return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2801  case ISD::JumpTable:
2802    return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2803  case ISD::ConstantFP:
2804    return LowerConstantFP(Op, DAG);
2805
2806  // i8, i64 math ops:
2807  case ISD::ADD:
2808  case ISD::SUB:
2809  case ISD::ROTR:
2810  case ISD::ROTL:
2811  case ISD::SRL:
2812  case ISD::SHL:
2813  case ISD::SRA: {
2814    if (VT == MVT::i8)
2815      return LowerI8Math(Op, DAG, Opc, *this);
2816    break;
2817  }
2818
2819  case ISD::FP_TO_SINT:
2820  case ISD::FP_TO_UINT:
2821    return LowerFP_TO_INT(Op, DAG, *this);
2822
2823  case ISD::SINT_TO_FP:
2824  case ISD::UINT_TO_FP:
2825    return LowerINT_TO_FP(Op, DAG, *this);
2826
2827  // Vector-related lowering.
2828  case ISD::BUILD_VECTOR:
2829    return LowerBUILD_VECTOR(Op, DAG);
2830  case ISD::SCALAR_TO_VECTOR:
2831    return LowerSCALAR_TO_VECTOR(Op, DAG);
2832  case ISD::VECTOR_SHUFFLE:
2833    return LowerVECTOR_SHUFFLE(Op, DAG);
2834  case ISD::EXTRACT_VECTOR_ELT:
2835    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2836  case ISD::INSERT_VECTOR_ELT:
2837    return LowerINSERT_VECTOR_ELT(Op, DAG);
2838
2839  // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2840  case ISD::AND:
2841  case ISD::OR:
2842  case ISD::XOR:
2843    return LowerByteImmed(Op, DAG);
2844
2845  // Vector and i8 multiply:
2846  case ISD::MUL:
2847    if (VT == MVT::i8)
2848      return LowerI8Math(Op, DAG, Opc, *this);
2849
2850  case ISD::CTPOP:
2851    return LowerCTPOP(Op, DAG);
2852
2853  case ISD::SELECT_CC:
2854    return LowerSELECT_CC(Op, DAG, *this);
2855
2856  case ISD::SETCC:
2857    return LowerSETCC(Op, DAG, *this);
2858
2859  case ISD::TRUNCATE:
2860    return LowerTRUNCATE(Op, DAG);
2861
2862  case ISD::SIGN_EXTEND:
2863    return LowerSIGN_EXTEND(Op, DAG);
2864  }
2865
2866  return SDValue();
2867}
2868
2869void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2870                                           SmallVectorImpl<SDValue>&Results,
2871                                           SelectionDAG &DAG) const
2872{
2873#if 0
2874  unsigned Opc = (unsigned) N->getOpcode();
2875  EVT OpVT = N->getValueType(0);
2876
2877  switch (Opc) {
2878  default: {
2879    errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2880    errs() << "Op.getOpcode() = " << Opc << "\n";
2881    errs() << "*Op.getNode():\n";
2882    N->dump();
2883    abort();
2884    /*NOTREACHED*/
2885  }
2886  }
2887#endif
2888
2889  /* Otherwise, return unchanged */
2890}
2891
2892//===----------------------------------------------------------------------===//
2893// Target Optimization Hooks
2894//===----------------------------------------------------------------------===//
2895
2896SDValue
2897SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2898{
2899#if 0
2900  TargetMachine &TM = getTargetMachine();
2901#endif
2902  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2903  SelectionDAG &DAG = DCI.DAG;
2904  SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2905  EVT NodeVT = N->getValueType(0);      // The node's value type
2906  EVT Op0VT = Op0.getValueType();       // The first operand's result
2907  SDValue Result;                       // Initially, empty result
2908  DebugLoc dl = N->getDebugLoc();
2909
2910  switch (N->getOpcode()) {
2911  default: break;
2912  case ISD::ADD: {
2913    SDValue Op1 = N->getOperand(1);
2914
2915    if (Op0.getOpcode() == SPUISD::IndirectAddr
2916        || Op1.getOpcode() == SPUISD::IndirectAddr) {
2917      // Normalize the operands to reduce repeated code
2918      SDValue IndirectArg = Op0, AddArg = Op1;
2919
2920      if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2921        IndirectArg = Op1;
2922        AddArg = Op0;
2923      }
2924
2925      if (isa<ConstantSDNode>(AddArg)) {
2926        ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2927        SDValue IndOp1 = IndirectArg.getOperand(1);
2928
2929        if (CN0->isNullValue()) {
2930          // (add (SPUindirect <arg>, <arg>), 0) ->
2931          // (SPUindirect <arg>, <arg>)
2932
2933#if !defined(NDEBUG)
2934          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2935            errs() << "\n"
2936                 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2937                 << "With:    (SPUindirect <arg>, <arg>)\n";
2938          }
2939#endif
2940
2941          return IndirectArg;
2942        } else if (isa<ConstantSDNode>(IndOp1)) {
2943          // (add (SPUindirect <arg>, <const>), <const>) ->
2944          // (SPUindirect <arg>, <const + const>)
2945          ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2946          int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2947          SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2948
2949#if !defined(NDEBUG)
2950          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2951            errs() << "\n"
2952                 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2953                 << "), " << CN0->getSExtValue() << ")\n"
2954                 << "With:    (SPUindirect <arg>, "
2955                 << combinedConst << ")\n";
2956          }
2957#endif
2958
2959          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2960                             IndirectArg, combinedValue);
2961        }
2962      }
2963    }
2964    break;
2965  }
2966  case ISD::SIGN_EXTEND:
2967  case ISD::ZERO_EXTEND:
2968  case ISD::ANY_EXTEND: {
2969    if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2970      // (any_extend (SPUextract_elt0 <arg>)) ->
2971      // (SPUextract_elt0 <arg>)
2972      // Types must match, however...
2973#if !defined(NDEBUG)
2974      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2975        errs() << "\nReplace: ";
2976        N->dump(&DAG);
2977        errs() << "\nWith:    ";
2978        Op0.getNode()->dump(&DAG);
2979        errs() << "\n";
2980      }
2981#endif
2982
2983      return Op0;
2984    }
2985    break;
2986  }
2987  case SPUISD::IndirectAddr: {
2988    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2989      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2990      if (CN != 0 && CN->isNullValue()) {
2991        // (SPUindirect (SPUaform <addr>, 0), 0) ->
2992        // (SPUaform <addr>, 0)
2993
2994        DEBUG(errs() << "Replace: ");
2995        DEBUG(N->dump(&DAG));
2996        DEBUG(errs() << "\nWith:    ");
2997        DEBUG(Op0.getNode()->dump(&DAG));
2998        DEBUG(errs() << "\n");
2999
3000        return Op0;
3001      }
3002    } else if (Op0.getOpcode() == ISD::ADD) {
3003      SDValue Op1 = N->getOperand(1);
3004      if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
3005        // (SPUindirect (add <arg>, <arg>), 0) ->
3006        // (SPUindirect <arg>, <arg>)
3007        if (CN1->isNullValue()) {
3008
3009#if !defined(NDEBUG)
3010          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3011            errs() << "\n"
3012                 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
3013                 << "With:    (SPUindirect <arg>, <arg>)\n";
3014          }
3015#endif
3016
3017          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
3018                             Op0.getOperand(0), Op0.getOperand(1));
3019        }
3020      }
3021    }
3022    break;
3023  }
3024  case SPUISD::SHL_BITS:
3025  case SPUISD::SHL_BYTES:
3026  case SPUISD::ROTBYTES_LEFT: {
3027    SDValue Op1 = N->getOperand(1);
3028
3029    // Kill degenerate vector shifts:
3030    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
3031      if (CN->isNullValue()) {
3032        Result = Op0;
3033      }
3034    }
3035    break;
3036  }
3037  case SPUISD::PREFSLOT2VEC: {
3038    switch (Op0.getOpcode()) {
3039    default:
3040      break;
3041    case ISD::ANY_EXTEND:
3042    case ISD::ZERO_EXTEND:
3043    case ISD::SIGN_EXTEND: {
3044      // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3045      // <arg>
3046      // but only if the SPUprefslot2vec and <arg> types match.
3047      SDValue Op00 = Op0.getOperand(0);
3048      if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3049        SDValue Op000 = Op00.getOperand(0);
3050        if (Op000.getValueType() == NodeVT) {
3051          Result = Op000;
3052        }
3053      }
3054      break;
3055    }
3056    case SPUISD::VEC2PREFSLOT: {
3057      // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
3058      // <arg>
3059      Result = Op0.getOperand(0);
3060      break;
3061    }
3062    }
3063    break;
3064  }
3065  }
3066
3067  // Otherwise, return unchanged.
3068#ifndef NDEBUG
3069  if (Result.getNode()) {
3070    DEBUG(errs() << "\nReplace.SPU: ");
3071    DEBUG(N->dump(&DAG));
3072    DEBUG(errs() << "\nWith:        ");
3073    DEBUG(Result.getNode()->dump(&DAG));
3074    DEBUG(errs() << "\n");
3075  }
3076#endif
3077
3078  return Result;
3079}
3080
3081//===----------------------------------------------------------------------===//
3082// Inline Assembly Support
3083//===----------------------------------------------------------------------===//
3084
3085/// getConstraintType - Given a constraint letter, return the type of
3086/// constraint it is for this target.
3087SPUTargetLowering::ConstraintType
3088SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3089  if (ConstraintLetter.size() == 1) {
3090    switch (ConstraintLetter[0]) {
3091    default: break;
3092    case 'b':
3093    case 'r':
3094    case 'f':
3095    case 'v':
3096    case 'y':
3097      return C_RegisterClass;
3098    }
3099  }
3100  return TargetLowering::getConstraintType(ConstraintLetter);
3101}
3102
3103/// Examine constraint type and operand type and determine a weight value.
3104/// This object must already have been set up with the operand type
3105/// and the current alternative constraint selected.
3106TargetLowering::ConstraintWeight
3107SPUTargetLowering::getSingleConstraintMatchWeight(
3108    AsmOperandInfo &info, const char *constraint) const {
3109  ConstraintWeight weight = CW_Invalid;
3110  Value *CallOperandVal = info.CallOperandVal;
3111    // If we don't have a value, we can't do a match,
3112    // but allow it at the lowest weight.
3113  if (CallOperandVal == NULL)
3114    return CW_Default;
3115  // Look at the constraint type.
3116  switch (*constraint) {
3117  default:
3118    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
3119    break;
3120    //FIXME: Seems like the supported constraint letters were just copied
3121    // from PPC, as the following doesn't correspond to the GCC docs.
3122    // I'm leaving it so until someone adds the corresponding lowering support.
3123  case 'b':
3124  case 'r':
3125  case 'f':
3126  case 'd':
3127  case 'v':
3128  case 'y':
3129    weight = CW_Register;
3130    break;
3131  }
3132  return weight;
3133}
3134
3135std::pair<unsigned, const TargetRegisterClass*>
3136SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3137                                                EVT VT) const
3138{
3139  if (Constraint.size() == 1) {
3140    // GCC RS6000 Constraint Letters
3141    switch (Constraint[0]) {
3142    case 'b':   // R1-R31
3143    case 'r':   // R0-R31
3144      if (VT == MVT::i64)
3145        return std::make_pair(0U, SPU::R64CRegisterClass);
3146      return std::make_pair(0U, SPU::R32CRegisterClass);
3147    case 'f':
3148      if (VT == MVT::f32)
3149        return std::make_pair(0U, SPU::R32FPRegisterClass);
3150      else if (VT == MVT::f64)
3151        return std::make_pair(0U, SPU::R64FPRegisterClass);
3152      break;
3153    case 'v':
3154      return std::make_pair(0U, SPU::GPRCRegisterClass);
3155    }
3156  }
3157
3158  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3159}
3160
3161//! Compute used/known bits for a SPU operand
3162void
3163SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3164                                                  const APInt &Mask,
3165                                                  APInt &KnownZero,
3166                                                  APInt &KnownOne,
3167                                                  const SelectionDAG &DAG,
3168                                                  unsigned Depth ) const {
3169#if 0
3170  const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3171
3172  switch (Op.getOpcode()) {
3173  default:
3174    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3175    break;
3176  case CALL:
3177  case SHUFB:
3178  case SHUFFLE_MASK:
3179  case CNTB:
3180  case SPUISD::PREFSLOT2VEC:
3181  case SPUISD::LDRESULT:
3182  case SPUISD::VEC2PREFSLOT:
3183  case SPUISD::SHLQUAD_L_BITS:
3184  case SPUISD::SHLQUAD_L_BYTES:
3185  case SPUISD::VEC_ROTL:
3186  case SPUISD::VEC_ROTR:
3187  case SPUISD::ROTBYTES_LEFT:
3188  case SPUISD::SELECT_MASK:
3189  case SPUISD::SELB:
3190  }
3191#endif
3192}
3193
3194unsigned
3195SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3196                                                   unsigned Depth) const {
3197  switch (Op.getOpcode()) {
3198  default:
3199    return 1;
3200
3201  case ISD::SETCC: {
3202    EVT VT = Op.getValueType();
3203
3204    if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3205      VT = MVT::i32;
3206    }
3207    return VT.getSizeInBits();
3208  }
3209  }
3210}
3211
3212// LowerAsmOperandForConstraint
3213void
3214SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3215                                                std::string &Constraint,
3216                                                std::vector<SDValue> &Ops,
3217                                                SelectionDAG &DAG) const {
3218  // Default, for the time being, to the base class handler
3219  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
3220}
3221
3222/// isLegalAddressImmediate - Return true if the integer value can be used
3223/// as the offset of the target addressing mode.
3224bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3225                                                Type *Ty) const {
3226  // SPU's addresses are 256K:
3227  return (V > -(1 << 18) && V < (1 << 18) - 1);
3228}
3229
3230bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3231  return false;
3232}
3233
3234bool
3235SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3236  // The SPU target isn't yet aware of offsets.
3237  return false;
3238}
3239
3240// can we compare to Imm without writing it into a register?
3241bool SPUTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
3242  //ceqi, cgti, etc. all take s10 operand
3243  return isInt<10>(Imm);
3244}
3245
3246bool
3247SPUTargetLowering::isLegalAddressingMode(const AddrMode &AM,
3248                                         Type * ) const{
3249
3250  // A-form: 18bit absolute address.
3251  if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && AM.BaseOffs == 0)
3252    return true;
3253
3254  // D-form: reg + 14bit offset
3255  if (AM.BaseGV ==0 && AM.HasBaseReg && AM.Scale == 0 && isInt<14>(AM.BaseOffs))
3256    return true;
3257
3258  // X-form: reg+reg
3259  if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 1 && AM.BaseOffs ==0)
3260    return true;
3261
3262  return false;
3263}
3264