SPUISelLowering.cpp revision 5427d71be8e907f1ef06f97b55b2431e703ef266
1//
2//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SPUTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SPURegisterNames.h"
15#include "SPUISelLowering.h"
16#include "SPUTargetMachine.h"
17#include "SPUFrameInfo.h"
18#include "llvm/ADT/APInt.h"
19#include "llvm/ADT/VectorExtras.h"
20#include "llvm/CallingConv.h"
21#include "llvm/CodeGen/CallingConvLower.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineInstrBuilder.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/SelectionDAG.h"
27#include "llvm/Constants.h"
28#include "llvm/Function.h"
29#include "llvm/Intrinsics.h"
30#include "llvm/Support/Debug.h"
31#include "llvm/Support/ErrorHandling.h"
32#include "llvm/Support/MathExtras.h"
33#include "llvm/Support/raw_ostream.h"
34#include "llvm/Target/TargetOptions.h"
35
36#include <map>
37
38using namespace llvm;
39
40// Used in getTargetNodeName() below
41namespace {
42  std::map<unsigned, const char *> node_names;
43
44  //! MVT mapping to useful data for Cell SPU
45  struct valtype_map_s {
46    const MVT   valtype;
47    const int   prefslot_byte;
48  };
49
50  const valtype_map_s valtype_map[] = {
51    { MVT::i1,   3 },
52    { MVT::i8,   3 },
53    { MVT::i16,  2 },
54    { MVT::i32,  0 },
55    { MVT::f32,  0 },
56    { MVT::i64,  0 },
57    { MVT::f64,  0 },
58    { MVT::i128, 0 }
59  };
60
61  const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
62
63  const valtype_map_s *getValueTypeMapEntry(MVT VT) {
64    const valtype_map_s *retval = 0;
65
66    for (size_t i = 0; i < n_valtype_map; ++i) {
67      if (valtype_map[i].valtype == VT) {
68        retval = valtype_map + i;
69        break;
70      }
71    }
72
73#ifndef NDEBUG
74    if (retval == 0) {
75      std::string msg;
76      raw_string_ostream Msg(msg);
77      Msg << "getValueTypeMapEntry returns NULL for "
78           << VT.getMVTString();
79      llvm_report_error(Msg.str());
80    }
81#endif
82
83    return retval;
84  }
85
86  //! Expand a library call into an actual call DAG node
87  /*!
88   \note
89   This code is taken from SelectionDAGLegalize, since it is not exposed as
90   part of the LLVM SelectionDAG API.
91   */
92
93  SDValue
94  ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
95                bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
96    // The input chain to this libcall is the entry node of the function.
97    // Legalizing the call will automatically add the previous call to the
98    // dependence.
99    SDValue InChain = DAG.getEntryNode();
100
101    TargetLowering::ArgListTy Args;
102    TargetLowering::ArgListEntry Entry;
103    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
104      MVT ArgVT = Op.getOperand(i).getValueType();
105      const Type *ArgTy = ArgVT.getTypeForMVT(*DAG.getContext());
106      Entry.Node = Op.getOperand(i);
107      Entry.Ty = ArgTy;
108      Entry.isSExt = isSigned;
109      Entry.isZExt = !isSigned;
110      Args.push_back(Entry);
111    }
112    SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
113                                           TLI.getPointerTy());
114
115    // Splice the libcall in wherever FindInputOutputChains tells us to.
116    const Type *RetTy =
117                 Op.getNode()->getValueType(0).getTypeForMVT(*DAG.getContext());
118    std::pair<SDValue, SDValue> CallInfo =
119            TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
120                            0, CallingConv::C, false, Callee, Args, DAG,
121                            Op.getDebugLoc());
122
123    return CallInfo.first;
124  }
125}
126
127SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
128  : TargetLowering(TM),
129    SPUTM(TM)
130{
131  // Fold away setcc operations if possible.
132  setPow2DivIsCheap();
133
134  // Use _setjmp/_longjmp instead of setjmp/longjmp.
135  setUseUnderscoreSetJmp(true);
136  setUseUnderscoreLongJmp(true);
137
138  // Set RTLIB libcall names as used by SPU:
139  setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
140
141  // Set up the SPU's register classes:
142  addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
143  addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
144  addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
145  addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
146  addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
147  addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
148  addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
149
150  // SPU has no sign or zero extended loads for i1, i8, i16:
151  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
152  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
153  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
154
155  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
156  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
157
158  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
159  setTruncStoreAction(MVT::i128, MVT::i32, Expand);
160  setTruncStoreAction(MVT::i128, MVT::i16, Expand);
161  setTruncStoreAction(MVT::i128, MVT::i8, Expand);
162
163  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
164
165  // SPU constant load actions are custom lowered:
166  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
167  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
168
169  // SPU's loads and stores have to be custom lowered:
170  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
171       ++sctype) {
172    MVT VT = (MVT::SimpleValueType)sctype;
173
174    setOperationAction(ISD::LOAD,   VT, Custom);
175    setOperationAction(ISD::STORE,  VT, Custom);
176    setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
177    setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
178    setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
179
180    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
181      MVT StoreVT = (MVT::SimpleValueType) stype;
182      setTruncStoreAction(VT, StoreVT, Expand);
183    }
184  }
185
186  for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
187       ++sctype) {
188    MVT VT = (MVT::SimpleValueType) sctype;
189
190    setOperationAction(ISD::LOAD,   VT, Custom);
191    setOperationAction(ISD::STORE,  VT, Custom);
192
193    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
194      MVT StoreVT = (MVT::SimpleValueType) stype;
195      setTruncStoreAction(VT, StoreVT, Expand);
196    }
197  }
198
199  // Expand the jumptable branches
200  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
201  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
202
203  // Custom lower SELECT_CC for most cases, but expand by default
204  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
205  setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
206  setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
207  setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
208  setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
209
210  // SPU has no intrinsics for these particular operations:
211  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
212
213  // SPU has no division/remainder instructions
214  setOperationAction(ISD::SREM,    MVT::i8,   Expand);
215  setOperationAction(ISD::UREM,    MVT::i8,   Expand);
216  setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
217  setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
218  setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
219  setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
220  setOperationAction(ISD::SREM,    MVT::i16,  Expand);
221  setOperationAction(ISD::UREM,    MVT::i16,  Expand);
222  setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
223  setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
224  setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
225  setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
226  setOperationAction(ISD::SREM,    MVT::i32,  Expand);
227  setOperationAction(ISD::UREM,    MVT::i32,  Expand);
228  setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
229  setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
230  setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
231  setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
232  setOperationAction(ISD::SREM,    MVT::i64,  Expand);
233  setOperationAction(ISD::UREM,    MVT::i64,  Expand);
234  setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
235  setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
236  setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
237  setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
238  setOperationAction(ISD::SREM,    MVT::i128, Expand);
239  setOperationAction(ISD::UREM,    MVT::i128, Expand);
240  setOperationAction(ISD::SDIV,    MVT::i128, Expand);
241  setOperationAction(ISD::UDIV,    MVT::i128, Expand);
242  setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
243  setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
244
245  // We don't support sin/cos/sqrt/fmod
246  setOperationAction(ISD::FSIN , MVT::f64, Expand);
247  setOperationAction(ISD::FCOS , MVT::f64, Expand);
248  setOperationAction(ISD::FREM , MVT::f64, Expand);
249  setOperationAction(ISD::FSIN , MVT::f32, Expand);
250  setOperationAction(ISD::FCOS , MVT::f32, Expand);
251  setOperationAction(ISD::FREM , MVT::f32, Expand);
252
253  // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
254  // for f32!)
255  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
256  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
257
258  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
259  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
260
261  // SPU can do rotate right and left, so legalize it... but customize for i8
262  // because instructions don't exist.
263
264  // FIXME: Change from "expand" to appropriate type once ROTR is supported in
265  //        .td files.
266  setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
267  setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
268  setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
269
270  setOperationAction(ISD::ROTL, MVT::i32,    Legal);
271  setOperationAction(ISD::ROTL, MVT::i16,    Legal);
272  setOperationAction(ISD::ROTL, MVT::i8,     Custom);
273
274  // SPU has no native version of shift left/right for i8
275  setOperationAction(ISD::SHL,  MVT::i8,     Custom);
276  setOperationAction(ISD::SRL,  MVT::i8,     Custom);
277  setOperationAction(ISD::SRA,  MVT::i8,     Custom);
278
279  // Make these operations legal and handle them during instruction selection:
280  setOperationAction(ISD::SHL,  MVT::i64,    Legal);
281  setOperationAction(ISD::SRL,  MVT::i64,    Legal);
282  setOperationAction(ISD::SRA,  MVT::i64,    Legal);
283
284  // Custom lower i8, i32 and i64 multiplications
285  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
286  setOperationAction(ISD::MUL,  MVT::i32,    Legal);
287  setOperationAction(ISD::MUL,  MVT::i64,    Legal);
288
289  // Expand double-width multiplication
290  // FIXME: It would probably be reasonable to support some of these operations
291  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
292  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
293  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
294  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
295  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
296  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
297  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
298  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
299  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
300  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
301  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
302  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
303  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
304  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
305  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
306  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
307
308  // Need to custom handle (some) common i8, i64 math ops
309  setOperationAction(ISD::ADD,  MVT::i8,     Custom);
310  setOperationAction(ISD::ADD,  MVT::i64,    Legal);
311  setOperationAction(ISD::SUB,  MVT::i8,     Custom);
312  setOperationAction(ISD::SUB,  MVT::i64,    Legal);
313
314  // SPU does not have BSWAP. It does have i32 support CTLZ.
315  // CTPOP has to be custom lowered.
316  setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
317  setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
318
319  setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
320  setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
321  setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
322  setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
323  setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
324
325  setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
326  setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
327  setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
328  setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
329  setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
330
331  setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
332  setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
333  setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
334  setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
335  setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
336
337  // SPU has a version of select that implements (a&~c)|(b&c), just like
338  // select ought to work:
339  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
340  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
341  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
342  setOperationAction(ISD::SELECT, MVT::i64,  Legal);
343
344  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
345  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
346  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
347  setOperationAction(ISD::SETCC, MVT::i64,   Legal);
348  setOperationAction(ISD::SETCC, MVT::f64,   Custom);
349
350  // Custom lower i128 -> i64 truncates
351  setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
352
353  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
354  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
355  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
356  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
357  // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
358  // to expand to a libcall, hence the custom lowering:
359  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
360  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
361  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
362  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
363  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
364  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
365
366  // FDIV on SPU requires custom lowering
367  setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
368
369  // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
370  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
371  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
372  setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
373  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
374  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
375  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
376  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
377  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
378
379  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
380  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
381  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
382  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
383
384  // We cannot sextinreg(i1).  Expand to shifts.
385  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
386
387  // Support label based line numbers.
388  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
389  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
390
391  // We want to legalize GlobalAddress and ConstantPool nodes into the
392  // appropriate instructions to materialize the address.
393  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
394       ++sctype) {
395    MVT VT = (MVT::SimpleValueType)sctype;
396
397    setOperationAction(ISD::GlobalAddress,  VT, Custom);
398    setOperationAction(ISD::ConstantPool,   VT, Custom);
399    setOperationAction(ISD::JumpTable,      VT, Custom);
400  }
401
402  // RET must be custom lowered, to meet ABI requirements
403  setOperationAction(ISD::RET,           MVT::Other, Custom);
404
405  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
406  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
407
408  // Use the default implementation.
409  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
410  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
411  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
412  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
413  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
414  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
415  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
416
417  // Cell SPU has instructions for converting between i64 and fp.
418  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
419  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
420
421  // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
422  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
423
424  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
425  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
426
427  // First set operation action for all vector types to expand. Then we
428  // will selectively turn on ones that can be effectively codegen'd.
429  addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
430  addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
431  addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
432  addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
433  addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
434  addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
435
436  // "Odd size" vector classes that we're willing to support:
437  addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
438
439  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
440       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
441    MVT VT = (MVT::SimpleValueType)i;
442
443    // add/sub are legal for all supported vector VT's.
444    setOperationAction(ISD::ADD,     VT, Legal);
445    setOperationAction(ISD::SUB,     VT, Legal);
446    // mul has to be custom lowered.
447    setOperationAction(ISD::MUL,     VT, Legal);
448
449    setOperationAction(ISD::AND,     VT, Legal);
450    setOperationAction(ISD::OR,      VT, Legal);
451    setOperationAction(ISD::XOR,     VT, Legal);
452    setOperationAction(ISD::LOAD,    VT, Legal);
453    setOperationAction(ISD::SELECT,  VT, Legal);
454    setOperationAction(ISD::STORE,   VT, Legal);
455
456    // These operations need to be expanded:
457    setOperationAction(ISD::SDIV,    VT, Expand);
458    setOperationAction(ISD::SREM,    VT, Expand);
459    setOperationAction(ISD::UDIV,    VT, Expand);
460    setOperationAction(ISD::UREM,    VT, Expand);
461
462    // Custom lower build_vector, constant pool spills, insert and
463    // extract vector elements:
464    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
465    setOperationAction(ISD::ConstantPool, VT, Custom);
466    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
467    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
468    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
469    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
470  }
471
472  setOperationAction(ISD::AND, MVT::v16i8, Custom);
473  setOperationAction(ISD::OR,  MVT::v16i8, Custom);
474  setOperationAction(ISD::XOR, MVT::v16i8, Custom);
475  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
476
477  setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
478
479  setShiftAmountType(MVT::i32);
480  setBooleanContents(ZeroOrNegativeOneBooleanContent);
481
482  setStackPointerRegisterToSaveRestore(SPU::R1);
483
484  // We have target-specific dag combine patterns for the following nodes:
485  setTargetDAGCombine(ISD::ADD);
486  setTargetDAGCombine(ISD::ZERO_EXTEND);
487  setTargetDAGCombine(ISD::SIGN_EXTEND);
488  setTargetDAGCombine(ISD::ANY_EXTEND);
489
490  computeRegisterProperties();
491
492  // Set pre-RA register scheduler default to BURR, which produces slightly
493  // better code than the default (could also be TDRR, but TargetLowering.h
494  // needs a mod to support that model):
495  setSchedulingPreference(SchedulingForRegPressure);
496}
497
498const char *
499SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
500{
501  if (node_names.empty()) {
502    node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
503    node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
504    node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
505    node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
506    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
507    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
508    node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
509    node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
510    node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
511    node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
512    node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
513    node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
514    node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
515    node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
516    node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
517    node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
518    node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
519    node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
520    node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
521    node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
522    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
523    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
524            "SPUISD::ROTBYTES_LEFT_BITS";
525    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
526    node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
527    node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
528    node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
529    node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
530  }
531
532  std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
533
534  return ((i != node_names.end()) ? i->second : 0);
535}
536
537/// getFunctionAlignment - Return the Log2 alignment of this function.
538unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
539  return 3;
540}
541
542//===----------------------------------------------------------------------===//
543// Return the Cell SPU's SETCC result type
544//===----------------------------------------------------------------------===//
545
546MVT SPUTargetLowering::getSetCCResultType(MVT VT) const {
547  // i16 and i32 are valid SETCC result types
548  return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
549}
550
551//===----------------------------------------------------------------------===//
552// Calling convention code:
553//===----------------------------------------------------------------------===//
554
555#include "SPUGenCallingConv.inc"
556
557//===----------------------------------------------------------------------===//
558//  LowerOperation implementation
559//===----------------------------------------------------------------------===//
560
561/// Custom lower loads for CellSPU
562/*!
563 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
564 within a 16-byte block, we have to rotate to extract the requested element.
565
566 For extending loads, we also want to ensure that the following sequence is
567 emitted, e.g. for MVT::f32 extending load to MVT::f64:
568
569\verbatim
570%1  v16i8,ch = load
571%2  v16i8,ch = rotate %1
572%3  v4f8, ch = bitconvert %2
573%4  f32      = vec2perfslot %3
574%5  f64      = fp_extend %4
575\endverbatim
576*/
577static SDValue
578LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
579  LoadSDNode *LN = cast<LoadSDNode>(Op);
580  SDValue the_chain = LN->getChain();
581  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
582  MVT InVT = LN->getMemoryVT();
583  MVT OutVT = Op.getValueType();
584  ISD::LoadExtType ExtType = LN->getExtensionType();
585  unsigned alignment = LN->getAlignment();
586  const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
587  DebugLoc dl = Op.getDebugLoc();
588
589  switch (LN->getAddressingMode()) {
590  case ISD::UNINDEXED: {
591    SDValue result;
592    SDValue basePtr = LN->getBasePtr();
593    SDValue rotate;
594
595    if (alignment == 16) {
596      ConstantSDNode *CN;
597
598      // Special cases for a known aligned load to simplify the base pointer
599      // and the rotation amount:
600      if (basePtr.getOpcode() == ISD::ADD
601          && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
602        // Known offset into basePtr
603        int64_t offset = CN->getSExtValue();
604        int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
605
606        if (rotamt < 0)
607          rotamt += 16;
608
609        rotate = DAG.getConstant(rotamt, MVT::i16);
610
611        // Simplify the base pointer for this case:
612        basePtr = basePtr.getOperand(0);
613        if ((offset & ~0xf) > 0) {
614          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
615                                basePtr,
616                                DAG.getConstant((offset & ~0xf), PtrVT));
617        }
618      } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
619                 || (basePtr.getOpcode() == SPUISD::IndirectAddr
620                     && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
621                     && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
622        // Plain aligned a-form address: rotate into preferred slot
623        // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
624        int64_t rotamt = -vtm->prefslot_byte;
625        if (rotamt < 0)
626          rotamt += 16;
627        rotate = DAG.getConstant(rotamt, MVT::i16);
628      } else {
629        // Offset the rotate amount by the basePtr and the preferred slot
630        // byte offset
631        int64_t rotamt = -vtm->prefslot_byte;
632        if (rotamt < 0)
633          rotamt += 16;
634        rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
635                             basePtr,
636                             DAG.getConstant(rotamt, PtrVT));
637      }
638    } else {
639      // Unaligned load: must be more pessimistic about addressing modes:
640      if (basePtr.getOpcode() == ISD::ADD) {
641        MachineFunction &MF = DAG.getMachineFunction();
642        MachineRegisterInfo &RegInfo = MF.getRegInfo();
643        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
644        SDValue Flag;
645
646        SDValue Op0 = basePtr.getOperand(0);
647        SDValue Op1 = basePtr.getOperand(1);
648
649        if (isa<ConstantSDNode>(Op1)) {
650          // Convert the (add <ptr>, <const>) to an indirect address contained
651          // in a register. Note that this is done because we need to avoid
652          // creating a 0(reg) d-form address due to the SPU's block loads.
653          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
654          the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
655          basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
656        } else {
657          // Convert the (add <arg1>, <arg2>) to an indirect address, which
658          // will likely be lowered as a reg(reg) x-form address.
659          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
660        }
661      } else {
662        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
663                              basePtr,
664                              DAG.getConstant(0, PtrVT));
665      }
666
667      // Offset the rotate amount by the basePtr and the preferred slot
668      // byte offset
669      rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
670                           basePtr,
671                           DAG.getConstant(-vtm->prefslot_byte, PtrVT));
672    }
673
674    // Re-emit as a v16i8 vector load
675    result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
676                         LN->getSrcValue(), LN->getSrcValueOffset(),
677                         LN->isVolatile(), 16);
678
679    // Update the chain
680    the_chain = result.getValue(1);
681
682    // Rotate into the preferred slot:
683    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
684                         result.getValue(0), rotate);
685
686    // Convert the loaded v16i8 vector to the appropriate vector type
687    // specified by the operand:
688    MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
689    result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
690                         DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
691
692    // Handle extending loads by extending the scalar result:
693    if (ExtType == ISD::SEXTLOAD) {
694      result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
695    } else if (ExtType == ISD::ZEXTLOAD) {
696      result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
697    } else if (ExtType == ISD::EXTLOAD) {
698      unsigned NewOpc = ISD::ANY_EXTEND;
699
700      if (OutVT.isFloatingPoint())
701        NewOpc = ISD::FP_EXTEND;
702
703      result = DAG.getNode(NewOpc, dl, OutVT, result);
704    }
705
706    SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
707    SDValue retops[2] = {
708      result,
709      the_chain
710    };
711
712    result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
713                         retops, sizeof(retops) / sizeof(retops[0]));
714    return result;
715  }
716  case ISD::PRE_INC:
717  case ISD::PRE_DEC:
718  case ISD::POST_INC:
719  case ISD::POST_DEC:
720  case ISD::LAST_INDEXED_MODE:
721    {
722      std::string msg;
723      raw_string_ostream Msg(msg);
724      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
725            "UNINDEXED\n";
726      Msg << (unsigned) LN->getAddressingMode();
727      llvm_report_error(Msg.str());
728      /*NOTREACHED*/
729    }
730  }
731
732  return SDValue();
733}
734
735/// Custom lower stores for CellSPU
736/*!
737 All CellSPU stores are aligned to 16-byte boundaries, so for elements
738 within a 16-byte block, we have to generate a shuffle to insert the
739 requested element into its place, then store the resulting block.
740 */
741static SDValue
742LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
743  StoreSDNode *SN = cast<StoreSDNode>(Op);
744  SDValue Value = SN->getValue();
745  MVT VT = Value.getValueType();
746  MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
747  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
748  DebugLoc dl = Op.getDebugLoc();
749  unsigned alignment = SN->getAlignment();
750
751  switch (SN->getAddressingMode()) {
752  case ISD::UNINDEXED: {
753    // The vector type we really want to load from the 16-byte chunk.
754    MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
755        stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
756
757    SDValue alignLoadVec;
758    SDValue basePtr = SN->getBasePtr();
759    SDValue the_chain = SN->getChain();
760    SDValue insertEltOffs;
761
762    if (alignment == 16) {
763      ConstantSDNode *CN;
764
765      // Special cases for a known aligned load to simplify the base pointer
766      // and insertion byte:
767      if (basePtr.getOpcode() == ISD::ADD
768          && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
769        // Known offset into basePtr
770        int64_t offset = CN->getSExtValue();
771
772        // Simplify the base pointer for this case:
773        basePtr = basePtr.getOperand(0);
774        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
775                                    basePtr,
776                                    DAG.getConstant((offset & 0xf), PtrVT));
777
778        if ((offset & ~0xf) > 0) {
779          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
780                                basePtr,
781                                DAG.getConstant((offset & ~0xf), PtrVT));
782        }
783      } else {
784        // Otherwise, assume it's at byte 0 of basePtr
785        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
786                                    basePtr,
787                                    DAG.getConstant(0, PtrVT));
788      }
789    } else {
790      // Unaligned load: must be more pessimistic about addressing modes:
791      if (basePtr.getOpcode() == ISD::ADD) {
792        MachineFunction &MF = DAG.getMachineFunction();
793        MachineRegisterInfo &RegInfo = MF.getRegInfo();
794        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
795        SDValue Flag;
796
797        SDValue Op0 = basePtr.getOperand(0);
798        SDValue Op1 = basePtr.getOperand(1);
799
800        if (isa<ConstantSDNode>(Op1)) {
801          // Convert the (add <ptr>, <const>) to an indirect address contained
802          // in a register. Note that this is done because we need to avoid
803          // creating a 0(reg) d-form address due to the SPU's block loads.
804          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
805          the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
806          basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
807        } else {
808          // Convert the (add <arg1>, <arg2>) to an indirect address, which
809          // will likely be lowered as a reg(reg) x-form address.
810          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
811        }
812      } else {
813        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
814                              basePtr,
815                              DAG.getConstant(0, PtrVT));
816      }
817
818      // Insertion point is solely determined by basePtr's contents
819      insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
820                                  basePtr,
821                                  DAG.getConstant(0, PtrVT));
822    }
823
824    // Re-emit as a v16i8 vector load
825    alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
826                               SN->getSrcValue(), SN->getSrcValueOffset(),
827                               SN->isVolatile(), 16);
828
829    // Update the chain
830    the_chain = alignLoadVec.getValue(1);
831
832    LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
833    SDValue theValue = SN->getValue();
834    SDValue result;
835
836    if (StVT != VT
837        && (theValue.getOpcode() == ISD::AssertZext
838            || theValue.getOpcode() == ISD::AssertSext)) {
839      // Drill down and get the value for zero- and sign-extended
840      // quantities
841      theValue = theValue.getOperand(0);
842    }
843
844    // If the base pointer is already a D-form address, then just create
845    // a new D-form address with a slot offset and the orignal base pointer.
846    // Otherwise generate a D-form address with the slot offset relative
847    // to the stack pointer, which is always aligned.
848#if !defined(NDEBUG)
849      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
850        cerr << "CellSPU LowerSTORE: basePtr = ";
851        basePtr.getNode()->dump(&DAG);
852        cerr << "\n";
853      }
854#endif
855
856    SDValue insertEltOp =
857            DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
858    SDValue vectorizeOp =
859            DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
860
861    result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
862                         vectorizeOp, alignLoadVec,
863                         DAG.getNode(ISD::BIT_CONVERT, dl,
864                                     MVT::v4i32, insertEltOp));
865
866    result = DAG.getStore(the_chain, dl, result, basePtr,
867                          LN->getSrcValue(), LN->getSrcValueOffset(),
868                          LN->isVolatile(), LN->getAlignment());
869
870#if 0 && !defined(NDEBUG)
871    if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
872      const SDValue &currentRoot = DAG.getRoot();
873
874      DAG.setRoot(result);
875      cerr << "------- CellSPU:LowerStore result:\n";
876      DAG.dump();
877      cerr << "-------\n";
878      DAG.setRoot(currentRoot);
879    }
880#endif
881
882    return result;
883    /*UNREACHED*/
884  }
885  case ISD::PRE_INC:
886  case ISD::PRE_DEC:
887  case ISD::POST_INC:
888  case ISD::POST_DEC:
889  case ISD::LAST_INDEXED_MODE:
890    {
891      std::string msg;
892      raw_string_ostream Msg(msg);
893      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
894            "UNINDEXED\n";
895      Msg << (unsigned) SN->getAddressingMode();
896      llvm_report_error(Msg.str());
897      /*NOTREACHED*/
898    }
899  }
900
901  return SDValue();
902}
903
904//! Generate the address of a constant pool entry.
905SDValue
906LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
907  MVT PtrVT = Op.getValueType();
908  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
909  Constant *C = CP->getConstVal();
910  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
911  SDValue Zero = DAG.getConstant(0, PtrVT);
912  const TargetMachine &TM = DAG.getTarget();
913  // FIXME there is no actual debug info here
914  DebugLoc dl = Op.getDebugLoc();
915
916  if (TM.getRelocationModel() == Reloc::Static) {
917    if (!ST->usingLargeMem()) {
918      // Just return the SDValue with the constant pool address in it.
919      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
920    } else {
921      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
922      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
923      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
924    }
925  }
926
927  llvm_unreachable("LowerConstantPool: Relocation model other than static"
928                   " not supported.");
929  return SDValue();
930}
931
932//! Alternate entry point for generating the address of a constant pool entry
933SDValue
934SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
935  return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
936}
937
938static SDValue
939LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
940  MVT PtrVT = Op.getValueType();
941  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
942  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
943  SDValue Zero = DAG.getConstant(0, PtrVT);
944  const TargetMachine &TM = DAG.getTarget();
945  // FIXME there is no actual debug info here
946  DebugLoc dl = Op.getDebugLoc();
947
948  if (TM.getRelocationModel() == Reloc::Static) {
949    if (!ST->usingLargeMem()) {
950      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
951    } else {
952      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
953      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
954      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
955    }
956  }
957
958  llvm_unreachable("LowerJumpTable: Relocation model other than static"
959                   " not supported.");
960  return SDValue();
961}
962
963static SDValue
964LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
965  MVT PtrVT = Op.getValueType();
966  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
967  GlobalValue *GV = GSDN->getGlobal();
968  SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
969  const TargetMachine &TM = DAG.getTarget();
970  SDValue Zero = DAG.getConstant(0, PtrVT);
971  // FIXME there is no actual debug info here
972  DebugLoc dl = Op.getDebugLoc();
973
974  if (TM.getRelocationModel() == Reloc::Static) {
975    if (!ST->usingLargeMem()) {
976      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
977    } else {
978      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
979      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
980      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
981    }
982  } else {
983    llvm_report_error("LowerGlobalAddress: Relocation model other than static"
984                      "not supported.");
985    /*NOTREACHED*/
986  }
987
988  return SDValue();
989}
990
991//! Custom lower double precision floating point constants
992static SDValue
993LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
994  MVT VT = Op.getValueType();
995  // FIXME there is no actual debug info here
996  DebugLoc dl = Op.getDebugLoc();
997
998  if (VT == MVT::f64) {
999    ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1000
1001    assert((FP != 0) &&
1002           "LowerConstantFP: Node is not ConstantFPSDNode");
1003
1004    uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1005    SDValue T = DAG.getConstant(dbits, MVT::i64);
1006    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1007    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1008                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1009  }
1010
1011  return SDValue();
1012}
1013
1014static SDValue
1015LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
1016{
1017  MachineFunction &MF = DAG.getMachineFunction();
1018  MachineFrameInfo *MFI = MF.getFrameInfo();
1019  MachineRegisterInfo &RegInfo = MF.getRegInfo();
1020  SmallVector<SDValue, 48> ArgValues;
1021  SDValue Root = Op.getOperand(0);
1022  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
1023  DebugLoc dl = Op.getDebugLoc();
1024
1025  const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1026  const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1027
1028  unsigned ArgOffset = SPUFrameInfo::minStackSize();
1029  unsigned ArgRegIdx = 0;
1030  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1031
1032  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1033
1034  // Add DAG nodes to load the arguments or copy them out of registers.
1035  for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
1036       ArgNo != e; ++ArgNo) {
1037    MVT ObjectVT = Op.getValue(ArgNo).getValueType();
1038    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1039    SDValue ArgVal;
1040
1041    if (ArgRegIdx < NumArgRegs) {
1042      const TargetRegisterClass *ArgRegClass;
1043
1044      switch (ObjectVT.getSimpleVT()) {
1045      default: {
1046        std::string msg;
1047        raw_string_ostream Msg(msg);
1048        Msg << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
1049             << ObjectVT.getMVTString();
1050        llvm_report_error(Msg.str());
1051      }
1052      case MVT::i8:
1053        ArgRegClass = &SPU::R8CRegClass;
1054        break;
1055      case MVT::i16:
1056        ArgRegClass = &SPU::R16CRegClass;
1057        break;
1058      case MVT::i32:
1059        ArgRegClass = &SPU::R32CRegClass;
1060        break;
1061      case MVT::i64:
1062        ArgRegClass = &SPU::R64CRegClass;
1063        break;
1064      case MVT::i128:
1065        ArgRegClass = &SPU::GPRCRegClass;
1066        break;
1067      case MVT::f32:
1068        ArgRegClass = &SPU::R32FPRegClass;
1069        break;
1070      case MVT::f64:
1071        ArgRegClass = &SPU::R64FPRegClass;
1072        break;
1073      case MVT::v2f64:
1074      case MVT::v4f32:
1075      case MVT::v2i64:
1076      case MVT::v4i32:
1077      case MVT::v8i16:
1078      case MVT::v16i8:
1079        ArgRegClass = &SPU::VECREGRegClass;
1080        break;
1081      }
1082
1083      unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1084      RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1085      ArgVal = DAG.getCopyFromReg(Root, dl, VReg, ObjectVT);
1086      ++ArgRegIdx;
1087    } else {
1088      // We need to load the argument to a virtual register if we determined
1089      // above that we ran out of physical registers of the appropriate type
1090      // or we're forced to do vararg
1091      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1092      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1093      ArgVal = DAG.getLoad(ObjectVT, dl, Root, FIN, NULL, 0);
1094      ArgOffset += StackSlotSize;
1095    }
1096
1097    ArgValues.push_back(ArgVal);
1098    // Update the chain
1099    Root = ArgVal.getOperand(0);
1100  }
1101
1102  // vararg handling:
1103  if (isVarArg) {
1104    // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1105    // We will spill (79-3)+1 registers to the stack
1106    SmallVector<SDValue, 79-3+1> MemOps;
1107
1108    // Create the frame slot
1109
1110    for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1111      VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1112      SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1113      SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1114      SDValue Store = DAG.getStore(Root, dl, ArgVal, FIN, NULL, 0);
1115      Root = Store.getOperand(0);
1116      MemOps.push_back(Store);
1117
1118      // Increment address by stack slot size for the next stored argument
1119      ArgOffset += StackSlotSize;
1120    }
1121    if (!MemOps.empty())
1122      Root = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1123                         &MemOps[0], MemOps.size());
1124  }
1125
1126  ArgValues.push_back(Root);
1127
1128  // Return the new list of results.
1129  return DAG.getNode(ISD::MERGE_VALUES, dl, Op.getNode()->getVTList(),
1130                     &ArgValues[0], ArgValues.size());
1131}
1132
1133/// isLSAAddress - Return the immediate to use if the specified
1134/// value is representable as a LSA address.
1135static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1136  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1137  if (!C) return 0;
1138
1139  int Addr = C->getZExtValue();
1140  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1141      (Addr << 14 >> 14) != Addr)
1142    return 0;  // Top 14 bits have to be sext of immediate.
1143
1144  return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1145}
1146
1147static SDValue
1148LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1149  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1150  SDValue Chain = TheCall->getChain();
1151  SDValue Callee    = TheCall->getCallee();
1152  unsigned NumOps     = TheCall->getNumArgs();
1153  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1154  const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1155  const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1156  DebugLoc dl = TheCall->getDebugLoc();
1157
1158  // Handy pointer type
1159  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1160
1161  // Accumulate how many bytes are to be pushed on the stack, including the
1162  // linkage area, and parameter passing area.  According to the SPU ABI,
1163  // we minimally need space for [LR] and [SP]
1164  unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1165
1166  // Set up a copy of the stack pointer for use loading and storing any
1167  // arguments that may not fit in the registers available for argument
1168  // passing.
1169  SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1170
1171  // Figure out which arguments are going to go in registers, and which in
1172  // memory.
1173  unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1174  unsigned ArgRegIdx = 0;
1175
1176  // Keep track of registers passing arguments
1177  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1178  // And the arguments passed on the stack
1179  SmallVector<SDValue, 8> MemOpChains;
1180
1181  for (unsigned i = 0; i != NumOps; ++i) {
1182    SDValue Arg = TheCall->getArg(i);
1183
1184    // PtrOff will be used to store the current argument to the stack if a
1185    // register cannot be found for it.
1186    SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1187    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1188
1189    switch (Arg.getValueType().getSimpleVT()) {
1190    default: llvm_unreachable("Unexpected ValueType for argument!");
1191    case MVT::i8:
1192    case MVT::i16:
1193    case MVT::i32:
1194    case MVT::i64:
1195    case MVT::i128:
1196      if (ArgRegIdx != NumArgRegs) {
1197        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1198      } else {
1199        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1200        ArgOffset += StackSlotSize;
1201      }
1202      break;
1203    case MVT::f32:
1204    case MVT::f64:
1205      if (ArgRegIdx != NumArgRegs) {
1206        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1207      } else {
1208        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1209        ArgOffset += StackSlotSize;
1210      }
1211      break;
1212    case MVT::v2i64:
1213    case MVT::v2f64:
1214    case MVT::v4f32:
1215    case MVT::v4i32:
1216    case MVT::v8i16:
1217    case MVT::v16i8:
1218      if (ArgRegIdx != NumArgRegs) {
1219        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1220      } else {
1221        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1222        ArgOffset += StackSlotSize;
1223      }
1224      break;
1225    }
1226  }
1227
1228  // Update number of stack bytes actually used, insert a call sequence start
1229  NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1230  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1231                                                            true));
1232
1233  if (!MemOpChains.empty()) {
1234    // Adjust the stack pointer for the stack arguments.
1235    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1236                        &MemOpChains[0], MemOpChains.size());
1237  }
1238
1239  // Build a sequence of copy-to-reg nodes chained together with token chain
1240  // and flag operands which copy the outgoing args into the appropriate regs.
1241  SDValue InFlag;
1242  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1243    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1244                             RegsToPass[i].second, InFlag);
1245    InFlag = Chain.getValue(1);
1246  }
1247
1248  SmallVector<SDValue, 8> Ops;
1249  unsigned CallOpc = SPUISD::CALL;
1250
1251  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1252  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1253  // node so that legalize doesn't hack it.
1254  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1255    GlobalValue *GV = G->getGlobal();
1256    MVT CalleeVT = Callee.getValueType();
1257    SDValue Zero = DAG.getConstant(0, PtrVT);
1258    SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1259
1260    if (!ST->usingLargeMem()) {
1261      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1262      // style calls, otherwise, external symbols are BRASL calls. This assumes
1263      // that declared/defined symbols are in the same compilation unit and can
1264      // be reached through PC-relative jumps.
1265      //
1266      // NOTE:
1267      // This may be an unsafe assumption for JIT and really large compilation
1268      // units.
1269      if (GV->isDeclaration()) {
1270        Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1271      } else {
1272        Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1273      }
1274    } else {
1275      // "Large memory" mode: Turn all calls into indirect calls with a X-form
1276      // address pairs:
1277      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1278    }
1279  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1280    MVT CalleeVT = Callee.getValueType();
1281    SDValue Zero = DAG.getConstant(0, PtrVT);
1282    SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1283        Callee.getValueType());
1284
1285    if (!ST->usingLargeMem()) {
1286      Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1287    } else {
1288      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1289    }
1290  } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1291    // If this is an absolute destination address that appears to be a legal
1292    // local store address, use the munged value.
1293    Callee = SDValue(Dest, 0);
1294  }
1295
1296  Ops.push_back(Chain);
1297  Ops.push_back(Callee);
1298
1299  // Add argument registers to the end of the list so that they are known live
1300  // into the call.
1301  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1302    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1303                                  RegsToPass[i].second.getValueType()));
1304
1305  if (InFlag.getNode())
1306    Ops.push_back(InFlag);
1307  // Returns a chain and a flag for retval copy to use.
1308  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1309                      &Ops[0], Ops.size());
1310  InFlag = Chain.getValue(1);
1311
1312  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1313                             DAG.getIntPtrConstant(0, true), InFlag);
1314  if (TheCall->getValueType(0) != MVT::Other)
1315    InFlag = Chain.getValue(1);
1316
1317  SDValue ResultVals[3];
1318  unsigned NumResults = 0;
1319
1320  // If the call has results, copy the values out of the ret val registers.
1321  switch (TheCall->getValueType(0).getSimpleVT()) {
1322  default: llvm_unreachable("Unexpected ret value!");
1323  case MVT::Other: break;
1324  case MVT::i32:
1325    if (TheCall->getValueType(1) == MVT::i32) {
1326      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1327                                 MVT::i32, InFlag).getValue(1);
1328      ResultVals[0] = Chain.getValue(0);
1329      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1330                                 Chain.getValue(2)).getValue(1);
1331      ResultVals[1] = Chain.getValue(0);
1332      NumResults = 2;
1333    } else {
1334      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1335                                 InFlag).getValue(1);
1336      ResultVals[0] = Chain.getValue(0);
1337      NumResults = 1;
1338    }
1339    break;
1340  case MVT::i64:
1341    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1342                               InFlag).getValue(1);
1343    ResultVals[0] = Chain.getValue(0);
1344    NumResults = 1;
1345    break;
1346  case MVT::i128:
1347    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1348                               InFlag).getValue(1);
1349    ResultVals[0] = Chain.getValue(0);
1350    NumResults = 1;
1351    break;
1352  case MVT::f32:
1353  case MVT::f64:
1354    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1355                               InFlag).getValue(1);
1356    ResultVals[0] = Chain.getValue(0);
1357    NumResults = 1;
1358    break;
1359  case MVT::v2f64:
1360  case MVT::v2i64:
1361  case MVT::v4f32:
1362  case MVT::v4i32:
1363  case MVT::v8i16:
1364  case MVT::v16i8:
1365    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, TheCall->getValueType(0),
1366                                   InFlag).getValue(1);
1367    ResultVals[0] = Chain.getValue(0);
1368    NumResults = 1;
1369    break;
1370  }
1371
1372  // If the function returns void, just return the chain.
1373  if (NumResults == 0)
1374    return Chain;
1375
1376  // Otherwise, merge everything together with a MERGE_VALUES node.
1377  ResultVals[NumResults++] = Chain;
1378  SDValue Res = DAG.getMergeValues(ResultVals, NumResults, dl);
1379  return Res.getValue(Op.getResNo());
1380}
1381
1382static SDValue
1383LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1384  SmallVector<CCValAssign, 16> RVLocs;
1385  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1386  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1387  DebugLoc dl = Op.getDebugLoc();
1388  CCState CCInfo(CC, isVarArg, TM, RVLocs, DAG.getContext());
1389  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1390
1391  // If this is the first return lowered for this function, add the regs to the
1392  // liveout set for the function.
1393  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1394    for (unsigned i = 0; i != RVLocs.size(); ++i)
1395      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1396  }
1397
1398  SDValue Chain = Op.getOperand(0);
1399  SDValue Flag;
1400
1401  // Copy the result values into the output registers.
1402  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1403    CCValAssign &VA = RVLocs[i];
1404    assert(VA.isRegLoc() && "Can only return in registers!");
1405    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1406                             Op.getOperand(i*2+1), Flag);
1407    Flag = Chain.getValue(1);
1408  }
1409
1410  if (Flag.getNode())
1411    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1412  else
1413    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1414}
1415
1416
1417//===----------------------------------------------------------------------===//
1418// Vector related lowering:
1419//===----------------------------------------------------------------------===//
1420
1421static ConstantSDNode *
1422getVecImm(SDNode *N) {
1423  SDValue OpVal(0, 0);
1424
1425  // Check to see if this buildvec has a single non-undef value in its elements.
1426  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1427    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1428    if (OpVal.getNode() == 0)
1429      OpVal = N->getOperand(i);
1430    else if (OpVal != N->getOperand(i))
1431      return 0;
1432  }
1433
1434  if (OpVal.getNode() != 0) {
1435    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1436      return CN;
1437    }
1438  }
1439
1440  return 0;
1441}
1442
1443/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1444/// and the value fits into an unsigned 18-bit constant, and if so, return the
1445/// constant
1446SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1447                              MVT ValueType) {
1448  if (ConstantSDNode *CN = getVecImm(N)) {
1449    uint64_t Value = CN->getZExtValue();
1450    if (ValueType == MVT::i64) {
1451      uint64_t UValue = CN->getZExtValue();
1452      uint32_t upper = uint32_t(UValue >> 32);
1453      uint32_t lower = uint32_t(UValue);
1454      if (upper != lower)
1455        return SDValue();
1456      Value = Value >> 32;
1457    }
1458    if (Value <= 0x3ffff)
1459      return DAG.getTargetConstant(Value, ValueType);
1460  }
1461
1462  return SDValue();
1463}
1464
1465/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1466/// and the value fits into a signed 16-bit constant, and if so, return the
1467/// constant
1468SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1469                              MVT ValueType) {
1470  if (ConstantSDNode *CN = getVecImm(N)) {
1471    int64_t Value = CN->getSExtValue();
1472    if (ValueType == MVT::i64) {
1473      uint64_t UValue = CN->getZExtValue();
1474      uint32_t upper = uint32_t(UValue >> 32);
1475      uint32_t lower = uint32_t(UValue);
1476      if (upper != lower)
1477        return SDValue();
1478      Value = Value >> 32;
1479    }
1480    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1481      return DAG.getTargetConstant(Value, ValueType);
1482    }
1483  }
1484
1485  return SDValue();
1486}
1487
1488/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1489/// and the value fits into a signed 10-bit constant, and if so, return the
1490/// constant
1491SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1492                              MVT ValueType) {
1493  if (ConstantSDNode *CN = getVecImm(N)) {
1494    int64_t Value = CN->getSExtValue();
1495    if (ValueType == MVT::i64) {
1496      uint64_t UValue = CN->getZExtValue();
1497      uint32_t upper = uint32_t(UValue >> 32);
1498      uint32_t lower = uint32_t(UValue);
1499      if (upper != lower)
1500        return SDValue();
1501      Value = Value >> 32;
1502    }
1503    if (isS10Constant(Value))
1504      return DAG.getTargetConstant(Value, ValueType);
1505  }
1506
1507  return SDValue();
1508}
1509
1510/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1511/// and the value fits into a signed 8-bit constant, and if so, return the
1512/// constant.
1513///
1514/// @note: The incoming vector is v16i8 because that's the only way we can load
1515/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1516/// same value.
1517SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1518                             MVT ValueType) {
1519  if (ConstantSDNode *CN = getVecImm(N)) {
1520    int Value = (int) CN->getZExtValue();
1521    if (ValueType == MVT::i16
1522        && Value <= 0xffff                 /* truncated from uint64_t */
1523        && ((short) Value >> 8) == ((short) Value & 0xff))
1524      return DAG.getTargetConstant(Value & 0xff, ValueType);
1525    else if (ValueType == MVT::i8
1526             && (Value & 0xff) == Value)
1527      return DAG.getTargetConstant(Value, ValueType);
1528  }
1529
1530  return SDValue();
1531}
1532
1533/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1534/// and the value fits into a signed 16-bit constant, and if so, return the
1535/// constant
1536SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1537                               MVT ValueType) {
1538  if (ConstantSDNode *CN = getVecImm(N)) {
1539    uint64_t Value = CN->getZExtValue();
1540    if ((ValueType == MVT::i32
1541          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1542        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1543      return DAG.getTargetConstant(Value >> 16, ValueType);
1544  }
1545
1546  return SDValue();
1547}
1548
1549/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1550SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1551  if (ConstantSDNode *CN = getVecImm(N)) {
1552    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1553  }
1554
1555  return SDValue();
1556}
1557
1558/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1559SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1560  if (ConstantSDNode *CN = getVecImm(N)) {
1561    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1562  }
1563
1564  return SDValue();
1565}
1566
1567//! Lower a BUILD_VECTOR instruction creatively:
1568SDValue
1569LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1570  MVT VT = Op.getValueType();
1571  MVT EltVT = VT.getVectorElementType();
1572  DebugLoc dl = Op.getDebugLoc();
1573  BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1574  assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1575  unsigned minSplatBits = EltVT.getSizeInBits();
1576
1577  if (minSplatBits < 16)
1578    minSplatBits = 16;
1579
1580  APInt APSplatBits, APSplatUndef;
1581  unsigned SplatBitSize;
1582  bool HasAnyUndefs;
1583
1584  if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1585                            HasAnyUndefs, minSplatBits)
1586      || minSplatBits < SplatBitSize)
1587    return SDValue();   // Wasn't a constant vector or splat exceeded min
1588
1589  uint64_t SplatBits = APSplatBits.getZExtValue();
1590
1591  switch (VT.getSimpleVT()) {
1592  default: {
1593    std::string msg;
1594    raw_string_ostream Msg(msg);
1595    Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1596         << VT.getMVTString();
1597    llvm_report_error(Msg.str());
1598    /*NOTREACHED*/
1599  }
1600  case MVT::v4f32: {
1601    uint32_t Value32 = uint32_t(SplatBits);
1602    assert(SplatBitSize == 32
1603           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1604    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1605    SDValue T = DAG.getConstant(Value32, MVT::i32);
1606    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1607                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1608    break;
1609  }
1610  case MVT::v2f64: {
1611    uint64_t f64val = uint64_t(SplatBits);
1612    assert(SplatBitSize == 64
1613           && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1614    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1615    SDValue T = DAG.getConstant(f64val, MVT::i64);
1616    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1617                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1618    break;
1619  }
1620  case MVT::v16i8: {
1621   // 8-bit constants have to be expanded to 16-bits
1622   unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1623   SmallVector<SDValue, 8> Ops;
1624
1625   Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1626   return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1627                      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1628  }
1629  case MVT::v8i16: {
1630    unsigned short Value16 = SplatBits;
1631    SDValue T = DAG.getConstant(Value16, EltVT);
1632    SmallVector<SDValue, 8> Ops;
1633
1634    Ops.assign(8, T);
1635    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1636  }
1637  case MVT::v4i32: {
1638    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1639    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1640  }
1641  case MVT::v2i32: {
1642    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1643    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1644  }
1645  case MVT::v2i64: {
1646    return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1647  }
1648  }
1649
1650  return SDValue();
1651}
1652
1653/*!
1654 */
1655SDValue
1656SPU::LowerV2I64Splat(MVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1657                     DebugLoc dl) {
1658  uint32_t upper = uint32_t(SplatVal >> 32);
1659  uint32_t lower = uint32_t(SplatVal);
1660
1661  if (upper == lower) {
1662    // Magic constant that can be matched by IL, ILA, et. al.
1663    SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1664    return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1665                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1666                                   Val, Val, Val, Val));
1667  } else {
1668    bool upper_special, lower_special;
1669
1670    // NOTE: This code creates common-case shuffle masks that can be easily
1671    // detected as common expressions. It is not attempting to create highly
1672    // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1673
1674    // Detect if the upper or lower half is a special shuffle mask pattern:
1675    upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1676    lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1677
1678    // Both upper and lower are special, lower to a constant pool load:
1679    if (lower_special && upper_special) {
1680      SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1681      return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1682                         SplatValCN, SplatValCN);
1683    }
1684
1685    SDValue LO32;
1686    SDValue HI32;
1687    SmallVector<SDValue, 16> ShufBytes;
1688    SDValue Result;
1689
1690    // Create lower vector if not a special pattern
1691    if (!lower_special) {
1692      SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1693      LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1694                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1695                                     LO32C, LO32C, LO32C, LO32C));
1696    }
1697
1698    // Create upper vector if not a special pattern
1699    if (!upper_special) {
1700      SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1701      HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1702                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1703                                     HI32C, HI32C, HI32C, HI32C));
1704    }
1705
1706    // If either upper or lower are special, then the two input operands are
1707    // the same (basically, one of them is a "don't care")
1708    if (lower_special)
1709      LO32 = HI32;
1710    if (upper_special)
1711      HI32 = LO32;
1712
1713    for (int i = 0; i < 4; ++i) {
1714      uint64_t val = 0;
1715      for (int j = 0; j < 4; ++j) {
1716        SDValue V;
1717        bool process_upper, process_lower;
1718        val <<= 8;
1719        process_upper = (upper_special && (i & 1) == 0);
1720        process_lower = (lower_special && (i & 1) == 1);
1721
1722        if (process_upper || process_lower) {
1723          if ((process_upper && upper == 0)
1724                  || (process_lower && lower == 0))
1725            val |= 0x80;
1726          else if ((process_upper && upper == 0xffffffff)
1727                  || (process_lower && lower == 0xffffffff))
1728            val |= 0xc0;
1729          else if ((process_upper && upper == 0x80000000)
1730                  || (process_lower && lower == 0x80000000))
1731            val |= (j == 0 ? 0xe0 : 0x80);
1732        } else
1733          val |= i * 4 + j + ((i & 1) * 16);
1734      }
1735
1736      ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1737    }
1738
1739    return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1740                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1741                                   &ShufBytes[0], ShufBytes.size()));
1742  }
1743}
1744
1745/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1746/// which the Cell can operate. The code inspects V3 to ascertain whether the
1747/// permutation vector, V3, is monotonically increasing with one "exception"
1748/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1749/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1750/// In either case, the net result is going to eventually invoke SHUFB to
1751/// permute/shuffle the bytes from V1 and V2.
1752/// \note
1753/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1754/// control word for byte/halfword/word insertion. This takes care of a single
1755/// element move from V2 into V1.
1756/// \note
1757/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1758static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1759  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1760  SDValue V1 = Op.getOperand(0);
1761  SDValue V2 = Op.getOperand(1);
1762  DebugLoc dl = Op.getDebugLoc();
1763
1764  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1765
1766  // If we have a single element being moved from V1 to V2, this can be handled
1767  // using the C*[DX] compute mask instructions, but the vector elements have
1768  // to be monotonically increasing with one exception element.
1769  MVT VecVT = V1.getValueType();
1770  MVT EltVT = VecVT.getVectorElementType();
1771  unsigned EltsFromV2 = 0;
1772  unsigned V2Elt = 0;
1773  unsigned V2EltIdx0 = 0;
1774  unsigned CurrElt = 0;
1775  unsigned MaxElts = VecVT.getVectorNumElements();
1776  unsigned PrevElt = 0;
1777  unsigned V0Elt = 0;
1778  bool monotonic = true;
1779  bool rotate = true;
1780
1781  if (EltVT == MVT::i8) {
1782    V2EltIdx0 = 16;
1783  } else if (EltVT == MVT::i16) {
1784    V2EltIdx0 = 8;
1785  } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1786    V2EltIdx0 = 4;
1787  } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1788    V2EltIdx0 = 2;
1789  } else
1790    llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1791
1792  for (unsigned i = 0; i != MaxElts; ++i) {
1793    if (SVN->getMaskElt(i) < 0)
1794      continue;
1795
1796    unsigned SrcElt = SVN->getMaskElt(i);
1797
1798    if (monotonic) {
1799      if (SrcElt >= V2EltIdx0) {
1800        if (1 >= (++EltsFromV2)) {
1801          V2Elt = (V2EltIdx0 - SrcElt) << 2;
1802        }
1803      } else if (CurrElt != SrcElt) {
1804        monotonic = false;
1805      }
1806
1807      ++CurrElt;
1808    }
1809
1810    if (rotate) {
1811      if (PrevElt > 0 && SrcElt < MaxElts) {
1812        if ((PrevElt == SrcElt - 1)
1813            || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1814          PrevElt = SrcElt;
1815          if (SrcElt == 0)
1816            V0Elt = i;
1817        } else {
1818          rotate = false;
1819        }
1820      } else if (PrevElt == 0) {
1821        // First time through, need to keep track of previous element
1822        PrevElt = SrcElt;
1823      } else {
1824        // This isn't a rotation, takes elements from vector 2
1825        rotate = false;
1826      }
1827    }
1828  }
1829
1830  if (EltsFromV2 == 1 && monotonic) {
1831    // Compute mask and shuffle
1832    MachineFunction &MF = DAG.getMachineFunction();
1833    MachineRegisterInfo &RegInfo = MF.getRegInfo();
1834    unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1835    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1836    // Initialize temporary register to 0
1837    SDValue InitTempReg =
1838      DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1839    // Copy register's contents as index in SHUFFLE_MASK:
1840    SDValue ShufMaskOp =
1841      DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1842                  DAG.getTargetConstant(V2Elt, MVT::i32),
1843                  DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1844    // Use shuffle mask in SHUFB synthetic instruction:
1845    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1846                       ShufMaskOp);
1847  } else if (rotate) {
1848    int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1849
1850    return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1851                       V1, DAG.getConstant(rotamt, MVT::i16));
1852  } else {
1853   // Convert the SHUFFLE_VECTOR mask's input element units to the
1854   // actual bytes.
1855    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1856
1857    SmallVector<SDValue, 16> ResultMask;
1858    for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1859      unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1860
1861      for (unsigned j = 0; j < BytesPerElement; ++j)
1862        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1863    }
1864
1865    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1866                                    &ResultMask[0], ResultMask.size());
1867    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1868  }
1869}
1870
1871static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1872  SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1873  DebugLoc dl = Op.getDebugLoc();
1874
1875  if (Op0.getNode()->getOpcode() == ISD::Constant) {
1876    // For a constant, build the appropriate constant vector, which will
1877    // eventually simplify to a vector register load.
1878
1879    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1880    SmallVector<SDValue, 16> ConstVecValues;
1881    MVT VT;
1882    size_t n_copies;
1883
1884    // Create a constant vector:
1885    switch (Op.getValueType().getSimpleVT()) {
1886    default: llvm_unreachable("Unexpected constant value type in "
1887                              "LowerSCALAR_TO_VECTOR");
1888    case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1889    case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1890    case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1891    case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1892    case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1893    case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1894    }
1895
1896    SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1897    for (size_t j = 0; j < n_copies; ++j)
1898      ConstVecValues.push_back(CValue);
1899
1900    return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1901                       &ConstVecValues[0], ConstVecValues.size());
1902  } else {
1903    // Otherwise, copy the value from one register to another:
1904    switch (Op0.getValueType().getSimpleVT()) {
1905    default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1906    case MVT::i8:
1907    case MVT::i16:
1908    case MVT::i32:
1909    case MVT::i64:
1910    case MVT::f32:
1911    case MVT::f64:
1912      return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1913    }
1914  }
1915
1916  return SDValue();
1917}
1918
1919static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1920  MVT VT = Op.getValueType();
1921  SDValue N = Op.getOperand(0);
1922  SDValue Elt = Op.getOperand(1);
1923  DebugLoc dl = Op.getDebugLoc();
1924  SDValue retval;
1925
1926  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1927    // Constant argument:
1928    int EltNo = (int) C->getZExtValue();
1929
1930    // sanity checks:
1931    if (VT == MVT::i8 && EltNo >= 16)
1932      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1933    else if (VT == MVT::i16 && EltNo >= 8)
1934      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1935    else if (VT == MVT::i32 && EltNo >= 4)
1936      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1937    else if (VT == MVT::i64 && EltNo >= 2)
1938      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1939
1940    if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1941      // i32 and i64: Element 0 is the preferred slot
1942      return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1943    }
1944
1945    // Need to generate shuffle mask and extract:
1946    int prefslot_begin = -1, prefslot_end = -1;
1947    int elt_byte = EltNo * VT.getSizeInBits() / 8;
1948
1949    switch (VT.getSimpleVT()) {
1950    default:
1951      assert(false && "Invalid value type!");
1952    case MVT::i8: {
1953      prefslot_begin = prefslot_end = 3;
1954      break;
1955    }
1956    case MVT::i16: {
1957      prefslot_begin = 2; prefslot_end = 3;
1958      break;
1959    }
1960    case MVT::i32:
1961    case MVT::f32: {
1962      prefslot_begin = 0; prefslot_end = 3;
1963      break;
1964    }
1965    case MVT::i64:
1966    case MVT::f64: {
1967      prefslot_begin = 0; prefslot_end = 7;
1968      break;
1969    }
1970    }
1971
1972    assert(prefslot_begin != -1 && prefslot_end != -1 &&
1973           "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1974
1975    unsigned int ShufBytes[16];
1976    for (int i = 0; i < 16; ++i) {
1977      // zero fill uppper part of preferred slot, don't care about the
1978      // other slots:
1979      unsigned int mask_val;
1980      if (i <= prefslot_end) {
1981        mask_val =
1982          ((i < prefslot_begin)
1983           ? 0x80
1984           : elt_byte + (i - prefslot_begin));
1985
1986        ShufBytes[i] = mask_val;
1987      } else
1988        ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1989    }
1990
1991    SDValue ShufMask[4];
1992    for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1993      unsigned bidx = i * 4;
1994      unsigned int bits = ((ShufBytes[bidx] << 24) |
1995                           (ShufBytes[bidx+1] << 16) |
1996                           (ShufBytes[bidx+2] << 8) |
1997                           ShufBytes[bidx+3]);
1998      ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1999    }
2000
2001    SDValue ShufMaskVec =
2002      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2003                  &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
2004
2005    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2006                         DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
2007                                     N, N, ShufMaskVec));
2008  } else {
2009    // Variable index: Rotate the requested element into slot 0, then replicate
2010    // slot 0 across the vector
2011    MVT VecVT = N.getValueType();
2012    if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2013      llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2014                        "vector type!");
2015    }
2016
2017    // Make life easier by making sure the index is zero-extended to i32
2018    if (Elt.getValueType() != MVT::i32)
2019      Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2020
2021    // Scale the index to a bit/byte shift quantity
2022    APInt scaleFactor =
2023            APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2024    unsigned scaleShift = scaleFactor.logBase2();
2025    SDValue vecShift;
2026
2027    if (scaleShift > 0) {
2028      // Scale the shift factor:
2029      Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2030                        DAG.getConstant(scaleShift, MVT::i32));
2031    }
2032
2033    vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2034
2035    // Replicate the bytes starting at byte 0 across the entire vector (for
2036    // consistency with the notion of a unified register set)
2037    SDValue replicate;
2038
2039    switch (VT.getSimpleVT()) {
2040    default:
2041      llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2042                        "type");
2043      /*NOTREACHED*/
2044    case MVT::i8: {
2045      SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2046      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2047                              factor, factor, factor, factor);
2048      break;
2049    }
2050    case MVT::i16: {
2051      SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2052      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2053                              factor, factor, factor, factor);
2054      break;
2055    }
2056    case MVT::i32:
2057    case MVT::f32: {
2058      SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2059      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2060                              factor, factor, factor, factor);
2061      break;
2062    }
2063    case MVT::i64:
2064    case MVT::f64: {
2065      SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2066      SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2067      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2068                              loFactor, hiFactor, loFactor, hiFactor);
2069      break;
2070    }
2071    }
2072
2073    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2074                         DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2075                                     vecShift, vecShift, replicate));
2076  }
2077
2078  return retval;
2079}
2080
2081static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2082  SDValue VecOp = Op.getOperand(0);
2083  SDValue ValOp = Op.getOperand(1);
2084  SDValue IdxOp = Op.getOperand(2);
2085  DebugLoc dl = Op.getDebugLoc();
2086  MVT VT = Op.getValueType();
2087
2088  ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2089  assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2090
2091  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2092  // Use $sp ($1) because it's always 16-byte aligned and it's available:
2093  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2094                                DAG.getRegister(SPU::R1, PtrVT),
2095                                DAG.getConstant(CN->getSExtValue(), PtrVT));
2096  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2097
2098  SDValue result =
2099    DAG.getNode(SPUISD::SHUFB, dl, VT,
2100                DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2101                VecOp,
2102                DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2103
2104  return result;
2105}
2106
2107static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2108                           const TargetLowering &TLI)
2109{
2110  SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2111  DebugLoc dl = Op.getDebugLoc();
2112  MVT ShiftVT = TLI.getShiftAmountTy();
2113
2114  assert(Op.getValueType() == MVT::i8);
2115  switch (Opc) {
2116  default:
2117    llvm_unreachable("Unhandled i8 math operator");
2118    /*NOTREACHED*/
2119    break;
2120  case ISD::ADD: {
2121    // 8-bit addition: Promote the arguments up to 16-bits and truncate
2122    // the result:
2123    SDValue N1 = Op.getOperand(1);
2124    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2125    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2126    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2127                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2128
2129  }
2130
2131  case ISD::SUB: {
2132    // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2133    // the result:
2134    SDValue N1 = Op.getOperand(1);
2135    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2136    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2137    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2138                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2139  }
2140  case ISD::ROTR:
2141  case ISD::ROTL: {
2142    SDValue N1 = Op.getOperand(1);
2143    MVT N1VT = N1.getValueType();
2144
2145    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2146    if (!N1VT.bitsEq(ShiftVT)) {
2147      unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2148                       ? ISD::ZERO_EXTEND
2149                       : ISD::TRUNCATE;
2150      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2151    }
2152
2153    // Replicate lower 8-bits into upper 8:
2154    SDValue ExpandArg =
2155      DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2156                  DAG.getNode(ISD::SHL, dl, MVT::i16,
2157                              N0, DAG.getConstant(8, MVT::i32)));
2158
2159    // Truncate back down to i8
2160    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2161                       DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2162  }
2163  case ISD::SRL:
2164  case ISD::SHL: {
2165    SDValue N1 = Op.getOperand(1);
2166    MVT N1VT = N1.getValueType();
2167
2168    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2169    if (!N1VT.bitsEq(ShiftVT)) {
2170      unsigned N1Opc = ISD::ZERO_EXTEND;
2171
2172      if (N1.getValueType().bitsGT(ShiftVT))
2173        N1Opc = ISD::TRUNCATE;
2174
2175      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2176    }
2177
2178    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2179                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2180  }
2181  case ISD::SRA: {
2182    SDValue N1 = Op.getOperand(1);
2183    MVT N1VT = N1.getValueType();
2184
2185    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2186    if (!N1VT.bitsEq(ShiftVT)) {
2187      unsigned N1Opc = ISD::SIGN_EXTEND;
2188
2189      if (N1VT.bitsGT(ShiftVT))
2190        N1Opc = ISD::TRUNCATE;
2191      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2192    }
2193
2194    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2195                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2196  }
2197  case ISD::MUL: {
2198    SDValue N1 = Op.getOperand(1);
2199
2200    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2201    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2202    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2203                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2204    break;
2205  }
2206  }
2207
2208  return SDValue();
2209}
2210
2211//! Lower byte immediate operations for v16i8 vectors:
2212static SDValue
2213LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2214  SDValue ConstVec;
2215  SDValue Arg;
2216  MVT VT = Op.getValueType();
2217  DebugLoc dl = Op.getDebugLoc();
2218
2219  ConstVec = Op.getOperand(0);
2220  Arg = Op.getOperand(1);
2221  if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2222    if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2223      ConstVec = ConstVec.getOperand(0);
2224    } else {
2225      ConstVec = Op.getOperand(1);
2226      Arg = Op.getOperand(0);
2227      if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2228        ConstVec = ConstVec.getOperand(0);
2229      }
2230    }
2231  }
2232
2233  if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2234    BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2235    assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2236
2237    APInt APSplatBits, APSplatUndef;
2238    unsigned SplatBitSize;
2239    bool HasAnyUndefs;
2240    unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2241
2242    if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2243                              HasAnyUndefs, minSplatBits)
2244        && minSplatBits <= SplatBitSize) {
2245      uint64_t SplatBits = APSplatBits.getZExtValue();
2246      SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2247
2248      SmallVector<SDValue, 16> tcVec;
2249      tcVec.assign(16, tc);
2250      return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2251                         DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2252    }
2253  }
2254
2255  // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2256  // lowered.  Return the operation, rather than a null SDValue.
2257  return Op;
2258}
2259
2260//! Custom lowering for CTPOP (count population)
2261/*!
2262  Custom lowering code that counts the number ones in the input
2263  operand. SPU has such an instruction, but it counts the number of
2264  ones per byte, which then have to be accumulated.
2265*/
2266static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2267  MVT VT = Op.getValueType();
2268  MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2269  DebugLoc dl = Op.getDebugLoc();
2270
2271  switch (VT.getSimpleVT()) {
2272  default:
2273    assert(false && "Invalid value type!");
2274  case MVT::i8: {
2275    SDValue N = Op.getOperand(0);
2276    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2277
2278    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2279    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2280
2281    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2282  }
2283
2284  case MVT::i16: {
2285    MachineFunction &MF = DAG.getMachineFunction();
2286    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2287
2288    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2289
2290    SDValue N = Op.getOperand(0);
2291    SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2292    SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2293    SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2294
2295    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2296    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2297
2298    // CNTB_result becomes the chain to which all of the virtual registers
2299    // CNTB_reg, SUM1_reg become associated:
2300    SDValue CNTB_result =
2301      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2302
2303    SDValue CNTB_rescopy =
2304      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2305
2306    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2307
2308    return DAG.getNode(ISD::AND, dl, MVT::i16,
2309                       DAG.getNode(ISD::ADD, dl, MVT::i16,
2310                                   DAG.getNode(ISD::SRL, dl, MVT::i16,
2311                                               Tmp1, Shift1),
2312                                   Tmp1),
2313                       Mask0);
2314  }
2315
2316  case MVT::i32: {
2317    MachineFunction &MF = DAG.getMachineFunction();
2318    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2319
2320    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2321    unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2322
2323    SDValue N = Op.getOperand(0);
2324    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2325    SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2326    SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2327    SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2328
2329    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2330    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2331
2332    // CNTB_result becomes the chain to which all of the virtual registers
2333    // CNTB_reg, SUM1_reg become associated:
2334    SDValue CNTB_result =
2335      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2336
2337    SDValue CNTB_rescopy =
2338      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2339
2340    SDValue Comp1 =
2341      DAG.getNode(ISD::SRL, dl, MVT::i32,
2342                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2343                  Shift1);
2344
2345    SDValue Sum1 =
2346      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2347                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2348
2349    SDValue Sum1_rescopy =
2350      DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2351
2352    SDValue Comp2 =
2353      DAG.getNode(ISD::SRL, dl, MVT::i32,
2354                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2355                  Shift2);
2356    SDValue Sum2 =
2357      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2358                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2359
2360    return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2361  }
2362
2363  case MVT::i64:
2364    break;
2365  }
2366
2367  return SDValue();
2368}
2369
2370//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2371/*!
2372 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2373 All conversions to i64 are expanded to a libcall.
2374 */
2375static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2376                              SPUTargetLowering &TLI) {
2377  MVT OpVT = Op.getValueType();
2378  SDValue Op0 = Op.getOperand(0);
2379  MVT Op0VT = Op0.getValueType();
2380
2381  if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2382      || OpVT == MVT::i64) {
2383    // Convert f32 / f64 to i32 / i64 via libcall.
2384    RTLIB::Libcall LC =
2385            (Op.getOpcode() == ISD::FP_TO_SINT)
2386             ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2387             : RTLIB::getFPTOUINT(Op0VT, OpVT);
2388    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2389    SDValue Dummy;
2390    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2391  }
2392
2393  return Op;
2394}
2395
2396//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2397/*!
2398 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2399 All conversions from i64 are expanded to a libcall.
2400 */
2401static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2402                              SPUTargetLowering &TLI) {
2403  MVT OpVT = Op.getValueType();
2404  SDValue Op0 = Op.getOperand(0);
2405  MVT Op0VT = Op0.getValueType();
2406
2407  if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2408      || Op0VT == MVT::i64) {
2409    // Convert i32, i64 to f64 via libcall:
2410    RTLIB::Libcall LC =
2411            (Op.getOpcode() == ISD::SINT_TO_FP)
2412             ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2413             : RTLIB::getUINTTOFP(Op0VT, OpVT);
2414    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2415    SDValue Dummy;
2416    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2417  }
2418
2419  return Op;
2420}
2421
2422//! Lower ISD::SETCC
2423/*!
2424 This handles MVT::f64 (double floating point) condition lowering
2425 */
2426static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2427                          const TargetLowering &TLI) {
2428  CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2429  DebugLoc dl = Op.getDebugLoc();
2430  assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2431
2432  SDValue lhs = Op.getOperand(0);
2433  SDValue rhs = Op.getOperand(1);
2434  MVT lhsVT = lhs.getValueType();
2435  assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2436
2437  MVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2438  APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2439  MVT IntVT(MVT::i64);
2440
2441  // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2442  // selected to a NOP:
2443  SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2444  SDValue lhsHi32 =
2445          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2446                      DAG.getNode(ISD::SRL, dl, IntVT,
2447                                  i64lhs, DAG.getConstant(32, MVT::i32)));
2448  SDValue lhsHi32abs =
2449          DAG.getNode(ISD::AND, dl, MVT::i32,
2450                      lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2451  SDValue lhsLo32 =
2452          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2453
2454  // SETO and SETUO only use the lhs operand:
2455  if (CC->get() == ISD::SETO) {
2456    // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2457    // SETUO
2458    APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2459    return DAG.getNode(ISD::XOR, dl, ccResultVT,
2460                       DAG.getSetCC(dl, ccResultVT,
2461                                    lhs, DAG.getConstantFP(0.0, lhsVT),
2462                                    ISD::SETUO),
2463                       DAG.getConstant(ccResultAllOnes, ccResultVT));
2464  } else if (CC->get() == ISD::SETUO) {
2465    // Evaluates to true if Op0 is [SQ]NaN
2466    return DAG.getNode(ISD::AND, dl, ccResultVT,
2467                       DAG.getSetCC(dl, ccResultVT,
2468                                    lhsHi32abs,
2469                                    DAG.getConstant(0x7ff00000, MVT::i32),
2470                                    ISD::SETGE),
2471                       DAG.getSetCC(dl, ccResultVT,
2472                                    lhsLo32,
2473                                    DAG.getConstant(0, MVT::i32),
2474                                    ISD::SETGT));
2475  }
2476
2477  SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2478  SDValue rhsHi32 =
2479          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2480                      DAG.getNode(ISD::SRL, dl, IntVT,
2481                                  i64rhs, DAG.getConstant(32, MVT::i32)));
2482
2483  // If a value is negative, subtract from the sign magnitude constant:
2484  SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2485
2486  // Convert the sign-magnitude representation into 2's complement:
2487  SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2488                                      lhsHi32, DAG.getConstant(31, MVT::i32));
2489  SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2490  SDValue lhsSelect =
2491          DAG.getNode(ISD::SELECT, dl, IntVT,
2492                      lhsSelectMask, lhsSignMag2TC, i64lhs);
2493
2494  SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2495                                      rhsHi32, DAG.getConstant(31, MVT::i32));
2496  SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2497  SDValue rhsSelect =
2498          DAG.getNode(ISD::SELECT, dl, IntVT,
2499                      rhsSelectMask, rhsSignMag2TC, i64rhs);
2500
2501  unsigned compareOp;
2502
2503  switch (CC->get()) {
2504  case ISD::SETOEQ:
2505  case ISD::SETUEQ:
2506    compareOp = ISD::SETEQ; break;
2507  case ISD::SETOGT:
2508  case ISD::SETUGT:
2509    compareOp = ISD::SETGT; break;
2510  case ISD::SETOGE:
2511  case ISD::SETUGE:
2512    compareOp = ISD::SETGE; break;
2513  case ISD::SETOLT:
2514  case ISD::SETULT:
2515    compareOp = ISD::SETLT; break;
2516  case ISD::SETOLE:
2517  case ISD::SETULE:
2518    compareOp = ISD::SETLE; break;
2519  case ISD::SETUNE:
2520  case ISD::SETONE:
2521    compareOp = ISD::SETNE; break;
2522  default:
2523    llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2524  }
2525
2526  SDValue result =
2527          DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2528                       (ISD::CondCode) compareOp);
2529
2530  if ((CC->get() & 0x8) == 0) {
2531    // Ordered comparison:
2532    SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2533                                  lhs, DAG.getConstantFP(0.0, MVT::f64),
2534                                  ISD::SETO);
2535    SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2536                                  rhs, DAG.getConstantFP(0.0, MVT::f64),
2537                                  ISD::SETO);
2538    SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2539
2540    result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2541  }
2542
2543  return result;
2544}
2545
2546//! Lower ISD::SELECT_CC
2547/*!
2548  ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2549  SELB instruction.
2550
2551  \note Need to revisit this in the future: if the code path through the true
2552  and false value computations is longer than the latency of a branch (6
2553  cycles), then it would be more advantageous to branch and insert a new basic
2554  block and branch on the condition. However, this code does not make that
2555  assumption, given the simplisitc uses so far.
2556 */
2557
2558static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2559                              const TargetLowering &TLI) {
2560  MVT VT = Op.getValueType();
2561  SDValue lhs = Op.getOperand(0);
2562  SDValue rhs = Op.getOperand(1);
2563  SDValue trueval = Op.getOperand(2);
2564  SDValue falseval = Op.getOperand(3);
2565  SDValue condition = Op.getOperand(4);
2566  DebugLoc dl = Op.getDebugLoc();
2567
2568  // NOTE: SELB's arguments: $rA, $rB, $mask
2569  //
2570  // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2571  // where bits in $mask are 1. CCond will be inverted, having 1s where the
2572  // condition was true and 0s where the condition was false. Hence, the
2573  // arguments to SELB get reversed.
2574
2575  // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2576  // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2577  // with another "cannot select select_cc" assert:
2578
2579  SDValue compare = DAG.getNode(ISD::SETCC, dl,
2580                                TLI.getSetCCResultType(Op.getValueType()),
2581                                lhs, rhs, condition);
2582  return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2583}
2584
2585//! Custom lower ISD::TRUNCATE
2586static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2587{
2588  // Type to truncate to
2589  MVT VT = Op.getValueType();
2590  MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2591  MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2592  DebugLoc dl = Op.getDebugLoc();
2593
2594  // Type to truncate from
2595  SDValue Op0 = Op.getOperand(0);
2596  MVT Op0VT = Op0.getValueType();
2597
2598  if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2599    // Create shuffle mask, least significant doubleword of quadword
2600    unsigned maskHigh = 0x08090a0b;
2601    unsigned maskLow = 0x0c0d0e0f;
2602    // Use a shuffle to perform the truncation
2603    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2604                                   DAG.getConstant(maskHigh, MVT::i32),
2605                                   DAG.getConstant(maskLow, MVT::i32),
2606                                   DAG.getConstant(maskHigh, MVT::i32),
2607                                   DAG.getConstant(maskLow, MVT::i32));
2608
2609    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2610                                       Op0, Op0, shufMask);
2611
2612    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2613  }
2614
2615  return SDValue();             // Leave the truncate unmolested
2616}
2617
2618//! Custom (target-specific) lowering entry point
2619/*!
2620  This is where LLVM's DAG selection process calls to do target-specific
2621  lowering of nodes.
2622 */
2623SDValue
2624SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2625{
2626  unsigned Opc = (unsigned) Op.getOpcode();
2627  MVT VT = Op.getValueType();
2628
2629  switch (Opc) {
2630  default: {
2631#ifndef NDEBUG
2632    cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2633    cerr << "Op.getOpcode() = " << Opc << "\n";
2634    cerr << "*Op.getNode():\n";
2635    Op.getNode()->dump();
2636#endif
2637    llvm_unreachable(0);
2638  }
2639  case ISD::LOAD:
2640  case ISD::EXTLOAD:
2641  case ISD::SEXTLOAD:
2642  case ISD::ZEXTLOAD:
2643    return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2644  case ISD::STORE:
2645    return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2646  case ISD::ConstantPool:
2647    return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2648  case ISD::GlobalAddress:
2649    return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2650  case ISD::JumpTable:
2651    return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2652  case ISD::ConstantFP:
2653    return LowerConstantFP(Op, DAG);
2654  case ISD::FORMAL_ARGUMENTS:
2655    return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2656  case ISD::CALL:
2657    return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2658  case ISD::RET:
2659    return LowerRET(Op, DAG, getTargetMachine());
2660
2661  // i8, i64 math ops:
2662  case ISD::ADD:
2663  case ISD::SUB:
2664  case ISD::ROTR:
2665  case ISD::ROTL:
2666  case ISD::SRL:
2667  case ISD::SHL:
2668  case ISD::SRA: {
2669    if (VT == MVT::i8)
2670      return LowerI8Math(Op, DAG, Opc, *this);
2671    break;
2672  }
2673
2674  case ISD::FP_TO_SINT:
2675  case ISD::FP_TO_UINT:
2676    return LowerFP_TO_INT(Op, DAG, *this);
2677
2678  case ISD::SINT_TO_FP:
2679  case ISD::UINT_TO_FP:
2680    return LowerINT_TO_FP(Op, DAG, *this);
2681
2682  // Vector-related lowering.
2683  case ISD::BUILD_VECTOR:
2684    return LowerBUILD_VECTOR(Op, DAG);
2685  case ISD::SCALAR_TO_VECTOR:
2686    return LowerSCALAR_TO_VECTOR(Op, DAG);
2687  case ISD::VECTOR_SHUFFLE:
2688    return LowerVECTOR_SHUFFLE(Op, DAG);
2689  case ISD::EXTRACT_VECTOR_ELT:
2690    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2691  case ISD::INSERT_VECTOR_ELT:
2692    return LowerINSERT_VECTOR_ELT(Op, DAG);
2693
2694  // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2695  case ISD::AND:
2696  case ISD::OR:
2697  case ISD::XOR:
2698    return LowerByteImmed(Op, DAG);
2699
2700  // Vector and i8 multiply:
2701  case ISD::MUL:
2702    if (VT == MVT::i8)
2703      return LowerI8Math(Op, DAG, Opc, *this);
2704
2705  case ISD::CTPOP:
2706    return LowerCTPOP(Op, DAG);
2707
2708  case ISD::SELECT_CC:
2709    return LowerSELECT_CC(Op, DAG, *this);
2710
2711  case ISD::SETCC:
2712    return LowerSETCC(Op, DAG, *this);
2713
2714  case ISD::TRUNCATE:
2715    return LowerTRUNCATE(Op, DAG);
2716  }
2717
2718  return SDValue();
2719}
2720
2721void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2722                                           SmallVectorImpl<SDValue>&Results,
2723                                           SelectionDAG &DAG)
2724{
2725#if 0
2726  unsigned Opc = (unsigned) N->getOpcode();
2727  MVT OpVT = N->getValueType(0);
2728
2729  switch (Opc) {
2730  default: {
2731    cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2732    cerr << "Op.getOpcode() = " << Opc << "\n";
2733    cerr << "*Op.getNode():\n";
2734    N->dump();
2735    abort();
2736    /*NOTREACHED*/
2737  }
2738  }
2739#endif
2740
2741  /* Otherwise, return unchanged */
2742}
2743
2744//===----------------------------------------------------------------------===//
2745// Target Optimization Hooks
2746//===----------------------------------------------------------------------===//
2747
2748SDValue
2749SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2750{
2751#if 0
2752  TargetMachine &TM = getTargetMachine();
2753#endif
2754  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2755  SelectionDAG &DAG = DCI.DAG;
2756  SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2757  MVT NodeVT = N->getValueType(0);      // The node's value type
2758  MVT Op0VT = Op0.getValueType();       // The first operand's result
2759  SDValue Result;                       // Initially, empty result
2760  DebugLoc dl = N->getDebugLoc();
2761
2762  switch (N->getOpcode()) {
2763  default: break;
2764  case ISD::ADD: {
2765    SDValue Op1 = N->getOperand(1);
2766
2767    if (Op0.getOpcode() == SPUISD::IndirectAddr
2768        || Op1.getOpcode() == SPUISD::IndirectAddr) {
2769      // Normalize the operands to reduce repeated code
2770      SDValue IndirectArg = Op0, AddArg = Op1;
2771
2772      if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2773        IndirectArg = Op1;
2774        AddArg = Op0;
2775      }
2776
2777      if (isa<ConstantSDNode>(AddArg)) {
2778        ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2779        SDValue IndOp1 = IndirectArg.getOperand(1);
2780
2781        if (CN0->isNullValue()) {
2782          // (add (SPUindirect <arg>, <arg>), 0) ->
2783          // (SPUindirect <arg>, <arg>)
2784
2785#if !defined(NDEBUG)
2786          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2787            cerr << "\n"
2788                 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2789                 << "With:    (SPUindirect <arg>, <arg>)\n";
2790          }
2791#endif
2792
2793          return IndirectArg;
2794        } else if (isa<ConstantSDNode>(IndOp1)) {
2795          // (add (SPUindirect <arg>, <const>), <const>) ->
2796          // (SPUindirect <arg>, <const + const>)
2797          ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2798          int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2799          SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2800
2801#if !defined(NDEBUG)
2802          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2803            cerr << "\n"
2804                 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2805                 << "), " << CN0->getSExtValue() << ")\n"
2806                 << "With:    (SPUindirect <arg>, "
2807                 << combinedConst << ")\n";
2808          }
2809#endif
2810
2811          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2812                             IndirectArg, combinedValue);
2813        }
2814      }
2815    }
2816    break;
2817  }
2818  case ISD::SIGN_EXTEND:
2819  case ISD::ZERO_EXTEND:
2820  case ISD::ANY_EXTEND: {
2821    if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2822      // (any_extend (SPUextract_elt0 <arg>)) ->
2823      // (SPUextract_elt0 <arg>)
2824      // Types must match, however...
2825#if !defined(NDEBUG)
2826      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2827        cerr << "\nReplace: ";
2828        N->dump(&DAG);
2829        cerr << "\nWith:    ";
2830        Op0.getNode()->dump(&DAG);
2831        cerr << "\n";
2832      }
2833#endif
2834
2835      return Op0;
2836    }
2837    break;
2838  }
2839  case SPUISD::IndirectAddr: {
2840    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2841      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2842      if (CN != 0 && CN->getZExtValue() == 0) {
2843        // (SPUindirect (SPUaform <addr>, 0), 0) ->
2844        // (SPUaform <addr>, 0)
2845
2846        DEBUG(cerr << "Replace: ");
2847        DEBUG(N->dump(&DAG));
2848        DEBUG(cerr << "\nWith:    ");
2849        DEBUG(Op0.getNode()->dump(&DAG));
2850        DEBUG(cerr << "\n");
2851
2852        return Op0;
2853      }
2854    } else if (Op0.getOpcode() == ISD::ADD) {
2855      SDValue Op1 = N->getOperand(1);
2856      if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2857        // (SPUindirect (add <arg>, <arg>), 0) ->
2858        // (SPUindirect <arg>, <arg>)
2859        if (CN1->isNullValue()) {
2860
2861#if !defined(NDEBUG)
2862          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2863            cerr << "\n"
2864                 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2865                 << "With:    (SPUindirect <arg>, <arg>)\n";
2866          }
2867#endif
2868
2869          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2870                             Op0.getOperand(0), Op0.getOperand(1));
2871        }
2872      }
2873    }
2874    break;
2875  }
2876  case SPUISD::SHLQUAD_L_BITS:
2877  case SPUISD::SHLQUAD_L_BYTES:
2878  case SPUISD::VEC_SHL:
2879  case SPUISD::VEC_SRL:
2880  case SPUISD::VEC_SRA:
2881  case SPUISD::ROTBYTES_LEFT: {
2882    SDValue Op1 = N->getOperand(1);
2883
2884    // Kill degenerate vector shifts:
2885    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2886      if (CN->isNullValue()) {
2887        Result = Op0;
2888      }
2889    }
2890    break;
2891  }
2892  case SPUISD::PREFSLOT2VEC: {
2893    switch (Op0.getOpcode()) {
2894    default:
2895      break;
2896    case ISD::ANY_EXTEND:
2897    case ISD::ZERO_EXTEND:
2898    case ISD::SIGN_EXTEND: {
2899      // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2900      // <arg>
2901      // but only if the SPUprefslot2vec and <arg> types match.
2902      SDValue Op00 = Op0.getOperand(0);
2903      if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2904        SDValue Op000 = Op00.getOperand(0);
2905        if (Op000.getValueType() == NodeVT) {
2906          Result = Op000;
2907        }
2908      }
2909      break;
2910    }
2911    case SPUISD::VEC2PREFSLOT: {
2912      // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2913      // <arg>
2914      Result = Op0.getOperand(0);
2915      break;
2916    }
2917    }
2918    break;
2919  }
2920  }
2921
2922  // Otherwise, return unchanged.
2923#ifndef NDEBUG
2924  if (Result.getNode()) {
2925    DEBUG(cerr << "\nReplace.SPU: ");
2926    DEBUG(N->dump(&DAG));
2927    DEBUG(cerr << "\nWith:        ");
2928    DEBUG(Result.getNode()->dump(&DAG));
2929    DEBUG(cerr << "\n");
2930  }
2931#endif
2932
2933  return Result;
2934}
2935
2936//===----------------------------------------------------------------------===//
2937// Inline Assembly Support
2938//===----------------------------------------------------------------------===//
2939
2940/// getConstraintType - Given a constraint letter, return the type of
2941/// constraint it is for this target.
2942SPUTargetLowering::ConstraintType
2943SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2944  if (ConstraintLetter.size() == 1) {
2945    switch (ConstraintLetter[0]) {
2946    default: break;
2947    case 'b':
2948    case 'r':
2949    case 'f':
2950    case 'v':
2951    case 'y':
2952      return C_RegisterClass;
2953    }
2954  }
2955  return TargetLowering::getConstraintType(ConstraintLetter);
2956}
2957
2958std::pair<unsigned, const TargetRegisterClass*>
2959SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2960                                                MVT VT) const
2961{
2962  if (Constraint.size() == 1) {
2963    // GCC RS6000 Constraint Letters
2964    switch (Constraint[0]) {
2965    case 'b':   // R1-R31
2966    case 'r':   // R0-R31
2967      if (VT == MVT::i64)
2968        return std::make_pair(0U, SPU::R64CRegisterClass);
2969      return std::make_pair(0U, SPU::R32CRegisterClass);
2970    case 'f':
2971      if (VT == MVT::f32)
2972        return std::make_pair(0U, SPU::R32FPRegisterClass);
2973      else if (VT == MVT::f64)
2974        return std::make_pair(0U, SPU::R64FPRegisterClass);
2975      break;
2976    case 'v':
2977      return std::make_pair(0U, SPU::GPRCRegisterClass);
2978    }
2979  }
2980
2981  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2982}
2983
2984//! Compute used/known bits for a SPU operand
2985void
2986SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2987                                                  const APInt &Mask,
2988                                                  APInt &KnownZero,
2989                                                  APInt &KnownOne,
2990                                                  const SelectionDAG &DAG,
2991                                                  unsigned Depth ) const {
2992#if 0
2993  const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
2994
2995  switch (Op.getOpcode()) {
2996  default:
2997    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2998    break;
2999  case CALL:
3000  case SHUFB:
3001  case SHUFFLE_MASK:
3002  case CNTB:
3003  case SPUISD::PREFSLOT2VEC:
3004  case SPUISD::LDRESULT:
3005  case SPUISD::VEC2PREFSLOT:
3006  case SPUISD::SHLQUAD_L_BITS:
3007  case SPUISD::SHLQUAD_L_BYTES:
3008  case SPUISD::VEC_SHL:
3009  case SPUISD::VEC_SRL:
3010  case SPUISD::VEC_SRA:
3011  case SPUISD::VEC_ROTL:
3012  case SPUISD::VEC_ROTR:
3013  case SPUISD::ROTBYTES_LEFT:
3014  case SPUISD::SELECT_MASK:
3015  case SPUISD::SELB:
3016  }
3017#endif
3018}
3019
3020unsigned
3021SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3022                                                   unsigned Depth) const {
3023  switch (Op.getOpcode()) {
3024  default:
3025    return 1;
3026
3027  case ISD::SETCC: {
3028    MVT VT = Op.getValueType();
3029
3030    if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3031      VT = MVT::i32;
3032    }
3033    return VT.getSizeInBits();
3034  }
3035  }
3036}
3037
3038// LowerAsmOperandForConstraint
3039void
3040SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3041                                                char ConstraintLetter,
3042                                                bool hasMemory,
3043                                                std::vector<SDValue> &Ops,
3044                                                SelectionDAG &DAG) const {
3045  // Default, for the time being, to the base class handler
3046  TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3047                                               Ops, DAG);
3048}
3049
3050/// isLegalAddressImmediate - Return true if the integer value can be used
3051/// as the offset of the target addressing mode.
3052bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3053                                                const Type *Ty) const {
3054  // SPU's addresses are 256K:
3055  return (V > -(1 << 18) && V < (1 << 18) - 1);
3056}
3057
3058bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3059  return false;
3060}
3061
3062bool
3063SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3064  // The SPU target isn't yet aware of offsets.
3065  return false;
3066}
3067