SPUISelLowering.cpp revision 23b9b19b1a5a00faa9fce0788155c7dbfd00bfb1
1//
2//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SPUTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SPURegisterNames.h"
15#include "SPUISelLowering.h"
16#include "SPUTargetMachine.h"
17#include "SPUFrameInfo.h"
18#include "llvm/Constants.h"
19#include "llvm/Function.h"
20#include "llvm/Intrinsics.h"
21#include "llvm/CallingConv.h"
22#include "llvm/CodeGen/CallingConvLower.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/CodeGen/MachineFunction.h"
25#include "llvm/CodeGen/MachineInstrBuilder.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SelectionDAG.h"
28#include "llvm/Target/TargetLoweringObjectFile.h"
29#include "llvm/Target/TargetOptions.h"
30#include "llvm/ADT/VectorExtras.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/ErrorHandling.h"
33#include "llvm/Support/MathExtras.h"
34#include "llvm/Support/raw_ostream.h"
35#include <map>
36
37using namespace llvm;
38
39// Used in getTargetNodeName() below
40namespace {
41  std::map<unsigned, const char *> node_names;
42
43  //! EVT mapping to useful data for Cell SPU
44  struct valtype_map_s {
45    const EVT   valtype;
46    const int   prefslot_byte;
47  };
48
49  const valtype_map_s valtype_map[] = {
50    { MVT::i1,   3 },
51    { MVT::i8,   3 },
52    { MVT::i16,  2 },
53    { MVT::i32,  0 },
54    { MVT::f32,  0 },
55    { MVT::i64,  0 },
56    { MVT::f64,  0 },
57    { MVT::i128, 0 }
58  };
59
60  const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
61
62  const valtype_map_s *getValueTypeMapEntry(EVT VT) {
63    const valtype_map_s *retval = 0;
64
65    for (size_t i = 0; i < n_valtype_map; ++i) {
66      if (valtype_map[i].valtype == VT) {
67        retval = valtype_map + i;
68        break;
69      }
70    }
71
72#ifndef NDEBUG
73    if (retval == 0) {
74      std::string msg;
75      raw_string_ostream Msg(msg);
76      Msg << "getValueTypeMapEntry returns NULL for "
77           << VT.getEVTString();
78      llvm_report_error(Msg.str());
79    }
80#endif
81
82    return retval;
83  }
84
85  //! Expand a library call into an actual call DAG node
86  /*!
87   \note
88   This code is taken from SelectionDAGLegalize, since it is not exposed as
89   part of the LLVM SelectionDAG API.
90   */
91
92  SDValue
93  ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
94                bool isSigned, SDValue &Hi, SPUTargetLowering &TLI) {
95    // The input chain to this libcall is the entry node of the function.
96    // Legalizing the call will automatically add the previous call to the
97    // dependence.
98    SDValue InChain = DAG.getEntryNode();
99
100    TargetLowering::ArgListTy Args;
101    TargetLowering::ArgListEntry Entry;
102    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
103      EVT ArgVT = Op.getOperand(i).getValueType();
104      const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
105      Entry.Node = Op.getOperand(i);
106      Entry.Ty = ArgTy;
107      Entry.isSExt = isSigned;
108      Entry.isZExt = !isSigned;
109      Args.push_back(Entry);
110    }
111    SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
112                                           TLI.getPointerTy());
113
114    // Splice the libcall in wherever FindInputOutputChains tells us to.
115    const Type *RetTy =
116                Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
117    std::pair<SDValue, SDValue> CallInfo =
118            TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
119                            0, CallingConv::C, false,
120                            /*isReturnValueUsed=*/true,
121                            Callee, Args, DAG,
122                            Op.getDebugLoc());
123
124    return CallInfo.first;
125  }
126}
127
128SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
129  : TargetLowering(TM, new TargetLoweringObjectFileELF()),
130    SPUTM(TM) {
131  // Fold away setcc operations if possible.
132  setPow2DivIsCheap();
133
134  // Use _setjmp/_longjmp instead of setjmp/longjmp.
135  setUseUnderscoreSetJmp(true);
136  setUseUnderscoreLongJmp(true);
137
138  // Set RTLIB libcall names as used by SPU:
139  setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
140
141  // Set up the SPU's register classes:
142  addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
143  addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
144  addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
145  addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
146  addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
147  addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
148  addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
149
150  // SPU has no sign or zero extended loads for i1, i8, i16:
151  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
152  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
153  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
154
155  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
156  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
157
158  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
159  setTruncStoreAction(MVT::i128, MVT::i32, Expand);
160  setTruncStoreAction(MVT::i128, MVT::i16, Expand);
161  setTruncStoreAction(MVT::i128, MVT::i8, Expand);
162
163  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
164
165  // SPU constant load actions are custom lowered:
166  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
167  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
168
169  // SPU's loads and stores have to be custom lowered:
170  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
171       ++sctype) {
172    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
173
174    setOperationAction(ISD::LOAD,   VT, Custom);
175    setOperationAction(ISD::STORE,  VT, Custom);
176    setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
177    setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
178    setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
179
180    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
181      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
182      setTruncStoreAction(VT, StoreVT, Expand);
183    }
184  }
185
186  for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
187       ++sctype) {
188    MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
189
190    setOperationAction(ISD::LOAD,   VT, Custom);
191    setOperationAction(ISD::STORE,  VT, Custom);
192
193    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
194      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
195      setTruncStoreAction(VT, StoreVT, Expand);
196    }
197  }
198
199  // Expand the jumptable branches
200  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
201  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
202
203  // Custom lower SELECT_CC for most cases, but expand by default
204  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
205  setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
206  setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
207  setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
208  setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
209
210  // SPU has no intrinsics for these particular operations:
211  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
212
213  // SPU has no division/remainder instructions
214  setOperationAction(ISD::SREM,    MVT::i8,   Expand);
215  setOperationAction(ISD::UREM,    MVT::i8,   Expand);
216  setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
217  setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
218  setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
219  setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
220  setOperationAction(ISD::SREM,    MVT::i16,  Expand);
221  setOperationAction(ISD::UREM,    MVT::i16,  Expand);
222  setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
223  setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
224  setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
225  setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
226  setOperationAction(ISD::SREM,    MVT::i32,  Expand);
227  setOperationAction(ISD::UREM,    MVT::i32,  Expand);
228  setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
229  setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
230  setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
231  setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
232  setOperationAction(ISD::SREM,    MVT::i64,  Expand);
233  setOperationAction(ISD::UREM,    MVT::i64,  Expand);
234  setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
235  setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
236  setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
237  setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
238  setOperationAction(ISD::SREM,    MVT::i128, Expand);
239  setOperationAction(ISD::UREM,    MVT::i128, Expand);
240  setOperationAction(ISD::SDIV,    MVT::i128, Expand);
241  setOperationAction(ISD::UDIV,    MVT::i128, Expand);
242  setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
243  setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
244
245  // We don't support sin/cos/sqrt/fmod
246  setOperationAction(ISD::FSIN , MVT::f64, Expand);
247  setOperationAction(ISD::FCOS , MVT::f64, Expand);
248  setOperationAction(ISD::FREM , MVT::f64, Expand);
249  setOperationAction(ISD::FSIN , MVT::f32, Expand);
250  setOperationAction(ISD::FCOS , MVT::f32, Expand);
251  setOperationAction(ISD::FREM , MVT::f32, Expand);
252
253  // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
254  // for f32!)
255  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
256  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
257
258  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
259  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
260
261  // SPU can do rotate right and left, so legalize it... but customize for i8
262  // because instructions don't exist.
263
264  // FIXME: Change from "expand" to appropriate type once ROTR is supported in
265  //        .td files.
266  setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
267  setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
268  setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
269
270  setOperationAction(ISD::ROTL, MVT::i32,    Legal);
271  setOperationAction(ISD::ROTL, MVT::i16,    Legal);
272  setOperationAction(ISD::ROTL, MVT::i8,     Custom);
273
274  // SPU has no native version of shift left/right for i8
275  setOperationAction(ISD::SHL,  MVT::i8,     Custom);
276  setOperationAction(ISD::SRL,  MVT::i8,     Custom);
277  setOperationAction(ISD::SRA,  MVT::i8,     Custom);
278
279  // Make these operations legal and handle them during instruction selection:
280  setOperationAction(ISD::SHL,  MVT::i64,    Legal);
281  setOperationAction(ISD::SRL,  MVT::i64,    Legal);
282  setOperationAction(ISD::SRA,  MVT::i64,    Legal);
283
284  // Custom lower i8, i32 and i64 multiplications
285  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
286  setOperationAction(ISD::MUL,  MVT::i32,    Legal);
287  setOperationAction(ISD::MUL,  MVT::i64,    Legal);
288
289  // Expand double-width multiplication
290  // FIXME: It would probably be reasonable to support some of these operations
291  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
292  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
293  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
294  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
295  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
296  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
297  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
298  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
299  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
300  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
301  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
302  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
303  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
304  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
305  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
306  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
307
308  // Need to custom handle (some) common i8, i64 math ops
309  setOperationAction(ISD::ADD,  MVT::i8,     Custom);
310  setOperationAction(ISD::ADD,  MVT::i64,    Legal);
311  setOperationAction(ISD::SUB,  MVT::i8,     Custom);
312  setOperationAction(ISD::SUB,  MVT::i64,    Legal);
313
314  // SPU does not have BSWAP. It does have i32 support CTLZ.
315  // CTPOP has to be custom lowered.
316  setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
317  setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
318
319  setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
320  setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
321  setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
322  setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
323  setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
324
325  setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
326  setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
327  setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
328  setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
329  setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
330
331  setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
332  setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
333  setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
334  setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
335  setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
336
337  // SPU has a version of select that implements (a&~c)|(b&c), just like
338  // select ought to work:
339  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
340  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
341  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
342  setOperationAction(ISD::SELECT, MVT::i64,  Legal);
343
344  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
345  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
346  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
347  setOperationAction(ISD::SETCC, MVT::i64,   Legal);
348  setOperationAction(ISD::SETCC, MVT::f64,   Custom);
349
350  // Custom lower i128 -> i64 truncates
351  setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
352
353  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
354  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
355  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
356  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
357  // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
358  // to expand to a libcall, hence the custom lowering:
359  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
360  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
361  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
362  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
363  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
364  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
365
366  // FDIV on SPU requires custom lowering
367  setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
368
369  // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
370  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
371  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
372  setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
373  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
374  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
375  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
376  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
377  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
378
379  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
380  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
381  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
382  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
383
384  // We cannot sextinreg(i1).  Expand to shifts.
385  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
386
387  // Support label based line numbers.
388  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
389  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
390
391  // We want to legalize GlobalAddress and ConstantPool nodes into the
392  // appropriate instructions to materialize the address.
393  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
394       ++sctype) {
395    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
396
397    setOperationAction(ISD::GlobalAddress,  VT, Custom);
398    setOperationAction(ISD::ConstantPool,   VT, Custom);
399    setOperationAction(ISD::JumpTable,      VT, Custom);
400  }
401
402  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
403  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
404
405  // Use the default implementation.
406  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
407  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
408  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
409  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
410  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
411  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
412  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
413
414  // Cell SPU has instructions for converting between i64 and fp.
415  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
416  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
417
418  // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
419  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
420
421  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
422  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
423
424  // First set operation action for all vector types to expand. Then we
425  // will selectively turn on ones that can be effectively codegen'd.
426  addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
427  addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
428  addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
429  addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
430  addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
431  addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
432
433  // "Odd size" vector classes that we're willing to support:
434  addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
435
436  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
437       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
438    MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
439
440    // add/sub are legal for all supported vector VT's.
441    setOperationAction(ISD::ADD,     VT, Legal);
442    setOperationAction(ISD::SUB,     VT, Legal);
443    // mul has to be custom lowered.
444    setOperationAction(ISD::MUL,     VT, Legal);
445
446    setOperationAction(ISD::AND,     VT, Legal);
447    setOperationAction(ISD::OR,      VT, Legal);
448    setOperationAction(ISD::XOR,     VT, Legal);
449    setOperationAction(ISD::LOAD,    VT, Legal);
450    setOperationAction(ISD::SELECT,  VT, Legal);
451    setOperationAction(ISD::STORE,   VT, Legal);
452
453    // These operations need to be expanded:
454    setOperationAction(ISD::SDIV,    VT, Expand);
455    setOperationAction(ISD::SREM,    VT, Expand);
456    setOperationAction(ISD::UDIV,    VT, Expand);
457    setOperationAction(ISD::UREM,    VT, Expand);
458
459    // Custom lower build_vector, constant pool spills, insert and
460    // extract vector elements:
461    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
462    setOperationAction(ISD::ConstantPool, VT, Custom);
463    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
464    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
465    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
466    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
467  }
468
469  setOperationAction(ISD::AND, MVT::v16i8, Custom);
470  setOperationAction(ISD::OR,  MVT::v16i8, Custom);
471  setOperationAction(ISD::XOR, MVT::v16i8, Custom);
472  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
473
474  setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
475
476  setShiftAmountType(MVT::i32);
477  setBooleanContents(ZeroOrNegativeOneBooleanContent);
478
479  setStackPointerRegisterToSaveRestore(SPU::R1);
480
481  // We have target-specific dag combine patterns for the following nodes:
482  setTargetDAGCombine(ISD::ADD);
483  setTargetDAGCombine(ISD::ZERO_EXTEND);
484  setTargetDAGCombine(ISD::SIGN_EXTEND);
485  setTargetDAGCombine(ISD::ANY_EXTEND);
486
487  computeRegisterProperties();
488
489  // Set pre-RA register scheduler default to BURR, which produces slightly
490  // better code than the default (could also be TDRR, but TargetLowering.h
491  // needs a mod to support that model):
492  setSchedulingPreference(SchedulingForRegPressure);
493}
494
495const char *
496SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
497{
498  if (node_names.empty()) {
499    node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
500    node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
501    node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
502    node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
503    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
504    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
505    node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
506    node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
507    node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
508    node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
509    node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
510    node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
511    node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
512    node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
513    node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
514    node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
515    node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
516    node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
517    node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
518    node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
519    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
520    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
521            "SPUISD::ROTBYTES_LEFT_BITS";
522    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
523    node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
524    node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
525    node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
526    node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
527  }
528
529  std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
530
531  return ((i != node_names.end()) ? i->second : 0);
532}
533
534/// getFunctionAlignment - Return the Log2 alignment of this function.
535unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
536  return 3;
537}
538
539//===----------------------------------------------------------------------===//
540// Return the Cell SPU's SETCC result type
541//===----------------------------------------------------------------------===//
542
543MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
544  // i16 and i32 are valid SETCC result types
545  return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
546    VT.getSimpleVT().SimpleTy :
547    MVT::i32);
548}
549
550//===----------------------------------------------------------------------===//
551// Calling convention code:
552//===----------------------------------------------------------------------===//
553
554#include "SPUGenCallingConv.inc"
555
556//===----------------------------------------------------------------------===//
557//  LowerOperation implementation
558//===----------------------------------------------------------------------===//
559
560/// Custom lower loads for CellSPU
561/*!
562 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
563 within a 16-byte block, we have to rotate to extract the requested element.
564
565 For extending loads, we also want to ensure that the following sequence is
566 emitted, e.g. for MVT::f32 extending load to MVT::f64:
567
568\verbatim
569%1  v16i8,ch = load
570%2  v16i8,ch = rotate %1
571%3  v4f8, ch = bitconvert %2
572%4  f32      = vec2perfslot %3
573%5  f64      = fp_extend %4
574\endverbatim
575*/
576static SDValue
577LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
578  LoadSDNode *LN = cast<LoadSDNode>(Op);
579  SDValue the_chain = LN->getChain();
580  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
581  EVT InVT = LN->getMemoryVT();
582  EVT OutVT = Op.getValueType();
583  ISD::LoadExtType ExtType = LN->getExtensionType();
584  unsigned alignment = LN->getAlignment();
585  const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
586  DebugLoc dl = Op.getDebugLoc();
587
588  switch (LN->getAddressingMode()) {
589  case ISD::UNINDEXED: {
590    SDValue result;
591    SDValue basePtr = LN->getBasePtr();
592    SDValue rotate;
593
594    if (alignment == 16) {
595      ConstantSDNode *CN;
596
597      // Special cases for a known aligned load to simplify the base pointer
598      // and the rotation amount:
599      if (basePtr.getOpcode() == ISD::ADD
600          && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
601        // Known offset into basePtr
602        int64_t offset = CN->getSExtValue();
603        int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
604
605        if (rotamt < 0)
606          rotamt += 16;
607
608        rotate = DAG.getConstant(rotamt, MVT::i16);
609
610        // Simplify the base pointer for this case:
611        basePtr = basePtr.getOperand(0);
612        if ((offset & ~0xf) > 0) {
613          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
614                                basePtr,
615                                DAG.getConstant((offset & ~0xf), PtrVT));
616        }
617      } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
618                 || (basePtr.getOpcode() == SPUISD::IndirectAddr
619                     && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
620                     && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
621        // Plain aligned a-form address: rotate into preferred slot
622        // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
623        int64_t rotamt = -vtm->prefslot_byte;
624        if (rotamt < 0)
625          rotamt += 16;
626        rotate = DAG.getConstant(rotamt, MVT::i16);
627      } else {
628        // Offset the rotate amount by the basePtr and the preferred slot
629        // byte offset
630        int64_t rotamt = -vtm->prefslot_byte;
631        if (rotamt < 0)
632          rotamt += 16;
633        rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
634                             basePtr,
635                             DAG.getConstant(rotamt, PtrVT));
636      }
637    } else {
638      // Unaligned load: must be more pessimistic about addressing modes:
639      if (basePtr.getOpcode() == ISD::ADD) {
640        MachineFunction &MF = DAG.getMachineFunction();
641        MachineRegisterInfo &RegInfo = MF.getRegInfo();
642        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
643        SDValue Flag;
644
645        SDValue Op0 = basePtr.getOperand(0);
646        SDValue Op1 = basePtr.getOperand(1);
647
648        if (isa<ConstantSDNode>(Op1)) {
649          // Convert the (add <ptr>, <const>) to an indirect address contained
650          // in a register. Note that this is done because we need to avoid
651          // creating a 0(reg) d-form address due to the SPU's block loads.
652          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
653          the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
654          basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
655        } else {
656          // Convert the (add <arg1>, <arg2>) to an indirect address, which
657          // will likely be lowered as a reg(reg) x-form address.
658          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
659        }
660      } else {
661        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
662                              basePtr,
663                              DAG.getConstant(0, PtrVT));
664      }
665
666      // Offset the rotate amount by the basePtr and the preferred slot
667      // byte offset
668      rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
669                           basePtr,
670                           DAG.getConstant(-vtm->prefslot_byte, PtrVT));
671    }
672
673    // Re-emit as a v16i8 vector load
674    result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
675                         LN->getSrcValue(), LN->getSrcValueOffset(),
676                         LN->isVolatile(), 16);
677
678    // Update the chain
679    the_chain = result.getValue(1);
680
681    // Rotate into the preferred slot:
682    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
683                         result.getValue(0), rotate);
684
685    // Convert the loaded v16i8 vector to the appropriate vector type
686    // specified by the operand:
687    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
688                                 InVT, (128 / InVT.getSizeInBits()));
689    result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
690                         DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
691
692    // Handle extending loads by extending the scalar result:
693    if (ExtType == ISD::SEXTLOAD) {
694      result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
695    } else if (ExtType == ISD::ZEXTLOAD) {
696      result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
697    } else if (ExtType == ISD::EXTLOAD) {
698      unsigned NewOpc = ISD::ANY_EXTEND;
699
700      if (OutVT.isFloatingPoint())
701        NewOpc = ISD::FP_EXTEND;
702
703      result = DAG.getNode(NewOpc, dl, OutVT, result);
704    }
705
706    SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
707    SDValue retops[2] = {
708      result,
709      the_chain
710    };
711
712    result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
713                         retops, sizeof(retops) / sizeof(retops[0]));
714    return result;
715  }
716  case ISD::PRE_INC:
717  case ISD::PRE_DEC:
718  case ISD::POST_INC:
719  case ISD::POST_DEC:
720  case ISD::LAST_INDEXED_MODE:
721    {
722      std::string msg;
723      raw_string_ostream Msg(msg);
724      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
725            "UNINDEXED\n";
726      Msg << (unsigned) LN->getAddressingMode();
727      llvm_report_error(Msg.str());
728      /*NOTREACHED*/
729    }
730  }
731
732  return SDValue();
733}
734
735/// Custom lower stores for CellSPU
736/*!
737 All CellSPU stores are aligned to 16-byte boundaries, so for elements
738 within a 16-byte block, we have to generate a shuffle to insert the
739 requested element into its place, then store the resulting block.
740 */
741static SDValue
742LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
743  StoreSDNode *SN = cast<StoreSDNode>(Op);
744  SDValue Value = SN->getValue();
745  EVT VT = Value.getValueType();
746  EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
747  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
748  DebugLoc dl = Op.getDebugLoc();
749  unsigned alignment = SN->getAlignment();
750
751  switch (SN->getAddressingMode()) {
752  case ISD::UNINDEXED: {
753    // The vector type we really want to load from the 16-byte chunk.
754    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
755                                 VT, (128 / VT.getSizeInBits())),
756        stVecVT = EVT::getVectorVT(*DAG.getContext(),
757                                   StVT, (128 / StVT.getSizeInBits()));
758
759    SDValue alignLoadVec;
760    SDValue basePtr = SN->getBasePtr();
761    SDValue the_chain = SN->getChain();
762    SDValue insertEltOffs;
763
764    if (alignment == 16) {
765      ConstantSDNode *CN;
766
767      // Special cases for a known aligned load to simplify the base pointer
768      // and insertion byte:
769      if (basePtr.getOpcode() == ISD::ADD
770          && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
771        // Known offset into basePtr
772        int64_t offset = CN->getSExtValue();
773
774        // Simplify the base pointer for this case:
775        basePtr = basePtr.getOperand(0);
776        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
777                                    basePtr,
778                                    DAG.getConstant((offset & 0xf), PtrVT));
779
780        if ((offset & ~0xf) > 0) {
781          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
782                                basePtr,
783                                DAG.getConstant((offset & ~0xf), PtrVT));
784        }
785      } else {
786        // Otherwise, assume it's at byte 0 of basePtr
787        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
788                                    basePtr,
789                                    DAG.getConstant(0, PtrVT));
790      }
791    } else {
792      // Unaligned load: must be more pessimistic about addressing modes:
793      if (basePtr.getOpcode() == ISD::ADD) {
794        MachineFunction &MF = DAG.getMachineFunction();
795        MachineRegisterInfo &RegInfo = MF.getRegInfo();
796        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
797        SDValue Flag;
798
799        SDValue Op0 = basePtr.getOperand(0);
800        SDValue Op1 = basePtr.getOperand(1);
801
802        if (isa<ConstantSDNode>(Op1)) {
803          // Convert the (add <ptr>, <const>) to an indirect address contained
804          // in a register. Note that this is done because we need to avoid
805          // creating a 0(reg) d-form address due to the SPU's block loads.
806          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
807          the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
808          basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
809        } else {
810          // Convert the (add <arg1>, <arg2>) to an indirect address, which
811          // will likely be lowered as a reg(reg) x-form address.
812          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
813        }
814      } else {
815        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
816                              basePtr,
817                              DAG.getConstant(0, PtrVT));
818      }
819
820      // Insertion point is solely determined by basePtr's contents
821      insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
822                                  basePtr,
823                                  DAG.getConstant(0, PtrVT));
824    }
825
826    // Re-emit as a v16i8 vector load
827    alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
828                               SN->getSrcValue(), SN->getSrcValueOffset(),
829                               SN->isVolatile(), 16);
830
831    // Update the chain
832    the_chain = alignLoadVec.getValue(1);
833
834    LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
835    SDValue theValue = SN->getValue();
836    SDValue result;
837
838    if (StVT != VT
839        && (theValue.getOpcode() == ISD::AssertZext
840            || theValue.getOpcode() == ISD::AssertSext)) {
841      // Drill down and get the value for zero- and sign-extended
842      // quantities
843      theValue = theValue.getOperand(0);
844    }
845
846    // If the base pointer is already a D-form address, then just create
847    // a new D-form address with a slot offset and the orignal base pointer.
848    // Otherwise generate a D-form address with the slot offset relative
849    // to the stack pointer, which is always aligned.
850#if !defined(NDEBUG)
851      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
852        cerr << "CellSPU LowerSTORE: basePtr = ";
853        basePtr.getNode()->dump(&DAG);
854        cerr << "\n";
855      }
856#endif
857
858    SDValue insertEltOp =
859            DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
860    SDValue vectorizeOp =
861            DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
862
863    result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
864                         vectorizeOp, alignLoadVec,
865                         DAG.getNode(ISD::BIT_CONVERT, dl,
866                                     MVT::v4i32, insertEltOp));
867
868    result = DAG.getStore(the_chain, dl, result, basePtr,
869                          LN->getSrcValue(), LN->getSrcValueOffset(),
870                          LN->isVolatile(), LN->getAlignment());
871
872#if 0 && !defined(NDEBUG)
873    if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
874      const SDValue &currentRoot = DAG.getRoot();
875
876      DAG.setRoot(result);
877      cerr << "------- CellSPU:LowerStore result:\n";
878      DAG.dump();
879      cerr << "-------\n";
880      DAG.setRoot(currentRoot);
881    }
882#endif
883
884    return result;
885    /*UNREACHED*/
886  }
887  case ISD::PRE_INC:
888  case ISD::PRE_DEC:
889  case ISD::POST_INC:
890  case ISD::POST_DEC:
891  case ISD::LAST_INDEXED_MODE:
892    {
893      std::string msg;
894      raw_string_ostream Msg(msg);
895      Msg << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
896            "UNINDEXED\n";
897      Msg << (unsigned) SN->getAddressingMode();
898      llvm_report_error(Msg.str());
899      /*NOTREACHED*/
900    }
901  }
902
903  return SDValue();
904}
905
906//! Generate the address of a constant pool entry.
907static SDValue
908LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
909  EVT PtrVT = Op.getValueType();
910  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
911  Constant *C = CP->getConstVal();
912  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
913  SDValue Zero = DAG.getConstant(0, PtrVT);
914  const TargetMachine &TM = DAG.getTarget();
915  // FIXME there is no actual debug info here
916  DebugLoc dl = Op.getDebugLoc();
917
918  if (TM.getRelocationModel() == Reloc::Static) {
919    if (!ST->usingLargeMem()) {
920      // Just return the SDValue with the constant pool address in it.
921      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
922    } else {
923      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
924      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
925      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
926    }
927  }
928
929  llvm_unreachable("LowerConstantPool: Relocation model other than static"
930                   " not supported.");
931  return SDValue();
932}
933
934//! Alternate entry point for generating the address of a constant pool entry
935SDValue
936SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
937  return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
938}
939
940static SDValue
941LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
942  EVT PtrVT = Op.getValueType();
943  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
944  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
945  SDValue Zero = DAG.getConstant(0, PtrVT);
946  const TargetMachine &TM = DAG.getTarget();
947  // FIXME there is no actual debug info here
948  DebugLoc dl = Op.getDebugLoc();
949
950  if (TM.getRelocationModel() == Reloc::Static) {
951    if (!ST->usingLargeMem()) {
952      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
953    } else {
954      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
955      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
956      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
957    }
958  }
959
960  llvm_unreachable("LowerJumpTable: Relocation model other than static"
961                   " not supported.");
962  return SDValue();
963}
964
965static SDValue
966LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
967  EVT PtrVT = Op.getValueType();
968  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
969  GlobalValue *GV = GSDN->getGlobal();
970  SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
971  const TargetMachine &TM = DAG.getTarget();
972  SDValue Zero = DAG.getConstant(0, PtrVT);
973  // FIXME there is no actual debug info here
974  DebugLoc dl = Op.getDebugLoc();
975
976  if (TM.getRelocationModel() == Reloc::Static) {
977    if (!ST->usingLargeMem()) {
978      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
979    } else {
980      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
981      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
982      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
983    }
984  } else {
985    llvm_report_error("LowerGlobalAddress: Relocation model other than static"
986                      "not supported.");
987    /*NOTREACHED*/
988  }
989
990  return SDValue();
991}
992
993//! Custom lower double precision floating point constants
994static SDValue
995LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
996  EVT VT = Op.getValueType();
997  // FIXME there is no actual debug info here
998  DebugLoc dl = Op.getDebugLoc();
999
1000  if (VT == MVT::f64) {
1001    ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
1002
1003    assert((FP != 0) &&
1004           "LowerConstantFP: Node is not ConstantFPSDNode");
1005
1006    uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
1007    SDValue T = DAG.getConstant(dbits, MVT::i64);
1008    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
1009    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1010                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
1011  }
1012
1013  return SDValue();
1014}
1015
1016SDValue
1017SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1018                                        unsigned CallConv, bool isVarArg,
1019                                        const SmallVectorImpl<ISD::InputArg>
1020                                          &Ins,
1021                                        DebugLoc dl, SelectionDAG &DAG,
1022                                        SmallVectorImpl<SDValue> &InVals) {
1023
1024  MachineFunction &MF = DAG.getMachineFunction();
1025  MachineFrameInfo *MFI = MF.getFrameInfo();
1026  MachineRegisterInfo &RegInfo = MF.getRegInfo();
1027
1028  const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1029  const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1030
1031  unsigned ArgOffset = SPUFrameInfo::minStackSize();
1032  unsigned ArgRegIdx = 0;
1033  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1034
1035  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1036
1037  // Add DAG nodes to load the arguments or copy them out of registers.
1038  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1039    EVT ObjectVT = Ins[ArgNo].VT;
1040    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1041    SDValue ArgVal;
1042
1043    if (ArgRegIdx < NumArgRegs) {
1044      const TargetRegisterClass *ArgRegClass;
1045
1046      switch (ObjectVT.getSimpleVT().SimpleTy) {
1047      default: {
1048        std::string msg;
1049        raw_string_ostream Msg(msg);
1050        Msg << "LowerFormalArguments Unhandled argument type: "
1051             << ObjectVT.getEVTString();
1052        llvm_report_error(Msg.str());
1053      }
1054      case MVT::i8:
1055        ArgRegClass = &SPU::R8CRegClass;
1056        break;
1057      case MVT::i16:
1058        ArgRegClass = &SPU::R16CRegClass;
1059        break;
1060      case MVT::i32:
1061        ArgRegClass = &SPU::R32CRegClass;
1062        break;
1063      case MVT::i64:
1064        ArgRegClass = &SPU::R64CRegClass;
1065        break;
1066      case MVT::i128:
1067        ArgRegClass = &SPU::GPRCRegClass;
1068        break;
1069      case MVT::f32:
1070        ArgRegClass = &SPU::R32FPRegClass;
1071        break;
1072      case MVT::f64:
1073        ArgRegClass = &SPU::R64FPRegClass;
1074        break;
1075      case MVT::v2f64:
1076      case MVT::v4f32:
1077      case MVT::v2i64:
1078      case MVT::v4i32:
1079      case MVT::v8i16:
1080      case MVT::v16i8:
1081        ArgRegClass = &SPU::VECREGRegClass;
1082        break;
1083      }
1084
1085      unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1086      RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1087      ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1088      ++ArgRegIdx;
1089    } else {
1090      // We need to load the argument to a virtual register if we determined
1091      // above that we ran out of physical registers of the appropriate type
1092      // or we're forced to do vararg
1093      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1094      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1095      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
1096      ArgOffset += StackSlotSize;
1097    }
1098
1099    InVals.push_back(ArgVal);
1100    // Update the chain
1101    Chain = ArgVal.getOperand(0);
1102  }
1103
1104  // vararg handling:
1105  if (isVarArg) {
1106    // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1107    // We will spill (79-3)+1 registers to the stack
1108    SmallVector<SDValue, 79-3+1> MemOps;
1109
1110    // Create the frame slot
1111
1112    for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1113      VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1114      SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1115      SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1116      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
1117      Chain = Store.getOperand(0);
1118      MemOps.push_back(Store);
1119
1120      // Increment address by stack slot size for the next stored argument
1121      ArgOffset += StackSlotSize;
1122    }
1123    if (!MemOps.empty())
1124      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1125                          &MemOps[0], MemOps.size());
1126  }
1127
1128  return Chain;
1129}
1130
1131/// isLSAAddress - Return the immediate to use if the specified
1132/// value is representable as a LSA address.
1133static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1134  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1135  if (!C) return 0;
1136
1137  int Addr = C->getZExtValue();
1138  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1139      (Addr << 14 >> 14) != Addr)
1140    return 0;  // Top 14 bits have to be sext of immediate.
1141
1142  return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1143}
1144
1145SDValue
1146SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1147                             unsigned CallConv, bool isVarArg,
1148                             bool isTailCall,
1149                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1150                             const SmallVectorImpl<ISD::InputArg> &Ins,
1151                             DebugLoc dl, SelectionDAG &DAG,
1152                             SmallVectorImpl<SDValue> &InVals) {
1153
1154  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1155  unsigned NumOps     = Outs.size();
1156  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1157  const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1158  const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1159
1160  // Handy pointer type
1161  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1162
1163  // Accumulate how many bytes are to be pushed on the stack, including the
1164  // linkage area, and parameter passing area.  According to the SPU ABI,
1165  // we minimally need space for [LR] and [SP]
1166  unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1167
1168  // Set up a copy of the stack pointer for use loading and storing any
1169  // arguments that may not fit in the registers available for argument
1170  // passing.
1171  SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1172
1173  // Figure out which arguments are going to go in registers, and which in
1174  // memory.
1175  unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1176  unsigned ArgRegIdx = 0;
1177
1178  // Keep track of registers passing arguments
1179  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1180  // And the arguments passed on the stack
1181  SmallVector<SDValue, 8> MemOpChains;
1182
1183  for (unsigned i = 0; i != NumOps; ++i) {
1184    SDValue Arg = Outs[i].Val;
1185
1186    // PtrOff will be used to store the current argument to the stack if a
1187    // register cannot be found for it.
1188    SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1189    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1190
1191    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1192    default: llvm_unreachable("Unexpected ValueType for argument!");
1193    case MVT::i8:
1194    case MVT::i16:
1195    case MVT::i32:
1196    case MVT::i64:
1197    case MVT::i128:
1198      if (ArgRegIdx != NumArgRegs) {
1199        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1200      } else {
1201        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1202        ArgOffset += StackSlotSize;
1203      }
1204      break;
1205    case MVT::f32:
1206    case MVT::f64:
1207      if (ArgRegIdx != NumArgRegs) {
1208        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1209      } else {
1210        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1211        ArgOffset += StackSlotSize;
1212      }
1213      break;
1214    case MVT::v2i64:
1215    case MVT::v2f64:
1216    case MVT::v4f32:
1217    case MVT::v4i32:
1218    case MVT::v8i16:
1219    case MVT::v16i8:
1220      if (ArgRegIdx != NumArgRegs) {
1221        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1222      } else {
1223        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0));
1224        ArgOffset += StackSlotSize;
1225      }
1226      break;
1227    }
1228  }
1229
1230  // Update number of stack bytes actually used, insert a call sequence start
1231  NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1232  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1233                                                            true));
1234
1235  if (!MemOpChains.empty()) {
1236    // Adjust the stack pointer for the stack arguments.
1237    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1238                        &MemOpChains[0], MemOpChains.size());
1239  }
1240
1241  // Build a sequence of copy-to-reg nodes chained together with token chain
1242  // and flag operands which copy the outgoing args into the appropriate regs.
1243  SDValue InFlag;
1244  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1245    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1246                             RegsToPass[i].second, InFlag);
1247    InFlag = Chain.getValue(1);
1248  }
1249
1250  SmallVector<SDValue, 8> Ops;
1251  unsigned CallOpc = SPUISD::CALL;
1252
1253  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1254  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1255  // node so that legalize doesn't hack it.
1256  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1257    GlobalValue *GV = G->getGlobal();
1258    EVT CalleeVT = Callee.getValueType();
1259    SDValue Zero = DAG.getConstant(0, PtrVT);
1260    SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1261
1262    if (!ST->usingLargeMem()) {
1263      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1264      // style calls, otherwise, external symbols are BRASL calls. This assumes
1265      // that declared/defined symbols are in the same compilation unit and can
1266      // be reached through PC-relative jumps.
1267      //
1268      // NOTE:
1269      // This may be an unsafe assumption for JIT and really large compilation
1270      // units.
1271      if (GV->isDeclaration()) {
1272        Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1273      } else {
1274        Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1275      }
1276    } else {
1277      // "Large memory" mode: Turn all calls into indirect calls with a X-form
1278      // address pairs:
1279      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1280    }
1281  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1282    EVT CalleeVT = Callee.getValueType();
1283    SDValue Zero = DAG.getConstant(0, PtrVT);
1284    SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1285        Callee.getValueType());
1286
1287    if (!ST->usingLargeMem()) {
1288      Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1289    } else {
1290      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1291    }
1292  } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1293    // If this is an absolute destination address that appears to be a legal
1294    // local store address, use the munged value.
1295    Callee = SDValue(Dest, 0);
1296  }
1297
1298  Ops.push_back(Chain);
1299  Ops.push_back(Callee);
1300
1301  // Add argument registers to the end of the list so that they are known live
1302  // into the call.
1303  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1304    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1305                                  RegsToPass[i].second.getValueType()));
1306
1307  if (InFlag.getNode())
1308    Ops.push_back(InFlag);
1309  // Returns a chain and a flag for retval copy to use.
1310  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1311                      &Ops[0], Ops.size());
1312  InFlag = Chain.getValue(1);
1313
1314  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1315                             DAG.getIntPtrConstant(0, true), InFlag);
1316  if (!Ins.empty())
1317    InFlag = Chain.getValue(1);
1318
1319  // If the function returns void, just return the chain.
1320  if (Ins.empty())
1321    return Chain;
1322
1323  // If the call has results, copy the values out of the ret val registers.
1324  switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1325  default: llvm_unreachable("Unexpected ret value!");
1326  case MVT::Other: break;
1327  case MVT::i32:
1328    if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1329      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1330                                 MVT::i32, InFlag).getValue(1);
1331      InVals.push_back(Chain.getValue(0));
1332      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1333                                 Chain.getValue(2)).getValue(1);
1334      InVals.push_back(Chain.getValue(0));
1335    } else {
1336      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1337                                 InFlag).getValue(1);
1338      InVals.push_back(Chain.getValue(0));
1339    }
1340    break;
1341  case MVT::i64:
1342    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i64,
1343                               InFlag).getValue(1);
1344    InVals.push_back(Chain.getValue(0));
1345    break;
1346  case MVT::i128:
1347    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i128,
1348                               InFlag).getValue(1);
1349    InVals.push_back(Chain.getValue(0));
1350    break;
1351  case MVT::f32:
1352  case MVT::f64:
1353    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1354                               InFlag).getValue(1);
1355    InVals.push_back(Chain.getValue(0));
1356    break;
1357  case MVT::v2f64:
1358  case MVT::v2i64:
1359  case MVT::v4f32:
1360  case MVT::v4i32:
1361  case MVT::v8i16:
1362  case MVT::v16i8:
1363    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1364                                   InFlag).getValue(1);
1365    InVals.push_back(Chain.getValue(0));
1366    break;
1367  }
1368
1369  return Chain;
1370}
1371
1372SDValue
1373SPUTargetLowering::LowerReturn(SDValue Chain,
1374                               unsigned CallConv, bool isVarArg,
1375                               const SmallVectorImpl<ISD::OutputArg> &Outs,
1376                               DebugLoc dl, SelectionDAG &DAG) {
1377
1378  SmallVector<CCValAssign, 16> RVLocs;
1379  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1380                 RVLocs, *DAG.getContext());
1381  CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1382
1383  // If this is the first return lowered for this function, add the regs to the
1384  // liveout set for the function.
1385  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1386    for (unsigned i = 0; i != RVLocs.size(); ++i)
1387      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1388  }
1389
1390  SDValue Flag;
1391
1392  // Copy the result values into the output registers.
1393  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1394    CCValAssign &VA = RVLocs[i];
1395    assert(VA.isRegLoc() && "Can only return in registers!");
1396    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1397                             Outs[i].Val, Flag);
1398    Flag = Chain.getValue(1);
1399  }
1400
1401  if (Flag.getNode())
1402    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1403  else
1404    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1405}
1406
1407
1408//===----------------------------------------------------------------------===//
1409// Vector related lowering:
1410//===----------------------------------------------------------------------===//
1411
1412static ConstantSDNode *
1413getVecImm(SDNode *N) {
1414  SDValue OpVal(0, 0);
1415
1416  // Check to see if this buildvec has a single non-undef value in its elements.
1417  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1418    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1419    if (OpVal.getNode() == 0)
1420      OpVal = N->getOperand(i);
1421    else if (OpVal != N->getOperand(i))
1422      return 0;
1423  }
1424
1425  if (OpVal.getNode() != 0) {
1426    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1427      return CN;
1428    }
1429  }
1430
1431  return 0;
1432}
1433
1434/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1435/// and the value fits into an unsigned 18-bit constant, and if so, return the
1436/// constant
1437SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1438                              EVT ValueType) {
1439  if (ConstantSDNode *CN = getVecImm(N)) {
1440    uint64_t Value = CN->getZExtValue();
1441    if (ValueType == MVT::i64) {
1442      uint64_t UValue = CN->getZExtValue();
1443      uint32_t upper = uint32_t(UValue >> 32);
1444      uint32_t lower = uint32_t(UValue);
1445      if (upper != lower)
1446        return SDValue();
1447      Value = Value >> 32;
1448    }
1449    if (Value <= 0x3ffff)
1450      return DAG.getTargetConstant(Value, ValueType);
1451  }
1452
1453  return SDValue();
1454}
1455
1456/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1457/// and the value fits into a signed 16-bit constant, and if so, return the
1458/// constant
1459SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1460                              EVT ValueType) {
1461  if (ConstantSDNode *CN = getVecImm(N)) {
1462    int64_t Value = CN->getSExtValue();
1463    if (ValueType == MVT::i64) {
1464      uint64_t UValue = CN->getZExtValue();
1465      uint32_t upper = uint32_t(UValue >> 32);
1466      uint32_t lower = uint32_t(UValue);
1467      if (upper != lower)
1468        return SDValue();
1469      Value = Value >> 32;
1470    }
1471    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1472      return DAG.getTargetConstant(Value, ValueType);
1473    }
1474  }
1475
1476  return SDValue();
1477}
1478
1479/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1480/// and the value fits into a signed 10-bit constant, and if so, return the
1481/// constant
1482SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1483                              EVT ValueType) {
1484  if (ConstantSDNode *CN = getVecImm(N)) {
1485    int64_t Value = CN->getSExtValue();
1486    if (ValueType == MVT::i64) {
1487      uint64_t UValue = CN->getZExtValue();
1488      uint32_t upper = uint32_t(UValue >> 32);
1489      uint32_t lower = uint32_t(UValue);
1490      if (upper != lower)
1491        return SDValue();
1492      Value = Value >> 32;
1493    }
1494    if (isS10Constant(Value))
1495      return DAG.getTargetConstant(Value, ValueType);
1496  }
1497
1498  return SDValue();
1499}
1500
1501/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1502/// and the value fits into a signed 8-bit constant, and if so, return the
1503/// constant.
1504///
1505/// @note: The incoming vector is v16i8 because that's the only way we can load
1506/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1507/// same value.
1508SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1509                             EVT ValueType) {
1510  if (ConstantSDNode *CN = getVecImm(N)) {
1511    int Value = (int) CN->getZExtValue();
1512    if (ValueType == MVT::i16
1513        && Value <= 0xffff                 /* truncated from uint64_t */
1514        && ((short) Value >> 8) == ((short) Value & 0xff))
1515      return DAG.getTargetConstant(Value & 0xff, ValueType);
1516    else if (ValueType == MVT::i8
1517             && (Value & 0xff) == Value)
1518      return DAG.getTargetConstant(Value, ValueType);
1519  }
1520
1521  return SDValue();
1522}
1523
1524/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1525/// and the value fits into a signed 16-bit constant, and if so, return the
1526/// constant
1527SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1528                               EVT ValueType) {
1529  if (ConstantSDNode *CN = getVecImm(N)) {
1530    uint64_t Value = CN->getZExtValue();
1531    if ((ValueType == MVT::i32
1532          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1533        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1534      return DAG.getTargetConstant(Value >> 16, ValueType);
1535  }
1536
1537  return SDValue();
1538}
1539
1540/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1541SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1542  if (ConstantSDNode *CN = getVecImm(N)) {
1543    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1544  }
1545
1546  return SDValue();
1547}
1548
1549/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1550SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1551  if (ConstantSDNode *CN = getVecImm(N)) {
1552    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1553  }
1554
1555  return SDValue();
1556}
1557
1558//! Lower a BUILD_VECTOR instruction creatively:
1559static SDValue
1560LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1561  EVT VT = Op.getValueType();
1562  EVT EltVT = VT.getVectorElementType();
1563  DebugLoc dl = Op.getDebugLoc();
1564  BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1565  assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1566  unsigned minSplatBits = EltVT.getSizeInBits();
1567
1568  if (minSplatBits < 16)
1569    minSplatBits = 16;
1570
1571  APInt APSplatBits, APSplatUndef;
1572  unsigned SplatBitSize;
1573  bool HasAnyUndefs;
1574
1575  if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1576                            HasAnyUndefs, minSplatBits)
1577      || minSplatBits < SplatBitSize)
1578    return SDValue();   // Wasn't a constant vector or splat exceeded min
1579
1580  uint64_t SplatBits = APSplatBits.getZExtValue();
1581
1582  switch (VT.getSimpleVT().SimpleTy) {
1583  default: {
1584    std::string msg;
1585    raw_string_ostream Msg(msg);
1586    Msg << "CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = "
1587         << VT.getEVTString();
1588    llvm_report_error(Msg.str());
1589    /*NOTREACHED*/
1590  }
1591  case MVT::v4f32: {
1592    uint32_t Value32 = uint32_t(SplatBits);
1593    assert(SplatBitSize == 32
1594           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1595    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1596    SDValue T = DAG.getConstant(Value32, MVT::i32);
1597    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1598                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1599    break;
1600  }
1601  case MVT::v2f64: {
1602    uint64_t f64val = uint64_t(SplatBits);
1603    assert(SplatBitSize == 64
1604           && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1605    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1606    SDValue T = DAG.getConstant(f64val, MVT::i64);
1607    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1608                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1609    break;
1610  }
1611  case MVT::v16i8: {
1612   // 8-bit constants have to be expanded to 16-bits
1613   unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1614   SmallVector<SDValue, 8> Ops;
1615
1616   Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1617   return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1618                      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1619  }
1620  case MVT::v8i16: {
1621    unsigned short Value16 = SplatBits;
1622    SDValue T = DAG.getConstant(Value16, EltVT);
1623    SmallVector<SDValue, 8> Ops;
1624
1625    Ops.assign(8, T);
1626    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1627  }
1628  case MVT::v4i32: {
1629    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1630    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1631  }
1632  case MVT::v2i32: {
1633    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1634    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1635  }
1636  case MVT::v2i64: {
1637    return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1638  }
1639  }
1640
1641  return SDValue();
1642}
1643
1644/*!
1645 */
1646SDValue
1647SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1648                     DebugLoc dl) {
1649  uint32_t upper = uint32_t(SplatVal >> 32);
1650  uint32_t lower = uint32_t(SplatVal);
1651
1652  if (upper == lower) {
1653    // Magic constant that can be matched by IL, ILA, et. al.
1654    SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1655    return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1656                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1657                                   Val, Val, Val, Val));
1658  } else {
1659    bool upper_special, lower_special;
1660
1661    // NOTE: This code creates common-case shuffle masks that can be easily
1662    // detected as common expressions. It is not attempting to create highly
1663    // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1664
1665    // Detect if the upper or lower half is a special shuffle mask pattern:
1666    upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1667    lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1668
1669    // Both upper and lower are special, lower to a constant pool load:
1670    if (lower_special && upper_special) {
1671      SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1672      return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1673                         SplatValCN, SplatValCN);
1674    }
1675
1676    SDValue LO32;
1677    SDValue HI32;
1678    SmallVector<SDValue, 16> ShufBytes;
1679    SDValue Result;
1680
1681    // Create lower vector if not a special pattern
1682    if (!lower_special) {
1683      SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1684      LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1685                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1686                                     LO32C, LO32C, LO32C, LO32C));
1687    }
1688
1689    // Create upper vector if not a special pattern
1690    if (!upper_special) {
1691      SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1692      HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1693                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1694                                     HI32C, HI32C, HI32C, HI32C));
1695    }
1696
1697    // If either upper or lower are special, then the two input operands are
1698    // the same (basically, one of them is a "don't care")
1699    if (lower_special)
1700      LO32 = HI32;
1701    if (upper_special)
1702      HI32 = LO32;
1703
1704    for (int i = 0; i < 4; ++i) {
1705      uint64_t val = 0;
1706      for (int j = 0; j < 4; ++j) {
1707        SDValue V;
1708        bool process_upper, process_lower;
1709        val <<= 8;
1710        process_upper = (upper_special && (i & 1) == 0);
1711        process_lower = (lower_special && (i & 1) == 1);
1712
1713        if (process_upper || process_lower) {
1714          if ((process_upper && upper == 0)
1715                  || (process_lower && lower == 0))
1716            val |= 0x80;
1717          else if ((process_upper && upper == 0xffffffff)
1718                  || (process_lower && lower == 0xffffffff))
1719            val |= 0xc0;
1720          else if ((process_upper && upper == 0x80000000)
1721                  || (process_lower && lower == 0x80000000))
1722            val |= (j == 0 ? 0xe0 : 0x80);
1723        } else
1724          val |= i * 4 + j + ((i & 1) * 16);
1725      }
1726
1727      ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1728    }
1729
1730    return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1731                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1732                                   &ShufBytes[0], ShufBytes.size()));
1733  }
1734}
1735
1736/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1737/// which the Cell can operate. The code inspects V3 to ascertain whether the
1738/// permutation vector, V3, is monotonically increasing with one "exception"
1739/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1740/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1741/// In either case, the net result is going to eventually invoke SHUFB to
1742/// permute/shuffle the bytes from V1 and V2.
1743/// \note
1744/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1745/// control word for byte/halfword/word insertion. This takes care of a single
1746/// element move from V2 into V1.
1747/// \note
1748/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1749static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1750  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1751  SDValue V1 = Op.getOperand(0);
1752  SDValue V2 = Op.getOperand(1);
1753  DebugLoc dl = Op.getDebugLoc();
1754
1755  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1756
1757  // If we have a single element being moved from V1 to V2, this can be handled
1758  // using the C*[DX] compute mask instructions, but the vector elements have
1759  // to be monotonically increasing with one exception element.
1760  EVT VecVT = V1.getValueType();
1761  EVT EltVT = VecVT.getVectorElementType();
1762  unsigned EltsFromV2 = 0;
1763  unsigned V2Elt = 0;
1764  unsigned V2EltIdx0 = 0;
1765  unsigned CurrElt = 0;
1766  unsigned MaxElts = VecVT.getVectorNumElements();
1767  unsigned PrevElt = 0;
1768  unsigned V0Elt = 0;
1769  bool monotonic = true;
1770  bool rotate = true;
1771
1772  if (EltVT == MVT::i8) {
1773    V2EltIdx0 = 16;
1774  } else if (EltVT == MVT::i16) {
1775    V2EltIdx0 = 8;
1776  } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1777    V2EltIdx0 = 4;
1778  } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1779    V2EltIdx0 = 2;
1780  } else
1781    llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1782
1783  for (unsigned i = 0; i != MaxElts; ++i) {
1784    if (SVN->getMaskElt(i) < 0)
1785      continue;
1786
1787    unsigned SrcElt = SVN->getMaskElt(i);
1788
1789    if (monotonic) {
1790      if (SrcElt >= V2EltIdx0) {
1791        if (1 >= (++EltsFromV2)) {
1792          V2Elt = (V2EltIdx0 - SrcElt) << 2;
1793        }
1794      } else if (CurrElt != SrcElt) {
1795        monotonic = false;
1796      }
1797
1798      ++CurrElt;
1799    }
1800
1801    if (rotate) {
1802      if (PrevElt > 0 && SrcElt < MaxElts) {
1803        if ((PrevElt == SrcElt - 1)
1804            || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1805          PrevElt = SrcElt;
1806          if (SrcElt == 0)
1807            V0Elt = i;
1808        } else {
1809          rotate = false;
1810        }
1811      } else if (PrevElt == 0) {
1812        // First time through, need to keep track of previous element
1813        PrevElt = SrcElt;
1814      } else {
1815        // This isn't a rotation, takes elements from vector 2
1816        rotate = false;
1817      }
1818    }
1819  }
1820
1821  if (EltsFromV2 == 1 && monotonic) {
1822    // Compute mask and shuffle
1823    MachineFunction &MF = DAG.getMachineFunction();
1824    MachineRegisterInfo &RegInfo = MF.getRegInfo();
1825    unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1826    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1827    // Initialize temporary register to 0
1828    SDValue InitTempReg =
1829      DAG.getCopyToReg(DAG.getEntryNode(), dl, VReg, DAG.getConstant(0, PtrVT));
1830    // Copy register's contents as index in SHUFFLE_MASK:
1831    SDValue ShufMaskOp =
1832      DAG.getNode(SPUISD::SHUFFLE_MASK, dl, MVT::v4i32,
1833                  DAG.getTargetConstant(V2Elt, MVT::i32),
1834                  DAG.getCopyFromReg(InitTempReg, dl, VReg, PtrVT));
1835    // Use shuffle mask in SHUFB synthetic instruction:
1836    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1837                       ShufMaskOp);
1838  } else if (rotate) {
1839    int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1840
1841    return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1842                       V1, DAG.getConstant(rotamt, MVT::i16));
1843  } else {
1844   // Convert the SHUFFLE_VECTOR mask's input element units to the
1845   // actual bytes.
1846    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1847
1848    SmallVector<SDValue, 16> ResultMask;
1849    for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1850      unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1851
1852      for (unsigned j = 0; j < BytesPerElement; ++j)
1853        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1854    }
1855
1856    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1857                                    &ResultMask[0], ResultMask.size());
1858    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1859  }
1860}
1861
1862static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1863  SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1864  DebugLoc dl = Op.getDebugLoc();
1865
1866  if (Op0.getNode()->getOpcode() == ISD::Constant) {
1867    // For a constant, build the appropriate constant vector, which will
1868    // eventually simplify to a vector register load.
1869
1870    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1871    SmallVector<SDValue, 16> ConstVecValues;
1872    EVT VT;
1873    size_t n_copies;
1874
1875    // Create a constant vector:
1876    switch (Op.getValueType().getSimpleVT().SimpleTy) {
1877    default: llvm_unreachable("Unexpected constant value type in "
1878                              "LowerSCALAR_TO_VECTOR");
1879    case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1880    case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1881    case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1882    case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1883    case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1884    case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1885    }
1886
1887    SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1888    for (size_t j = 0; j < n_copies; ++j)
1889      ConstVecValues.push_back(CValue);
1890
1891    return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1892                       &ConstVecValues[0], ConstVecValues.size());
1893  } else {
1894    // Otherwise, copy the value from one register to another:
1895    switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1896    default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1897    case MVT::i8:
1898    case MVT::i16:
1899    case MVT::i32:
1900    case MVT::i64:
1901    case MVT::f32:
1902    case MVT::f64:
1903      return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1904    }
1905  }
1906
1907  return SDValue();
1908}
1909
1910static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1911  EVT VT = Op.getValueType();
1912  SDValue N = Op.getOperand(0);
1913  SDValue Elt = Op.getOperand(1);
1914  DebugLoc dl = Op.getDebugLoc();
1915  SDValue retval;
1916
1917  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1918    // Constant argument:
1919    int EltNo = (int) C->getZExtValue();
1920
1921    // sanity checks:
1922    if (VT == MVT::i8 && EltNo >= 16)
1923      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1924    else if (VT == MVT::i16 && EltNo >= 8)
1925      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1926    else if (VT == MVT::i32 && EltNo >= 4)
1927      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1928    else if (VT == MVT::i64 && EltNo >= 2)
1929      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1930
1931    if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1932      // i32 and i64: Element 0 is the preferred slot
1933      return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1934    }
1935
1936    // Need to generate shuffle mask and extract:
1937    int prefslot_begin = -1, prefslot_end = -1;
1938    int elt_byte = EltNo * VT.getSizeInBits() / 8;
1939
1940    switch (VT.getSimpleVT().SimpleTy) {
1941    default:
1942      assert(false && "Invalid value type!");
1943    case MVT::i8: {
1944      prefslot_begin = prefslot_end = 3;
1945      break;
1946    }
1947    case MVT::i16: {
1948      prefslot_begin = 2; prefslot_end = 3;
1949      break;
1950    }
1951    case MVT::i32:
1952    case MVT::f32: {
1953      prefslot_begin = 0; prefslot_end = 3;
1954      break;
1955    }
1956    case MVT::i64:
1957    case MVT::f64: {
1958      prefslot_begin = 0; prefslot_end = 7;
1959      break;
1960    }
1961    }
1962
1963    assert(prefslot_begin != -1 && prefslot_end != -1 &&
1964           "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1965
1966    unsigned int ShufBytes[16];
1967    for (int i = 0; i < 16; ++i) {
1968      // zero fill uppper part of preferred slot, don't care about the
1969      // other slots:
1970      unsigned int mask_val;
1971      if (i <= prefslot_end) {
1972        mask_val =
1973          ((i < prefslot_begin)
1974           ? 0x80
1975           : elt_byte + (i - prefslot_begin));
1976
1977        ShufBytes[i] = mask_val;
1978      } else
1979        ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1980    }
1981
1982    SDValue ShufMask[4];
1983    for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1984      unsigned bidx = i * 4;
1985      unsigned int bits = ((ShufBytes[bidx] << 24) |
1986                           (ShufBytes[bidx+1] << 16) |
1987                           (ShufBytes[bidx+2] << 8) |
1988                           ShufBytes[bidx+3]);
1989      ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1990    }
1991
1992    SDValue ShufMaskVec =
1993      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1994                  &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1995
1996    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1997                         DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1998                                     N, N, ShufMaskVec));
1999  } else {
2000    // Variable index: Rotate the requested element into slot 0, then replicate
2001    // slot 0 across the vector
2002    EVT VecVT = N.getValueType();
2003    if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2004      llvm_report_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2005                        "vector type!");
2006    }
2007
2008    // Make life easier by making sure the index is zero-extended to i32
2009    if (Elt.getValueType() != MVT::i32)
2010      Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2011
2012    // Scale the index to a bit/byte shift quantity
2013    APInt scaleFactor =
2014            APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2015    unsigned scaleShift = scaleFactor.logBase2();
2016    SDValue vecShift;
2017
2018    if (scaleShift > 0) {
2019      // Scale the shift factor:
2020      Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2021                        DAG.getConstant(scaleShift, MVT::i32));
2022    }
2023
2024    vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2025
2026    // Replicate the bytes starting at byte 0 across the entire vector (for
2027    // consistency with the notion of a unified register set)
2028    SDValue replicate;
2029
2030    switch (VT.getSimpleVT().SimpleTy) {
2031    default:
2032      llvm_report_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2033                        "type");
2034      /*NOTREACHED*/
2035    case MVT::i8: {
2036      SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2037      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2038                              factor, factor, factor, factor);
2039      break;
2040    }
2041    case MVT::i16: {
2042      SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2043      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2044                              factor, factor, factor, factor);
2045      break;
2046    }
2047    case MVT::i32:
2048    case MVT::f32: {
2049      SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2050      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2051                              factor, factor, factor, factor);
2052      break;
2053    }
2054    case MVT::i64:
2055    case MVT::f64: {
2056      SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2057      SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2058      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2059                              loFactor, hiFactor, loFactor, hiFactor);
2060      break;
2061    }
2062    }
2063
2064    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2065                         DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2066                                     vecShift, vecShift, replicate));
2067  }
2068
2069  return retval;
2070}
2071
2072static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2073  SDValue VecOp = Op.getOperand(0);
2074  SDValue ValOp = Op.getOperand(1);
2075  SDValue IdxOp = Op.getOperand(2);
2076  DebugLoc dl = Op.getDebugLoc();
2077  EVT VT = Op.getValueType();
2078
2079  ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2080  assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2081
2082  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2083  // Use $sp ($1) because it's always 16-byte aligned and it's available:
2084  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2085                                DAG.getRegister(SPU::R1, PtrVT),
2086                                DAG.getConstant(CN->getSExtValue(), PtrVT));
2087  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2088
2089  SDValue result =
2090    DAG.getNode(SPUISD::SHUFB, dl, VT,
2091                DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2092                VecOp,
2093                DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2094
2095  return result;
2096}
2097
2098static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2099                           const TargetLowering &TLI)
2100{
2101  SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2102  DebugLoc dl = Op.getDebugLoc();
2103  EVT ShiftVT = TLI.getShiftAmountTy();
2104
2105  assert(Op.getValueType() == MVT::i8);
2106  switch (Opc) {
2107  default:
2108    llvm_unreachable("Unhandled i8 math operator");
2109    /*NOTREACHED*/
2110    break;
2111  case ISD::ADD: {
2112    // 8-bit addition: Promote the arguments up to 16-bits and truncate
2113    // the result:
2114    SDValue N1 = Op.getOperand(1);
2115    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2116    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2117    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2118                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2119
2120  }
2121
2122  case ISD::SUB: {
2123    // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2124    // the result:
2125    SDValue N1 = Op.getOperand(1);
2126    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2127    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2128    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2129                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2130  }
2131  case ISD::ROTR:
2132  case ISD::ROTL: {
2133    SDValue N1 = Op.getOperand(1);
2134    EVT N1VT = N1.getValueType();
2135
2136    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2137    if (!N1VT.bitsEq(ShiftVT)) {
2138      unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2139                       ? ISD::ZERO_EXTEND
2140                       : ISD::TRUNCATE;
2141      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2142    }
2143
2144    // Replicate lower 8-bits into upper 8:
2145    SDValue ExpandArg =
2146      DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2147                  DAG.getNode(ISD::SHL, dl, MVT::i16,
2148                              N0, DAG.getConstant(8, MVT::i32)));
2149
2150    // Truncate back down to i8
2151    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2152                       DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2153  }
2154  case ISD::SRL:
2155  case ISD::SHL: {
2156    SDValue N1 = Op.getOperand(1);
2157    EVT N1VT = N1.getValueType();
2158
2159    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2160    if (!N1VT.bitsEq(ShiftVT)) {
2161      unsigned N1Opc = ISD::ZERO_EXTEND;
2162
2163      if (N1.getValueType().bitsGT(ShiftVT))
2164        N1Opc = ISD::TRUNCATE;
2165
2166      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2167    }
2168
2169    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2170                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2171  }
2172  case ISD::SRA: {
2173    SDValue N1 = Op.getOperand(1);
2174    EVT N1VT = N1.getValueType();
2175
2176    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2177    if (!N1VT.bitsEq(ShiftVT)) {
2178      unsigned N1Opc = ISD::SIGN_EXTEND;
2179
2180      if (N1VT.bitsGT(ShiftVT))
2181        N1Opc = ISD::TRUNCATE;
2182      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2183    }
2184
2185    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2186                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2187  }
2188  case ISD::MUL: {
2189    SDValue N1 = Op.getOperand(1);
2190
2191    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2192    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2193    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2194                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2195    break;
2196  }
2197  }
2198
2199  return SDValue();
2200}
2201
2202//! Lower byte immediate operations for v16i8 vectors:
2203static SDValue
2204LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2205  SDValue ConstVec;
2206  SDValue Arg;
2207  EVT VT = Op.getValueType();
2208  DebugLoc dl = Op.getDebugLoc();
2209
2210  ConstVec = Op.getOperand(0);
2211  Arg = Op.getOperand(1);
2212  if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2213    if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2214      ConstVec = ConstVec.getOperand(0);
2215    } else {
2216      ConstVec = Op.getOperand(1);
2217      Arg = Op.getOperand(0);
2218      if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2219        ConstVec = ConstVec.getOperand(0);
2220      }
2221    }
2222  }
2223
2224  if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2225    BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2226    assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2227
2228    APInt APSplatBits, APSplatUndef;
2229    unsigned SplatBitSize;
2230    bool HasAnyUndefs;
2231    unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2232
2233    if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2234                              HasAnyUndefs, minSplatBits)
2235        && minSplatBits <= SplatBitSize) {
2236      uint64_t SplatBits = APSplatBits.getZExtValue();
2237      SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2238
2239      SmallVector<SDValue, 16> tcVec;
2240      tcVec.assign(16, tc);
2241      return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2242                         DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2243    }
2244  }
2245
2246  // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2247  // lowered.  Return the operation, rather than a null SDValue.
2248  return Op;
2249}
2250
2251//! Custom lowering for CTPOP (count population)
2252/*!
2253  Custom lowering code that counts the number ones in the input
2254  operand. SPU has such an instruction, but it counts the number of
2255  ones per byte, which then have to be accumulated.
2256*/
2257static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2258  EVT VT = Op.getValueType();
2259  EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2260                               VT, (128 / VT.getSizeInBits()));
2261  DebugLoc dl = Op.getDebugLoc();
2262
2263  switch (VT.getSimpleVT().SimpleTy) {
2264  default:
2265    assert(false && "Invalid value type!");
2266  case MVT::i8: {
2267    SDValue N = Op.getOperand(0);
2268    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2269
2270    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2271    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2272
2273    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2274  }
2275
2276  case MVT::i16: {
2277    MachineFunction &MF = DAG.getMachineFunction();
2278    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2279
2280    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2281
2282    SDValue N = Op.getOperand(0);
2283    SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2284    SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2285    SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2286
2287    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2288    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2289
2290    // CNTB_result becomes the chain to which all of the virtual registers
2291    // CNTB_reg, SUM1_reg become associated:
2292    SDValue CNTB_result =
2293      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2294
2295    SDValue CNTB_rescopy =
2296      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2297
2298    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2299
2300    return DAG.getNode(ISD::AND, dl, MVT::i16,
2301                       DAG.getNode(ISD::ADD, dl, MVT::i16,
2302                                   DAG.getNode(ISD::SRL, dl, MVT::i16,
2303                                               Tmp1, Shift1),
2304                                   Tmp1),
2305                       Mask0);
2306  }
2307
2308  case MVT::i32: {
2309    MachineFunction &MF = DAG.getMachineFunction();
2310    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2311
2312    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2313    unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2314
2315    SDValue N = Op.getOperand(0);
2316    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2317    SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2318    SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2319    SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2320
2321    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2322    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2323
2324    // CNTB_result becomes the chain to which all of the virtual registers
2325    // CNTB_reg, SUM1_reg become associated:
2326    SDValue CNTB_result =
2327      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2328
2329    SDValue CNTB_rescopy =
2330      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2331
2332    SDValue Comp1 =
2333      DAG.getNode(ISD::SRL, dl, MVT::i32,
2334                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2335                  Shift1);
2336
2337    SDValue Sum1 =
2338      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2339                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2340
2341    SDValue Sum1_rescopy =
2342      DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2343
2344    SDValue Comp2 =
2345      DAG.getNode(ISD::SRL, dl, MVT::i32,
2346                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2347                  Shift2);
2348    SDValue Sum2 =
2349      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2350                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2351
2352    return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2353  }
2354
2355  case MVT::i64:
2356    break;
2357  }
2358
2359  return SDValue();
2360}
2361
2362//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2363/*!
2364 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2365 All conversions to i64 are expanded to a libcall.
2366 */
2367static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2368                              SPUTargetLowering &TLI) {
2369  EVT OpVT = Op.getValueType();
2370  SDValue Op0 = Op.getOperand(0);
2371  EVT Op0VT = Op0.getValueType();
2372
2373  if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2374      || OpVT == MVT::i64) {
2375    // Convert f32 / f64 to i32 / i64 via libcall.
2376    RTLIB::Libcall LC =
2377            (Op.getOpcode() == ISD::FP_TO_SINT)
2378             ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2379             : RTLIB::getFPTOUINT(Op0VT, OpVT);
2380    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2381    SDValue Dummy;
2382    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2383  }
2384
2385  return Op;
2386}
2387
2388//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2389/*!
2390 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2391 All conversions from i64 are expanded to a libcall.
2392 */
2393static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2394                              SPUTargetLowering &TLI) {
2395  EVT OpVT = Op.getValueType();
2396  SDValue Op0 = Op.getOperand(0);
2397  EVT Op0VT = Op0.getValueType();
2398
2399  if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2400      || Op0VT == MVT::i64) {
2401    // Convert i32, i64 to f64 via libcall:
2402    RTLIB::Libcall LC =
2403            (Op.getOpcode() == ISD::SINT_TO_FP)
2404             ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2405             : RTLIB::getUINTTOFP(Op0VT, OpVT);
2406    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2407    SDValue Dummy;
2408    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2409  }
2410
2411  return Op;
2412}
2413
2414//! Lower ISD::SETCC
2415/*!
2416 This handles MVT::f64 (double floating point) condition lowering
2417 */
2418static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2419                          const TargetLowering &TLI) {
2420  CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2421  DebugLoc dl = Op.getDebugLoc();
2422  assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2423
2424  SDValue lhs = Op.getOperand(0);
2425  SDValue rhs = Op.getOperand(1);
2426  EVT lhsVT = lhs.getValueType();
2427  assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2428
2429  EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2430  APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2431  EVT IntVT(MVT::i64);
2432
2433  // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2434  // selected to a NOP:
2435  SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2436  SDValue lhsHi32 =
2437          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2438                      DAG.getNode(ISD::SRL, dl, IntVT,
2439                                  i64lhs, DAG.getConstant(32, MVT::i32)));
2440  SDValue lhsHi32abs =
2441          DAG.getNode(ISD::AND, dl, MVT::i32,
2442                      lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2443  SDValue lhsLo32 =
2444          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2445
2446  // SETO and SETUO only use the lhs operand:
2447  if (CC->get() == ISD::SETO) {
2448    // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2449    // SETUO
2450    APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2451    return DAG.getNode(ISD::XOR, dl, ccResultVT,
2452                       DAG.getSetCC(dl, ccResultVT,
2453                                    lhs, DAG.getConstantFP(0.0, lhsVT),
2454                                    ISD::SETUO),
2455                       DAG.getConstant(ccResultAllOnes, ccResultVT));
2456  } else if (CC->get() == ISD::SETUO) {
2457    // Evaluates to true if Op0 is [SQ]NaN
2458    return DAG.getNode(ISD::AND, dl, ccResultVT,
2459                       DAG.getSetCC(dl, ccResultVT,
2460                                    lhsHi32abs,
2461                                    DAG.getConstant(0x7ff00000, MVT::i32),
2462                                    ISD::SETGE),
2463                       DAG.getSetCC(dl, ccResultVT,
2464                                    lhsLo32,
2465                                    DAG.getConstant(0, MVT::i32),
2466                                    ISD::SETGT));
2467  }
2468
2469  SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2470  SDValue rhsHi32 =
2471          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2472                      DAG.getNode(ISD::SRL, dl, IntVT,
2473                                  i64rhs, DAG.getConstant(32, MVT::i32)));
2474
2475  // If a value is negative, subtract from the sign magnitude constant:
2476  SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2477
2478  // Convert the sign-magnitude representation into 2's complement:
2479  SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2480                                      lhsHi32, DAG.getConstant(31, MVT::i32));
2481  SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2482  SDValue lhsSelect =
2483          DAG.getNode(ISD::SELECT, dl, IntVT,
2484                      lhsSelectMask, lhsSignMag2TC, i64lhs);
2485
2486  SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2487                                      rhsHi32, DAG.getConstant(31, MVT::i32));
2488  SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2489  SDValue rhsSelect =
2490          DAG.getNode(ISD::SELECT, dl, IntVT,
2491                      rhsSelectMask, rhsSignMag2TC, i64rhs);
2492
2493  unsigned compareOp;
2494
2495  switch (CC->get()) {
2496  case ISD::SETOEQ:
2497  case ISD::SETUEQ:
2498    compareOp = ISD::SETEQ; break;
2499  case ISD::SETOGT:
2500  case ISD::SETUGT:
2501    compareOp = ISD::SETGT; break;
2502  case ISD::SETOGE:
2503  case ISD::SETUGE:
2504    compareOp = ISD::SETGE; break;
2505  case ISD::SETOLT:
2506  case ISD::SETULT:
2507    compareOp = ISD::SETLT; break;
2508  case ISD::SETOLE:
2509  case ISD::SETULE:
2510    compareOp = ISD::SETLE; break;
2511  case ISD::SETUNE:
2512  case ISD::SETONE:
2513    compareOp = ISD::SETNE; break;
2514  default:
2515    llvm_report_error("CellSPU ISel Select: unimplemented f64 condition");
2516  }
2517
2518  SDValue result =
2519          DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2520                       (ISD::CondCode) compareOp);
2521
2522  if ((CC->get() & 0x8) == 0) {
2523    // Ordered comparison:
2524    SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2525                                  lhs, DAG.getConstantFP(0.0, MVT::f64),
2526                                  ISD::SETO);
2527    SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2528                                  rhs, DAG.getConstantFP(0.0, MVT::f64),
2529                                  ISD::SETO);
2530    SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2531
2532    result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2533  }
2534
2535  return result;
2536}
2537
2538//! Lower ISD::SELECT_CC
2539/*!
2540  ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2541  SELB instruction.
2542
2543  \note Need to revisit this in the future: if the code path through the true
2544  and false value computations is longer than the latency of a branch (6
2545  cycles), then it would be more advantageous to branch and insert a new basic
2546  block and branch on the condition. However, this code does not make that
2547  assumption, given the simplisitc uses so far.
2548 */
2549
2550static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2551                              const TargetLowering &TLI) {
2552  EVT VT = Op.getValueType();
2553  SDValue lhs = Op.getOperand(0);
2554  SDValue rhs = Op.getOperand(1);
2555  SDValue trueval = Op.getOperand(2);
2556  SDValue falseval = Op.getOperand(3);
2557  SDValue condition = Op.getOperand(4);
2558  DebugLoc dl = Op.getDebugLoc();
2559
2560  // NOTE: SELB's arguments: $rA, $rB, $mask
2561  //
2562  // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2563  // where bits in $mask are 1. CCond will be inverted, having 1s where the
2564  // condition was true and 0s where the condition was false. Hence, the
2565  // arguments to SELB get reversed.
2566
2567  // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2568  // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2569  // with another "cannot select select_cc" assert:
2570
2571  SDValue compare = DAG.getNode(ISD::SETCC, dl,
2572                                TLI.getSetCCResultType(Op.getValueType()),
2573                                lhs, rhs, condition);
2574  return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2575}
2576
2577//! Custom lower ISD::TRUNCATE
2578static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2579{
2580  // Type to truncate to
2581  EVT VT = Op.getValueType();
2582  MVT simpleVT = VT.getSimpleVT();
2583  EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2584                               VT, (128 / VT.getSizeInBits()));
2585  DebugLoc dl = Op.getDebugLoc();
2586
2587  // Type to truncate from
2588  SDValue Op0 = Op.getOperand(0);
2589  EVT Op0VT = Op0.getValueType();
2590
2591  if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2592    // Create shuffle mask, least significant doubleword of quadword
2593    unsigned maskHigh = 0x08090a0b;
2594    unsigned maskLow = 0x0c0d0e0f;
2595    // Use a shuffle to perform the truncation
2596    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2597                                   DAG.getConstant(maskHigh, MVT::i32),
2598                                   DAG.getConstant(maskLow, MVT::i32),
2599                                   DAG.getConstant(maskHigh, MVT::i32),
2600                                   DAG.getConstant(maskLow, MVT::i32));
2601
2602    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2603                                       Op0, Op0, shufMask);
2604
2605    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2606  }
2607
2608  return SDValue();             // Leave the truncate unmolested
2609}
2610
2611//! Custom (target-specific) lowering entry point
2612/*!
2613  This is where LLVM's DAG selection process calls to do target-specific
2614  lowering of nodes.
2615 */
2616SDValue
2617SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2618{
2619  unsigned Opc = (unsigned) Op.getOpcode();
2620  EVT VT = Op.getValueType();
2621
2622  switch (Opc) {
2623  default: {
2624#ifndef NDEBUG
2625    cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2626    cerr << "Op.getOpcode() = " << Opc << "\n";
2627    cerr << "*Op.getNode():\n";
2628    Op.getNode()->dump();
2629#endif
2630    llvm_unreachable(0);
2631  }
2632  case ISD::LOAD:
2633  case ISD::EXTLOAD:
2634  case ISD::SEXTLOAD:
2635  case ISD::ZEXTLOAD:
2636    return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2637  case ISD::STORE:
2638    return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2639  case ISD::ConstantPool:
2640    return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2641  case ISD::GlobalAddress:
2642    return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2643  case ISD::JumpTable:
2644    return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2645  case ISD::ConstantFP:
2646    return LowerConstantFP(Op, DAG);
2647
2648  // i8, i64 math ops:
2649  case ISD::ADD:
2650  case ISD::SUB:
2651  case ISD::ROTR:
2652  case ISD::ROTL:
2653  case ISD::SRL:
2654  case ISD::SHL:
2655  case ISD::SRA: {
2656    if (VT == MVT::i8)
2657      return LowerI8Math(Op, DAG, Opc, *this);
2658    break;
2659  }
2660
2661  case ISD::FP_TO_SINT:
2662  case ISD::FP_TO_UINT:
2663    return LowerFP_TO_INT(Op, DAG, *this);
2664
2665  case ISD::SINT_TO_FP:
2666  case ISD::UINT_TO_FP:
2667    return LowerINT_TO_FP(Op, DAG, *this);
2668
2669  // Vector-related lowering.
2670  case ISD::BUILD_VECTOR:
2671    return LowerBUILD_VECTOR(Op, DAG);
2672  case ISD::SCALAR_TO_VECTOR:
2673    return LowerSCALAR_TO_VECTOR(Op, DAG);
2674  case ISD::VECTOR_SHUFFLE:
2675    return LowerVECTOR_SHUFFLE(Op, DAG);
2676  case ISD::EXTRACT_VECTOR_ELT:
2677    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2678  case ISD::INSERT_VECTOR_ELT:
2679    return LowerINSERT_VECTOR_ELT(Op, DAG);
2680
2681  // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2682  case ISD::AND:
2683  case ISD::OR:
2684  case ISD::XOR:
2685    return LowerByteImmed(Op, DAG);
2686
2687  // Vector and i8 multiply:
2688  case ISD::MUL:
2689    if (VT == MVT::i8)
2690      return LowerI8Math(Op, DAG, Opc, *this);
2691
2692  case ISD::CTPOP:
2693    return LowerCTPOP(Op, DAG);
2694
2695  case ISD::SELECT_CC:
2696    return LowerSELECT_CC(Op, DAG, *this);
2697
2698  case ISD::SETCC:
2699    return LowerSETCC(Op, DAG, *this);
2700
2701  case ISD::TRUNCATE:
2702    return LowerTRUNCATE(Op, DAG);
2703  }
2704
2705  return SDValue();
2706}
2707
2708void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2709                                           SmallVectorImpl<SDValue>&Results,
2710                                           SelectionDAG &DAG)
2711{
2712#if 0
2713  unsigned Opc = (unsigned) N->getOpcode();
2714  EVT OpVT = N->getValueType(0);
2715
2716  switch (Opc) {
2717  default: {
2718    cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2719    cerr << "Op.getOpcode() = " << Opc << "\n";
2720    cerr << "*Op.getNode():\n";
2721    N->dump();
2722    abort();
2723    /*NOTREACHED*/
2724  }
2725  }
2726#endif
2727
2728  /* Otherwise, return unchanged */
2729}
2730
2731//===----------------------------------------------------------------------===//
2732// Target Optimization Hooks
2733//===----------------------------------------------------------------------===//
2734
2735SDValue
2736SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2737{
2738#if 0
2739  TargetMachine &TM = getTargetMachine();
2740#endif
2741  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2742  SelectionDAG &DAG = DCI.DAG;
2743  SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2744  EVT NodeVT = N->getValueType(0);      // The node's value type
2745  EVT Op0VT = Op0.getValueType();       // The first operand's result
2746  SDValue Result;                       // Initially, empty result
2747  DebugLoc dl = N->getDebugLoc();
2748
2749  switch (N->getOpcode()) {
2750  default: break;
2751  case ISD::ADD: {
2752    SDValue Op1 = N->getOperand(1);
2753
2754    if (Op0.getOpcode() == SPUISD::IndirectAddr
2755        || Op1.getOpcode() == SPUISD::IndirectAddr) {
2756      // Normalize the operands to reduce repeated code
2757      SDValue IndirectArg = Op0, AddArg = Op1;
2758
2759      if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2760        IndirectArg = Op1;
2761        AddArg = Op0;
2762      }
2763
2764      if (isa<ConstantSDNode>(AddArg)) {
2765        ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2766        SDValue IndOp1 = IndirectArg.getOperand(1);
2767
2768        if (CN0->isNullValue()) {
2769          // (add (SPUindirect <arg>, <arg>), 0) ->
2770          // (SPUindirect <arg>, <arg>)
2771
2772#if !defined(NDEBUG)
2773          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2774            cerr << "\n"
2775                 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2776                 << "With:    (SPUindirect <arg>, <arg>)\n";
2777          }
2778#endif
2779
2780          return IndirectArg;
2781        } else if (isa<ConstantSDNode>(IndOp1)) {
2782          // (add (SPUindirect <arg>, <const>), <const>) ->
2783          // (SPUindirect <arg>, <const + const>)
2784          ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2785          int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2786          SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2787
2788#if !defined(NDEBUG)
2789          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2790            cerr << "\n"
2791                 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2792                 << "), " << CN0->getSExtValue() << ")\n"
2793                 << "With:    (SPUindirect <arg>, "
2794                 << combinedConst << ")\n";
2795          }
2796#endif
2797
2798          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2799                             IndirectArg, combinedValue);
2800        }
2801      }
2802    }
2803    break;
2804  }
2805  case ISD::SIGN_EXTEND:
2806  case ISD::ZERO_EXTEND:
2807  case ISD::ANY_EXTEND: {
2808    if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2809      // (any_extend (SPUextract_elt0 <arg>)) ->
2810      // (SPUextract_elt0 <arg>)
2811      // Types must match, however...
2812#if !defined(NDEBUG)
2813      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2814        cerr << "\nReplace: ";
2815        N->dump(&DAG);
2816        cerr << "\nWith:    ";
2817        Op0.getNode()->dump(&DAG);
2818        cerr << "\n";
2819      }
2820#endif
2821
2822      return Op0;
2823    }
2824    break;
2825  }
2826  case SPUISD::IndirectAddr: {
2827    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2828      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2829      if (CN != 0 && CN->getZExtValue() == 0) {
2830        // (SPUindirect (SPUaform <addr>, 0), 0) ->
2831        // (SPUaform <addr>, 0)
2832
2833        DEBUG(cerr << "Replace: ");
2834        DEBUG(N->dump(&DAG));
2835        DEBUG(cerr << "\nWith:    ");
2836        DEBUG(Op0.getNode()->dump(&DAG));
2837        DEBUG(cerr << "\n");
2838
2839        return Op0;
2840      }
2841    } else if (Op0.getOpcode() == ISD::ADD) {
2842      SDValue Op1 = N->getOperand(1);
2843      if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2844        // (SPUindirect (add <arg>, <arg>), 0) ->
2845        // (SPUindirect <arg>, <arg>)
2846        if (CN1->isNullValue()) {
2847
2848#if !defined(NDEBUG)
2849          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2850            cerr << "\n"
2851                 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2852                 << "With:    (SPUindirect <arg>, <arg>)\n";
2853          }
2854#endif
2855
2856          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2857                             Op0.getOperand(0), Op0.getOperand(1));
2858        }
2859      }
2860    }
2861    break;
2862  }
2863  case SPUISD::SHLQUAD_L_BITS:
2864  case SPUISD::SHLQUAD_L_BYTES:
2865  case SPUISD::VEC_SHL:
2866  case SPUISD::VEC_SRL:
2867  case SPUISD::VEC_SRA:
2868  case SPUISD::ROTBYTES_LEFT: {
2869    SDValue Op1 = N->getOperand(1);
2870
2871    // Kill degenerate vector shifts:
2872    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2873      if (CN->isNullValue()) {
2874        Result = Op0;
2875      }
2876    }
2877    break;
2878  }
2879  case SPUISD::PREFSLOT2VEC: {
2880    switch (Op0.getOpcode()) {
2881    default:
2882      break;
2883    case ISD::ANY_EXTEND:
2884    case ISD::ZERO_EXTEND:
2885    case ISD::SIGN_EXTEND: {
2886      // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2887      // <arg>
2888      // but only if the SPUprefslot2vec and <arg> types match.
2889      SDValue Op00 = Op0.getOperand(0);
2890      if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2891        SDValue Op000 = Op00.getOperand(0);
2892        if (Op000.getValueType() == NodeVT) {
2893          Result = Op000;
2894        }
2895      }
2896      break;
2897    }
2898    case SPUISD::VEC2PREFSLOT: {
2899      // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2900      // <arg>
2901      Result = Op0.getOperand(0);
2902      break;
2903    }
2904    }
2905    break;
2906  }
2907  }
2908
2909  // Otherwise, return unchanged.
2910#ifndef NDEBUG
2911  if (Result.getNode()) {
2912    DEBUG(cerr << "\nReplace.SPU: ");
2913    DEBUG(N->dump(&DAG));
2914    DEBUG(cerr << "\nWith:        ");
2915    DEBUG(Result.getNode()->dump(&DAG));
2916    DEBUG(cerr << "\n");
2917  }
2918#endif
2919
2920  return Result;
2921}
2922
2923//===----------------------------------------------------------------------===//
2924// Inline Assembly Support
2925//===----------------------------------------------------------------------===//
2926
2927/// getConstraintType - Given a constraint letter, return the type of
2928/// constraint it is for this target.
2929SPUTargetLowering::ConstraintType
2930SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2931  if (ConstraintLetter.size() == 1) {
2932    switch (ConstraintLetter[0]) {
2933    default: break;
2934    case 'b':
2935    case 'r':
2936    case 'f':
2937    case 'v':
2938    case 'y':
2939      return C_RegisterClass;
2940    }
2941  }
2942  return TargetLowering::getConstraintType(ConstraintLetter);
2943}
2944
2945std::pair<unsigned, const TargetRegisterClass*>
2946SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2947                                                EVT VT) const
2948{
2949  if (Constraint.size() == 1) {
2950    // GCC RS6000 Constraint Letters
2951    switch (Constraint[0]) {
2952    case 'b':   // R1-R31
2953    case 'r':   // R0-R31
2954      if (VT == MVT::i64)
2955        return std::make_pair(0U, SPU::R64CRegisterClass);
2956      return std::make_pair(0U, SPU::R32CRegisterClass);
2957    case 'f':
2958      if (VT == MVT::f32)
2959        return std::make_pair(0U, SPU::R32FPRegisterClass);
2960      else if (VT == MVT::f64)
2961        return std::make_pair(0U, SPU::R64FPRegisterClass);
2962      break;
2963    case 'v':
2964      return std::make_pair(0U, SPU::GPRCRegisterClass);
2965    }
2966  }
2967
2968  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2969}
2970
2971//! Compute used/known bits for a SPU operand
2972void
2973SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
2974                                                  const APInt &Mask,
2975                                                  APInt &KnownZero,
2976                                                  APInt &KnownOne,
2977                                                  const SelectionDAG &DAG,
2978                                                  unsigned Depth ) const {
2979#if 0
2980  const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
2981
2982  switch (Op.getOpcode()) {
2983  default:
2984    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2985    break;
2986  case CALL:
2987  case SHUFB:
2988  case SHUFFLE_MASK:
2989  case CNTB:
2990  case SPUISD::PREFSLOT2VEC:
2991  case SPUISD::LDRESULT:
2992  case SPUISD::VEC2PREFSLOT:
2993  case SPUISD::SHLQUAD_L_BITS:
2994  case SPUISD::SHLQUAD_L_BYTES:
2995  case SPUISD::VEC_SHL:
2996  case SPUISD::VEC_SRL:
2997  case SPUISD::VEC_SRA:
2998  case SPUISD::VEC_ROTL:
2999  case SPUISD::VEC_ROTR:
3000  case SPUISD::ROTBYTES_LEFT:
3001  case SPUISD::SELECT_MASK:
3002  case SPUISD::SELB:
3003  }
3004#endif
3005}
3006
3007unsigned
3008SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3009                                                   unsigned Depth) const {
3010  switch (Op.getOpcode()) {
3011  default:
3012    return 1;
3013
3014  case ISD::SETCC: {
3015    EVT VT = Op.getValueType();
3016
3017    if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3018      VT = MVT::i32;
3019    }
3020    return VT.getSizeInBits();
3021  }
3022  }
3023}
3024
3025// LowerAsmOperandForConstraint
3026void
3027SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3028                                                char ConstraintLetter,
3029                                                bool hasMemory,
3030                                                std::vector<SDValue> &Ops,
3031                                                SelectionDAG &DAG) const {
3032  // Default, for the time being, to the base class handler
3033  TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3034                                               Ops, DAG);
3035}
3036
3037/// isLegalAddressImmediate - Return true if the integer value can be used
3038/// as the offset of the target addressing mode.
3039bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3040                                                const Type *Ty) const {
3041  // SPU's addresses are 256K:
3042  return (V > -(1 << 18) && V < (1 << 18) - 1);
3043}
3044
3045bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3046  return false;
3047}
3048
3049bool
3050SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3051  // The SPU target isn't yet aware of offsets.
3052  return false;
3053}
3054