SPUISelLowering.cpp revision d258c49589f3befd161a5ab27fd635b1dbdafc10
1//
2//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SPUTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SPURegisterNames.h"
15#include "SPUISelLowering.h"
16#include "SPUTargetMachine.h"
17#include "SPUFrameInfo.h"
18#include "SPUMachineFunction.h"
19#include "llvm/Constants.h"
20#include "llvm/Function.h"
21#include "llvm/Intrinsics.h"
22#include "llvm/CallingConv.h"
23#include "llvm/CodeGen/CallingConvLower.h"
24#include "llvm/CodeGen/MachineFrameInfo.h"
25#include "llvm/CodeGen/MachineFunction.h"
26#include "llvm/CodeGen/MachineInstrBuilder.h"
27#include "llvm/CodeGen/MachineRegisterInfo.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
30#include "llvm/Target/TargetOptions.h"
31#include "llvm/ADT/VectorExtras.h"
32#include "llvm/Support/Debug.h"
33#include "llvm/Support/ErrorHandling.h"
34#include "llvm/Support/MathExtras.h"
35#include "llvm/Support/raw_ostream.h"
36#include <map>
37
38using namespace llvm;
39
40// Used in getTargetNodeName() below
41namespace {
42  std::map<unsigned, const char *> node_names;
43
44  //! EVT mapping to useful data for Cell SPU
45  struct valtype_map_s {
46    EVT   valtype;
47    int   prefslot_byte;
48  };
49
50  const valtype_map_s valtype_map[] = {
51    { MVT::i1,   3 },
52    { MVT::i8,   3 },
53    { MVT::i16,  2 },
54    { MVT::i32,  0 },
55    { MVT::f32,  0 },
56    { MVT::i64,  0 },
57    { MVT::f64,  0 },
58    { MVT::i128, 0 }
59  };
60
61  const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
62
63  const valtype_map_s *getValueTypeMapEntry(EVT VT) {
64    const valtype_map_s *retval = 0;
65
66    for (size_t i = 0; i < n_valtype_map; ++i) {
67      if (valtype_map[i].valtype == VT) {
68        retval = valtype_map + i;
69        break;
70      }
71    }
72
73#ifndef NDEBUG
74    if (retval == 0) {
75      report_fatal_error("getValueTypeMapEntry returns NULL for " +
76                         Twine(VT.getEVTString()));
77    }
78#endif
79
80    return retval;
81  }
82
83  //! Expand a library call into an actual call DAG node
84  /*!
85   \note
86   This code is taken from SelectionDAGLegalize, since it is not exposed as
87   part of the LLVM SelectionDAG API.
88   */
89
90  SDValue
91  ExpandLibCall(RTLIB::Libcall LC, SDValue Op, SelectionDAG &DAG,
92                bool isSigned, SDValue &Hi, const SPUTargetLowering &TLI) {
93    // The input chain to this libcall is the entry node of the function.
94    // Legalizing the call will automatically add the previous call to the
95    // dependence.
96    SDValue InChain = DAG.getEntryNode();
97
98    TargetLowering::ArgListTy Args;
99    TargetLowering::ArgListEntry Entry;
100    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
101      EVT ArgVT = Op.getOperand(i).getValueType();
102      const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
103      Entry.Node = Op.getOperand(i);
104      Entry.Ty = ArgTy;
105      Entry.isSExt = isSigned;
106      Entry.isZExt = !isSigned;
107      Args.push_back(Entry);
108    }
109    SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
110                                           TLI.getPointerTy());
111
112    // Splice the libcall in wherever FindInputOutputChains tells us to.
113    const Type *RetTy =
114                Op.getNode()->getValueType(0).getTypeForEVT(*DAG.getContext());
115    std::pair<SDValue, SDValue> CallInfo =
116            TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
117                            0, TLI.getLibcallCallingConv(LC), false,
118                            /*isReturnValueUsed=*/true,
119                            Callee, Args, DAG, Op.getDebugLoc());
120
121    return CallInfo.first;
122  }
123}
124
125SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
126  : TargetLowering(TM, new TargetLoweringObjectFileELF()),
127    SPUTM(TM) {
128  // Fold away setcc operations if possible.
129  setPow2DivIsCheap();
130
131  // Use _setjmp/_longjmp instead of setjmp/longjmp.
132  setUseUnderscoreSetJmp(true);
133  setUseUnderscoreLongJmp(true);
134
135  // Set RTLIB libcall names as used by SPU:
136  setLibcallName(RTLIB::DIV_F64, "__fast_divdf3");
137
138  // Set up the SPU's register classes:
139  addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
140  addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
141  addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
142  addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
143  addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
144  addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
145  addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
146
147  // SPU has no sign or zero extended loads for i1, i8, i16:
148  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
149  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
150  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
151
152  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
153  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
154
155  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
156  setTruncStoreAction(MVT::i128, MVT::i32, Expand);
157  setTruncStoreAction(MVT::i128, MVT::i16, Expand);
158  setTruncStoreAction(MVT::i128, MVT::i8, Expand);
159
160  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
161
162  // SPU constant load actions are custom lowered:
163  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
164  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
165
166  // SPU's loads and stores have to be custom lowered:
167  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::i128;
168       ++sctype) {
169    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
170
171    setOperationAction(ISD::LOAD,   VT, Custom);
172    setOperationAction(ISD::STORE,  VT, Custom);
173    setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
174    setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
175    setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
176
177    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
178      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
179      setTruncStoreAction(VT, StoreVT, Expand);
180    }
181  }
182
183  for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
184       ++sctype) {
185    MVT::SimpleValueType VT = (MVT::SimpleValueType) sctype;
186
187    setOperationAction(ISD::LOAD,   VT, Custom);
188    setOperationAction(ISD::STORE,  VT, Custom);
189
190    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
191      MVT::SimpleValueType StoreVT = (MVT::SimpleValueType) stype;
192      setTruncStoreAction(VT, StoreVT, Expand);
193    }
194  }
195
196  // Expand the jumptable branches
197  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
198  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
199
200  // Custom lower SELECT_CC for most cases, but expand by default
201  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
202  setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
203  setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
204  setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
205  setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
206
207  // SPU has no intrinsics for these particular operations:
208  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
209
210  // SPU has no division/remainder instructions
211  setOperationAction(ISD::SREM,    MVT::i8,   Expand);
212  setOperationAction(ISD::UREM,    MVT::i8,   Expand);
213  setOperationAction(ISD::SDIV,    MVT::i8,   Expand);
214  setOperationAction(ISD::UDIV,    MVT::i8,   Expand);
215  setOperationAction(ISD::SDIVREM, MVT::i8,   Expand);
216  setOperationAction(ISD::UDIVREM, MVT::i8,   Expand);
217  setOperationAction(ISD::SREM,    MVT::i16,  Expand);
218  setOperationAction(ISD::UREM,    MVT::i16,  Expand);
219  setOperationAction(ISD::SDIV,    MVT::i16,  Expand);
220  setOperationAction(ISD::UDIV,    MVT::i16,  Expand);
221  setOperationAction(ISD::SDIVREM, MVT::i16,  Expand);
222  setOperationAction(ISD::UDIVREM, MVT::i16,  Expand);
223  setOperationAction(ISD::SREM,    MVT::i32,  Expand);
224  setOperationAction(ISD::UREM,    MVT::i32,  Expand);
225  setOperationAction(ISD::SDIV,    MVT::i32,  Expand);
226  setOperationAction(ISD::UDIV,    MVT::i32,  Expand);
227  setOperationAction(ISD::SDIVREM, MVT::i32,  Expand);
228  setOperationAction(ISD::UDIVREM, MVT::i32,  Expand);
229  setOperationAction(ISD::SREM,    MVT::i64,  Expand);
230  setOperationAction(ISD::UREM,    MVT::i64,  Expand);
231  setOperationAction(ISD::SDIV,    MVT::i64,  Expand);
232  setOperationAction(ISD::UDIV,    MVT::i64,  Expand);
233  setOperationAction(ISD::SDIVREM, MVT::i64,  Expand);
234  setOperationAction(ISD::UDIVREM, MVT::i64,  Expand);
235  setOperationAction(ISD::SREM,    MVT::i128, Expand);
236  setOperationAction(ISD::UREM,    MVT::i128, Expand);
237  setOperationAction(ISD::SDIV,    MVT::i128, Expand);
238  setOperationAction(ISD::UDIV,    MVT::i128, Expand);
239  setOperationAction(ISD::SDIVREM, MVT::i128, Expand);
240  setOperationAction(ISD::UDIVREM, MVT::i128, Expand);
241
242  // We don't support sin/cos/sqrt/fmod
243  setOperationAction(ISD::FSIN , MVT::f64, Expand);
244  setOperationAction(ISD::FCOS , MVT::f64, Expand);
245  setOperationAction(ISD::FREM , MVT::f64, Expand);
246  setOperationAction(ISD::FSIN , MVT::f32, Expand);
247  setOperationAction(ISD::FCOS , MVT::f32, Expand);
248  setOperationAction(ISD::FREM , MVT::f32, Expand);
249
250  // Expand fsqrt to the appropriate libcall (NOTE: should use h/w fsqrt
251  // for f32!)
252  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
253  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
254
255  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
256  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
257
258  // SPU can do rotate right and left, so legalize it... but customize for i8
259  // because instructions don't exist.
260
261  // FIXME: Change from "expand" to appropriate type once ROTR is supported in
262  //        .td files.
263  setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
264  setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
265  setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
266
267  setOperationAction(ISD::ROTL, MVT::i32,    Legal);
268  setOperationAction(ISD::ROTL, MVT::i16,    Legal);
269  setOperationAction(ISD::ROTL, MVT::i8,     Custom);
270
271  // SPU has no native version of shift left/right for i8
272  setOperationAction(ISD::SHL,  MVT::i8,     Custom);
273  setOperationAction(ISD::SRL,  MVT::i8,     Custom);
274  setOperationAction(ISD::SRA,  MVT::i8,     Custom);
275
276  // Make these operations legal and handle them during instruction selection:
277  setOperationAction(ISD::SHL,  MVT::i64,    Legal);
278  setOperationAction(ISD::SRL,  MVT::i64,    Legal);
279  setOperationAction(ISD::SRA,  MVT::i64,    Legal);
280
281  // Custom lower i8, i32 and i64 multiplications
282  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
283  setOperationAction(ISD::MUL,  MVT::i32,    Legal);
284  setOperationAction(ISD::MUL,  MVT::i64,    Legal);
285
286  // Expand double-width multiplication
287  // FIXME: It would probably be reasonable to support some of these operations
288  setOperationAction(ISD::UMUL_LOHI, MVT::i8,  Expand);
289  setOperationAction(ISD::SMUL_LOHI, MVT::i8,  Expand);
290  setOperationAction(ISD::MULHU,     MVT::i8,  Expand);
291  setOperationAction(ISD::MULHS,     MVT::i8,  Expand);
292  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
293  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
294  setOperationAction(ISD::MULHU,     MVT::i16, Expand);
295  setOperationAction(ISD::MULHS,     MVT::i16, Expand);
296  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
297  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
298  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
299  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
300  setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
301  setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
302  setOperationAction(ISD::MULHU,     MVT::i64, Expand);
303  setOperationAction(ISD::MULHS,     MVT::i64, Expand);
304
305  // Need to custom handle (some) common i8, i64 math ops
306  setOperationAction(ISD::ADD,  MVT::i8,     Custom);
307  setOperationAction(ISD::ADD,  MVT::i64,    Legal);
308  setOperationAction(ISD::SUB,  MVT::i8,     Custom);
309  setOperationAction(ISD::SUB,  MVT::i64,    Legal);
310
311  // SPU does not have BSWAP. It does have i32 support CTLZ.
312  // CTPOP has to be custom lowered.
313  setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
314  setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
315
316  setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
317  setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
318  setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
319  setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
320  setOperationAction(ISD::CTPOP, MVT::i128,  Expand);
321
322  setOperationAction(ISD::CTTZ , MVT::i8,    Expand);
323  setOperationAction(ISD::CTTZ , MVT::i16,   Expand);
324  setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
325  setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
326  setOperationAction(ISD::CTTZ , MVT::i128,  Expand);
327
328  setOperationAction(ISD::CTLZ , MVT::i8,    Promote);
329  setOperationAction(ISD::CTLZ , MVT::i16,   Promote);
330  setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
331  setOperationAction(ISD::CTLZ , MVT::i64,   Expand);
332  setOperationAction(ISD::CTLZ , MVT::i128,  Expand);
333
334  // SPU has a version of select that implements (a&~c)|(b&c), just like
335  // select ought to work:
336  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
337  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
338  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
339  setOperationAction(ISD::SELECT, MVT::i64,  Legal);
340
341  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
342  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
343  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
344  setOperationAction(ISD::SETCC, MVT::i64,   Legal);
345  setOperationAction(ISD::SETCC, MVT::f64,   Custom);
346
347  // Custom lower i128 -> i64 truncates
348  setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
349
350  // Custom lower i32/i64 -> i128 sign extend
351  setOperationAction(ISD::SIGN_EXTEND, MVT::i128, Custom);
352
353  setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
354  setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
355  setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
356  setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
357  // SPU has a legal FP -> signed INT instruction for f32, but for f64, need
358  // to expand to a libcall, hence the custom lowering:
359  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
360  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
361  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand);
362  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand);
363  setOperationAction(ISD::FP_TO_SINT, MVT::i128, Expand);
364  setOperationAction(ISD::FP_TO_UINT, MVT::i128, Expand);
365
366  // FDIV on SPU requires custom lowering
367  setOperationAction(ISD::FDIV, MVT::f64, Expand);      // to libcall
368
369  // SPU has [U|S]INT_TO_FP for f32->i32, but not for f64->i32, f64->i64:
370  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
371  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
372  setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
373  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
374  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
375  setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
376  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
377  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
378
379  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
380  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
381  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
382  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
383
384  // We cannot sextinreg(i1).  Expand to shifts.
385  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
386
387  // We want to legalize GlobalAddress and ConstantPool nodes into the
388  // appropriate instructions to materialize the address.
389  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
390       ++sctype) {
391    MVT::SimpleValueType VT = (MVT::SimpleValueType)sctype;
392
393    setOperationAction(ISD::GlobalAddress,  VT, Custom);
394    setOperationAction(ISD::ConstantPool,   VT, Custom);
395    setOperationAction(ISD::JumpTable,      VT, Custom);
396  }
397
398  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
399  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
400
401  // Use the default implementation.
402  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
403  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
404  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
405  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
406  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
407  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
408  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
409
410  // Cell SPU has instructions for converting between i64 and fp.
411  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
412  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
413
414  // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
415  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
416
417  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
418  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
419
420  // First set operation action for all vector types to expand. Then we
421  // will selectively turn on ones that can be effectively codegen'd.
422  addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
423  addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
424  addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
425  addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
426  addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
427  addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
428
429  // "Odd size" vector classes that we're willing to support:
430  addRegisterClass(MVT::v2i32, SPU::VECREGRegisterClass);
431
432  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
433       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
434    MVT::SimpleValueType VT = (MVT::SimpleValueType)i;
435
436    // add/sub are legal for all supported vector VT's.
437    setOperationAction(ISD::ADD,     VT, Legal);
438    setOperationAction(ISD::SUB,     VT, Legal);
439    // mul has to be custom lowered.
440    setOperationAction(ISD::MUL,     VT, Legal);
441
442    setOperationAction(ISD::AND,     VT, Legal);
443    setOperationAction(ISD::OR,      VT, Legal);
444    setOperationAction(ISD::XOR,     VT, Legal);
445    setOperationAction(ISD::LOAD,    VT, Legal);
446    setOperationAction(ISD::SELECT,  VT, Legal);
447    setOperationAction(ISD::STORE,   VT, Legal);
448
449    // These operations need to be expanded:
450    setOperationAction(ISD::SDIV,    VT, Expand);
451    setOperationAction(ISD::SREM,    VT, Expand);
452    setOperationAction(ISD::UDIV,    VT, Expand);
453    setOperationAction(ISD::UREM,    VT, Expand);
454
455    // Custom lower build_vector, constant pool spills, insert and
456    // extract vector elements:
457    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
458    setOperationAction(ISD::ConstantPool, VT, Custom);
459    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
460    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
461    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
462    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
463  }
464
465  setOperationAction(ISD::AND, MVT::v16i8, Custom);
466  setOperationAction(ISD::OR,  MVT::v16i8, Custom);
467  setOperationAction(ISD::XOR, MVT::v16i8, Custom);
468  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
469
470  setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
471
472  setShiftAmountType(MVT::i32);
473  setBooleanContents(ZeroOrNegativeOneBooleanContent);
474
475  setStackPointerRegisterToSaveRestore(SPU::R1);
476
477  // We have target-specific dag combine patterns for the following nodes:
478  setTargetDAGCombine(ISD::ADD);
479  setTargetDAGCombine(ISD::ZERO_EXTEND);
480  setTargetDAGCombine(ISD::SIGN_EXTEND);
481  setTargetDAGCombine(ISD::ANY_EXTEND);
482
483  computeRegisterProperties();
484
485  // Set pre-RA register scheduler default to BURR, which produces slightly
486  // better code than the default (could also be TDRR, but TargetLowering.h
487  // needs a mod to support that model):
488  setSchedulingPreference(Sched::RegPressure);
489}
490
491const char *
492SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
493{
494  if (node_names.empty()) {
495    node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
496    node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
497    node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
498    node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
499    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
500    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
501    node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
502    node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
503    node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
504    node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
505    node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
506    node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
507    node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
508    node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
509    node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
510    node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
511    node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
512    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
513    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
514            "SPUISD::ROTBYTES_LEFT_BITS";
515    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
516    node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
517    node_names[(unsigned) SPUISD::ADD64_MARKER] = "SPUISD::ADD64_MARKER";
518    node_names[(unsigned) SPUISD::SUB64_MARKER] = "SPUISD::SUB64_MARKER";
519    node_names[(unsigned) SPUISD::MUL64_MARKER] = "SPUISD::MUL64_MARKER";
520  }
521
522  std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
523
524  return ((i != node_names.end()) ? i->second : 0);
525}
526
527/// getFunctionAlignment - Return the Log2 alignment of this function.
528unsigned SPUTargetLowering::getFunctionAlignment(const Function *) const {
529  return 3;
530}
531
532//===----------------------------------------------------------------------===//
533// Return the Cell SPU's SETCC result type
534//===----------------------------------------------------------------------===//
535
536MVT::SimpleValueType SPUTargetLowering::getSetCCResultType(EVT VT) const {
537  // i16 and i32 are valid SETCC result types
538  return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ?
539    VT.getSimpleVT().SimpleTy :
540    MVT::i32);
541}
542
543//===----------------------------------------------------------------------===//
544// Calling convention code:
545//===----------------------------------------------------------------------===//
546
547#include "SPUGenCallingConv.inc"
548
549//===----------------------------------------------------------------------===//
550//  LowerOperation implementation
551//===----------------------------------------------------------------------===//
552
553/// Custom lower loads for CellSPU
554/*!
555 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
556 within a 16-byte block, we have to rotate to extract the requested element.
557
558 For extending loads, we also want to ensure that the following sequence is
559 emitted, e.g. for MVT::f32 extending load to MVT::f64:
560
561\verbatim
562%1  v16i8,ch = load
563%2  v16i8,ch = rotate %1
564%3  v4f8, ch = bitconvert %2
565%4  f32      = vec2perfslot %3
566%5  f64      = fp_extend %4
567\endverbatim
568*/
569static SDValue
570LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
571  LoadSDNode *LN = cast<LoadSDNode>(Op);
572  SDValue the_chain = LN->getChain();
573  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
574  EVT InVT = LN->getMemoryVT();
575  EVT OutVT = Op.getValueType();
576  ISD::LoadExtType ExtType = LN->getExtensionType();
577  unsigned alignment = LN->getAlignment();
578  const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
579  DebugLoc dl = Op.getDebugLoc();
580
581  switch (LN->getAddressingMode()) {
582  case ISD::UNINDEXED: {
583    SDValue result;
584    SDValue basePtr = LN->getBasePtr();
585    SDValue rotate;
586
587    if (alignment == 16) {
588      ConstantSDNode *CN;
589
590      // Special cases for a known aligned load to simplify the base pointer
591      // and the rotation amount:
592      if (basePtr.getOpcode() == ISD::ADD
593          && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
594        // Known offset into basePtr
595        int64_t offset = CN->getSExtValue();
596        int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
597
598        if (rotamt < 0)
599          rotamt += 16;
600
601        rotate = DAG.getConstant(rotamt, MVT::i16);
602
603        // Simplify the base pointer for this case:
604        basePtr = basePtr.getOperand(0);
605        if ((offset & ~0xf) > 0) {
606          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
607                                basePtr,
608                                DAG.getConstant((offset & ~0xf), PtrVT));
609        }
610      } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
611                 || (basePtr.getOpcode() == SPUISD::IndirectAddr
612                     && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
613                     && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
614        // Plain aligned a-form address: rotate into preferred slot
615        // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
616        int64_t rotamt = -vtm->prefslot_byte;
617        if (rotamt < 0)
618          rotamt += 16;
619        rotate = DAG.getConstant(rotamt, MVT::i16);
620      } else {
621        // Offset the rotate amount by the basePtr and the preferred slot
622        // byte offset
623        int64_t rotamt = -vtm->prefslot_byte;
624        if (rotamt < 0)
625          rotamt += 16;
626        rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
627                             basePtr,
628                             DAG.getConstant(rotamt, PtrVT));
629      }
630    } else {
631      // Unaligned load: must be more pessimistic about addressing modes:
632      if (basePtr.getOpcode() == ISD::ADD) {
633        MachineFunction &MF = DAG.getMachineFunction();
634        MachineRegisterInfo &RegInfo = MF.getRegInfo();
635        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
636        SDValue Flag;
637
638        SDValue Op0 = basePtr.getOperand(0);
639        SDValue Op1 = basePtr.getOperand(1);
640
641        if (isa<ConstantSDNode>(Op1)) {
642          // Convert the (add <ptr>, <const>) to an indirect address contained
643          // in a register. Note that this is done because we need to avoid
644          // creating a 0(reg) d-form address due to the SPU's block loads.
645          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
646          the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
647          basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
648        } else {
649          // Convert the (add <arg1>, <arg2>) to an indirect address, which
650          // will likely be lowered as a reg(reg) x-form address.
651          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
652        }
653      } else {
654        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
655                              basePtr,
656                              DAG.getConstant(0, PtrVT));
657      }
658
659      // Offset the rotate amount by the basePtr and the preferred slot
660      // byte offset
661      rotate = DAG.getNode(ISD::ADD, dl, PtrVT,
662                           basePtr,
663                           DAG.getConstant(-vtm->prefslot_byte, PtrVT));
664    }
665
666    // Re-emit as a v16i8 vector load
667    result = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
668                         LN->getSrcValue(), LN->getSrcValueOffset(),
669                         LN->isVolatile(), LN->isNonTemporal(), 16);
670
671    // Update the chain
672    the_chain = result.getValue(1);
673
674    // Rotate into the preferred slot:
675    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, MVT::v16i8,
676                         result.getValue(0), rotate);
677
678    // Convert the loaded v16i8 vector to the appropriate vector type
679    // specified by the operand:
680    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
681                                 InVT, (128 / InVT.getSizeInBits()));
682    result = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, InVT,
683                         DAG.getNode(ISD::BIT_CONVERT, dl, vecVT, result));
684
685    // Handle extending loads by extending the scalar result:
686    if (ExtType == ISD::SEXTLOAD) {
687      result = DAG.getNode(ISD::SIGN_EXTEND, dl, OutVT, result);
688    } else if (ExtType == ISD::ZEXTLOAD) {
689      result = DAG.getNode(ISD::ZERO_EXTEND, dl, OutVT, result);
690    } else if (ExtType == ISD::EXTLOAD) {
691      unsigned NewOpc = ISD::ANY_EXTEND;
692
693      if (OutVT.isFloatingPoint())
694        NewOpc = ISD::FP_EXTEND;
695
696      result = DAG.getNode(NewOpc, dl, OutVT, result);
697    }
698
699    SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
700    SDValue retops[2] = {
701      result,
702      the_chain
703    };
704
705    result = DAG.getNode(SPUISD::LDRESULT, dl, retvts,
706                         retops, sizeof(retops) / sizeof(retops[0]));
707    return result;
708  }
709  case ISD::PRE_INC:
710  case ISD::PRE_DEC:
711  case ISD::POST_INC:
712  case ISD::POST_DEC:
713  case ISD::LAST_INDEXED_MODE:
714    {
715      report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
716                         "than UNINDEXED\n" +
717                         Twine((unsigned)LN->getAddressingMode()));
718      /*NOTREACHED*/
719    }
720  }
721
722  return SDValue();
723}
724
725/// Custom lower stores for CellSPU
726/*!
727 All CellSPU stores are aligned to 16-byte boundaries, so for elements
728 within a 16-byte block, we have to generate a shuffle to insert the
729 requested element into its place, then store the resulting block.
730 */
731static SDValue
732LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
733  StoreSDNode *SN = cast<StoreSDNode>(Op);
734  SDValue Value = SN->getValue();
735  EVT VT = Value.getValueType();
736  EVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
737  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
738  DebugLoc dl = Op.getDebugLoc();
739  unsigned alignment = SN->getAlignment();
740
741  switch (SN->getAddressingMode()) {
742  case ISD::UNINDEXED: {
743    // The vector type we really want to load from the 16-byte chunk.
744    EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
745                                 VT, (128 / VT.getSizeInBits()));
746
747    SDValue alignLoadVec;
748    SDValue basePtr = SN->getBasePtr();
749    SDValue the_chain = SN->getChain();
750    SDValue insertEltOffs;
751
752    if (alignment == 16) {
753      ConstantSDNode *CN;
754
755      // Special cases for a known aligned load to simplify the base pointer
756      // and insertion byte:
757      if (basePtr.getOpcode() == ISD::ADD
758          && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
759        // Known offset into basePtr
760        int64_t offset = CN->getSExtValue();
761
762        // Simplify the base pointer for this case:
763        basePtr = basePtr.getOperand(0);
764        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
765                                    basePtr,
766                                    DAG.getConstant((offset & 0xf), PtrVT));
767
768        if ((offset & ~0xf) > 0) {
769          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
770                                basePtr,
771                                DAG.getConstant((offset & ~0xf), PtrVT));
772        }
773      } else {
774        // Otherwise, assume it's at byte 0 of basePtr
775        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
776                                    basePtr,
777                                    DAG.getConstant(0, PtrVT));
778      }
779    } else {
780      // Unaligned load: must be more pessimistic about addressing modes:
781      if (basePtr.getOpcode() == ISD::ADD) {
782        MachineFunction &MF = DAG.getMachineFunction();
783        MachineRegisterInfo &RegInfo = MF.getRegInfo();
784        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
785        SDValue Flag;
786
787        SDValue Op0 = basePtr.getOperand(0);
788        SDValue Op1 = basePtr.getOperand(1);
789
790        if (isa<ConstantSDNode>(Op1)) {
791          // Convert the (add <ptr>, <const>) to an indirect address contained
792          // in a register. Note that this is done because we need to avoid
793          // creating a 0(reg) d-form address due to the SPU's block loads.
794          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
795          the_chain = DAG.getCopyToReg(the_chain, dl, VReg, basePtr, Flag);
796          basePtr = DAG.getCopyFromReg(the_chain, dl, VReg, PtrVT);
797        } else {
798          // Convert the (add <arg1>, <arg2>) to an indirect address, which
799          // will likely be lowered as a reg(reg) x-form address.
800          basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Op0, Op1);
801        }
802      } else {
803        basePtr = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
804                              basePtr,
805                              DAG.getConstant(0, PtrVT));
806      }
807
808      // Insertion point is solely determined by basePtr's contents
809      insertEltOffs = DAG.getNode(ISD::ADD, dl, PtrVT,
810                                  basePtr,
811                                  DAG.getConstant(0, PtrVT));
812    }
813
814    // Re-emit as a v16i8 vector load
815    alignLoadVec = DAG.getLoad(MVT::v16i8, dl, the_chain, basePtr,
816                               SN->getSrcValue(), SN->getSrcValueOffset(),
817                               SN->isVolatile(), SN->isNonTemporal(), 16);
818
819    // Update the chain
820    the_chain = alignLoadVec.getValue(1);
821
822    LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
823    SDValue theValue = SN->getValue();
824    SDValue result;
825
826    if (StVT != VT
827        && (theValue.getOpcode() == ISD::AssertZext
828            || theValue.getOpcode() == ISD::AssertSext)) {
829      // Drill down and get the value for zero- and sign-extended
830      // quantities
831      theValue = theValue.getOperand(0);
832    }
833
834    // If the base pointer is already a D-form address, then just create
835    // a new D-form address with a slot offset and the orignal base pointer.
836    // Otherwise generate a D-form address with the slot offset relative
837    // to the stack pointer, which is always aligned.
838#if !defined(NDEBUG)
839      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
840        errs() << "CellSPU LowerSTORE: basePtr = ";
841        basePtr.getNode()->dump(&DAG);
842        errs() << "\n";
843      }
844#endif
845
846    SDValue insertEltOp =
847            DAG.getNode(SPUISD::SHUFFLE_MASK, dl, vecVT, insertEltOffs);
848    SDValue vectorizeOp =
849            DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, vecVT, theValue);
850
851    result = DAG.getNode(SPUISD::SHUFB, dl, vecVT,
852                         vectorizeOp, alignLoadVec,
853                         DAG.getNode(ISD::BIT_CONVERT, dl,
854                                     MVT::v4i32, insertEltOp));
855
856    result = DAG.getStore(the_chain, dl, result, basePtr,
857                          LN->getSrcValue(), LN->getSrcValueOffset(),
858                          LN->isVolatile(), LN->isNonTemporal(),
859                          LN->getAlignment());
860
861#if 0 && !defined(NDEBUG)
862    if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
863      const SDValue &currentRoot = DAG.getRoot();
864
865      DAG.setRoot(result);
866      errs() << "------- CellSPU:LowerStore result:\n";
867      DAG.dump();
868      errs() << "-------\n";
869      DAG.setRoot(currentRoot);
870    }
871#endif
872
873    return result;
874    /*UNREACHED*/
875  }
876  case ISD::PRE_INC:
877  case ISD::PRE_DEC:
878  case ISD::POST_INC:
879  case ISD::POST_DEC:
880  case ISD::LAST_INDEXED_MODE:
881    {
882      report_fatal_error("LowerLOAD: Got a LoadSDNode with an addr mode other "
883                         "than UNINDEXED\n" +
884                         Twine((unsigned)SN->getAddressingMode()));
885      /*NOTREACHED*/
886    }
887  }
888
889  return SDValue();
890}
891
892//! Generate the address of a constant pool entry.
893static SDValue
894LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
895  EVT PtrVT = Op.getValueType();
896  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
897  const Constant *C = CP->getConstVal();
898  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
899  SDValue Zero = DAG.getConstant(0, PtrVT);
900  const TargetMachine &TM = DAG.getTarget();
901  // FIXME there is no actual debug info here
902  DebugLoc dl = Op.getDebugLoc();
903
904  if (TM.getRelocationModel() == Reloc::Static) {
905    if (!ST->usingLargeMem()) {
906      // Just return the SDValue with the constant pool address in it.
907      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, CPI, Zero);
908    } else {
909      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, CPI, Zero);
910      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, CPI, Zero);
911      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
912    }
913  }
914
915  llvm_unreachable("LowerConstantPool: Relocation model other than static"
916                   " not supported.");
917  return SDValue();
918}
919
920//! Alternate entry point for generating the address of a constant pool entry
921SDValue
922SPU::LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUTargetMachine &TM) {
923  return ::LowerConstantPool(Op, DAG, TM.getSubtargetImpl());
924}
925
926static SDValue
927LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
928  EVT PtrVT = Op.getValueType();
929  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
930  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
931  SDValue Zero = DAG.getConstant(0, PtrVT);
932  const TargetMachine &TM = DAG.getTarget();
933  // FIXME there is no actual debug info here
934  DebugLoc dl = Op.getDebugLoc();
935
936  if (TM.getRelocationModel() == Reloc::Static) {
937    if (!ST->usingLargeMem()) {
938      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, JTI, Zero);
939    } else {
940      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, JTI, Zero);
941      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, JTI, Zero);
942      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
943    }
944  }
945
946  llvm_unreachable("LowerJumpTable: Relocation model other than static"
947                   " not supported.");
948  return SDValue();
949}
950
951static SDValue
952LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
953  EVT PtrVT = Op.getValueType();
954  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
955  const GlobalValue *GV = GSDN->getGlobal();
956  SDValue GA = DAG.getTargetGlobalAddress(GV, Op.getDebugLoc(),
957                                          PtrVT, GSDN->getOffset());
958  const TargetMachine &TM = DAG.getTarget();
959  SDValue Zero = DAG.getConstant(0, PtrVT);
960  // FIXME there is no actual debug info here
961  DebugLoc dl = Op.getDebugLoc();
962
963  if (TM.getRelocationModel() == Reloc::Static) {
964    if (!ST->usingLargeMem()) {
965      return DAG.getNode(SPUISD::AFormAddr, dl, PtrVT, GA, Zero);
966    } else {
967      SDValue Hi = DAG.getNode(SPUISD::Hi, dl, PtrVT, GA, Zero);
968      SDValue Lo = DAG.getNode(SPUISD::Lo, dl, PtrVT, GA, Zero);
969      return DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, Hi, Lo);
970    }
971  } else {
972    report_fatal_error("LowerGlobalAddress: Relocation model other than static"
973                      "not supported.");
974    /*NOTREACHED*/
975  }
976
977  return SDValue();
978}
979
980//! Custom lower double precision floating point constants
981static SDValue
982LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
983  EVT VT = Op.getValueType();
984  // FIXME there is no actual debug info here
985  DebugLoc dl = Op.getDebugLoc();
986
987  if (VT == MVT::f64) {
988    ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
989
990    assert((FP != 0) &&
991           "LowerConstantFP: Node is not ConstantFPSDNode");
992
993    uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
994    SDValue T = DAG.getConstant(dbits, MVT::i64);
995    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T);
996    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
997                       DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64, Tvec));
998  }
999
1000  return SDValue();
1001}
1002
1003SDValue
1004SPUTargetLowering::LowerFormalArguments(SDValue Chain,
1005                                        CallingConv::ID CallConv, bool isVarArg,
1006                                        const SmallVectorImpl<ISD::InputArg>
1007                                          &Ins,
1008                                        DebugLoc dl, SelectionDAG &DAG,
1009                                        SmallVectorImpl<SDValue> &InVals)
1010                                          const {
1011
1012  MachineFunction &MF = DAG.getMachineFunction();
1013  MachineFrameInfo *MFI = MF.getFrameInfo();
1014  MachineRegisterInfo &RegInfo = MF.getRegInfo();
1015  SPUFunctionInfo *FuncInfo = MF.getInfo<SPUFunctionInfo>();
1016
1017  unsigned ArgOffset = SPUFrameInfo::minStackSize();
1018  unsigned ArgRegIdx = 0;
1019  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1020
1021  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1022
1023  SmallVector<CCValAssign, 16> ArgLocs;
1024  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1025                 *DAG.getContext());
1026  // FIXME: allow for other calling conventions
1027  CCInfo.AnalyzeFormalArguments(Ins, CCC_SPU);
1028
1029  // Add DAG nodes to load the arguments or copy them out of registers.
1030  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
1031    EVT ObjectVT = Ins[ArgNo].VT;
1032    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
1033    SDValue ArgVal;
1034    CCValAssign &VA = ArgLocs[ArgNo];
1035
1036    if (VA.isRegLoc()) {
1037      const TargetRegisterClass *ArgRegClass;
1038
1039      switch (ObjectVT.getSimpleVT().SimpleTy) {
1040      default:
1041        report_fatal_error("LowerFormalArguments Unhandled argument type: " +
1042                           Twine(ObjectVT.getEVTString()));
1043      case MVT::i8:
1044        ArgRegClass = &SPU::R8CRegClass;
1045        break;
1046      case MVT::i16:
1047        ArgRegClass = &SPU::R16CRegClass;
1048        break;
1049      case MVT::i32:
1050        ArgRegClass = &SPU::R32CRegClass;
1051        break;
1052      case MVT::i64:
1053        ArgRegClass = &SPU::R64CRegClass;
1054        break;
1055      case MVT::i128:
1056        ArgRegClass = &SPU::GPRCRegClass;
1057        break;
1058      case MVT::f32:
1059        ArgRegClass = &SPU::R32FPRegClass;
1060        break;
1061      case MVT::f64:
1062        ArgRegClass = &SPU::R64FPRegClass;
1063        break;
1064      case MVT::v2f64:
1065      case MVT::v4f32:
1066      case MVT::v2i64:
1067      case MVT::v4i32:
1068      case MVT::v8i16:
1069      case MVT::v16i8:
1070        ArgRegClass = &SPU::VECREGRegClass;
1071        break;
1072      }
1073
1074      unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1075      RegInfo.addLiveIn(VA.getLocReg(), VReg);
1076      ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
1077      ++ArgRegIdx;
1078    } else {
1079      // We need to load the argument to a virtual register if we determined
1080      // above that we ran out of physical registers of the appropriate type
1081      // or we're forced to do vararg
1082      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true);
1083      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1084      ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0, false, false, 0);
1085      ArgOffset += StackSlotSize;
1086    }
1087
1088    InVals.push_back(ArgVal);
1089    // Update the chain
1090    Chain = ArgVal.getOperand(0);
1091  }
1092
1093  // vararg handling:
1094  if (isVarArg) {
1095    // FIXME: we should be able to query the argument registers from
1096    //        tablegen generated code.
1097    static const unsigned ArgRegs[] = {
1098      SPU::R3,  SPU::R4,  SPU::R5,  SPU::R6,  SPU::R7,  SPU::R8,  SPU::R9,
1099      SPU::R10, SPU::R11, SPU::R12, SPU::R13, SPU::R14, SPU::R15, SPU::R16,
1100      SPU::R17, SPU::R18, SPU::R19, SPU::R20, SPU::R21, SPU::R22, SPU::R23,
1101      SPU::R24, SPU::R25, SPU::R26, SPU::R27, SPU::R28, SPU::R29, SPU::R30,
1102      SPU::R31, SPU::R32, SPU::R33, SPU::R34, SPU::R35, SPU::R36, SPU::R37,
1103      SPU::R38, SPU::R39, SPU::R40, SPU::R41, SPU::R42, SPU::R43, SPU::R44,
1104      SPU::R45, SPU::R46, SPU::R47, SPU::R48, SPU::R49, SPU::R50, SPU::R51,
1105      SPU::R52, SPU::R53, SPU::R54, SPU::R55, SPU::R56, SPU::R57, SPU::R58,
1106      SPU::R59, SPU::R60, SPU::R61, SPU::R62, SPU::R63, SPU::R64, SPU::R65,
1107      SPU::R66, SPU::R67, SPU::R68, SPU::R69, SPU::R70, SPU::R71, SPU::R72,
1108      SPU::R73, SPU::R74, SPU::R75, SPU::R76, SPU::R77, SPU::R78, SPU::R79
1109    };
1110    // size of ArgRegs array
1111    unsigned NumArgRegs = 77;
1112
1113    // We will spill (79-3)+1 registers to the stack
1114    SmallVector<SDValue, 79-3+1> MemOps;
1115
1116    // Create the frame slot
1117    for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1118      FuncInfo->setVarArgsFrameIndex(
1119        MFI->CreateFixedObject(StackSlotSize, ArgOffset, true));
1120      SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
1121      unsigned VReg = MF.addLiveIn(ArgRegs[ArgRegIdx], &SPU::R32CRegClass);
1122      SDValue ArgVal = DAG.getRegister(VReg, MVT::v16i8);
1123      SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0,
1124                                   false, false, 0);
1125      Chain = Store.getOperand(0);
1126      MemOps.push_back(Store);
1127
1128      // Increment address by stack slot size for the next stored argument
1129      ArgOffset += StackSlotSize;
1130    }
1131    if (!MemOps.empty())
1132      Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1133                          &MemOps[0], MemOps.size());
1134  }
1135
1136  return Chain;
1137}
1138
1139/// isLSAAddress - Return the immediate to use if the specified
1140/// value is representable as a LSA address.
1141static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1142  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1143  if (!C) return 0;
1144
1145  int Addr = C->getZExtValue();
1146  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1147      (Addr << 14 >> 14) != Addr)
1148    return 0;  // Top 14 bits have to be sext of immediate.
1149
1150  return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1151}
1152
1153SDValue
1154SPUTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1155                             CallingConv::ID CallConv, bool isVarArg,
1156                             bool &isTailCall,
1157                             const SmallVectorImpl<ISD::OutputArg> &Outs,
1158                             const SmallVectorImpl<SDValue> &OutVals,
1159                             const SmallVectorImpl<ISD::InputArg> &Ins,
1160                             DebugLoc dl, SelectionDAG &DAG,
1161                             SmallVectorImpl<SDValue> &InVals) const {
1162  // CellSPU target does not yet support tail call optimization.
1163  isTailCall = false;
1164
1165  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
1166  unsigned NumOps     = Outs.size();
1167  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1168
1169  SmallVector<CCValAssign, 16> ArgLocs;
1170  CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
1171                 *DAG.getContext());
1172  // FIXME: allow for other calling conventions
1173  CCInfo.AnalyzeCallOperands(Outs, CCC_SPU);
1174
1175  const unsigned NumArgRegs = ArgLocs.size();
1176
1177
1178  // Handy pointer type
1179  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1180
1181  // Set up a copy of the stack pointer for use loading and storing any
1182  // arguments that may not fit in the registers available for argument
1183  // passing.
1184  SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1185
1186  // Figure out which arguments are going to go in registers, and which in
1187  // memory.
1188  unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1189  unsigned ArgRegIdx = 0;
1190
1191  // Keep track of registers passing arguments
1192  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1193  // And the arguments passed on the stack
1194  SmallVector<SDValue, 8> MemOpChains;
1195
1196  for (; ArgRegIdx != NumOps; ++ArgRegIdx) {
1197    SDValue Arg = OutVals[ArgRegIdx];
1198    CCValAssign &VA = ArgLocs[ArgRegIdx];
1199
1200    // PtrOff will be used to store the current argument to the stack if a
1201    // register cannot be found for it.
1202    SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1203    PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
1204
1205    switch (Arg.getValueType().getSimpleVT().SimpleTy) {
1206    default: llvm_unreachable("Unexpected ValueType for argument!");
1207    case MVT::i8:
1208    case MVT::i16:
1209    case MVT::i32:
1210    case MVT::i64:
1211    case MVT::i128:
1212    case MVT::f32:
1213    case MVT::f64:
1214    case MVT::v2i64:
1215    case MVT::v2f64:
1216    case MVT::v4f32:
1217    case MVT::v4i32:
1218    case MVT::v8i16:
1219    case MVT::v16i8:
1220      if (ArgRegIdx != NumArgRegs) {
1221        RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1222      } else {
1223        MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
1224                                           false, false, 0));
1225        ArgOffset += StackSlotSize;
1226      }
1227      break;
1228    }
1229  }
1230
1231  // Accumulate how many bytes are to be pushed on the stack, including the
1232  // linkage area, and parameter passing area.  According to the SPU ABI,
1233  // we minimally need space for [LR] and [SP].
1234  unsigned NumStackBytes = ArgOffset - SPUFrameInfo::minStackSize();
1235
1236  // Insert a call sequence start
1237  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1238                                                            true));
1239
1240  if (!MemOpChains.empty()) {
1241    // Adjust the stack pointer for the stack arguments.
1242    Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
1243                        &MemOpChains[0], MemOpChains.size());
1244  }
1245
1246  // Build a sequence of copy-to-reg nodes chained together with token chain
1247  // and flag operands which copy the outgoing args into the appropriate regs.
1248  SDValue InFlag;
1249  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1250    Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
1251                             RegsToPass[i].second, InFlag);
1252    InFlag = Chain.getValue(1);
1253  }
1254
1255  SmallVector<SDValue, 8> Ops;
1256  unsigned CallOpc = SPUISD::CALL;
1257
1258  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1259  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1260  // node so that legalize doesn't hack it.
1261  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1262    const GlobalValue *GV = G->getGlobal();
1263    EVT CalleeVT = Callee.getValueType();
1264    SDValue Zero = DAG.getConstant(0, PtrVT);
1265    SDValue GA = DAG.getTargetGlobalAddress(GV, dl, CalleeVT);
1266
1267    if (!ST->usingLargeMem()) {
1268      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1269      // style calls, otherwise, external symbols are BRASL calls. This assumes
1270      // that declared/defined symbols are in the same compilation unit and can
1271      // be reached through PC-relative jumps.
1272      //
1273      // NOTE:
1274      // This may be an unsafe assumption for JIT and really large compilation
1275      // units.
1276      if (GV->isDeclaration()) {
1277        Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, GA, Zero);
1278      } else {
1279        Callee = DAG.getNode(SPUISD::PCRelAddr, dl, CalleeVT, GA, Zero);
1280      }
1281    } else {
1282      // "Large memory" mode: Turn all calls into indirect calls with a X-form
1283      // address pairs:
1284      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, GA, Zero);
1285    }
1286  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1287    EVT CalleeVT = Callee.getValueType();
1288    SDValue Zero = DAG.getConstant(0, PtrVT);
1289    SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1290        Callee.getValueType());
1291
1292    if (!ST->usingLargeMem()) {
1293      Callee = DAG.getNode(SPUISD::AFormAddr, dl, CalleeVT, ExtSym, Zero);
1294    } else {
1295      Callee = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT, ExtSym, Zero);
1296    }
1297  } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1298    // If this is an absolute destination address that appears to be a legal
1299    // local store address, use the munged value.
1300    Callee = SDValue(Dest, 0);
1301  }
1302
1303  Ops.push_back(Chain);
1304  Ops.push_back(Callee);
1305
1306  // Add argument registers to the end of the list so that they are known live
1307  // into the call.
1308  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1309    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1310                                  RegsToPass[i].second.getValueType()));
1311
1312  if (InFlag.getNode())
1313    Ops.push_back(InFlag);
1314  // Returns a chain and a flag for retval copy to use.
1315  Chain = DAG.getNode(CallOpc, dl, DAG.getVTList(MVT::Other, MVT::Flag),
1316                      &Ops[0], Ops.size());
1317  InFlag = Chain.getValue(1);
1318
1319  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1320                             DAG.getIntPtrConstant(0, true), InFlag);
1321  if (!Ins.empty())
1322    InFlag = Chain.getValue(1);
1323
1324  // If the function returns void, just return the chain.
1325  if (Ins.empty())
1326    return Chain;
1327
1328  // If the call has results, copy the values out of the ret val registers.
1329  switch (Ins[0].VT.getSimpleVT().SimpleTy) {
1330  default: llvm_unreachable("Unexpected ret value!");
1331  case MVT::Other: break;
1332  case MVT::i32:
1333    if (Ins.size() > 1 && Ins[1].VT == MVT::i32) {
1334      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R4,
1335                                 MVT::i32, InFlag).getValue(1);
1336      InVals.push_back(Chain.getValue(0));
1337      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1338                                 Chain.getValue(2)).getValue(1);
1339      InVals.push_back(Chain.getValue(0));
1340    } else {
1341      Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, MVT::i32,
1342                                 InFlag).getValue(1);
1343      InVals.push_back(Chain.getValue(0));
1344    }
1345    break;
1346  case MVT::i8:
1347  case MVT::i16:
1348  case MVT::i64:
1349  case MVT::i128:
1350  case MVT::f32:
1351  case MVT::f64:
1352  case MVT::v2f64:
1353  case MVT::v2i64:
1354  case MVT::v4f32:
1355  case MVT::v4i32:
1356  case MVT::v8i16:
1357  case MVT::v16i8:
1358    Chain = DAG.getCopyFromReg(Chain, dl, SPU::R3, Ins[0].VT,
1359                                   InFlag).getValue(1);
1360    InVals.push_back(Chain.getValue(0));
1361    break;
1362  }
1363
1364  return Chain;
1365}
1366
1367SDValue
1368SPUTargetLowering::LowerReturn(SDValue Chain,
1369                               CallingConv::ID CallConv, bool isVarArg,
1370                               const SmallVectorImpl<ISD::OutputArg> &Outs,
1371                               const SmallVectorImpl<SDValue> &OutVals,
1372                               DebugLoc dl, SelectionDAG &DAG) const {
1373
1374  SmallVector<CCValAssign, 16> RVLocs;
1375  CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
1376                 RVLocs, *DAG.getContext());
1377  CCInfo.AnalyzeReturn(Outs, RetCC_SPU);
1378
1379  // If this is the first return lowered for this function, add the regs to the
1380  // liveout set for the function.
1381  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1382    for (unsigned i = 0; i != RVLocs.size(); ++i)
1383      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1384  }
1385
1386  SDValue Flag;
1387
1388  // Copy the result values into the output registers.
1389  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1390    CCValAssign &VA = RVLocs[i];
1391    assert(VA.isRegLoc() && "Can only return in registers!");
1392    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
1393                             OutVals[i], Flag);
1394    Flag = Chain.getValue(1);
1395  }
1396
1397  if (Flag.getNode())
1398    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
1399  else
1400    return DAG.getNode(SPUISD::RET_FLAG, dl, MVT::Other, Chain);
1401}
1402
1403
1404//===----------------------------------------------------------------------===//
1405// Vector related lowering:
1406//===----------------------------------------------------------------------===//
1407
1408static ConstantSDNode *
1409getVecImm(SDNode *N) {
1410  SDValue OpVal(0, 0);
1411
1412  // Check to see if this buildvec has a single non-undef value in its elements.
1413  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1414    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1415    if (OpVal.getNode() == 0)
1416      OpVal = N->getOperand(i);
1417    else if (OpVal != N->getOperand(i))
1418      return 0;
1419  }
1420
1421  if (OpVal.getNode() != 0) {
1422    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1423      return CN;
1424    }
1425  }
1426
1427  return 0;
1428}
1429
1430/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1431/// and the value fits into an unsigned 18-bit constant, and if so, return the
1432/// constant
1433SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1434                              EVT ValueType) {
1435  if (ConstantSDNode *CN = getVecImm(N)) {
1436    uint64_t Value = CN->getZExtValue();
1437    if (ValueType == MVT::i64) {
1438      uint64_t UValue = CN->getZExtValue();
1439      uint32_t upper = uint32_t(UValue >> 32);
1440      uint32_t lower = uint32_t(UValue);
1441      if (upper != lower)
1442        return SDValue();
1443      Value = Value >> 32;
1444    }
1445    if (Value <= 0x3ffff)
1446      return DAG.getTargetConstant(Value, ValueType);
1447  }
1448
1449  return SDValue();
1450}
1451
1452/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1453/// and the value fits into a signed 16-bit constant, and if so, return the
1454/// constant
1455SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1456                              EVT ValueType) {
1457  if (ConstantSDNode *CN = getVecImm(N)) {
1458    int64_t Value = CN->getSExtValue();
1459    if (ValueType == MVT::i64) {
1460      uint64_t UValue = CN->getZExtValue();
1461      uint32_t upper = uint32_t(UValue >> 32);
1462      uint32_t lower = uint32_t(UValue);
1463      if (upper != lower)
1464        return SDValue();
1465      Value = Value >> 32;
1466    }
1467    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1468      return DAG.getTargetConstant(Value, ValueType);
1469    }
1470  }
1471
1472  return SDValue();
1473}
1474
1475/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1476/// and the value fits into a signed 10-bit constant, and if so, return the
1477/// constant
1478SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1479                              EVT ValueType) {
1480  if (ConstantSDNode *CN = getVecImm(N)) {
1481    int64_t Value = CN->getSExtValue();
1482    if (ValueType == MVT::i64) {
1483      uint64_t UValue = CN->getZExtValue();
1484      uint32_t upper = uint32_t(UValue >> 32);
1485      uint32_t lower = uint32_t(UValue);
1486      if (upper != lower)
1487        return SDValue();
1488      Value = Value >> 32;
1489    }
1490    if (isInt<10>(Value))
1491      return DAG.getTargetConstant(Value, ValueType);
1492  }
1493
1494  return SDValue();
1495}
1496
1497/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1498/// and the value fits into a signed 8-bit constant, and if so, return the
1499/// constant.
1500///
1501/// @note: The incoming vector is v16i8 because that's the only way we can load
1502/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1503/// same value.
1504SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1505                             EVT ValueType) {
1506  if (ConstantSDNode *CN = getVecImm(N)) {
1507    int Value = (int) CN->getZExtValue();
1508    if (ValueType == MVT::i16
1509        && Value <= 0xffff                 /* truncated from uint64_t */
1510        && ((short) Value >> 8) == ((short) Value & 0xff))
1511      return DAG.getTargetConstant(Value & 0xff, ValueType);
1512    else if (ValueType == MVT::i8
1513             && (Value & 0xff) == Value)
1514      return DAG.getTargetConstant(Value, ValueType);
1515  }
1516
1517  return SDValue();
1518}
1519
1520/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1521/// and the value fits into a signed 16-bit constant, and if so, return the
1522/// constant
1523SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1524                               EVT ValueType) {
1525  if (ConstantSDNode *CN = getVecImm(N)) {
1526    uint64_t Value = CN->getZExtValue();
1527    if ((ValueType == MVT::i32
1528          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1529        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1530      return DAG.getTargetConstant(Value >> 16, ValueType);
1531  }
1532
1533  return SDValue();
1534}
1535
1536/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1537SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1538  if (ConstantSDNode *CN = getVecImm(N)) {
1539    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1540  }
1541
1542  return SDValue();
1543}
1544
1545/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1546SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1547  if (ConstantSDNode *CN = getVecImm(N)) {
1548    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1549  }
1550
1551  return SDValue();
1552}
1553
1554//! Lower a BUILD_VECTOR instruction creatively:
1555static SDValue
1556LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1557  EVT VT = Op.getValueType();
1558  EVT EltVT = VT.getVectorElementType();
1559  DebugLoc dl = Op.getDebugLoc();
1560  BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(Op.getNode());
1561  assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerBUILD_VECTOR");
1562  unsigned minSplatBits = EltVT.getSizeInBits();
1563
1564  if (minSplatBits < 16)
1565    minSplatBits = 16;
1566
1567  APInt APSplatBits, APSplatUndef;
1568  unsigned SplatBitSize;
1569  bool HasAnyUndefs;
1570
1571  if (!BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
1572                            HasAnyUndefs, minSplatBits)
1573      || minSplatBits < SplatBitSize)
1574    return SDValue();   // Wasn't a constant vector or splat exceeded min
1575
1576  uint64_t SplatBits = APSplatBits.getZExtValue();
1577
1578  switch (VT.getSimpleVT().SimpleTy) {
1579  default:
1580    report_fatal_error("CellSPU: Unhandled VT in LowerBUILD_VECTOR, VT = " +
1581                       Twine(VT.getEVTString()));
1582    /*NOTREACHED*/
1583  case MVT::v4f32: {
1584    uint32_t Value32 = uint32_t(SplatBits);
1585    assert(SplatBitSize == 32
1586           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1587    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1588    SDValue T = DAG.getConstant(Value32, MVT::i32);
1589    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4f32,
1590                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, T,T,T,T));
1591    break;
1592  }
1593  case MVT::v2f64: {
1594    uint64_t f64val = uint64_t(SplatBits);
1595    assert(SplatBitSize == 64
1596           && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1597    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1598    SDValue T = DAG.getConstant(f64val, MVT::i64);
1599    return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v2f64,
1600                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, T, T));
1601    break;
1602  }
1603  case MVT::v16i8: {
1604   // 8-bit constants have to be expanded to 16-bits
1605   unsigned short Value16 = SplatBits /* | (SplatBits << 8) */;
1606   SmallVector<SDValue, 8> Ops;
1607
1608   Ops.assign(8, DAG.getConstant(Value16, MVT::i16));
1609   return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
1610                      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v8i16, &Ops[0], Ops.size()));
1611  }
1612  case MVT::v8i16: {
1613    unsigned short Value16 = SplatBits;
1614    SDValue T = DAG.getConstant(Value16, EltVT);
1615    SmallVector<SDValue, 8> Ops;
1616
1617    Ops.assign(8, T);
1618    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size());
1619  }
1620  case MVT::v4i32: {
1621    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1622    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T, T, T);
1623  }
1624  case MVT::v2i32: {
1625    SDValue T = DAG.getConstant(unsigned(SplatBits), VT.getVectorElementType());
1626    return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, T, T);
1627  }
1628  case MVT::v2i64: {
1629    return SPU::LowerV2I64Splat(VT, DAG, SplatBits, dl);
1630  }
1631  }
1632
1633  return SDValue();
1634}
1635
1636/*!
1637 */
1638SDValue
1639SPU::LowerV2I64Splat(EVT OpVT, SelectionDAG& DAG, uint64_t SplatVal,
1640                     DebugLoc dl) {
1641  uint32_t upper = uint32_t(SplatVal >> 32);
1642  uint32_t lower = uint32_t(SplatVal);
1643
1644  if (upper == lower) {
1645    // Magic constant that can be matched by IL, ILA, et. al.
1646    SDValue Val = DAG.getTargetConstant(upper, MVT::i32);
1647    return DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1648                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1649                                   Val, Val, Val, Val));
1650  } else {
1651    bool upper_special, lower_special;
1652
1653    // NOTE: This code creates common-case shuffle masks that can be easily
1654    // detected as common expressions. It is not attempting to create highly
1655    // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1656
1657    // Detect if the upper or lower half is a special shuffle mask pattern:
1658    upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1659    lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1660
1661    // Both upper and lower are special, lower to a constant pool load:
1662    if (lower_special && upper_special) {
1663      SDValue SplatValCN = DAG.getConstant(SplatVal, MVT::i64);
1664      return DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64,
1665                         SplatValCN, SplatValCN);
1666    }
1667
1668    SDValue LO32;
1669    SDValue HI32;
1670    SmallVector<SDValue, 16> ShufBytes;
1671    SDValue Result;
1672
1673    // Create lower vector if not a special pattern
1674    if (!lower_special) {
1675      SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1676      LO32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1677                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1678                                     LO32C, LO32C, LO32C, LO32C));
1679    }
1680
1681    // Create upper vector if not a special pattern
1682    if (!upper_special) {
1683      SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1684      HI32 = DAG.getNode(ISD::BIT_CONVERT, dl, OpVT,
1685                         DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1686                                     HI32C, HI32C, HI32C, HI32C));
1687    }
1688
1689    // If either upper or lower are special, then the two input operands are
1690    // the same (basically, one of them is a "don't care")
1691    if (lower_special)
1692      LO32 = HI32;
1693    if (upper_special)
1694      HI32 = LO32;
1695
1696    for (int i = 0; i < 4; ++i) {
1697      uint64_t val = 0;
1698      for (int j = 0; j < 4; ++j) {
1699        SDValue V;
1700        bool process_upper, process_lower;
1701        val <<= 8;
1702        process_upper = (upper_special && (i & 1) == 0);
1703        process_lower = (lower_special && (i & 1) == 1);
1704
1705        if (process_upper || process_lower) {
1706          if ((process_upper && upper == 0)
1707                  || (process_lower && lower == 0))
1708            val |= 0x80;
1709          else if ((process_upper && upper == 0xffffffff)
1710                  || (process_lower && lower == 0xffffffff))
1711            val |= 0xc0;
1712          else if ((process_upper && upper == 0x80000000)
1713                  || (process_lower && lower == 0x80000000))
1714            val |= (j == 0 ? 0xe0 : 0x80);
1715        } else
1716          val |= i * 4 + j + ((i & 1) * 16);
1717      }
1718
1719      ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1720    }
1721
1722    return DAG.getNode(SPUISD::SHUFB, dl, OpVT, HI32, LO32,
1723                       DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1724                                   &ShufBytes[0], ShufBytes.size()));
1725  }
1726}
1727
1728/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1729/// which the Cell can operate. The code inspects V3 to ascertain whether the
1730/// permutation vector, V3, is monotonically increasing with one "exception"
1731/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1732/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1733/// In either case, the net result is going to eventually invoke SHUFB to
1734/// permute/shuffle the bytes from V1 and V2.
1735/// \note
1736/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1737/// control word for byte/halfword/word insertion. This takes care of a single
1738/// element move from V2 into V1.
1739/// \note
1740/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1741static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1742  const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
1743  SDValue V1 = Op.getOperand(0);
1744  SDValue V2 = Op.getOperand(1);
1745  DebugLoc dl = Op.getDebugLoc();
1746
1747  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1748
1749  // If we have a single element being moved from V1 to V2, this can be handled
1750  // using the C*[DX] compute mask instructions, but the vector elements have
1751  // to be monotonically increasing with one exception element.
1752  EVT VecVT = V1.getValueType();
1753  EVT EltVT = VecVT.getVectorElementType();
1754  unsigned EltsFromV2 = 0;
1755  unsigned V2Elt = 0;
1756  unsigned V2EltIdx0 = 0;
1757  unsigned CurrElt = 0;
1758  unsigned MaxElts = VecVT.getVectorNumElements();
1759  unsigned PrevElt = 0;
1760  unsigned V0Elt = 0;
1761  bool monotonic = true;
1762  bool rotate = true;
1763  EVT maskVT;             // which of the c?d instructions to use
1764
1765  if (EltVT == MVT::i8) {
1766    V2EltIdx0 = 16;
1767    maskVT = MVT::v16i8;
1768  } else if (EltVT == MVT::i16) {
1769    V2EltIdx0 = 8;
1770    maskVT = MVT::v8i16;
1771  } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1772    V2EltIdx0 = 4;
1773    maskVT = MVT::v4i32;
1774  } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1775    V2EltIdx0 = 2;
1776    maskVT = MVT::v2i64;
1777  } else
1778    llvm_unreachable("Unhandled vector type in LowerVECTOR_SHUFFLE");
1779
1780  for (unsigned i = 0; i != MaxElts; ++i) {
1781    if (SVN->getMaskElt(i) < 0)
1782      continue;
1783
1784    unsigned SrcElt = SVN->getMaskElt(i);
1785
1786    if (monotonic) {
1787      if (SrcElt >= V2EltIdx0) {
1788        if (1 >= (++EltsFromV2)) {
1789          V2Elt = (V2EltIdx0 - SrcElt) << 2;
1790        }
1791      } else if (CurrElt != SrcElt) {
1792        monotonic = false;
1793      }
1794
1795      ++CurrElt;
1796    }
1797
1798    if (rotate) {
1799      if (PrevElt > 0 && SrcElt < MaxElts) {
1800        if ((PrevElt == SrcElt - 1)
1801            || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1802          PrevElt = SrcElt;
1803          if (SrcElt == 0)
1804            V0Elt = i;
1805        } else {
1806          rotate = false;
1807        }
1808      } else if (i == 0) {
1809        // First time through, need to keep track of previous element
1810        PrevElt = SrcElt;
1811      } else {
1812        // This isn't a rotation, takes elements from vector 2
1813        rotate = false;
1814      }
1815    }
1816  }
1817
1818  if (EltsFromV2 == 1 && monotonic) {
1819    // Compute mask and shuffle
1820    EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1821
1822    // As SHUFFLE_MASK becomes a c?d instruction, feed it an address
1823    // R1 ($sp) is used here only as it is guaranteed to have last bits zero
1824    SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
1825                                DAG.getRegister(SPU::R1, PtrVT),
1826                                DAG.getConstant(V2Elt, MVT::i32));
1827    SDValue ShufMaskOp = DAG.getNode(SPUISD::SHUFFLE_MASK, dl,
1828                                     maskVT, Pointer);
1829
1830    // Use shuffle mask in SHUFB synthetic instruction:
1831    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V2, V1,
1832                       ShufMaskOp);
1833  } else if (rotate) {
1834    int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1835
1836    return DAG.getNode(SPUISD::ROTBYTES_LEFT, dl, V1.getValueType(),
1837                       V1, DAG.getConstant(rotamt, MVT::i16));
1838  } else {
1839   // Convert the SHUFFLE_VECTOR mask's input element units to the
1840   // actual bytes.
1841    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1842
1843    SmallVector<SDValue, 16> ResultMask;
1844    for (unsigned i = 0, e = MaxElts; i != e; ++i) {
1845      unsigned SrcElt = SVN->getMaskElt(i) < 0 ? 0 : SVN->getMaskElt(i);
1846
1847      for (unsigned j = 0; j < BytesPerElement; ++j)
1848        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,MVT::i8));
1849    }
1850
1851    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8,
1852                                    &ResultMask[0], ResultMask.size());
1853    return DAG.getNode(SPUISD::SHUFB, dl, V1.getValueType(), V1, V2, VPermMask);
1854  }
1855}
1856
1857static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1858  SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1859  DebugLoc dl = Op.getDebugLoc();
1860
1861  if (Op0.getNode()->getOpcode() == ISD::Constant) {
1862    // For a constant, build the appropriate constant vector, which will
1863    // eventually simplify to a vector register load.
1864
1865    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1866    SmallVector<SDValue, 16> ConstVecValues;
1867    EVT VT;
1868    size_t n_copies;
1869
1870    // Create a constant vector:
1871    switch (Op.getValueType().getSimpleVT().SimpleTy) {
1872    default: llvm_unreachable("Unexpected constant value type in "
1873                              "LowerSCALAR_TO_VECTOR");
1874    case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1875    case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1876    case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1877    case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1878    case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1879    case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1880    }
1881
1882    SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1883    for (size_t j = 0; j < n_copies; ++j)
1884      ConstVecValues.push_back(CValue);
1885
1886    return DAG.getNode(ISD::BUILD_VECTOR, dl, Op.getValueType(),
1887                       &ConstVecValues[0], ConstVecValues.size());
1888  } else {
1889    // Otherwise, copy the value from one register to another:
1890    switch (Op0.getValueType().getSimpleVT().SimpleTy) {
1891    default: llvm_unreachable("Unexpected value type in LowerSCALAR_TO_VECTOR");
1892    case MVT::i8:
1893    case MVT::i16:
1894    case MVT::i32:
1895    case MVT::i64:
1896    case MVT::f32:
1897    case MVT::f64:
1898      return DAG.getNode(SPUISD::PREFSLOT2VEC, dl, Op.getValueType(), Op0, Op0);
1899    }
1900  }
1901
1902  return SDValue();
1903}
1904
1905static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
1906  EVT VT = Op.getValueType();
1907  SDValue N = Op.getOperand(0);
1908  SDValue Elt = Op.getOperand(1);
1909  DebugLoc dl = Op.getDebugLoc();
1910  SDValue retval;
1911
1912  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
1913    // Constant argument:
1914    int EltNo = (int) C->getZExtValue();
1915
1916    // sanity checks:
1917    if (VT == MVT::i8 && EltNo >= 16)
1918      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
1919    else if (VT == MVT::i16 && EltNo >= 8)
1920      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
1921    else if (VT == MVT::i32 && EltNo >= 4)
1922      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
1923    else if (VT == MVT::i64 && EltNo >= 2)
1924      llvm_unreachable("SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
1925
1926    if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
1927      // i32 and i64: Element 0 is the preferred slot
1928      return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, N);
1929    }
1930
1931    // Need to generate shuffle mask and extract:
1932    int prefslot_begin = -1, prefslot_end = -1;
1933    int elt_byte = EltNo * VT.getSizeInBits() / 8;
1934
1935    switch (VT.getSimpleVT().SimpleTy) {
1936    default:
1937      assert(false && "Invalid value type!");
1938    case MVT::i8: {
1939      prefslot_begin = prefslot_end = 3;
1940      break;
1941    }
1942    case MVT::i16: {
1943      prefslot_begin = 2; prefslot_end = 3;
1944      break;
1945    }
1946    case MVT::i32:
1947    case MVT::f32: {
1948      prefslot_begin = 0; prefslot_end = 3;
1949      break;
1950    }
1951    case MVT::i64:
1952    case MVT::f64: {
1953      prefslot_begin = 0; prefslot_end = 7;
1954      break;
1955    }
1956    }
1957
1958    assert(prefslot_begin != -1 && prefslot_end != -1 &&
1959           "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
1960
1961    unsigned int ShufBytes[16] = {
1962      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
1963    };
1964    for (int i = 0; i < 16; ++i) {
1965      // zero fill uppper part of preferred slot, don't care about the
1966      // other slots:
1967      unsigned int mask_val;
1968      if (i <= prefslot_end) {
1969        mask_val =
1970          ((i < prefslot_begin)
1971           ? 0x80
1972           : elt_byte + (i - prefslot_begin));
1973
1974        ShufBytes[i] = mask_val;
1975      } else
1976        ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
1977    }
1978
1979    SDValue ShufMask[4];
1980    for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
1981      unsigned bidx = i * 4;
1982      unsigned int bits = ((ShufBytes[bidx] << 24) |
1983                           (ShufBytes[bidx+1] << 16) |
1984                           (ShufBytes[bidx+2] << 8) |
1985                           ShufBytes[bidx+3]);
1986      ShufMask[i] = DAG.getConstant(bits, MVT::i32);
1987    }
1988
1989    SDValue ShufMaskVec =
1990      DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
1991                  &ShufMask[0], sizeof(ShufMask)/sizeof(ShufMask[0]));
1992
1993    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
1994                         DAG.getNode(SPUISD::SHUFB, dl, N.getValueType(),
1995                                     N, N, ShufMaskVec));
1996  } else {
1997    // Variable index: Rotate the requested element into slot 0, then replicate
1998    // slot 0 across the vector
1999    EVT VecVT = N.getValueType();
2000    if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2001      report_fatal_error("LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit"
2002                        "vector type!");
2003    }
2004
2005    // Make life easier by making sure the index is zero-extended to i32
2006    if (Elt.getValueType() != MVT::i32)
2007      Elt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Elt);
2008
2009    // Scale the index to a bit/byte shift quantity
2010    APInt scaleFactor =
2011            APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2012    unsigned scaleShift = scaleFactor.logBase2();
2013    SDValue vecShift;
2014
2015    if (scaleShift > 0) {
2016      // Scale the shift factor:
2017      Elt = DAG.getNode(ISD::SHL, dl, MVT::i32, Elt,
2018                        DAG.getConstant(scaleShift, MVT::i32));
2019    }
2020
2021    vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, dl, VecVT, N, Elt);
2022
2023    // Replicate the bytes starting at byte 0 across the entire vector (for
2024    // consistency with the notion of a unified register set)
2025    SDValue replicate;
2026
2027    switch (VT.getSimpleVT().SimpleTy) {
2028    default:
2029      report_fatal_error("LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector"
2030                        "type");
2031      /*NOTREACHED*/
2032    case MVT::i8: {
2033      SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2034      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2035                              factor, factor, factor, factor);
2036      break;
2037    }
2038    case MVT::i16: {
2039      SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2040      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2041                              factor, factor, factor, factor);
2042      break;
2043    }
2044    case MVT::i32:
2045    case MVT::f32: {
2046      SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2047      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2048                              factor, factor, factor, factor);
2049      break;
2050    }
2051    case MVT::i64:
2052    case MVT::f64: {
2053      SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2054      SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2055      replicate = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2056                              loFactor, hiFactor, loFactor, hiFactor);
2057      break;
2058    }
2059    }
2060
2061    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT,
2062                         DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2063                                     vecShift, vecShift, replicate));
2064  }
2065
2066  return retval;
2067}
2068
2069static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2070  SDValue VecOp = Op.getOperand(0);
2071  SDValue ValOp = Op.getOperand(1);
2072  SDValue IdxOp = Op.getOperand(2);
2073  DebugLoc dl = Op.getDebugLoc();
2074  EVT VT = Op.getValueType();
2075
2076  // use 0 when the lane to insert to is 'undef'
2077  int64_t Idx=0;
2078  if (IdxOp.getOpcode() != ISD::UNDEF) {
2079    ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2080    assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2081    Idx = (CN->getSExtValue());
2082  }
2083
2084  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2085  // Use $sp ($1) because it's always 16-byte aligned and it's available:
2086  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, dl, PtrVT,
2087                                DAG.getRegister(SPU::R1, PtrVT),
2088                                DAG.getConstant(Idx, PtrVT));
2089  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, dl, VT, Pointer);
2090
2091  SDValue result =
2092    DAG.getNode(SPUISD::SHUFB, dl, VT,
2093                DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, ValOp),
2094                VecOp,
2095                DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, ShufMask));
2096
2097  return result;
2098}
2099
2100static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2101                           const TargetLowering &TLI)
2102{
2103  SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2104  DebugLoc dl = Op.getDebugLoc();
2105  EVT ShiftVT = TLI.getShiftAmountTy();
2106
2107  assert(Op.getValueType() == MVT::i8);
2108  switch (Opc) {
2109  default:
2110    llvm_unreachable("Unhandled i8 math operator");
2111    /*NOTREACHED*/
2112    break;
2113  case ISD::ADD: {
2114    // 8-bit addition: Promote the arguments up to 16-bits and truncate
2115    // the result:
2116    SDValue N1 = Op.getOperand(1);
2117    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2118    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2119    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2120                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2121
2122  }
2123
2124  case ISD::SUB: {
2125    // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2126    // the result:
2127    SDValue N1 = Op.getOperand(1);
2128    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2129    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2130    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2131                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2132  }
2133  case ISD::ROTR:
2134  case ISD::ROTL: {
2135    SDValue N1 = Op.getOperand(1);
2136    EVT N1VT = N1.getValueType();
2137
2138    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2139    if (!N1VT.bitsEq(ShiftVT)) {
2140      unsigned N1Opc = N1.getValueType().bitsLT(ShiftVT)
2141                       ? ISD::ZERO_EXTEND
2142                       : ISD::TRUNCATE;
2143      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2144    }
2145
2146    // Replicate lower 8-bits into upper 8:
2147    SDValue ExpandArg =
2148      DAG.getNode(ISD::OR, dl, MVT::i16, N0,
2149                  DAG.getNode(ISD::SHL, dl, MVT::i16,
2150                              N0, DAG.getConstant(8, MVT::i32)));
2151
2152    // Truncate back down to i8
2153    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2154                       DAG.getNode(Opc, dl, MVT::i16, ExpandArg, N1));
2155  }
2156  case ISD::SRL:
2157  case ISD::SHL: {
2158    SDValue N1 = Op.getOperand(1);
2159    EVT N1VT = N1.getValueType();
2160
2161    N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i16, N0);
2162    if (!N1VT.bitsEq(ShiftVT)) {
2163      unsigned N1Opc = ISD::ZERO_EXTEND;
2164
2165      if (N1.getValueType().bitsGT(ShiftVT))
2166        N1Opc = ISD::TRUNCATE;
2167
2168      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2169    }
2170
2171    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2172                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2173  }
2174  case ISD::SRA: {
2175    SDValue N1 = Op.getOperand(1);
2176    EVT N1VT = N1.getValueType();
2177
2178    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2179    if (!N1VT.bitsEq(ShiftVT)) {
2180      unsigned N1Opc = ISD::SIGN_EXTEND;
2181
2182      if (N1VT.bitsGT(ShiftVT))
2183        N1Opc = ISD::TRUNCATE;
2184      N1 = DAG.getNode(N1Opc, dl, ShiftVT, N1);
2185    }
2186
2187    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2188                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2189  }
2190  case ISD::MUL: {
2191    SDValue N1 = Op.getOperand(1);
2192
2193    N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N0);
2194    N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i16, N1);
2195    return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8,
2196                       DAG.getNode(Opc, dl, MVT::i16, N0, N1));
2197    break;
2198  }
2199  }
2200
2201  return SDValue();
2202}
2203
2204//! Lower byte immediate operations for v16i8 vectors:
2205static SDValue
2206LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2207  SDValue ConstVec;
2208  SDValue Arg;
2209  EVT VT = Op.getValueType();
2210  DebugLoc dl = Op.getDebugLoc();
2211
2212  ConstVec = Op.getOperand(0);
2213  Arg = Op.getOperand(1);
2214  if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2215    if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2216      ConstVec = ConstVec.getOperand(0);
2217    } else {
2218      ConstVec = Op.getOperand(1);
2219      Arg = Op.getOperand(0);
2220      if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2221        ConstVec = ConstVec.getOperand(0);
2222      }
2223    }
2224  }
2225
2226  if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2227    BuildVectorSDNode *BCN = dyn_cast<BuildVectorSDNode>(ConstVec.getNode());
2228    assert(BCN != 0 && "Expected BuildVectorSDNode in SPU LowerByteImmed");
2229
2230    APInt APSplatBits, APSplatUndef;
2231    unsigned SplatBitSize;
2232    bool HasAnyUndefs;
2233    unsigned minSplatBits = VT.getVectorElementType().getSizeInBits();
2234
2235    if (BCN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
2236                              HasAnyUndefs, minSplatBits)
2237        && minSplatBits <= SplatBitSize) {
2238      uint64_t SplatBits = APSplatBits.getZExtValue();
2239      SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2240
2241      SmallVector<SDValue, 16> tcVec;
2242      tcVec.assign(16, tc);
2243      return DAG.getNode(Op.getNode()->getOpcode(), dl, VT, Arg,
2244                         DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &tcVec[0], tcVec.size()));
2245    }
2246  }
2247
2248  // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2249  // lowered.  Return the operation, rather than a null SDValue.
2250  return Op;
2251}
2252
2253//! Custom lowering for CTPOP (count population)
2254/*!
2255  Custom lowering code that counts the number ones in the input
2256  operand. SPU has such an instruction, but it counts the number of
2257  ones per byte, which then have to be accumulated.
2258*/
2259static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2260  EVT VT = Op.getValueType();
2261  EVT vecVT = EVT::getVectorVT(*DAG.getContext(),
2262                               VT, (128 / VT.getSizeInBits()));
2263  DebugLoc dl = Op.getDebugLoc();
2264
2265  switch (VT.getSimpleVT().SimpleTy) {
2266  default:
2267    assert(false && "Invalid value type!");
2268  case MVT::i8: {
2269    SDValue N = Op.getOperand(0);
2270    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2271
2272    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2273    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2274
2275    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i8, CNTB, Elt0);
2276  }
2277
2278  case MVT::i16: {
2279    MachineFunction &MF = DAG.getMachineFunction();
2280    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2281
2282    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2283
2284    SDValue N = Op.getOperand(0);
2285    SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2286    SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2287    SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2288
2289    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2290    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2291
2292    // CNTB_result becomes the chain to which all of the virtual registers
2293    // CNTB_reg, SUM1_reg become associated:
2294    SDValue CNTB_result =
2295      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, CNTB, Elt0);
2296
2297    SDValue CNTB_rescopy =
2298      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2299
2300    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i16);
2301
2302    return DAG.getNode(ISD::AND, dl, MVT::i16,
2303                       DAG.getNode(ISD::ADD, dl, MVT::i16,
2304                                   DAG.getNode(ISD::SRL, dl, MVT::i16,
2305                                               Tmp1, Shift1),
2306                                   Tmp1),
2307                       Mask0);
2308  }
2309
2310  case MVT::i32: {
2311    MachineFunction &MF = DAG.getMachineFunction();
2312    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2313
2314    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2315    unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2316
2317    SDValue N = Op.getOperand(0);
2318    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2319    SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2320    SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2321    SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2322
2323    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, dl, vecVT, N, N);
2324    SDValue CNTB = DAG.getNode(SPUISD::CNTB, dl, vecVT, Promote);
2325
2326    // CNTB_result becomes the chain to which all of the virtual registers
2327    // CNTB_reg, SUM1_reg become associated:
2328    SDValue CNTB_result =
2329      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, CNTB, Elt0);
2330
2331    SDValue CNTB_rescopy =
2332      DAG.getCopyToReg(CNTB_result, dl, CNTB_reg, CNTB_result);
2333
2334    SDValue Comp1 =
2335      DAG.getNode(ISD::SRL, dl, MVT::i32,
2336                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32),
2337                  Shift1);
2338
2339    SDValue Sum1 =
2340      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp1,
2341                  DAG.getCopyFromReg(CNTB_rescopy, dl, CNTB_reg, MVT::i32));
2342
2343    SDValue Sum1_rescopy =
2344      DAG.getCopyToReg(CNTB_result, dl, SUM1_reg, Sum1);
2345
2346    SDValue Comp2 =
2347      DAG.getNode(ISD::SRL, dl, MVT::i32,
2348                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32),
2349                  Shift2);
2350    SDValue Sum2 =
2351      DAG.getNode(ISD::ADD, dl, MVT::i32, Comp2,
2352                  DAG.getCopyFromReg(Sum1_rescopy, dl, SUM1_reg, MVT::i32));
2353
2354    return DAG.getNode(ISD::AND, dl, MVT::i32, Sum2, Mask0);
2355  }
2356
2357  case MVT::i64:
2358    break;
2359  }
2360
2361  return SDValue();
2362}
2363
2364//! Lower ISD::FP_TO_SINT, ISD::FP_TO_UINT for i32
2365/*!
2366 f32->i32 passes through unchanged, whereas f64->i32 expands to a libcall.
2367 All conversions to i64 are expanded to a libcall.
2368 */
2369static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
2370                              const SPUTargetLowering &TLI) {
2371  EVT OpVT = Op.getValueType();
2372  SDValue Op0 = Op.getOperand(0);
2373  EVT Op0VT = Op0.getValueType();
2374
2375  if ((OpVT == MVT::i32 && Op0VT == MVT::f64)
2376      || OpVT == MVT::i64) {
2377    // Convert f32 / f64 to i32 / i64 via libcall.
2378    RTLIB::Libcall LC =
2379            (Op.getOpcode() == ISD::FP_TO_SINT)
2380             ? RTLIB::getFPTOSINT(Op0VT, OpVT)
2381             : RTLIB::getFPTOUINT(Op0VT, OpVT);
2382    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd fp-to-int conversion!");
2383    SDValue Dummy;
2384    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2385  }
2386
2387  return Op;
2388}
2389
2390//! Lower ISD::SINT_TO_FP, ISD::UINT_TO_FP for i32
2391/*!
2392 i32->f32 passes through unchanged, whereas i32->f64 is expanded to a libcall.
2393 All conversions from i64 are expanded to a libcall.
2394 */
2395static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG,
2396                              const SPUTargetLowering &TLI) {
2397  EVT OpVT = Op.getValueType();
2398  SDValue Op0 = Op.getOperand(0);
2399  EVT Op0VT = Op0.getValueType();
2400
2401  if ((OpVT == MVT::f64 && Op0VT == MVT::i32)
2402      || Op0VT == MVT::i64) {
2403    // Convert i32, i64 to f64 via libcall:
2404    RTLIB::Libcall LC =
2405            (Op.getOpcode() == ISD::SINT_TO_FP)
2406             ? RTLIB::getSINTTOFP(Op0VT, OpVT)
2407             : RTLIB::getUINTTOFP(Op0VT, OpVT);
2408    assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpectd int-to-fp conversion!");
2409    SDValue Dummy;
2410    return ExpandLibCall(LC, Op, DAG, false, Dummy, TLI);
2411  }
2412
2413  return Op;
2414}
2415
2416//! Lower ISD::SETCC
2417/*!
2418 This handles MVT::f64 (double floating point) condition lowering
2419 */
2420static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG,
2421                          const TargetLowering &TLI) {
2422  CondCodeSDNode *CC = dyn_cast<CondCodeSDNode>(Op.getOperand(2));
2423  DebugLoc dl = Op.getDebugLoc();
2424  assert(CC != 0 && "LowerSETCC: CondCodeSDNode should not be null here!\n");
2425
2426  SDValue lhs = Op.getOperand(0);
2427  SDValue rhs = Op.getOperand(1);
2428  EVT lhsVT = lhs.getValueType();
2429  assert(lhsVT == MVT::f64 && "LowerSETCC: type other than MVT::64\n");
2430
2431  EVT ccResultVT = TLI.getSetCCResultType(lhs.getValueType());
2432  APInt ccResultOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2433  EVT IntVT(MVT::i64);
2434
2435  // Take advantage of the fact that (truncate (sra arg, 32)) is efficiently
2436  // selected to a NOP:
2437  SDValue i64lhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, lhs);
2438  SDValue lhsHi32 =
2439          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2440                      DAG.getNode(ISD::SRL, dl, IntVT,
2441                                  i64lhs, DAG.getConstant(32, MVT::i32)));
2442  SDValue lhsHi32abs =
2443          DAG.getNode(ISD::AND, dl, MVT::i32,
2444                      lhsHi32, DAG.getConstant(0x7fffffff, MVT::i32));
2445  SDValue lhsLo32 =
2446          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, i64lhs);
2447
2448  // SETO and SETUO only use the lhs operand:
2449  if (CC->get() == ISD::SETO) {
2450    // Evaluates to true if Op0 is not [SQ]NaN - lowers to the inverse of
2451    // SETUO
2452    APInt ccResultAllOnes = APInt::getAllOnesValue(ccResultVT.getSizeInBits());
2453    return DAG.getNode(ISD::XOR, dl, ccResultVT,
2454                       DAG.getSetCC(dl, ccResultVT,
2455                                    lhs, DAG.getConstantFP(0.0, lhsVT),
2456                                    ISD::SETUO),
2457                       DAG.getConstant(ccResultAllOnes, ccResultVT));
2458  } else if (CC->get() == ISD::SETUO) {
2459    // Evaluates to true if Op0 is [SQ]NaN
2460    return DAG.getNode(ISD::AND, dl, ccResultVT,
2461                       DAG.getSetCC(dl, ccResultVT,
2462                                    lhsHi32abs,
2463                                    DAG.getConstant(0x7ff00000, MVT::i32),
2464                                    ISD::SETGE),
2465                       DAG.getSetCC(dl, ccResultVT,
2466                                    lhsLo32,
2467                                    DAG.getConstant(0, MVT::i32),
2468                                    ISD::SETGT));
2469  }
2470
2471  SDValue i64rhs = DAG.getNode(ISD::BIT_CONVERT, dl, IntVT, rhs);
2472  SDValue rhsHi32 =
2473          DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
2474                      DAG.getNode(ISD::SRL, dl, IntVT,
2475                                  i64rhs, DAG.getConstant(32, MVT::i32)));
2476
2477  // If a value is negative, subtract from the sign magnitude constant:
2478  SDValue signMag2TC = DAG.getConstant(0x8000000000000000ULL, IntVT);
2479
2480  // Convert the sign-magnitude representation into 2's complement:
2481  SDValue lhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2482                                      lhsHi32, DAG.getConstant(31, MVT::i32));
2483  SDValue lhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64lhs);
2484  SDValue lhsSelect =
2485          DAG.getNode(ISD::SELECT, dl, IntVT,
2486                      lhsSelectMask, lhsSignMag2TC, i64lhs);
2487
2488  SDValue rhsSelectMask = DAG.getNode(ISD::SRA, dl, ccResultVT,
2489                                      rhsHi32, DAG.getConstant(31, MVT::i32));
2490  SDValue rhsSignMag2TC = DAG.getNode(ISD::SUB, dl, IntVT, signMag2TC, i64rhs);
2491  SDValue rhsSelect =
2492          DAG.getNode(ISD::SELECT, dl, IntVT,
2493                      rhsSelectMask, rhsSignMag2TC, i64rhs);
2494
2495  unsigned compareOp;
2496
2497  switch (CC->get()) {
2498  case ISD::SETOEQ:
2499  case ISD::SETUEQ:
2500    compareOp = ISD::SETEQ; break;
2501  case ISD::SETOGT:
2502  case ISD::SETUGT:
2503    compareOp = ISD::SETGT; break;
2504  case ISD::SETOGE:
2505  case ISD::SETUGE:
2506    compareOp = ISD::SETGE; break;
2507  case ISD::SETOLT:
2508  case ISD::SETULT:
2509    compareOp = ISD::SETLT; break;
2510  case ISD::SETOLE:
2511  case ISD::SETULE:
2512    compareOp = ISD::SETLE; break;
2513  case ISD::SETUNE:
2514  case ISD::SETONE:
2515    compareOp = ISD::SETNE; break;
2516  default:
2517    report_fatal_error("CellSPU ISel Select: unimplemented f64 condition");
2518  }
2519
2520  SDValue result =
2521          DAG.getSetCC(dl, ccResultVT, lhsSelect, rhsSelect,
2522                       (ISD::CondCode) compareOp);
2523
2524  if ((CC->get() & 0x8) == 0) {
2525    // Ordered comparison:
2526    SDValue lhsNaN = DAG.getSetCC(dl, ccResultVT,
2527                                  lhs, DAG.getConstantFP(0.0, MVT::f64),
2528                                  ISD::SETO);
2529    SDValue rhsNaN = DAG.getSetCC(dl, ccResultVT,
2530                                  rhs, DAG.getConstantFP(0.0, MVT::f64),
2531                                  ISD::SETO);
2532    SDValue ordered = DAG.getNode(ISD::AND, dl, ccResultVT, lhsNaN, rhsNaN);
2533
2534    result = DAG.getNode(ISD::AND, dl, ccResultVT, ordered, result);
2535  }
2536
2537  return result;
2538}
2539
2540//! Lower ISD::SELECT_CC
2541/*!
2542  ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2543  SELB instruction.
2544
2545  \note Need to revisit this in the future: if the code path through the true
2546  and false value computations is longer than the latency of a branch (6
2547  cycles), then it would be more advantageous to branch and insert a new basic
2548  block and branch on the condition. However, this code does not make that
2549  assumption, given the simplisitc uses so far.
2550 */
2551
2552static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2553                              const TargetLowering &TLI) {
2554  EVT VT = Op.getValueType();
2555  SDValue lhs = Op.getOperand(0);
2556  SDValue rhs = Op.getOperand(1);
2557  SDValue trueval = Op.getOperand(2);
2558  SDValue falseval = Op.getOperand(3);
2559  SDValue condition = Op.getOperand(4);
2560  DebugLoc dl = Op.getDebugLoc();
2561
2562  // NOTE: SELB's arguments: $rA, $rB, $mask
2563  //
2564  // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2565  // where bits in $mask are 1. CCond will be inverted, having 1s where the
2566  // condition was true and 0s where the condition was false. Hence, the
2567  // arguments to SELB get reversed.
2568
2569  // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2570  // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2571  // with another "cannot select select_cc" assert:
2572
2573  SDValue compare = DAG.getNode(ISD::SETCC, dl,
2574                                TLI.getSetCCResultType(Op.getValueType()),
2575                                lhs, rhs, condition);
2576  return DAG.getNode(SPUISD::SELB, dl, VT, falseval, trueval, compare);
2577}
2578
2579//! Custom lower ISD::TRUNCATE
2580static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2581{
2582  // Type to truncate to
2583  EVT VT = Op.getValueType();
2584  MVT simpleVT = VT.getSimpleVT();
2585  EVT VecVT = EVT::getVectorVT(*DAG.getContext(),
2586                               VT, (128 / VT.getSizeInBits()));
2587  DebugLoc dl = Op.getDebugLoc();
2588
2589  // Type to truncate from
2590  SDValue Op0 = Op.getOperand(0);
2591  EVT Op0VT = Op0.getValueType();
2592
2593  if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2594    // Create shuffle mask, least significant doubleword of quadword
2595    unsigned maskHigh = 0x08090a0b;
2596    unsigned maskLow = 0x0c0d0e0f;
2597    // Use a shuffle to perform the truncation
2598    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2599                                   DAG.getConstant(maskHigh, MVT::i32),
2600                                   DAG.getConstant(maskLow, MVT::i32),
2601                                   DAG.getConstant(maskHigh, MVT::i32),
2602                                   DAG.getConstant(maskLow, MVT::i32));
2603
2604    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, dl, VecVT,
2605                                       Op0, Op0, shufMask);
2606
2607    return DAG.getNode(SPUISD::VEC2PREFSLOT, dl, VT, truncShuffle);
2608  }
2609
2610  return SDValue();             // Leave the truncate unmolested
2611}
2612
2613/*!
2614 * Emit the instruction sequence for i64/i32 -> i128 sign extend. The basic
2615 * algorithm is to duplicate the sign bit using rotmai to generate at
2616 * least one byte full of sign bits. Then propagate the "sign-byte" into
2617 * the leftmost words and the i64/i32 into the rightmost words using shufb.
2618 *
2619 * @param Op The sext operand
2620 * @param DAG The current DAG
2621 * @return The SDValue with the entire instruction sequence
2622 */
2623static SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG)
2624{
2625  DebugLoc dl = Op.getDebugLoc();
2626
2627  // Type to extend to
2628  MVT OpVT = Op.getValueType().getSimpleVT();
2629
2630  // Type to extend from
2631  SDValue Op0 = Op.getOperand(0);
2632  MVT Op0VT = Op0.getValueType().getSimpleVT();
2633
2634  // The type to extend to needs to be a i128 and
2635  // the type to extend from needs to be i64 or i32.
2636  assert((OpVT == MVT::i128 && (Op0VT == MVT::i64 || Op0VT == MVT::i32)) &&
2637          "LowerSIGN_EXTEND: input and/or output operand have wrong size");
2638
2639  // Create shuffle mask
2640  unsigned mask1 = 0x10101010; // byte 0 - 3 and 4 - 7
2641  unsigned mask2 = Op0VT == MVT::i64 ? 0x00010203 : 0x10101010; // byte  8 - 11
2642  unsigned mask3 = Op0VT == MVT::i64 ? 0x04050607 : 0x00010203; // byte 12 - 15
2643  SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32,
2644                                 DAG.getConstant(mask1, MVT::i32),
2645                                 DAG.getConstant(mask1, MVT::i32),
2646                                 DAG.getConstant(mask2, MVT::i32),
2647                                 DAG.getConstant(mask3, MVT::i32));
2648
2649  // Word wise arithmetic right shift to generate at least one byte
2650  // that contains sign bits.
2651  MVT mvt = Op0VT == MVT::i64 ? MVT::v2i64 : MVT::v4i32;
2652  SDValue sraVal = DAG.getNode(ISD::SRA,
2653                 dl,
2654                 mvt,
2655                 DAG.getNode(SPUISD::PREFSLOT2VEC, dl, mvt, Op0, Op0),
2656                 DAG.getConstant(31, MVT::i32));
2657
2658  // Shuffle bytes - Copy the sign bits into the upper 64 bits
2659  // and the input value into the lower 64 bits.
2660  SDValue extShuffle = DAG.getNode(SPUISD::SHUFB, dl, mvt,
2661      DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i128, Op0), sraVal, shufMask);
2662
2663  return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i128, extShuffle);
2664}
2665
2666//! Custom (target-specific) lowering entry point
2667/*!
2668  This is where LLVM's DAG selection process calls to do target-specific
2669  lowering of nodes.
2670 */
2671SDValue
2672SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2673{
2674  unsigned Opc = (unsigned) Op.getOpcode();
2675  EVT VT = Op.getValueType();
2676
2677  switch (Opc) {
2678  default: {
2679#ifndef NDEBUG
2680    errs() << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2681    errs() << "Op.getOpcode() = " << Opc << "\n";
2682    errs() << "*Op.getNode():\n";
2683    Op.getNode()->dump();
2684#endif
2685    llvm_unreachable(0);
2686  }
2687  case ISD::LOAD:
2688  case ISD::EXTLOAD:
2689  case ISD::SEXTLOAD:
2690  case ISD::ZEXTLOAD:
2691    return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2692  case ISD::STORE:
2693    return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2694  case ISD::ConstantPool:
2695    return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2696  case ISD::GlobalAddress:
2697    return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2698  case ISD::JumpTable:
2699    return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2700  case ISD::ConstantFP:
2701    return LowerConstantFP(Op, DAG);
2702
2703  // i8, i64 math ops:
2704  case ISD::ADD:
2705  case ISD::SUB:
2706  case ISD::ROTR:
2707  case ISD::ROTL:
2708  case ISD::SRL:
2709  case ISD::SHL:
2710  case ISD::SRA: {
2711    if (VT == MVT::i8)
2712      return LowerI8Math(Op, DAG, Opc, *this);
2713    break;
2714  }
2715
2716  case ISD::FP_TO_SINT:
2717  case ISD::FP_TO_UINT:
2718    return LowerFP_TO_INT(Op, DAG, *this);
2719
2720  case ISD::SINT_TO_FP:
2721  case ISD::UINT_TO_FP:
2722    return LowerINT_TO_FP(Op, DAG, *this);
2723
2724  // Vector-related lowering.
2725  case ISD::BUILD_VECTOR:
2726    return LowerBUILD_VECTOR(Op, DAG);
2727  case ISD::SCALAR_TO_VECTOR:
2728    return LowerSCALAR_TO_VECTOR(Op, DAG);
2729  case ISD::VECTOR_SHUFFLE:
2730    return LowerVECTOR_SHUFFLE(Op, DAG);
2731  case ISD::EXTRACT_VECTOR_ELT:
2732    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2733  case ISD::INSERT_VECTOR_ELT:
2734    return LowerINSERT_VECTOR_ELT(Op, DAG);
2735
2736  // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2737  case ISD::AND:
2738  case ISD::OR:
2739  case ISD::XOR:
2740    return LowerByteImmed(Op, DAG);
2741
2742  // Vector and i8 multiply:
2743  case ISD::MUL:
2744    if (VT == MVT::i8)
2745      return LowerI8Math(Op, DAG, Opc, *this);
2746
2747  case ISD::CTPOP:
2748    return LowerCTPOP(Op, DAG);
2749
2750  case ISD::SELECT_CC:
2751    return LowerSELECT_CC(Op, DAG, *this);
2752
2753  case ISD::SETCC:
2754    return LowerSETCC(Op, DAG, *this);
2755
2756  case ISD::TRUNCATE:
2757    return LowerTRUNCATE(Op, DAG);
2758
2759  case ISD::SIGN_EXTEND:
2760    return LowerSIGN_EXTEND(Op, DAG);
2761  }
2762
2763  return SDValue();
2764}
2765
2766void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2767                                           SmallVectorImpl<SDValue>&Results,
2768                                           SelectionDAG &DAG) const
2769{
2770#if 0
2771  unsigned Opc = (unsigned) N->getOpcode();
2772  EVT OpVT = N->getValueType(0);
2773
2774  switch (Opc) {
2775  default: {
2776    errs() << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2777    errs() << "Op.getOpcode() = " << Opc << "\n";
2778    errs() << "*Op.getNode():\n";
2779    N->dump();
2780    abort();
2781    /*NOTREACHED*/
2782  }
2783  }
2784#endif
2785
2786  /* Otherwise, return unchanged */
2787}
2788
2789//===----------------------------------------------------------------------===//
2790// Target Optimization Hooks
2791//===----------------------------------------------------------------------===//
2792
2793SDValue
2794SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2795{
2796#if 0
2797  TargetMachine &TM = getTargetMachine();
2798#endif
2799  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2800  SelectionDAG &DAG = DCI.DAG;
2801  SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2802  EVT NodeVT = N->getValueType(0);      // The node's value type
2803  EVT Op0VT = Op0.getValueType();       // The first operand's result
2804  SDValue Result;                       // Initially, empty result
2805  DebugLoc dl = N->getDebugLoc();
2806
2807  switch (N->getOpcode()) {
2808  default: break;
2809  case ISD::ADD: {
2810    SDValue Op1 = N->getOperand(1);
2811
2812    if (Op0.getOpcode() == SPUISD::IndirectAddr
2813        || Op1.getOpcode() == SPUISD::IndirectAddr) {
2814      // Normalize the operands to reduce repeated code
2815      SDValue IndirectArg = Op0, AddArg = Op1;
2816
2817      if (Op1.getOpcode() == SPUISD::IndirectAddr) {
2818        IndirectArg = Op1;
2819        AddArg = Op0;
2820      }
2821
2822      if (isa<ConstantSDNode>(AddArg)) {
2823        ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
2824        SDValue IndOp1 = IndirectArg.getOperand(1);
2825
2826        if (CN0->isNullValue()) {
2827          // (add (SPUindirect <arg>, <arg>), 0) ->
2828          // (SPUindirect <arg>, <arg>)
2829
2830#if !defined(NDEBUG)
2831          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2832            errs() << "\n"
2833                 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
2834                 << "With:    (SPUindirect <arg>, <arg>)\n";
2835          }
2836#endif
2837
2838          return IndirectArg;
2839        } else if (isa<ConstantSDNode>(IndOp1)) {
2840          // (add (SPUindirect <arg>, <const>), <const>) ->
2841          // (SPUindirect <arg>, <const + const>)
2842          ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
2843          int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
2844          SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
2845
2846#if !defined(NDEBUG)
2847          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2848            errs() << "\n"
2849                 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
2850                 << "), " << CN0->getSExtValue() << ")\n"
2851                 << "With:    (SPUindirect <arg>, "
2852                 << combinedConst << ")\n";
2853          }
2854#endif
2855
2856          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2857                             IndirectArg, combinedValue);
2858        }
2859      }
2860    }
2861    break;
2862  }
2863  case ISD::SIGN_EXTEND:
2864  case ISD::ZERO_EXTEND:
2865  case ISD::ANY_EXTEND: {
2866    if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
2867      // (any_extend (SPUextract_elt0 <arg>)) ->
2868      // (SPUextract_elt0 <arg>)
2869      // Types must match, however...
2870#if !defined(NDEBUG)
2871      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2872        errs() << "\nReplace: ";
2873        N->dump(&DAG);
2874        errs() << "\nWith:    ";
2875        Op0.getNode()->dump(&DAG);
2876        errs() << "\n";
2877      }
2878#endif
2879
2880      return Op0;
2881    }
2882    break;
2883  }
2884  case SPUISD::IndirectAddr: {
2885    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2886      ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1));
2887      if (CN != 0 && CN->isNullValue()) {
2888        // (SPUindirect (SPUaform <addr>, 0), 0) ->
2889        // (SPUaform <addr>, 0)
2890
2891        DEBUG(errs() << "Replace: ");
2892        DEBUG(N->dump(&DAG));
2893        DEBUG(errs() << "\nWith:    ");
2894        DEBUG(Op0.getNode()->dump(&DAG));
2895        DEBUG(errs() << "\n");
2896
2897        return Op0;
2898      }
2899    } else if (Op0.getOpcode() == ISD::ADD) {
2900      SDValue Op1 = N->getOperand(1);
2901      if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
2902        // (SPUindirect (add <arg>, <arg>), 0) ->
2903        // (SPUindirect <arg>, <arg>)
2904        if (CN1->isNullValue()) {
2905
2906#if !defined(NDEBUG)
2907          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
2908            errs() << "\n"
2909                 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
2910                 << "With:    (SPUindirect <arg>, <arg>)\n";
2911          }
2912#endif
2913
2914          return DAG.getNode(SPUISD::IndirectAddr, dl, Op0VT,
2915                             Op0.getOperand(0), Op0.getOperand(1));
2916        }
2917      }
2918    }
2919    break;
2920  }
2921  case SPUISD::SHLQUAD_L_BITS:
2922  case SPUISD::SHLQUAD_L_BYTES:
2923  case SPUISD::ROTBYTES_LEFT: {
2924    SDValue Op1 = N->getOperand(1);
2925
2926    // Kill degenerate vector shifts:
2927    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
2928      if (CN->isNullValue()) {
2929        Result = Op0;
2930      }
2931    }
2932    break;
2933  }
2934  case SPUISD::PREFSLOT2VEC: {
2935    switch (Op0.getOpcode()) {
2936    default:
2937      break;
2938    case ISD::ANY_EXTEND:
2939    case ISD::ZERO_EXTEND:
2940    case ISD::SIGN_EXTEND: {
2941      // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
2942      // <arg>
2943      // but only if the SPUprefslot2vec and <arg> types match.
2944      SDValue Op00 = Op0.getOperand(0);
2945      if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
2946        SDValue Op000 = Op00.getOperand(0);
2947        if (Op000.getValueType() == NodeVT) {
2948          Result = Op000;
2949        }
2950      }
2951      break;
2952    }
2953    case SPUISD::VEC2PREFSLOT: {
2954      // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
2955      // <arg>
2956      Result = Op0.getOperand(0);
2957      break;
2958    }
2959    }
2960    break;
2961  }
2962  }
2963
2964  // Otherwise, return unchanged.
2965#ifndef NDEBUG
2966  if (Result.getNode()) {
2967    DEBUG(errs() << "\nReplace.SPU: ");
2968    DEBUG(N->dump(&DAG));
2969    DEBUG(errs() << "\nWith:        ");
2970    DEBUG(Result.getNode()->dump(&DAG));
2971    DEBUG(errs() << "\n");
2972  }
2973#endif
2974
2975  return Result;
2976}
2977
2978//===----------------------------------------------------------------------===//
2979// Inline Assembly Support
2980//===----------------------------------------------------------------------===//
2981
2982/// getConstraintType - Given a constraint letter, return the type of
2983/// constraint it is for this target.
2984SPUTargetLowering::ConstraintType
2985SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2986  if (ConstraintLetter.size() == 1) {
2987    switch (ConstraintLetter[0]) {
2988    default: break;
2989    case 'b':
2990    case 'r':
2991    case 'f':
2992    case 'v':
2993    case 'y':
2994      return C_RegisterClass;
2995    }
2996  }
2997  return TargetLowering::getConstraintType(ConstraintLetter);
2998}
2999
3000std::pair<unsigned, const TargetRegisterClass*>
3001SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3002                                                EVT VT) const
3003{
3004  if (Constraint.size() == 1) {
3005    // GCC RS6000 Constraint Letters
3006    switch (Constraint[0]) {
3007    case 'b':   // R1-R31
3008    case 'r':   // R0-R31
3009      if (VT == MVT::i64)
3010        return std::make_pair(0U, SPU::R64CRegisterClass);
3011      return std::make_pair(0U, SPU::R32CRegisterClass);
3012    case 'f':
3013      if (VT == MVT::f32)
3014        return std::make_pair(0U, SPU::R32FPRegisterClass);
3015      else if (VT == MVT::f64)
3016        return std::make_pair(0U, SPU::R64FPRegisterClass);
3017      break;
3018    case 'v':
3019      return std::make_pair(0U, SPU::GPRCRegisterClass);
3020    }
3021  }
3022
3023  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3024}
3025
3026//! Compute used/known bits for a SPU operand
3027void
3028SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3029                                                  const APInt &Mask,
3030                                                  APInt &KnownZero,
3031                                                  APInt &KnownOne,
3032                                                  const SelectionDAG &DAG,
3033                                                  unsigned Depth ) const {
3034#if 0
3035  const uint64_t uint64_sizebits = sizeof(uint64_t) * CHAR_BIT;
3036
3037  switch (Op.getOpcode()) {
3038  default:
3039    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3040    break;
3041  case CALL:
3042  case SHUFB:
3043  case SHUFFLE_MASK:
3044  case CNTB:
3045  case SPUISD::PREFSLOT2VEC:
3046  case SPUISD::LDRESULT:
3047  case SPUISD::VEC2PREFSLOT:
3048  case SPUISD::SHLQUAD_L_BITS:
3049  case SPUISD::SHLQUAD_L_BYTES:
3050  case SPUISD::VEC_ROTL:
3051  case SPUISD::VEC_ROTR:
3052  case SPUISD::ROTBYTES_LEFT:
3053  case SPUISD::SELECT_MASK:
3054  case SPUISD::SELB:
3055  }
3056#endif
3057}
3058
3059unsigned
3060SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3061                                                   unsigned Depth) const {
3062  switch (Op.getOpcode()) {
3063  default:
3064    return 1;
3065
3066  case ISD::SETCC: {
3067    EVT VT = Op.getValueType();
3068
3069    if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3070      VT = MVT::i32;
3071    }
3072    return VT.getSizeInBits();
3073  }
3074  }
3075}
3076
3077// LowerAsmOperandForConstraint
3078void
3079SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3080                                                char ConstraintLetter,
3081                                                std::vector<SDValue> &Ops,
3082                                                SelectionDAG &DAG) const {
3083  // Default, for the time being, to the base class handler
3084  TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3085}
3086
3087/// isLegalAddressImmediate - Return true if the integer value can be used
3088/// as the offset of the target addressing mode.
3089bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3090                                                const Type *Ty) const {
3091  // SPU's addresses are 256K:
3092  return (V > -(1 << 18) && V < (1 << 18) - 1);
3093}
3094
3095bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3096  return false;
3097}
3098
3099bool
3100SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3101  // The SPU target isn't yet aware of offsets.
3102  return false;
3103}
3104