SPUISelLowering.cpp revision 09d3fdc254c0b922c38f7c2bcad27c02fa0904f3
1//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SPUTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SPURegisterNames.h"
15#include "SPUISelLowering.h"
16#include "SPUTargetMachine.h"
17#include "SPUFrameInfo.h"
18#include "llvm/ADT/VectorExtras.h"
19#include "llvm/CodeGen/CallingConvLower.h"
20#include "llvm/CodeGen/MachineFrameInfo.h"
21#include "llvm/CodeGen/MachineFunction.h"
22#include "llvm/CodeGen/MachineInstrBuilder.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/SelectionDAG.h"
25#include "llvm/Constants.h"
26#include "llvm/Function.h"
27#include "llvm/Intrinsics.h"
28#include "llvm/Support/Debug.h"
29#include "llvm/Support/MathExtras.h"
30#include "llvm/Target/TargetOptions.h"
31
32#include <map>
33
34using namespace llvm;
35
36// Used in getTargetNodeName() below
37namespace {
38  std::map<unsigned, const char *> node_names;
39
40  //! MVT mapping to useful data for Cell SPU
41  struct valtype_map_s {
42    const MVT        valtype;
43    const int                   prefslot_byte;
44  };
45
46  const valtype_map_s valtype_map[] = {
47    { MVT::i1,   3 },
48    { MVT::i8,   3 },
49    { MVT::i16,  2 },
50    { MVT::i32,  0 },
51    { MVT::f32,  0 },
52    { MVT::i64,  0 },
53    { MVT::f64,  0 },
54    { MVT::i128, 0 }
55  };
56
57  const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
58
59  const valtype_map_s *getValueTypeMapEntry(MVT VT) {
60    const valtype_map_s *retval = 0;
61
62    for (size_t i = 0; i < n_valtype_map; ++i) {
63      if (valtype_map[i].valtype == VT) {
64        retval = valtype_map + i;
65        break;
66      }
67    }
68
69#ifndef NDEBUG
70    if (retval == 0) {
71      cerr << "getValueTypeMapEntry returns NULL for "
72           << VT.getMVTString()
73           << "\n";
74      abort();
75    }
76#endif
77
78    return retval;
79  }
80
81  //! Predicate that returns true if operand is a memory target
82  /*!
83    \arg Op Operand to test
84    \return true if the operand is a memory target (i.e., global
85    address, external symbol, constant pool) or an A-form
86    address.
87   */
88  bool isMemoryOperand(const SDOperand &Op)
89  {
90    const unsigned Opc = Op.getOpcode();
91    return (Opc == ISD::GlobalAddress
92            || Opc == ISD::GlobalTLSAddress
93            || Opc == ISD::JumpTable
94            || Opc == ISD::ConstantPool
95            || Opc == ISD::ExternalSymbol
96            || Opc == ISD::TargetGlobalAddress
97            || Opc == ISD::TargetGlobalTLSAddress
98            || Opc == ISD::TargetJumpTable
99            || Opc == ISD::TargetConstantPool
100            || Opc == ISD::TargetExternalSymbol
101            || Opc == SPUISD::AFormAddr);
102  }
103
104  //! Predicate that returns true if the operand is an indirect target
105  bool isIndirectOperand(const SDOperand &Op)
106  {
107    const unsigned Opc = Op.getOpcode();
108    return (Opc == ISD::Register
109            || Opc == SPUISD::LDRESULT);
110  }
111}
112
113SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
114  : TargetLowering(TM),
115    SPUTM(TM)
116{
117  // Fold away setcc operations if possible.
118  setPow2DivIsCheap();
119
120  // Use _setjmp/_longjmp instead of setjmp/longjmp.
121  setUseUnderscoreSetJmp(true);
122  setUseUnderscoreLongJmp(true);
123
124  // Set up the SPU's register classes:
125  addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
126  addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
127  addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
128  addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
129  addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
130  addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
131  addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
132
133  // SPU has no sign or zero extended loads for i1, i8, i16:
134  setLoadXAction(ISD::EXTLOAD,  MVT::i1, Promote);
135  setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote);
136  setLoadXAction(ISD::ZEXTLOAD, MVT::i1, Promote);
137  setTruncStoreAction(MVT::i8, MVT::i1, Custom);
138  setTruncStoreAction(MVT::i16, MVT::i1, Custom);
139  setTruncStoreAction(MVT::i32, MVT::i1, Custom);
140  setTruncStoreAction(MVT::i64, MVT::i1, Custom);
141  setTruncStoreAction(MVT::i128, MVT::i1, Custom);
142
143  setLoadXAction(ISD::EXTLOAD,  MVT::i8, Custom);
144  setLoadXAction(ISD::SEXTLOAD, MVT::i8, Custom);
145  setLoadXAction(ISD::ZEXTLOAD, MVT::i8, Custom);
146  setTruncStoreAction(MVT::i8  , MVT::i8, Custom);
147  setTruncStoreAction(MVT::i16 , MVT::i8, Custom);
148  setTruncStoreAction(MVT::i32 , MVT::i8, Custom);
149  setTruncStoreAction(MVT::i64 , MVT::i8, Custom);
150  setTruncStoreAction(MVT::i128, MVT::i8, Custom);
151
152  setLoadXAction(ISD::EXTLOAD,  MVT::i16, Custom);
153  setLoadXAction(ISD::SEXTLOAD, MVT::i16, Custom);
154  setLoadXAction(ISD::ZEXTLOAD, MVT::i16, Custom);
155
156  // SPU constant load actions are custom lowered:
157  setOperationAction(ISD::Constant,   MVT::i64, Custom);
158  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
159  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
160
161  // SPU's loads and stores have to be custom lowered:
162  for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
163       ++sctype) {
164    MVT VT = (MVT::SimpleValueType)sctype;
165
166    setOperationAction(ISD::LOAD, VT, Custom);
167    setOperationAction(ISD::STORE, VT, Custom);
168  }
169
170  // Custom lower BRCOND for i1, i8 to "promote" the result to
171  // i32 and i16, respectively.
172  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
173
174  // Expand the jumptable branches
175  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
176  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
177  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
178
179  // SPU has no intrinsics for these particular operations:
180  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
181
182  // PowerPC has no SREM/UREM instructions
183  setOperationAction(ISD::SREM, MVT::i32, Expand);
184  setOperationAction(ISD::UREM, MVT::i32, Expand);
185  setOperationAction(ISD::SREM, MVT::i64, Expand);
186  setOperationAction(ISD::UREM, MVT::i64, Expand);
187
188  // We don't support sin/cos/sqrt/fmod
189  setOperationAction(ISD::FSIN , MVT::f64, Expand);
190  setOperationAction(ISD::FCOS , MVT::f64, Expand);
191  setOperationAction(ISD::FREM , MVT::f64, Expand);
192  setOperationAction(ISD::FSIN , MVT::f32, Expand);
193  setOperationAction(ISD::FCOS , MVT::f32, Expand);
194  setOperationAction(ISD::FREM , MVT::f32, Expand);
195
196  // If we're enabling GP optimizations, use hardware square root
197  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
198  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
199
200  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
201  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
202
203  // SPU can do rotate right and left, so legalize it... but customize for i8
204  // because instructions don't exist.
205  setOperationAction(ISD::ROTR, MVT::i32,    Legal);
206  setOperationAction(ISD::ROTR, MVT::i16,    Legal);
207  setOperationAction(ISD::ROTR, MVT::i8,     Custom);
208  setOperationAction(ISD::ROTL, MVT::i32,    Legal);
209  setOperationAction(ISD::ROTL, MVT::i16,    Legal);
210  setOperationAction(ISD::ROTL, MVT::i8,     Custom);
211  // SPU has no native version of shift left/right for i8
212  setOperationAction(ISD::SHL,  MVT::i8,     Custom);
213  setOperationAction(ISD::SRL,  MVT::i8,     Custom);
214  setOperationAction(ISD::SRA,  MVT::i8,     Custom);
215  // And SPU needs custom lowering for shift left/right for i64
216  setOperationAction(ISD::SHL,  MVT::i64,    Custom);
217  setOperationAction(ISD::SRL,  MVT::i64,    Custom);
218  setOperationAction(ISD::SRA,  MVT::i64,    Custom);
219
220  // Custom lower i32 multiplications
221  setOperationAction(ISD::MUL,  MVT::i32,    Custom);
222
223  // Need to custom handle (some) common i8, i64 math ops
224  setOperationAction(ISD::ADD,  MVT::i64,    Custom);
225  setOperationAction(ISD::SUB,  MVT::i8,     Custom);
226  setOperationAction(ISD::SUB,  MVT::i64,    Custom);
227  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
228
229  // SPU does not have BSWAP. It does have i32 support CTLZ.
230  // CTPOP has to be custom lowered.
231  setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
232  setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
233
234  setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
235  setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
236  setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
237  setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
238
239  setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
240  setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
241
242  setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
243
244  // SPU has a version of select that implements (a&~c)|(b&c), just like
245  // select ought to work:
246  setOperationAction(ISD::SELECT, MVT::i1,   Promote);
247  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
248  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
249  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
250  setOperationAction(ISD::SELECT, MVT::i64,  Expand);
251
252  setOperationAction(ISD::SETCC, MVT::i1,    Promote);
253  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
254  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
255  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
256  setOperationAction(ISD::SETCC, MVT::i64,   Expand);
257
258  // Zero extension and sign extension for i64 have to be
259  // custom legalized
260  setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
261  setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
262  setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
263
264  // SPU has a legal FP -> signed INT instruction
265  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
266  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
267  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
268  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
269
270  // FDIV on SPU requires custom lowering
271  setOperationAction(ISD::FDIV, MVT::f32, Custom);
272  //setOperationAction(ISD::FDIV, MVT::f64, Custom);
273
274  // SPU has [U|S]INT_TO_FP
275  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
276  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
277  setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
278  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
279  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
280  setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
281  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
282  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
283
284  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
285  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
286  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
287  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
288
289  // We cannot sextinreg(i1).  Expand to shifts.
290  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
291
292  // Support label based line numbers.
293  setOperationAction(ISD::LOCATION, MVT::Other, Expand);
294  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
295
296  // We want to legalize GlobalAddress and ConstantPool nodes into the
297  // appropriate instructions to materialize the address.
298  for (unsigned sctype = (unsigned) MVT::i1; sctype < (unsigned) MVT::f128;
299       ++sctype) {
300    MVT VT = (MVT::SimpleValueType)sctype;
301
302    setOperationAction(ISD::GlobalAddress, VT, Custom);
303    setOperationAction(ISD::ConstantPool,  VT, Custom);
304    setOperationAction(ISD::JumpTable,     VT, Custom);
305  }
306
307  // RET must be custom lowered, to meet ABI requirements
308  setOperationAction(ISD::RET,           MVT::Other, Custom);
309
310  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
311  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
312
313  // Use the default implementation.
314  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
315  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
316  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
317  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
318  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
319  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
320  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
321
322  // Cell SPU has instructions for converting between i64 and fp.
323  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
324  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
325
326  // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
327  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
328
329  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
330  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
331
332  // First set operation action for all vector types to expand. Then we
333  // will selectively turn on ones that can be effectively codegen'd.
334  addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
335  addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
336  addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
337  addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
338  addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
339  addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
340
341  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
342       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
343    MVT VT = (MVT::SimpleValueType)i;
344
345    // add/sub are legal for all supported vector VT's.
346    setOperationAction(ISD::ADD , VT, Legal);
347    setOperationAction(ISD::SUB , VT, Legal);
348    // mul has to be custom lowered.
349    setOperationAction(ISD::MUL , VT, Custom);
350
351    setOperationAction(ISD::AND   , VT, Legal);
352    setOperationAction(ISD::OR    , VT, Legal);
353    setOperationAction(ISD::XOR   , VT, Legal);
354    setOperationAction(ISD::LOAD  , VT, Legal);
355    setOperationAction(ISD::SELECT, VT, Legal);
356    setOperationAction(ISD::STORE,  VT, Legal);
357
358    // These operations need to be expanded:
359    setOperationAction(ISD::SDIV, VT, Expand);
360    setOperationAction(ISD::SREM, VT, Expand);
361    setOperationAction(ISD::UDIV, VT, Expand);
362    setOperationAction(ISD::UREM, VT, Expand);
363    setOperationAction(ISD::FDIV, VT, Custom);
364
365    // Custom lower build_vector, constant pool spills, insert and
366    // extract vector elements:
367    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
368    setOperationAction(ISD::ConstantPool, VT, Custom);
369    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
370    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
371    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
372    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
373  }
374
375  setOperationAction(ISD::MUL, MVT::v16i8, Custom);
376  setOperationAction(ISD::AND, MVT::v16i8, Custom);
377  setOperationAction(ISD::OR,  MVT::v16i8, Custom);
378  setOperationAction(ISD::XOR, MVT::v16i8, Custom);
379  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
380
381  setShiftAmountType(MVT::i32);
382  setSetCCResultContents(ZeroOrOneSetCCResult);
383
384  setStackPointerRegisterToSaveRestore(SPU::R1);
385
386  // We have target-specific dag combine patterns for the following nodes:
387  setTargetDAGCombine(ISD::ADD);
388  setTargetDAGCombine(ISD::ZERO_EXTEND);
389  setTargetDAGCombine(ISD::SIGN_EXTEND);
390  setTargetDAGCombine(ISD::ANY_EXTEND);
391
392  computeRegisterProperties();
393}
394
395const char *
396SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
397{
398  if (node_names.empty()) {
399    node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
400    node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
401    node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
402    node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
403    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
404    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
405    node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
406    node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
407    node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
408    node_names[(unsigned) SPUISD::INSERT_MASK] = "SPUISD::INSERT_MASK";
409    node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
410    node_names[(unsigned) SPUISD::PROMOTE_SCALAR] = "SPUISD::PROMOTE_SCALAR";
411    node_names[(unsigned) SPUISD::EXTRACT_ELT0] = "SPUISD::EXTRACT_ELT0";
412    node_names[(unsigned) SPUISD::EXTRACT_ELT0_CHAINED] = "SPUISD::EXTRACT_ELT0_CHAINED";
413    node_names[(unsigned) SPUISD::EXTRACT_I1_ZEXT] = "SPUISD::EXTRACT_I1_ZEXT";
414    node_names[(unsigned) SPUISD::EXTRACT_I1_SEXT] = "SPUISD::EXTRACT_I1_SEXT";
415    node_names[(unsigned) SPUISD::EXTRACT_I8_ZEXT] = "SPUISD::EXTRACT_I8_ZEXT";
416    node_names[(unsigned) SPUISD::EXTRACT_I8_SEXT] = "SPUISD::EXTRACT_I8_SEXT";
417    node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
418    node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
419    node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
420    node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
421    node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
422    node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
423    node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
424    node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
425    node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
426    node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
427    node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
428    node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
429      "SPUISD::ROTQUAD_RZ_BYTES";
430    node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
431      "SPUISD::ROTQUAD_RZ_BITS";
432    node_names[(unsigned) SPUISD::ROTBYTES_RIGHT_S] =
433      "SPUISD::ROTBYTES_RIGHT_S";
434    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
435    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_CHAINED] =
436      "SPUISD::ROTBYTES_LEFT_CHAINED";
437    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
438      "SPUISD::ROTBYTES_LEFT_BITS";
439    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
440    node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
441    node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
442    node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
443    node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
444    node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
445    node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
446    node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
447    node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
448  }
449
450  std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
451
452  return ((i != node_names.end()) ? i->second : 0);
453}
454
455MVT SPUTargetLowering::getSetCCResultType(const SDOperand &Op) const {
456  MVT VT = Op.getValueType();
457  if (VT.isInteger())
458    return VT;
459  else
460    return MVT::i32;
461}
462
463//===----------------------------------------------------------------------===//
464// Calling convention code:
465//===----------------------------------------------------------------------===//
466
467#include "SPUGenCallingConv.inc"
468
469//===----------------------------------------------------------------------===//
470//  LowerOperation implementation
471//===----------------------------------------------------------------------===//
472
473/// Aligned load common code for CellSPU
474/*!
475  \param[in] Op The SelectionDAG load or store operand
476  \param[in] DAG The selection DAG
477  \param[in] ST CellSPU subtarget information structure
478  \param[in,out] alignment Caller initializes this to the load or store node's
479  value from getAlignment(), may be updated while generating the aligned load
480  \param[in,out] alignOffs Aligned offset; set by AlignedLoad to the aligned
481  offset (divisible by 16, modulo 16 == 0)
482  \param[in,out] prefSlotOffs Preferred slot offset; set by AlignedLoad to the
483  offset of the preferred slot (modulo 16 != 0)
484  \param[in,out] VT Caller initializes this value type to the the load or store
485  node's loaded or stored value type; may be updated if an i1-extended load or
486  store.
487  \param[out] was16aligned true if the base pointer had 16-byte alignment,
488  otherwise false. Can help to determine if the chunk needs to be rotated.
489
490 Both load and store lowering load a block of data aligned on a 16-byte
491 boundary. This is the common aligned load code shared between both.
492 */
493static SDOperand
494AlignedLoad(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST,
495            LSBaseSDNode *LSN,
496            unsigned &alignment, int &alignOffs, int &prefSlotOffs,
497            MVT &VT, bool &was16aligned)
498{
499  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
500  const valtype_map_s *vtm = getValueTypeMapEntry(VT);
501  SDOperand basePtr = LSN->getBasePtr();
502  SDOperand chain = LSN->getChain();
503
504  if (basePtr.getOpcode() == ISD::ADD) {
505    SDOperand Op1 = basePtr.Val->getOperand(1);
506
507    if (Op1.getOpcode() == ISD::Constant || Op1.getOpcode() == ISD::TargetConstant) {
508      const ConstantSDNode *CN = cast<ConstantSDNode>(basePtr.getOperand(1));
509
510      alignOffs = (int) CN->getValue();
511      prefSlotOffs = (int) (alignOffs & 0xf);
512
513      // Adjust the rotation amount to ensure that the final result ends up in
514      // the preferred slot:
515      prefSlotOffs -= vtm->prefslot_byte;
516      basePtr = basePtr.getOperand(0);
517
518      // Loading from memory, can we adjust alignment?
519      if (basePtr.getOpcode() == SPUISD::AFormAddr) {
520        SDOperand APtr = basePtr.getOperand(0);
521        if (APtr.getOpcode() == ISD::TargetGlobalAddress) {
522          GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(APtr);
523          alignment = GSDN->getGlobal()->getAlignment();
524        }
525      }
526    } else {
527      alignOffs = 0;
528      prefSlotOffs = -vtm->prefslot_byte;
529    }
530  } else if (basePtr.getOpcode() == ISD::FrameIndex) {
531    FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(basePtr);
532    alignOffs = int(FIN->getIndex() * SPUFrameInfo::stackSlotSize());
533    prefSlotOffs = (int) (alignOffs & 0xf);
534    prefSlotOffs -= vtm->prefslot_byte;
535    basePtr = DAG.getRegister(SPU::R1, VT);
536  } else {
537    alignOffs = 0;
538    prefSlotOffs = -vtm->prefslot_byte;
539  }
540
541  if (alignment == 16) {
542    // Realign the base pointer as a D-Form address:
543    if (!isMemoryOperand(basePtr) || (alignOffs & ~0xf) != 0) {
544      basePtr = DAG.getNode(ISD::ADD, PtrVT,
545                            basePtr,
546                            DAG.getConstant((alignOffs & ~0xf), PtrVT));
547    }
548
549    // Emit the vector load:
550    was16aligned = true;
551    return DAG.getLoad(MVT::v16i8, chain, basePtr,
552                       LSN->getSrcValue(), LSN->getSrcValueOffset(),
553                       LSN->isVolatile(), 16);
554  }
555
556  // Unaligned load or we're using the "large memory" model, which means that
557  // we have to be very pessimistic:
558  if (isMemoryOperand(basePtr) || isIndirectOperand(basePtr)) {
559    basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, basePtr, DAG.getConstant(0, PtrVT));
560  }
561
562  // Add the offset
563  basePtr = DAG.getNode(ISD::ADD, PtrVT, basePtr,
564                        DAG.getConstant((alignOffs & ~0xf), PtrVT));
565  was16aligned = false;
566  return DAG.getLoad(MVT::v16i8, chain, basePtr,
567                     LSN->getSrcValue(), LSN->getSrcValueOffset(),
568                     LSN->isVolatile(), 16);
569}
570
571/// Custom lower loads for CellSPU
572/*!
573 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
574 within a 16-byte block, we have to rotate to extract the requested element.
575 */
576static SDOperand
577LowerLOAD(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
578  LoadSDNode *LN = cast<LoadSDNode>(Op);
579  SDOperand the_chain = LN->getChain();
580  MVT VT = LN->getMemoryVT();
581  MVT OpVT = Op.Val->getValueType(0);
582  ISD::LoadExtType ExtType = LN->getExtensionType();
583  unsigned alignment = LN->getAlignment();
584  SDOperand Ops[8];
585
586  switch (LN->getAddressingMode()) {
587  case ISD::UNINDEXED: {
588    int offset, rotamt;
589    bool was16aligned;
590    SDOperand result =
591      AlignedLoad(Op, DAG, ST, LN,alignment, offset, rotamt, VT, was16aligned);
592
593    if (result.Val == 0)
594      return result;
595
596    the_chain = result.getValue(1);
597    // Rotate the chunk if necessary
598    if (rotamt < 0)
599      rotamt += 16;
600    if (rotamt != 0 || !was16aligned) {
601      SDVTList vecvts = DAG.getVTList(MVT::v16i8, MVT::Other);
602
603      Ops[0] = the_chain;
604      Ops[1] = result;
605      if (was16aligned) {
606        Ops[2] = DAG.getConstant(rotamt, MVT::i16);
607      } else {
608        MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
609        LoadSDNode *LN1 = cast<LoadSDNode>(result);
610        Ops[2] = DAG.getNode(ISD::ADD, PtrVT, LN1->getBasePtr(),
611                             DAG.getConstant(rotamt, PtrVT));
612      }
613
614      result = DAG.getNode(SPUISD::ROTBYTES_LEFT_CHAINED, vecvts, Ops, 3);
615      the_chain = result.getValue(1);
616    }
617
618    if (VT == OpVT || ExtType == ISD::EXTLOAD) {
619      SDVTList scalarvts;
620      MVT vecVT = MVT::v16i8;
621
622      // Convert the loaded v16i8 vector to the appropriate vector type
623      // specified by the operand:
624      if (OpVT == VT) {
625        if (VT != MVT::i1)
626          vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
627      } else
628        vecVT = MVT::getVectorVT(OpVT, (128 / OpVT.getSizeInBits()));
629
630      Ops[0] = the_chain;
631      Ops[1] = DAG.getNode(ISD::BIT_CONVERT, vecVT, result);
632      scalarvts = DAG.getVTList((OpVT == VT ? VT : OpVT), MVT::Other);
633      result = DAG.getNode(SPUISD::EXTRACT_ELT0_CHAINED, scalarvts, Ops, 2);
634      the_chain = result.getValue(1);
635    } else {
636      // Handle the sign and zero-extending loads for i1 and i8:
637      unsigned NewOpC;
638
639      if (ExtType == ISD::SEXTLOAD) {
640        NewOpC = (OpVT == MVT::i1
641                  ? SPUISD::EXTRACT_I1_SEXT
642                  : SPUISD::EXTRACT_I8_SEXT);
643      } else {
644        assert(ExtType == ISD::ZEXTLOAD);
645        NewOpC = (OpVT == MVT::i1
646                  ? SPUISD::EXTRACT_I1_ZEXT
647                  : SPUISD::EXTRACT_I8_ZEXT);
648      }
649
650      result = DAG.getNode(NewOpC, OpVT, result);
651    }
652
653    SDVTList retvts = DAG.getVTList(OpVT, MVT::Other);
654    SDOperand retops[2] = {
655      result,
656      the_chain
657    };
658
659    result = DAG.getNode(SPUISD::LDRESULT, retvts,
660                         retops, sizeof(retops) / sizeof(retops[0]));
661    return result;
662  }
663  case ISD::PRE_INC:
664  case ISD::PRE_DEC:
665  case ISD::POST_INC:
666  case ISD::POST_DEC:
667  case ISD::LAST_INDEXED_MODE:
668    cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
669            "UNINDEXED\n";
670    cerr << (unsigned) LN->getAddressingMode() << "\n";
671    abort();
672    /*NOTREACHED*/
673  }
674
675  return SDOperand();
676}
677
678/// Custom lower stores for CellSPU
679/*!
680 All CellSPU stores are aligned to 16-byte boundaries, so for elements
681 within a 16-byte block, we have to generate a shuffle to insert the
682 requested element into its place, then store the resulting block.
683 */
684static SDOperand
685LowerSTORE(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
686  StoreSDNode *SN = cast<StoreSDNode>(Op);
687  SDOperand Value = SN->getValue();
688  MVT VT = Value.getValueType();
689  MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
690  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
691  unsigned alignment = SN->getAlignment();
692
693  switch (SN->getAddressingMode()) {
694  case ISD::UNINDEXED: {
695    int chunk_offset, slot_offset;
696    bool was16aligned;
697
698    // The vector type we really want to load from the 16-byte chunk, except
699    // in the case of MVT::i1, which has to be v16i8.
700    MVT vecVT, stVecVT = MVT::v16i8;
701
702    if (StVT != MVT::i1)
703      stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
704    vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
705
706    SDOperand alignLoadVec =
707      AlignedLoad(Op, DAG, ST, SN, alignment,
708                  chunk_offset, slot_offset, VT, was16aligned);
709
710    if (alignLoadVec.Val == 0)
711      return alignLoadVec;
712
713    LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
714    SDOperand basePtr = LN->getBasePtr();
715    SDOperand the_chain = alignLoadVec.getValue(1);
716    SDOperand theValue = SN->getValue();
717    SDOperand result;
718
719    if (StVT != VT
720        && (theValue.getOpcode() == ISD::AssertZext
721            || theValue.getOpcode() == ISD::AssertSext)) {
722      // Drill down and get the value for zero- and sign-extended
723      // quantities
724      theValue = theValue.getOperand(0);
725    }
726
727    chunk_offset &= 0xf;
728
729    SDOperand insertEltOffs = DAG.getConstant(chunk_offset, PtrVT);
730    SDOperand insertEltPtr;
731    SDOperand insertEltOp;
732
733    // If the base pointer is already a D-form address, then just create
734    // a new D-form address with a slot offset and the orignal base pointer.
735    // Otherwise generate a D-form address with the slot offset relative
736    // to the stack pointer, which is always aligned.
737    DEBUG(cerr << "CellSPU LowerSTORE: basePtr = ");
738    DEBUG(basePtr.Val->dump(&DAG));
739    DEBUG(cerr << "\n");
740
741    if (basePtr.getOpcode() == SPUISD::IndirectAddr ||
742        (basePtr.getOpcode() == ISD::ADD
743         && basePtr.getOperand(0).getOpcode() == SPUISD::IndirectAddr)) {
744      insertEltPtr = basePtr;
745    } else {
746      insertEltPtr = DAG.getNode(ISD::ADD, PtrVT, basePtr, insertEltOffs);
747    }
748
749    insertEltOp = DAG.getNode(SPUISD::INSERT_MASK, stVecVT, insertEltPtr);
750    result = DAG.getNode(SPUISD::SHUFB, vecVT,
751                         DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue),
752                         alignLoadVec,
753                         DAG.getNode(ISD::BIT_CONVERT, vecVT, insertEltOp));
754
755    result = DAG.getStore(the_chain, result, basePtr,
756                          LN->getSrcValue(), LN->getSrcValueOffset(),
757                          LN->isVolatile(), LN->getAlignment());
758
759    return result;
760    /*UNREACHED*/
761  }
762  case ISD::PRE_INC:
763  case ISD::PRE_DEC:
764  case ISD::POST_INC:
765  case ISD::POST_DEC:
766  case ISD::LAST_INDEXED_MODE:
767    cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
768            "UNINDEXED\n";
769    cerr << (unsigned) SN->getAddressingMode() << "\n";
770    abort();
771    /*NOTREACHED*/
772  }
773
774  return SDOperand();
775}
776
777/// Generate the address of a constant pool entry.
778static SDOperand
779LowerConstantPool(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
780  MVT PtrVT = Op.getValueType();
781  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
782  Constant *C = CP->getConstVal();
783  SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
784  SDOperand Zero = DAG.getConstant(0, PtrVT);
785  const TargetMachine &TM = DAG.getTarget();
786
787  if (TM.getRelocationModel() == Reloc::Static) {
788    if (!ST->usingLargeMem()) {
789      // Just return the SDOperand with the constant pool address in it.
790      return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
791    } else {
792      SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
793      SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
794      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
795    }
796  }
797
798  assert(0 &&
799         "LowerConstantPool: Relocation model other than static not supported.");
800  return SDOperand();
801}
802
803static SDOperand
804LowerJumpTable(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
805  MVT PtrVT = Op.getValueType();
806  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
807  SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
808  SDOperand Zero = DAG.getConstant(0, PtrVT);
809  const TargetMachine &TM = DAG.getTarget();
810
811  if (TM.getRelocationModel() == Reloc::Static) {
812    if (!ST->usingLargeMem()) {
813      return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
814    } else {
815      SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
816      SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
817      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
818    }
819  }
820
821  assert(0 &&
822         "LowerJumpTable: Relocation model other than static not supported.");
823  return SDOperand();
824}
825
826static SDOperand
827LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
828  MVT PtrVT = Op.getValueType();
829  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
830  GlobalValue *GV = GSDN->getGlobal();
831  SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
832  const TargetMachine &TM = DAG.getTarget();
833  SDOperand Zero = DAG.getConstant(0, PtrVT);
834
835  if (TM.getRelocationModel() == Reloc::Static) {
836    if (!ST->usingLargeMem()) {
837      return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
838    } else {
839      SDOperand Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
840      SDOperand Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
841      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
842    }
843  } else {
844    cerr << "LowerGlobalAddress: Relocation model other than static not "
845         << "supported.\n";
846    abort();
847    /*NOTREACHED*/
848  }
849
850  return SDOperand();
851}
852
853//! Custom lower i64 integer constants
854/*!
855 This code inserts all of the necessary juggling that needs to occur to load
856 a 64-bit constant into a register.
857 */
858static SDOperand
859LowerConstant(SDOperand Op, SelectionDAG &DAG) {
860  MVT VT = Op.getValueType();
861  ConstantSDNode *CN = cast<ConstantSDNode>(Op.Val);
862
863  if (VT == MVT::i64) {
864    SDOperand T = DAG.getConstant(CN->getValue(), MVT::i64);
865    return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
866                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
867  } else {
868    cerr << "LowerConstant: unhandled constant type "
869         << VT.getMVTString()
870         << "\n";
871    abort();
872    /*NOTREACHED*/
873  }
874
875  return SDOperand();
876}
877
878//! Custom lower double precision floating point constants
879static SDOperand
880LowerConstantFP(SDOperand Op, SelectionDAG &DAG) {
881  MVT VT = Op.getValueType();
882  ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.Val);
883
884  assert((FP != 0) &&
885         "LowerConstantFP: Node is not ConstantFPSDNode");
886
887  if (VT == MVT::f64) {
888    uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
889    return DAG.getNode(ISD::BIT_CONVERT, VT,
890                       LowerConstant(DAG.getConstant(dbits, MVT::i64), DAG));
891  }
892
893  return SDOperand();
894}
895
896//! Lower MVT::i1, MVT::i8 brcond to a promoted type (MVT::i32, MVT::i16)
897static SDOperand
898LowerBRCOND(SDOperand Op, SelectionDAG &DAG)
899{
900  SDOperand Cond = Op.getOperand(1);
901  MVT CondVT = Cond.getValueType();
902  MVT CondNVT;
903
904  if (CondVT == MVT::i1 || CondVT == MVT::i8) {
905    CondNVT = (CondVT == MVT::i1 ? MVT::i32 : MVT::i16);
906    return DAG.getNode(ISD::BRCOND, Op.getValueType(),
907                      Op.getOperand(0),
908                      DAG.getNode(ISD::ZERO_EXTEND, CondNVT, Op.getOperand(1)),
909                      Op.getOperand(2));
910  } else
911    return SDOperand();                // Unchanged
912}
913
914static SDOperand
915LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
916{
917  MachineFunction &MF = DAG.getMachineFunction();
918  MachineFrameInfo *MFI = MF.getFrameInfo();
919  MachineRegisterInfo &RegInfo = MF.getRegInfo();
920  SmallVector<SDOperand, 8> ArgValues;
921  SDOperand Root = Op.getOperand(0);
922  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
923
924  const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
925  const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
926
927  unsigned ArgOffset = SPUFrameInfo::minStackSize();
928  unsigned ArgRegIdx = 0;
929  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
930
931  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
932
933  // Add DAG nodes to load the arguments or copy them out of registers.
934  for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) {
935    SDOperand ArgVal;
936    bool needsLoad = false;
937    MVT ObjectVT = Op.getValue(ArgNo).getValueType();
938    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
939
940    switch (ObjectVT.getSimpleVT()) {
941    default: {
942      cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
943           << ObjectVT.getMVTString()
944           << "\n";
945      abort();
946    }
947    case MVT::i8:
948      if (!isVarArg && ArgRegIdx < NumArgRegs) {
949        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R8CRegClass);
950        RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
951        ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i8);
952        ++ArgRegIdx;
953      } else {
954        needsLoad = true;
955      }
956      break;
957    case MVT::i16:
958      if (!isVarArg && ArgRegIdx < NumArgRegs) {
959        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
960        RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
961        ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i16);
962        ++ArgRegIdx;
963      } else {
964        needsLoad = true;
965      }
966      break;
967    case MVT::i32:
968      if (!isVarArg && ArgRegIdx < NumArgRegs) {
969        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
970        RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
971        ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32);
972        ++ArgRegIdx;
973      } else {
974        needsLoad = true;
975      }
976      break;
977    case MVT::i64:
978      if (!isVarArg && ArgRegIdx < NumArgRegs) {
979        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64CRegClass);
980        RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
981        ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64);
982        ++ArgRegIdx;
983      } else {
984        needsLoad = true;
985      }
986      break;
987    case MVT::f32:
988      if (!isVarArg && ArgRegIdx < NumArgRegs) {
989        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
990        RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
991        ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f32);
992        ++ArgRegIdx;
993      } else {
994        needsLoad = true;
995      }
996      break;
997    case MVT::f64:
998      if (!isVarArg && ArgRegIdx < NumArgRegs) {
999        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R64FPRegClass);
1000        RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1001        ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::f64);
1002        ++ArgRegIdx;
1003      } else {
1004        needsLoad = true;
1005      }
1006      break;
1007    case MVT::v2f64:
1008    case MVT::v4f32:
1009    case MVT::v2i64:
1010    case MVT::v4i32:
1011    case MVT::v8i16:
1012    case MVT::v16i8:
1013      if (!isVarArg && ArgRegIdx < NumArgRegs) {
1014        unsigned VReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1015        RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1016        ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1017        ++ArgRegIdx;
1018      } else {
1019        needsLoad = true;
1020      }
1021      break;
1022    }
1023
1024    // We need to load the argument to a virtual register if we determined above
1025    // that we ran out of physical registers of the appropriate type
1026    if (needsLoad) {
1027      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1028      SDOperand FIN = DAG.getFrameIndex(FI, PtrVT);
1029      ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1030      ArgOffset += StackSlotSize;
1031    }
1032
1033    ArgValues.push_back(ArgVal);
1034  }
1035
1036  // If the function takes variable number of arguments, make a frame index for
1037  // the start of the first vararg value... for expansion of llvm.va_start.
1038  if (isVarArg) {
1039    VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
1040                                               ArgOffset);
1041    SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1042    // If this function is vararg, store any remaining integer argument regs to
1043    // their spots on the stack so that they may be loaded by deferencing the
1044    // result of va_next.
1045    SmallVector<SDOperand, 8> MemOps;
1046    for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1047      unsigned VReg = RegInfo.createVirtualRegister(&SPU::GPRCRegClass);
1048      RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1049      SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT);
1050      SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0);
1051      MemOps.push_back(Store);
1052      // Increment the address by four for the next argument to store
1053      SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT);
1054      FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff);
1055    }
1056    if (!MemOps.empty())
1057      Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size());
1058  }
1059
1060  ArgValues.push_back(Root);
1061
1062  // Return the new list of results.
1063  std::vector<MVT> RetVT(Op.Val->value_begin(),
1064                                    Op.Val->value_end());
1065  return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size());
1066}
1067
1068/// isLSAAddress - Return the immediate to use if the specified
1069/// value is representable as a LSA address.
1070static SDNode *isLSAAddress(SDOperand Op, SelectionDAG &DAG) {
1071  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1072  if (!C) return 0;
1073
1074  int Addr = C->getValue();
1075  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1076      (Addr << 14 >> 14) != Addr)
1077    return 0;  // Top 14 bits have to be sext of immediate.
1078
1079  return DAG.getConstant((int)C->getValue() >> 2, MVT::i32).Val;
1080}
1081
1082static
1083SDOperand
1084LowerCALL(SDOperand Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1085  SDOperand Chain = Op.getOperand(0);
1086#if 0
1087  bool isVarArg       = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0;
1088  bool isTailCall     = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0;
1089#endif
1090  SDOperand Callee    = Op.getOperand(4);
1091  unsigned NumOps     = (Op.getNumOperands() - 5) / 2;
1092  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1093  const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1094  const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1095
1096  // Handy pointer type
1097  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1098
1099  // Accumulate how many bytes are to be pushed on the stack, including the
1100  // linkage area, and parameter passing area.  According to the SPU ABI,
1101  // we minimally need space for [LR] and [SP]
1102  unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1103
1104  // Set up a copy of the stack pointer for use loading and storing any
1105  // arguments that may not fit in the registers available for argument
1106  // passing.
1107  SDOperand StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1108
1109  // Figure out which arguments are going to go in registers, and which in
1110  // memory.
1111  unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1112  unsigned ArgRegIdx = 0;
1113
1114  // Keep track of registers passing arguments
1115  std::vector<std::pair<unsigned, SDOperand> > RegsToPass;
1116  // And the arguments passed on the stack
1117  SmallVector<SDOperand, 8> MemOpChains;
1118
1119  for (unsigned i = 0; i != NumOps; ++i) {
1120    SDOperand Arg = Op.getOperand(5+2*i);
1121
1122    // PtrOff will be used to store the current argument to the stack if a
1123    // register cannot be found for it.
1124    SDOperand PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1125    PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1126
1127    switch (Arg.getValueType().getSimpleVT()) {
1128    default: assert(0 && "Unexpected ValueType for argument!");
1129    case MVT::i32:
1130    case MVT::i64:
1131    case MVT::i128:
1132      if (ArgRegIdx != NumArgRegs) {
1133        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1134      } else {
1135        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1136        ArgOffset += StackSlotSize;
1137      }
1138      break;
1139    case MVT::f32:
1140    case MVT::f64:
1141      if (ArgRegIdx != NumArgRegs) {
1142        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1143      } else {
1144        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1145        ArgOffset += StackSlotSize;
1146      }
1147      break;
1148    case MVT::v4f32:
1149    case MVT::v4i32:
1150    case MVT::v8i16:
1151    case MVT::v16i8:
1152      if (ArgRegIdx != NumArgRegs) {
1153        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1154      } else {
1155        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1156        ArgOffset += StackSlotSize;
1157      }
1158      break;
1159    }
1160  }
1161
1162  // Update number of stack bytes actually used, insert a call sequence start
1163  NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1164  Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumStackBytes, PtrVT));
1165
1166  if (!MemOpChains.empty()) {
1167    // Adjust the stack pointer for the stack arguments.
1168    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1169                        &MemOpChains[0], MemOpChains.size());
1170  }
1171
1172  // Build a sequence of copy-to-reg nodes chained together with token chain
1173  // and flag operands which copy the outgoing args into the appropriate regs.
1174  SDOperand InFlag;
1175  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1176    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1177                             InFlag);
1178    InFlag = Chain.getValue(1);
1179  }
1180
1181  std::vector<MVT> NodeTys;
1182  NodeTys.push_back(MVT::Other);   // Returns a chain
1183  NodeTys.push_back(MVT::Flag);    // Returns a flag for retval copy to use.
1184
1185  SmallVector<SDOperand, 8> Ops;
1186  unsigned CallOpc = SPUISD::CALL;
1187
1188  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1189  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1190  // node so that legalize doesn't hack it.
1191  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1192    GlobalValue *GV = G->getGlobal();
1193    MVT CalleeVT = Callee.getValueType();
1194    SDOperand Zero = DAG.getConstant(0, PtrVT);
1195    SDOperand GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1196
1197    if (!ST->usingLargeMem()) {
1198      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1199      // style calls, otherwise, external symbols are BRASL calls. This assumes
1200      // that declared/defined symbols are in the same compilation unit and can
1201      // be reached through PC-relative jumps.
1202      //
1203      // NOTE:
1204      // This may be an unsafe assumption for JIT and really large compilation
1205      // units.
1206      if (GV->isDeclaration()) {
1207        Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1208      } else {
1209        Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1210      }
1211    } else {
1212      // "Large memory" mode: Turn all calls into indirect calls with a X-form
1213      // address pairs:
1214      Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1215    }
1216  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
1217    Callee = DAG.getExternalSymbol(S->getSymbol(), Callee.getValueType());
1218  else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1219    // If this is an absolute destination address that appears to be a legal
1220    // local store address, use the munged value.
1221    Callee = SDOperand(Dest, 0);
1222  }
1223
1224  Ops.push_back(Chain);
1225  Ops.push_back(Callee);
1226
1227  // Add argument registers to the end of the list so that they are known live
1228  // into the call.
1229  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1230    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1231                                  RegsToPass[i].second.getValueType()));
1232
1233  if (InFlag.Val)
1234    Ops.push_back(InFlag);
1235  Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size());
1236  InFlag = Chain.getValue(1);
1237
1238  Chain = DAG.getCALLSEQ_END(Chain,
1239                             DAG.getConstant(NumStackBytes, PtrVT),
1240                             DAG.getConstant(0, PtrVT),
1241                             InFlag);
1242  if (Op.Val->getValueType(0) != MVT::Other)
1243    InFlag = Chain.getValue(1);
1244
1245  SDOperand ResultVals[3];
1246  unsigned NumResults = 0;
1247  NodeTys.clear();
1248
1249  // If the call has results, copy the values out of the ret val registers.
1250  switch (Op.Val->getValueType(0).getSimpleVT()) {
1251  default: assert(0 && "Unexpected ret value!");
1252  case MVT::Other: break;
1253  case MVT::i32:
1254    if (Op.Val->getValueType(1) == MVT::i32) {
1255      Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1256      ResultVals[0] = Chain.getValue(0);
1257      Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1258                                 Chain.getValue(2)).getValue(1);
1259      ResultVals[1] = Chain.getValue(0);
1260      NumResults = 2;
1261      NodeTys.push_back(MVT::i32);
1262    } else {
1263      Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1264      ResultVals[0] = Chain.getValue(0);
1265      NumResults = 1;
1266    }
1267    NodeTys.push_back(MVT::i32);
1268    break;
1269  case MVT::i64:
1270    Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1271    ResultVals[0] = Chain.getValue(0);
1272    NumResults = 1;
1273    NodeTys.push_back(MVT::i64);
1274    break;
1275  case MVT::f32:
1276  case MVT::f64:
1277    Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1278                               InFlag).getValue(1);
1279    ResultVals[0] = Chain.getValue(0);
1280    NumResults = 1;
1281    NodeTys.push_back(Op.Val->getValueType(0));
1282    break;
1283  case MVT::v2f64:
1284  case MVT::v4f32:
1285  case MVT::v4i32:
1286  case MVT::v8i16:
1287  case MVT::v16i8:
1288    Chain = DAG.getCopyFromReg(Chain, SPU::R3, Op.Val->getValueType(0),
1289                                   InFlag).getValue(1);
1290    ResultVals[0] = Chain.getValue(0);
1291    NumResults = 1;
1292    NodeTys.push_back(Op.Val->getValueType(0));
1293    break;
1294  }
1295
1296  NodeTys.push_back(MVT::Other);
1297
1298  // If the function returns void, just return the chain.
1299  if (NumResults == 0)
1300    return Chain;
1301
1302  // Otherwise, merge everything together with a MERGE_VALUES node.
1303  ResultVals[NumResults++] = Chain;
1304  SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys,
1305                              ResultVals, NumResults);
1306  return Res.getValue(Op.ResNo);
1307}
1308
1309static SDOperand
1310LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) {
1311  SmallVector<CCValAssign, 16> RVLocs;
1312  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1313  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1314  CCState CCInfo(CC, isVarArg, TM, RVLocs);
1315  CCInfo.AnalyzeReturn(Op.Val, RetCC_SPU);
1316
1317  // If this is the first return lowered for this function, add the regs to the
1318  // liveout set for the function.
1319  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1320    for (unsigned i = 0; i != RVLocs.size(); ++i)
1321      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1322  }
1323
1324  SDOperand Chain = Op.getOperand(0);
1325  SDOperand Flag;
1326
1327  // Copy the result values into the output registers.
1328  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1329    CCValAssign &VA = RVLocs[i];
1330    assert(VA.isRegLoc() && "Can only return in registers!");
1331    Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1332    Flag = Chain.getValue(1);
1333  }
1334
1335  if (Flag.Val)
1336    return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1337  else
1338    return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1339}
1340
1341
1342//===----------------------------------------------------------------------===//
1343// Vector related lowering:
1344//===----------------------------------------------------------------------===//
1345
1346static ConstantSDNode *
1347getVecImm(SDNode *N) {
1348  SDOperand OpVal(0, 0);
1349
1350  // Check to see if this buildvec has a single non-undef value in its elements.
1351  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1352    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1353    if (OpVal.Val == 0)
1354      OpVal = N->getOperand(i);
1355    else if (OpVal != N->getOperand(i))
1356      return 0;
1357  }
1358
1359  if (OpVal.Val != 0) {
1360    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1361      return CN;
1362    }
1363  }
1364
1365  return 0; // All UNDEF: use implicit def.; not Constant node
1366}
1367
1368/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1369/// and the value fits into an unsigned 18-bit constant, and if so, return the
1370/// constant
1371SDOperand SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1372                              MVT ValueType) {
1373  if (ConstantSDNode *CN = getVecImm(N)) {
1374    uint64_t Value = CN->getValue();
1375    if (ValueType == MVT::i64) {
1376      uint64_t UValue = CN->getValue();
1377      uint32_t upper = uint32_t(UValue >> 32);
1378      uint32_t lower = uint32_t(UValue);
1379      if (upper != lower)
1380        return SDOperand();
1381      Value = Value >> 32;
1382    }
1383    if (Value <= 0x3ffff)
1384      return DAG.getConstant(Value, ValueType);
1385  }
1386
1387  return SDOperand();
1388}
1389
1390/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1391/// and the value fits into a signed 16-bit constant, and if so, return the
1392/// constant
1393SDOperand SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1394                              MVT ValueType) {
1395  if (ConstantSDNode *CN = getVecImm(N)) {
1396    int64_t Value = CN->getSignExtended();
1397    if (ValueType == MVT::i64) {
1398      uint64_t UValue = CN->getValue();
1399      uint32_t upper = uint32_t(UValue >> 32);
1400      uint32_t lower = uint32_t(UValue);
1401      if (upper != lower)
1402        return SDOperand();
1403      Value = Value >> 32;
1404    }
1405    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1406      return DAG.getConstant(Value, ValueType);
1407    }
1408  }
1409
1410  return SDOperand();
1411}
1412
1413/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1414/// and the value fits into a signed 10-bit constant, and if so, return the
1415/// constant
1416SDOperand SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1417                              MVT ValueType) {
1418  if (ConstantSDNode *CN = getVecImm(N)) {
1419    int64_t Value = CN->getSignExtended();
1420    if (ValueType == MVT::i64) {
1421      uint64_t UValue = CN->getValue();
1422      uint32_t upper = uint32_t(UValue >> 32);
1423      uint32_t lower = uint32_t(UValue);
1424      if (upper != lower)
1425        return SDOperand();
1426      Value = Value >> 32;
1427    }
1428    if (isS10Constant(Value))
1429      return DAG.getConstant(Value, ValueType);
1430  }
1431
1432  return SDOperand();
1433}
1434
1435/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1436/// and the value fits into a signed 8-bit constant, and if so, return the
1437/// constant.
1438///
1439/// @note: The incoming vector is v16i8 because that's the only way we can load
1440/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1441/// same value.
1442SDOperand SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1443                             MVT ValueType) {
1444  if (ConstantSDNode *CN = getVecImm(N)) {
1445    int Value = (int) CN->getValue();
1446    if (ValueType == MVT::i16
1447        && Value <= 0xffff                 /* truncated from uint64_t */
1448        && ((short) Value >> 8) == ((short) Value & 0xff))
1449      return DAG.getConstant(Value & 0xff, ValueType);
1450    else if (ValueType == MVT::i8
1451             && (Value & 0xff) == Value)
1452      return DAG.getConstant(Value, ValueType);
1453  }
1454
1455  return SDOperand();
1456}
1457
1458/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1459/// and the value fits into a signed 16-bit constant, and if so, return the
1460/// constant
1461SDOperand SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1462                               MVT ValueType) {
1463  if (ConstantSDNode *CN = getVecImm(N)) {
1464    uint64_t Value = CN->getValue();
1465    if ((ValueType == MVT::i32
1466          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1467        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1468      return DAG.getConstant(Value >> 16, ValueType);
1469  }
1470
1471  return SDOperand();
1472}
1473
1474/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1475SDOperand SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1476  if (ConstantSDNode *CN = getVecImm(N)) {
1477    return DAG.getConstant((unsigned) CN->getValue(), MVT::i32);
1478  }
1479
1480  return SDOperand();
1481}
1482
1483/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1484SDOperand SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1485  if (ConstantSDNode *CN = getVecImm(N)) {
1486    return DAG.getConstant((unsigned) CN->getValue(), MVT::i64);
1487  }
1488
1489  return SDOperand();
1490}
1491
1492// If this is a vector of constants or undefs, get the bits.  A bit in
1493// UndefBits is set if the corresponding element of the vector is an
1494// ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1495// zero.   Return true if this is not an array of constants, false if it is.
1496//
1497static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1498                                       uint64_t UndefBits[2]) {
1499  // Start with zero'd results.
1500  VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1501
1502  unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1503  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1504    SDOperand OpVal = BV->getOperand(i);
1505
1506    unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1507    unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1508
1509    uint64_t EltBits = 0;
1510    if (OpVal.getOpcode() == ISD::UNDEF) {
1511      uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1512      UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1513      continue;
1514    } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1515      EltBits = CN->getValue() & (~0ULL >> (64-EltBitSize));
1516    } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1517      const APFloat &apf = CN->getValueAPF();
1518      EltBits = (CN->getValueType(0) == MVT::f32
1519                 ? FloatToBits(apf.convertToFloat())
1520                 : DoubleToBits(apf.convertToDouble()));
1521    } else {
1522      // Nonconstant element.
1523      return true;
1524    }
1525
1526    VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1527  }
1528
1529  //printf("%llx %llx  %llx %llx\n",
1530  //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1531  return false;
1532}
1533
1534/// If this is a splat (repetition) of a value across the whole vector, return
1535/// the smallest size that splats it.  For example, "0x01010101010101..." is a
1536/// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1537/// SplatSize = 1 byte.
1538static bool isConstantSplat(const uint64_t Bits128[2],
1539                            const uint64_t Undef128[2],
1540                            int MinSplatBits,
1541                            uint64_t &SplatBits, uint64_t &SplatUndef,
1542                            int &SplatSize) {
1543  // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1544  // the same as the lower 64-bits, ignoring undefs.
1545  uint64_t Bits64  = Bits128[0] | Bits128[1];
1546  uint64_t Undef64 = Undef128[0] & Undef128[1];
1547  uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1548  uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1549  uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1550  uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1551
1552  if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1553    if (MinSplatBits < 64) {
1554
1555      // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1556      // undefs.
1557      if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1558        if (MinSplatBits < 32) {
1559
1560          // If the top 16-bits are different than the lower 16-bits, ignoring
1561          // undefs, we have an i32 splat.
1562          if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1563            if (MinSplatBits < 16) {
1564              // If the top 8-bits are different than the lower 8-bits, ignoring
1565              // undefs, we have an i16 splat.
1566              if ((Bits16 & (uint16_t(~Undef16) >> 8)) == ((Bits16 >> 8) & ~Undef16)) {
1567                // Otherwise, we have an 8-bit splat.
1568                SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1569                SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1570                SplatSize = 1;
1571                return true;
1572              }
1573            } else {
1574              SplatBits = Bits16;
1575              SplatUndef = Undef16;
1576              SplatSize = 2;
1577              return true;
1578            }
1579          }
1580        } else {
1581          SplatBits = Bits32;
1582          SplatUndef = Undef32;
1583          SplatSize = 4;
1584          return true;
1585        }
1586      }
1587    } else {
1588      SplatBits = Bits128[0];
1589      SplatUndef = Undef128[0];
1590      SplatSize = 8;
1591      return true;
1592    }
1593  }
1594
1595  return false;  // Can't be a splat if two pieces don't match.
1596}
1597
1598// If this is a case we can't handle, return null and let the default
1599// expansion code take care of it.  If we CAN select this case, and if it
1600// selects to a single instruction, return Op.  Otherwise, if we can codegen
1601// this case more efficiently than a constant pool load, lower it to the
1602// sequence of ops that should be used.
1603static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1604  MVT VT = Op.getValueType();
1605  // If this is a vector of constants or undefs, get the bits.  A bit in
1606  // UndefBits is set if the corresponding element of the vector is an
1607  // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1608  // zero.
1609  uint64_t VectorBits[2];
1610  uint64_t UndefBits[2];
1611  uint64_t SplatBits, SplatUndef;
1612  int SplatSize;
1613  if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)
1614      || !isConstantSplat(VectorBits, UndefBits,
1615                          VT.getVectorElementType().getSizeInBits(),
1616                          SplatBits, SplatUndef, SplatSize))
1617    return SDOperand();   // Not a constant vector, not a splat.
1618
1619  switch (VT.getSimpleVT()) {
1620  default:
1621  case MVT::v4f32: {
1622    uint32_t Value32 = SplatBits;
1623    assert(SplatSize == 4
1624           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1625    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1626    SDOperand T = DAG.getConstant(Value32, MVT::i32);
1627    return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1628                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1629    break;
1630  }
1631  case MVT::v2f64: {
1632    uint64_t f64val = SplatBits;
1633    assert(SplatSize == 8
1634           && "LowerBUILD_VECTOR: 64-bit float vector element: unexpected size.");
1635    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1636    SDOperand T = DAG.getConstant(f64val, MVT::i64);
1637    return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1638                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1639    break;
1640  }
1641  case MVT::v16i8: {
1642   // 8-bit constants have to be expanded to 16-bits
1643   unsigned short Value16 = SplatBits | (SplatBits << 8);
1644   SDOperand Ops[8];
1645   for (int i = 0; i < 8; ++i)
1646     Ops[i] = DAG.getConstant(Value16, MVT::i16);
1647   return DAG.getNode(ISD::BIT_CONVERT, VT,
1648                      DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1649  }
1650  case MVT::v8i16: {
1651    unsigned short Value16;
1652    if (SplatSize == 2)
1653      Value16 = (unsigned short) (SplatBits & 0xffff);
1654    else
1655      Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1656    SDOperand T = DAG.getConstant(Value16, VT.getVectorElementType());
1657    SDOperand Ops[8];
1658    for (int i = 0; i < 8; ++i) Ops[i] = T;
1659    return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1660  }
1661  case MVT::v4i32: {
1662    unsigned int Value = SplatBits;
1663    SDOperand T = DAG.getConstant(Value, VT.getVectorElementType());
1664    return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1665  }
1666  case MVT::v2i64: {
1667    uint64_t val = SplatBits;
1668    uint32_t upper = uint32_t(val >> 32);
1669    uint32_t lower = uint32_t(val);
1670
1671    if (upper == lower) {
1672      // Magic constant that can be matched by IL, ILA, et. al.
1673      SDOperand Val = DAG.getTargetConstant(val, MVT::i64);
1674      return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1675    } else {
1676      SDOperand LO32;
1677      SDOperand HI32;
1678      SmallVector<SDOperand, 16> ShufBytes;
1679      SDOperand Result;
1680      bool upper_special, lower_special;
1681
1682      // NOTE: This code creates common-case shuffle masks that can be easily
1683      // detected as common expressions. It is not attempting to create highly
1684      // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1685
1686      // Detect if the upper or lower half is a special shuffle mask pattern:
1687      upper_special = (upper == 0 || upper == 0xffffffff || upper == 0x80000000);
1688      lower_special = (lower == 0 || lower == 0xffffffff || lower == 0x80000000);
1689
1690      // Create lower vector if not a special pattern
1691      if (!lower_special) {
1692        SDOperand LO32C = DAG.getConstant(lower, MVT::i32);
1693        LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1694                           DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1695                                       LO32C, LO32C, LO32C, LO32C));
1696      }
1697
1698      // Create upper vector if not a special pattern
1699      if (!upper_special) {
1700        SDOperand HI32C = DAG.getConstant(upper, MVT::i32);
1701        HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1702                           DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1703                                       HI32C, HI32C, HI32C, HI32C));
1704      }
1705
1706      // If either upper or lower are special, then the two input operands are
1707      // the same (basically, one of them is a "don't care")
1708      if (lower_special)
1709        LO32 = HI32;
1710      if (upper_special)
1711        HI32 = LO32;
1712      if (lower_special && upper_special) {
1713        // Unhappy situation... both upper and lower are special, so punt with
1714        // a target constant:
1715        SDOperand Zero = DAG.getConstant(0, MVT::i32);
1716        HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1717                                  Zero, Zero);
1718      }
1719
1720      for (int i = 0; i < 4; ++i) {
1721        uint64_t val = 0;
1722        for (int j = 0; j < 4; ++j) {
1723          SDOperand V;
1724          bool process_upper, process_lower;
1725          val <<= 8;
1726          process_upper = (upper_special && (i & 1) == 0);
1727          process_lower = (lower_special && (i & 1) == 1);
1728
1729          if (process_upper || process_lower) {
1730            if ((process_upper && upper == 0)
1731                || (process_lower && lower == 0))
1732              val |= 0x80;
1733            else if ((process_upper && upper == 0xffffffff)
1734                     || (process_lower && lower == 0xffffffff))
1735              val |= 0xc0;
1736            else if ((process_upper && upper == 0x80000000)
1737                     || (process_lower && lower == 0x80000000))
1738              val |= (j == 0 ? 0xe0 : 0x80);
1739          } else
1740            val |= i * 4 + j + ((i & 1) * 16);
1741        }
1742
1743        ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1744      }
1745
1746      return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1747                         DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1748                                     &ShufBytes[0], ShufBytes.size()));
1749    }
1750  }
1751  }
1752
1753  return SDOperand();
1754}
1755
1756/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1757/// which the Cell can operate. The code inspects V3 to ascertain whether the
1758/// permutation vector, V3, is monotonically increasing with one "exception"
1759/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1760/// INSERT_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1761/// In either case, the net result is going to eventually invoke SHUFB to
1762/// permute/shuffle the bytes from V1 and V2.
1763/// \note
1764/// INSERT_MASK is eventually selected as one of the C*D instructions, generate
1765/// control word for byte/halfword/word insertion. This takes care of a single
1766/// element move from V2 into V1.
1767/// \note
1768/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1769static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) {
1770  SDOperand V1 = Op.getOperand(0);
1771  SDOperand V2 = Op.getOperand(1);
1772  SDOperand PermMask = Op.getOperand(2);
1773
1774  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1775
1776  // If we have a single element being moved from V1 to V2, this can be handled
1777  // using the C*[DX] compute mask instructions, but the vector elements have
1778  // to be monotonically increasing with one exception element.
1779  MVT EltVT = V1.getValueType().getVectorElementType();
1780  unsigned EltsFromV2 = 0;
1781  unsigned V2Elt = 0;
1782  unsigned V2EltIdx0 = 0;
1783  unsigned CurrElt = 0;
1784  bool monotonic = true;
1785  if (EltVT == MVT::i8)
1786    V2EltIdx0 = 16;
1787  else if (EltVT == MVT::i16)
1788    V2EltIdx0 = 8;
1789  else if (EltVT == MVT::i32)
1790    V2EltIdx0 = 4;
1791  else
1792    assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1793
1794  for (unsigned i = 0, e = PermMask.getNumOperands();
1795       EltsFromV2 <= 1 && monotonic && i != e;
1796       ++i) {
1797    unsigned SrcElt;
1798    if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1799      SrcElt = 0;
1800    else
1801      SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1802
1803    if (SrcElt >= V2EltIdx0) {
1804      ++EltsFromV2;
1805      V2Elt = (V2EltIdx0 - SrcElt) << 2;
1806    } else if (CurrElt != SrcElt) {
1807      monotonic = false;
1808    }
1809
1810    ++CurrElt;
1811  }
1812
1813  if (EltsFromV2 == 1 && monotonic) {
1814    // Compute mask and shuffle
1815    MachineFunction &MF = DAG.getMachineFunction();
1816    MachineRegisterInfo &RegInfo = MF.getRegInfo();
1817    unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1818    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1819    // Initialize temporary register to 0
1820    SDOperand InitTempReg =
1821      DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1822    // Copy register's contents as index in INSERT_MASK:
1823    SDOperand ShufMaskOp =
1824      DAG.getNode(SPUISD::INSERT_MASK, V1.getValueType(),
1825                  DAG.getTargetConstant(V2Elt, MVT::i32),
1826                  DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1827    // Use shuffle mask in SHUFB synthetic instruction:
1828    return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1829  } else {
1830    // Convert the SHUFFLE_VECTOR mask's input element units to the actual bytes.
1831    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1832
1833    SmallVector<SDOperand, 16> ResultMask;
1834    for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1835      unsigned SrcElt;
1836      if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1837        SrcElt = 0;
1838      else
1839        SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue();
1840
1841      for (unsigned j = 0; j < BytesPerElement; ++j) {
1842        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1843                                             MVT::i8));
1844      }
1845    }
1846
1847    SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1848                                      &ResultMask[0], ResultMask.size());
1849    return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1850  }
1851}
1852
1853static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) {
1854  SDOperand Op0 = Op.getOperand(0);                     // Op0 = the scalar
1855
1856  if (Op0.Val->getOpcode() == ISD::Constant) {
1857    // For a constant, build the appropriate constant vector, which will
1858    // eventually simplify to a vector register load.
1859
1860    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.Val);
1861    SmallVector<SDOperand, 16> ConstVecValues;
1862    MVT VT;
1863    size_t n_copies;
1864
1865    // Create a constant vector:
1866    switch (Op.getValueType().getSimpleVT()) {
1867    default: assert(0 && "Unexpected constant value type in "
1868                         "LowerSCALAR_TO_VECTOR");
1869    case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1870    case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1871    case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1872    case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1873    case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1874    case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1875    }
1876
1877    SDOperand CValue = DAG.getConstant(CN->getValue(), VT);
1878    for (size_t j = 0; j < n_copies; ++j)
1879      ConstVecValues.push_back(CValue);
1880
1881    return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1882                       &ConstVecValues[0], ConstVecValues.size());
1883  } else {
1884    // Otherwise, copy the value from one register to another:
1885    switch (Op0.getValueType().getSimpleVT()) {
1886    default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1887    case MVT::i8:
1888    case MVT::i16:
1889    case MVT::i32:
1890    case MVT::i64:
1891    case MVT::f32:
1892    case MVT::f64:
1893      return DAG.getNode(SPUISD::PROMOTE_SCALAR, Op.getValueType(), Op0, Op0);
1894    }
1895  }
1896
1897  return SDOperand();
1898}
1899
1900static SDOperand LowerVectorMUL(SDOperand Op, SelectionDAG &DAG) {
1901  switch (Op.getValueType().getSimpleVT()) {
1902  default:
1903    cerr << "CellSPU: Unknown vector multiplication, got "
1904         << Op.getValueType().getMVTString()
1905         << "\n";
1906    abort();
1907    /*NOTREACHED*/
1908
1909  case MVT::v4i32: {
1910    SDOperand rA = Op.getOperand(0);
1911    SDOperand rB = Op.getOperand(1);
1912    SDOperand HiProd1 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rA, rB);
1913    SDOperand HiProd2 = DAG.getNode(SPUISD::MPYH, MVT::v4i32, rB, rA);
1914    SDOperand LoProd = DAG.getNode(SPUISD::MPYU, MVT::v4i32, rA, rB);
1915    SDOperand Residual1 = DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd1);
1916
1917    return DAG.getNode(ISD::ADD, MVT::v4i32, Residual1, HiProd2);
1918    break;
1919  }
1920
1921  // Multiply two v8i16 vectors (pipeline friendly version):
1922  // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1923  // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1924  // c) Use SELB to select upper and lower halves from the intermediate results
1925  //
1926  // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1927  // dual-issue. This code does manage to do this, even if it's a little on
1928  // the wacky side
1929  case MVT::v8i16: {
1930    MachineFunction &MF = DAG.getMachineFunction();
1931    MachineRegisterInfo &RegInfo = MF.getRegInfo();
1932    SDOperand Chain = Op.getOperand(0);
1933    SDOperand rA = Op.getOperand(0);
1934    SDOperand rB = Op.getOperand(1);
1935    unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1936    unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1937
1938    SDOperand FSMBOp =
1939      DAG.getCopyToReg(Chain, FSMBIreg,
1940                       DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1941                                   DAG.getConstant(0xcccc, MVT::i16)));
1942
1943    SDOperand HHProd =
1944      DAG.getCopyToReg(FSMBOp, HiProdReg,
1945                       DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1946
1947    SDOperand HHProd_v4i32 =
1948      DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1949                  DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1950
1951    return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1952                       DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1953                       DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1954                                   DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1955                                               HHProd_v4i32,
1956                                               DAG.getConstant(16, MVT::i16))),
1957                       DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1958  }
1959
1960  // This M00sE is N@stI! (apologies to Monty Python)
1961  //
1962  // SPU doesn't know how to do any 8-bit multiplication, so the solution
1963  // is to break it all apart, sign extend, and reassemble the various
1964  // intermediate products.
1965  case MVT::v16i8: {
1966    SDOperand rA = Op.getOperand(0);
1967    SDOperand rB = Op.getOperand(1);
1968    SDOperand c8 = DAG.getConstant(8, MVT::i32);
1969    SDOperand c16 = DAG.getConstant(16, MVT::i32);
1970
1971    SDOperand LLProd =
1972      DAG.getNode(SPUISD::MPY, MVT::v8i16,
1973                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1974                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1975
1976    SDOperand rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1977
1978    SDOperand rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1979
1980    SDOperand LHProd =
1981      DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1982                  DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1983
1984    SDOperand FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1985                                     DAG.getConstant(0x2222, MVT::i16));
1986
1987    SDOperand LoProdParts =
1988      DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1989                  DAG.getNode(SPUISD::SELB, MVT::v8i16,
1990                              LLProd, LHProd, FSMBmask));
1991
1992    SDOperand LoProdMask = DAG.getConstant(0xffff, MVT::i32);
1993
1994    SDOperand LoProd =
1995      DAG.getNode(ISD::AND, MVT::v4i32,
1996                  LoProdParts,
1997                  DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1998                              LoProdMask, LoProdMask,
1999                              LoProdMask, LoProdMask));
2000
2001    SDOperand rAH =
2002      DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2003                  DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2004
2005    SDOperand rBH =
2006      DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2007                  DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2008
2009    SDOperand HLProd =
2010      DAG.getNode(SPUISD::MPY, MVT::v8i16,
2011                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2012                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2013
2014    SDOperand HHProd_1 =
2015      DAG.getNode(SPUISD::MPY, MVT::v8i16,
2016                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2017                              DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rAH, c8)),
2018                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2019                              DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32, rBH, c8)));
2020
2021    SDOperand HHProd =
2022      DAG.getNode(SPUISD::SELB, MVT::v8i16,
2023                  HLProd,
2024                  DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2025                  FSMBmask);
2026
2027    SDOperand HiProd =
2028      DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2029
2030    return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2031                       DAG.getNode(ISD::OR, MVT::v4i32,
2032                                   LoProd, HiProd));
2033  }
2034  }
2035
2036  return SDOperand();
2037}
2038
2039static SDOperand LowerFDIVf32(SDOperand Op, SelectionDAG &DAG) {
2040  MachineFunction &MF = DAG.getMachineFunction();
2041  MachineRegisterInfo &RegInfo = MF.getRegInfo();
2042
2043  SDOperand A = Op.getOperand(0);
2044  SDOperand B = Op.getOperand(1);
2045  MVT VT = Op.getValueType();
2046
2047  unsigned VRegBR, VRegC;
2048
2049  if (VT == MVT::f32) {
2050    VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2051    VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2052  } else {
2053    VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2054    VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2055  }
2056  // TODO: make sure we're feeding FPInterp the right arguments
2057  // Right now: fi B, frest(B)
2058
2059  // Computes BRcpl =
2060  // (Floating Interpolate (FP Reciprocal Estimate B))
2061  SDOperand BRcpl =
2062      DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2063                       DAG.getNode(SPUISD::FPInterp, VT, B,
2064                                DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2065
2066  // Computes A * BRcpl and stores in a temporary register
2067  SDOperand AxBRcpl =
2068      DAG.getCopyToReg(BRcpl, VRegC,
2069                 DAG.getNode(ISD::FMUL, VT, A,
2070                        DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2071  // What's the Chain variable do? It's magic!
2072  // TODO: set Chain = Op(0).getEntryNode()
2073
2074  return DAG.getNode(ISD::FADD, VT,
2075                DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2076                DAG.getNode(ISD::FMUL, VT,
2077                        DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2078                        DAG.getNode(ISD::FSUB, VT, A,
2079                            DAG.getNode(ISD::FMUL, VT, B,
2080                            DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2081}
2082
2083static SDOperand LowerEXTRACT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2084  MVT VT = Op.getValueType();
2085  SDOperand N = Op.getOperand(0);
2086  SDOperand Elt = Op.getOperand(1);
2087  SDOperand ShufMask[16];
2088  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt);
2089
2090  assert(C != 0 && "LowerEXTRACT_VECTOR_ELT expecting constant SDNode");
2091
2092  int EltNo = (int) C->getValue();
2093
2094  // sanity checks:
2095  if (VT == MVT::i8 && EltNo >= 16)
2096    assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2097  else if (VT == MVT::i16 && EltNo >= 8)
2098    assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2099  else if (VT == MVT::i32 && EltNo >= 4)
2100    assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2101  else if (VT == MVT::i64 && EltNo >= 2)
2102    assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2103
2104  if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2105    // i32 and i64: Element 0 is the preferred slot
2106    return DAG.getNode(SPUISD::EXTRACT_ELT0, VT, N);
2107  }
2108
2109  // Need to generate shuffle mask and extract:
2110  int prefslot_begin = -1, prefslot_end = -1;
2111  int elt_byte = EltNo * VT.getSizeInBits() / 8;
2112
2113  switch (VT.getSimpleVT()) {
2114  default:
2115    assert(false && "Invalid value type!");
2116  case MVT::i8: {
2117    prefslot_begin = prefslot_end = 3;
2118    break;
2119  }
2120  case MVT::i16: {
2121    prefslot_begin = 2; prefslot_end = 3;
2122    break;
2123  }
2124  case MVT::i32: {
2125    prefslot_begin = 0; prefslot_end = 3;
2126    break;
2127  }
2128  case MVT::i64: {
2129    prefslot_begin = 0; prefslot_end = 7;
2130    break;
2131  }
2132  }
2133
2134  assert(prefslot_begin != -1 && prefslot_end != -1 &&
2135         "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2136
2137  for (int i = 0; i < 16; ++i) {
2138    // zero fill uppper part of preferred slot, don't care about the
2139    // other slots:
2140    unsigned int mask_val;
2141
2142    if (i <= prefslot_end) {
2143      mask_val =
2144        ((i < prefslot_begin)
2145         ? 0x80
2146         : elt_byte + (i - prefslot_begin));
2147
2148      ShufMask[i] = DAG.getConstant(mask_val, MVT::i8);
2149    } else
2150      ShufMask[i] = ShufMask[i % (prefslot_end + 1)];
2151  }
2152
2153  SDOperand ShufMaskVec =
2154    DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
2155                &ShufMask[0],
2156                sizeof(ShufMask) / sizeof(ShufMask[0]));
2157
2158  return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2159                     DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2160                                 N, N, ShufMaskVec));
2161
2162}
2163
2164static SDOperand LowerINSERT_VECTOR_ELT(SDOperand Op, SelectionDAG &DAG) {
2165  SDOperand VecOp = Op.getOperand(0);
2166  SDOperand ValOp = Op.getOperand(1);
2167  SDOperand IdxOp = Op.getOperand(2);
2168  MVT VT = Op.getValueType();
2169
2170  ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2171  assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2172
2173  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2174  // Use $2 because it's always 16-byte aligned and it's available:
2175  SDOperand PtrBase = DAG.getRegister(SPU::R2, PtrVT);
2176
2177  SDOperand result =
2178    DAG.getNode(SPUISD::SHUFB, VT,
2179                DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2180                VecOp,
2181                DAG.getNode(SPUISD::INSERT_MASK, VT,
2182                            DAG.getNode(ISD::ADD, PtrVT,
2183                                        PtrBase,
2184                                        DAG.getConstant(CN->getValue(),
2185                                                        PtrVT))));
2186
2187  return result;
2188}
2189
2190static SDOperand LowerI8Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2191{
2192  SDOperand N0 = Op.getOperand(0);      // Everything has at least one operand
2193
2194  assert(Op.getValueType() == MVT::i8);
2195  switch (Opc) {
2196  default:
2197    assert(0 && "Unhandled i8 math operator");
2198    /*NOTREACHED*/
2199    break;
2200  case ISD::SUB: {
2201    // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2202    // the result:
2203    SDOperand N1 = Op.getOperand(1);
2204    N0 = (N0.getOpcode() != ISD::Constant
2205          ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2206          : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2207    N1 = (N1.getOpcode() != ISD::Constant
2208          ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2209          : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2210    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2211                       DAG.getNode(Opc, MVT::i16, N0, N1));
2212  }
2213  case ISD::ROTR:
2214  case ISD::ROTL: {
2215    SDOperand N1 = Op.getOperand(1);
2216    unsigned N1Opc;
2217    N0 = (N0.getOpcode() != ISD::Constant
2218          ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2219          : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2220    N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2221    N1 = (N1.getOpcode() != ISD::Constant
2222          ? DAG.getNode(N1Opc, MVT::i16, N1)
2223          : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2224    SDOperand ExpandArg =
2225      DAG.getNode(ISD::OR, MVT::i16, N0,
2226                  DAG.getNode(ISD::SHL, MVT::i16,
2227                              N0, DAG.getConstant(8, MVT::i16)));
2228    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2229                       DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2230  }
2231  case ISD::SRL:
2232  case ISD::SHL: {
2233    SDOperand N1 = Op.getOperand(1);
2234    unsigned N1Opc;
2235    N0 = (N0.getOpcode() != ISD::Constant
2236          ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2237          : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2238    N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::ZERO_EXTEND : ISD::TRUNCATE;
2239    N1 = (N1.getOpcode() != ISD::Constant
2240          ? DAG.getNode(N1Opc, MVT::i16, N1)
2241          : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2242    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2243                       DAG.getNode(Opc, MVT::i16, N0, N1));
2244  }
2245  case ISD::SRA: {
2246    SDOperand N1 = Op.getOperand(1);
2247    unsigned N1Opc;
2248    N0 = (N0.getOpcode() != ISD::Constant
2249          ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2250          : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2251    N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2252    N1 = (N1.getOpcode() != ISD::Constant
2253          ? DAG.getNode(N1Opc, MVT::i16, N1)
2254          : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2255    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2256                       DAG.getNode(Opc, MVT::i16, N0, N1));
2257  }
2258  case ISD::MUL: {
2259    SDOperand N1 = Op.getOperand(1);
2260    unsigned N1Opc;
2261    N0 = (N0.getOpcode() != ISD::Constant
2262          ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2263          : DAG.getConstant(cast<ConstantSDNode>(N0)->getValue(), MVT::i16));
2264    N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2265    N1 = (N1.getOpcode() != ISD::Constant
2266          ? DAG.getNode(N1Opc, MVT::i16, N1)
2267          : DAG.getConstant(cast<ConstantSDNode>(N1)->getValue(), MVT::i16));
2268    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2269                       DAG.getNode(Opc, MVT::i16, N0, N1));
2270    break;
2271  }
2272  }
2273
2274  return SDOperand();
2275}
2276
2277static SDOperand LowerI64Math(SDOperand Op, SelectionDAG &DAG, unsigned Opc)
2278{
2279  MVT VT = Op.getValueType();
2280  MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2281
2282  SDOperand Op0 = Op.getOperand(0);
2283
2284  switch (Opc) {
2285  case ISD::ZERO_EXTEND:
2286  case ISD::SIGN_EXTEND:
2287  case ISD::ANY_EXTEND: {
2288    MVT Op0VT = Op0.getValueType();
2289    MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2290
2291    assert(Op0VT == MVT::i32
2292           && "CellSPU: Zero/sign extending something other than i32");
2293    DEBUG(cerr << "CellSPU: LowerI64Math custom lowering zero/sign/any extend\n");
2294
2295    unsigned NewOpc = (Opc == ISD::SIGN_EXTEND
2296                      ? SPUISD::ROTBYTES_RIGHT_S
2297                      : SPUISD::ROTQUAD_RZ_BYTES);
2298    SDOperand PromoteScalar =
2299      DAG.getNode(SPUISD::PROMOTE_SCALAR, Op0VecVT, Op0);
2300
2301    return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2302                       DAG.getNode(ISD::BIT_CONVERT, VecVT,
2303                                   DAG.getNode(NewOpc, Op0VecVT,
2304                                               PromoteScalar,
2305                                               DAG.getConstant(4, MVT::i32))));
2306  }
2307
2308  case ISD::ADD: {
2309    // Turn operands into vectors to satisfy type checking (shufb works on
2310    // vectors)
2311    SDOperand Op0 =
2312      DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2313    SDOperand Op1 =
2314      DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2315    SmallVector<SDOperand, 16> ShufBytes;
2316
2317    // Create the shuffle mask for "rotating" the borrow up one register slot
2318    // once the borrow is generated.
2319    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2320    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2321    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2322    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2323
2324    SDOperand CarryGen =
2325      DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2326    SDOperand ShiftedCarry =
2327      DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2328                  CarryGen, CarryGen,
2329                  DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2330                              &ShufBytes[0], ShufBytes.size()));
2331
2332    return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2333                       DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2334                                   Op0, Op1, ShiftedCarry));
2335  }
2336
2337  case ISD::SUB: {
2338    // Turn operands into vectors to satisfy type checking (shufb works on
2339    // vectors)
2340    SDOperand Op0 =
2341      DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2342    SDOperand Op1 =
2343      DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(1));
2344    SmallVector<SDOperand, 16> ShufBytes;
2345
2346    // Create the shuffle mask for "rotating" the borrow up one register slot
2347    // once the borrow is generated.
2348    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2349    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2350    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2351    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2352
2353    SDOperand BorrowGen =
2354      DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2355    SDOperand ShiftedBorrow =
2356      DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2357                  BorrowGen, BorrowGen,
2358                  DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2359                              &ShufBytes[0], ShufBytes.size()));
2360
2361    return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2362                       DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2363                                   Op0, Op1, ShiftedBorrow));
2364  }
2365
2366  case ISD::SHL: {
2367    SDOperand ShiftAmt = Op.getOperand(1);
2368    MVT ShiftAmtVT = ShiftAmt.getValueType();
2369    SDOperand Op0Vec = DAG.getNode(SPUISD::PROMOTE_SCALAR, VecVT, Op0);
2370    SDOperand MaskLower =
2371      DAG.getNode(SPUISD::SELB, VecVT,
2372                  Op0Vec,
2373                  DAG.getConstant(0, VecVT),
2374                  DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2375                              DAG.getConstant(0xff00ULL, MVT::i16)));
2376    SDOperand ShiftAmtBytes =
2377      DAG.getNode(ISD::SRL, ShiftAmtVT,
2378                  ShiftAmt,
2379                  DAG.getConstant(3, ShiftAmtVT));
2380    SDOperand ShiftAmtBits =
2381      DAG.getNode(ISD::AND, ShiftAmtVT,
2382                  ShiftAmt,
2383                  DAG.getConstant(7, ShiftAmtVT));
2384
2385    return DAG.getNode(SPUISD::EXTRACT_ELT0, VT,
2386                       DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2387                                   DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2388                                               MaskLower, ShiftAmtBytes),
2389                                   ShiftAmtBits));
2390  }
2391
2392  case ISD::SRL: {
2393    MVT VT = Op.getValueType();
2394    SDOperand ShiftAmt = Op.getOperand(1);
2395    MVT ShiftAmtVT = ShiftAmt.getValueType();
2396    SDOperand ShiftAmtBytes =
2397      DAG.getNode(ISD::SRL, ShiftAmtVT,
2398                  ShiftAmt,
2399                  DAG.getConstant(3, ShiftAmtVT));
2400    SDOperand ShiftAmtBits =
2401      DAG.getNode(ISD::AND, ShiftAmtVT,
2402                  ShiftAmt,
2403                  DAG.getConstant(7, ShiftAmtVT));
2404
2405    return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2406                       DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2407                                   Op0, ShiftAmtBytes),
2408                       ShiftAmtBits);
2409  }
2410
2411  case ISD::SRA: {
2412    // Promote Op0 to vector
2413    SDOperand Op0 =
2414      DAG.getNode(SPUISD::PROMOTE_SCALAR, MVT::v2i64, Op.getOperand(0));
2415    SDOperand ShiftAmt = Op.getOperand(1);
2416    MVT ShiftVT = ShiftAmt.getValueType();
2417
2418    // Negate variable shift amounts
2419    if (!isa<ConstantSDNode>(ShiftAmt)) {
2420      ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2421                             DAG.getConstant(0, ShiftVT), ShiftAmt);
2422    }
2423
2424    SDOperand UpperHalfSign =
2425      DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i32,
2426                  DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2427                              DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2428                                          Op0, DAG.getConstant(31, MVT::i32))));
2429    SDOperand UpperHalfSignMask =
2430      DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2431    SDOperand UpperLowerMask =
2432      DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2433                  DAG.getConstant(0xff00, MVT::i16));
2434    SDOperand UpperLowerSelect =
2435      DAG.getNode(SPUISD::SELB, MVT::v2i64,
2436                  UpperHalfSignMask, Op0, UpperLowerMask);
2437    SDOperand RotateLeftBytes =
2438      DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2439                  UpperLowerSelect, ShiftAmt);
2440    SDOperand RotateLeftBits =
2441      DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2442                  RotateLeftBytes, ShiftAmt);
2443
2444    return DAG.getNode(SPUISD::EXTRACT_ELT0, MVT::i64,
2445                       RotateLeftBits);
2446  }
2447  }
2448
2449  return SDOperand();
2450}
2451
2452//! Lower byte immediate operations for v16i8 vectors:
2453static SDOperand
2454LowerByteImmed(SDOperand Op, SelectionDAG &DAG) {
2455  SDOperand ConstVec;
2456  SDOperand Arg;
2457  MVT VT = Op.getValueType();
2458
2459  ConstVec = Op.getOperand(0);
2460  Arg = Op.getOperand(1);
2461  if (ConstVec.Val->getOpcode() != ISD::BUILD_VECTOR) {
2462    if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2463      ConstVec = ConstVec.getOperand(0);
2464    } else {
2465      ConstVec = Op.getOperand(1);
2466      Arg = Op.getOperand(0);
2467      if (ConstVec.Val->getOpcode() == ISD::BIT_CONVERT) {
2468        ConstVec = ConstVec.getOperand(0);
2469      }
2470    }
2471  }
2472
2473  if (ConstVec.Val->getOpcode() == ISD::BUILD_VECTOR) {
2474    uint64_t VectorBits[2];
2475    uint64_t UndefBits[2];
2476    uint64_t SplatBits, SplatUndef;
2477    int SplatSize;
2478
2479    if (!GetConstantBuildVectorBits(ConstVec.Val, VectorBits, UndefBits)
2480        && isConstantSplat(VectorBits, UndefBits,
2481                           VT.getVectorElementType().getSizeInBits(),
2482                           SplatBits, SplatUndef, SplatSize)) {
2483      SDOperand tcVec[16];
2484      SDOperand tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2485      const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2486
2487      // Turn the BUILD_VECTOR into a set of target constants:
2488      for (size_t i = 0; i < tcVecSize; ++i)
2489        tcVec[i] = tc;
2490
2491      return DAG.getNode(Op.Val->getOpcode(), VT, Arg,
2492                         DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2493    }
2494  }
2495
2496  return SDOperand();
2497}
2498
2499//! Lower i32 multiplication
2500static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG, MVT VT,
2501                          unsigned Opc) {
2502  switch (VT.getSimpleVT()) {
2503  default:
2504    cerr << "CellSPU: Unknown LowerMUL value type, got "
2505         << Op.getValueType().getMVTString()
2506         << "\n";
2507    abort();
2508    /*NOTREACHED*/
2509
2510  case MVT::i32: {
2511    SDOperand rA = Op.getOperand(0);
2512    SDOperand rB = Op.getOperand(1);
2513
2514    return DAG.getNode(ISD::ADD, MVT::i32,
2515                       DAG.getNode(ISD::ADD, MVT::i32,
2516                                   DAG.getNode(SPUISD::MPYH, MVT::i32, rA, rB),
2517                                   DAG.getNode(SPUISD::MPYH, MVT::i32, rB, rA)),
2518                       DAG.getNode(SPUISD::MPYU, MVT::i32, rA, rB));
2519  }
2520  }
2521
2522  return SDOperand();
2523}
2524
2525//! Custom lowering for CTPOP (count population)
2526/*!
2527  Custom lowering code that counts the number ones in the input
2528  operand. SPU has such an instruction, but it counts the number of
2529  ones per byte, which then have to be accumulated.
2530*/
2531static SDOperand LowerCTPOP(SDOperand Op, SelectionDAG &DAG) {
2532  MVT VT = Op.getValueType();
2533  MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2534
2535  switch (VT.getSimpleVT()) {
2536  default:
2537    assert(false && "Invalid value type!");
2538  case MVT::i8: {
2539    SDOperand N = Op.getOperand(0);
2540    SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2541
2542    SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2543    SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2544
2545    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2546  }
2547
2548  case MVT::i16: {
2549    MachineFunction &MF = DAG.getMachineFunction();
2550    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2551
2552    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2553
2554    SDOperand N = Op.getOperand(0);
2555    SDOperand Elt0 = DAG.getConstant(0, MVT::i16);
2556    SDOperand Mask0 = DAG.getConstant(0x0f, MVT::i16);
2557    SDOperand Shift1 = DAG.getConstant(8, MVT::i16);
2558
2559    SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2560    SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2561
2562    // CNTB_result becomes the chain to which all of the virtual registers
2563    // CNTB_reg, SUM1_reg become associated:
2564    SDOperand CNTB_result =
2565      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2566
2567    SDOperand CNTB_rescopy =
2568      DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2569
2570    SDOperand Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2571
2572    return DAG.getNode(ISD::AND, MVT::i16,
2573                       DAG.getNode(ISD::ADD, MVT::i16,
2574                                   DAG.getNode(ISD::SRL, MVT::i16,
2575                                               Tmp1, Shift1),
2576                                   Tmp1),
2577                       Mask0);
2578  }
2579
2580  case MVT::i32: {
2581    MachineFunction &MF = DAG.getMachineFunction();
2582    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2583
2584    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2585    unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2586
2587    SDOperand N = Op.getOperand(0);
2588    SDOperand Elt0 = DAG.getConstant(0, MVT::i32);
2589    SDOperand Mask0 = DAG.getConstant(0xff, MVT::i32);
2590    SDOperand Shift1 = DAG.getConstant(16, MVT::i32);
2591    SDOperand Shift2 = DAG.getConstant(8, MVT::i32);
2592
2593    SDOperand Promote = DAG.getNode(SPUISD::PROMOTE_SCALAR, vecVT, N, N);
2594    SDOperand CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2595
2596    // CNTB_result becomes the chain to which all of the virtual registers
2597    // CNTB_reg, SUM1_reg become associated:
2598    SDOperand CNTB_result =
2599      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2600
2601    SDOperand CNTB_rescopy =
2602      DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2603
2604    SDOperand Comp1 =
2605      DAG.getNode(ISD::SRL, MVT::i32,
2606                  DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2607
2608    SDOperand Sum1 =
2609      DAG.getNode(ISD::ADD, MVT::i32,
2610                  Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2611
2612    SDOperand Sum1_rescopy =
2613      DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2614
2615    SDOperand Comp2 =
2616      DAG.getNode(ISD::SRL, MVT::i32,
2617                  DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2618                  Shift2);
2619    SDOperand Sum2 =
2620      DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2621                  DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2622
2623    return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2624  }
2625
2626  case MVT::i64:
2627    break;
2628  }
2629
2630  return SDOperand();
2631}
2632
2633/// LowerOperation - Provide custom lowering hooks for some operations.
2634///
2635SDOperand
2636SPUTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG)
2637{
2638  unsigned Opc = (unsigned) Op.getOpcode();
2639  MVT VT = Op.getValueType();
2640
2641  switch (Opc) {
2642  default: {
2643    cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2644    cerr << "Op.getOpcode() = " << Opc << "\n";
2645    cerr << "*Op.Val:\n";
2646    Op.Val->dump();
2647    abort();
2648  }
2649  case ISD::LOAD:
2650  case ISD::SEXTLOAD:
2651  case ISD::ZEXTLOAD:
2652    return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2653  case ISD::STORE:
2654    return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2655  case ISD::ConstantPool:
2656    return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2657  case ISD::GlobalAddress:
2658    return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2659  case ISD::JumpTable:
2660    return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2661  case ISD::Constant:
2662    return LowerConstant(Op, DAG);
2663  case ISD::ConstantFP:
2664    return LowerConstantFP(Op, DAG);
2665  case ISD::BRCOND:
2666    return LowerBRCOND(Op, DAG);
2667  case ISD::FORMAL_ARGUMENTS:
2668    return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2669  case ISD::CALL:
2670    return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2671  case ISD::RET:
2672    return LowerRET(Op, DAG, getTargetMachine());
2673
2674
2675  // i8, i64 math ops:
2676  case ISD::ZERO_EXTEND:
2677  case ISD::SIGN_EXTEND:
2678  case ISD::ANY_EXTEND:
2679  case ISD::ADD:
2680  case ISD::SUB:
2681  case ISD::ROTR:
2682  case ISD::ROTL:
2683  case ISD::SRL:
2684  case ISD::SHL:
2685  case ISD::SRA: {
2686    if (VT == MVT::i8)
2687      return LowerI8Math(Op, DAG, Opc);
2688    else if (VT == MVT::i64)
2689      return LowerI64Math(Op, DAG, Opc);
2690    break;
2691  }
2692
2693  // Vector-related lowering.
2694  case ISD::BUILD_VECTOR:
2695    return LowerBUILD_VECTOR(Op, DAG);
2696  case ISD::SCALAR_TO_VECTOR:
2697    return LowerSCALAR_TO_VECTOR(Op, DAG);
2698  case ISD::VECTOR_SHUFFLE:
2699    return LowerVECTOR_SHUFFLE(Op, DAG);
2700  case ISD::EXTRACT_VECTOR_ELT:
2701    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2702  case ISD::INSERT_VECTOR_ELT:
2703    return LowerINSERT_VECTOR_ELT(Op, DAG);
2704
2705  // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2706  case ISD::AND:
2707  case ISD::OR:
2708  case ISD::XOR:
2709    return LowerByteImmed(Op, DAG);
2710
2711  // Vector and i8 multiply:
2712  case ISD::MUL:
2713    if (VT.isVector())
2714      return LowerVectorMUL(Op, DAG);
2715    else if (VT == MVT::i8)
2716      return LowerI8Math(Op, DAG, Opc);
2717    else
2718      return LowerMUL(Op, DAG, VT, Opc);
2719
2720  case ISD::FDIV:
2721    if (VT == MVT::f32 || VT == MVT::v4f32)
2722      return LowerFDIVf32(Op, DAG);
2723//    else if (Op.getValueType() == MVT::f64)
2724//      return LowerFDIVf64(Op, DAG);
2725    else
2726      assert(0 && "Calling FDIV on unsupported MVT");
2727
2728  case ISD::CTPOP:
2729    return LowerCTPOP(Op, DAG);
2730  }
2731
2732  return SDOperand();
2733}
2734
2735//===----------------------------------------------------------------------===//
2736// Target Optimization Hooks
2737//===----------------------------------------------------------------------===//
2738
2739SDOperand
2740SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2741{
2742#if 0
2743  TargetMachine &TM = getTargetMachine();
2744#endif
2745  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2746  SelectionDAG &DAG = DCI.DAG;
2747  SDOperand Op0 = N->getOperand(0);      // everything has at least one operand
2748  SDOperand Result;                     // Initially, NULL result
2749
2750  switch (N->getOpcode()) {
2751  default: break;
2752  case ISD::ADD: {
2753    SDOperand Op1 = N->getOperand(1);
2754
2755    if (isa<ConstantSDNode>(Op1) && Op0.getOpcode() == SPUISD::IndirectAddr) {
2756      SDOperand Op01 = Op0.getOperand(1);
2757      if (Op01.getOpcode() == ISD::Constant
2758          || Op01.getOpcode() == ISD::TargetConstant) {
2759        // (add <const>, (SPUindirect <arg>, <const>)) ->
2760        // (SPUindirect <arg>, <const + const>)
2761        ConstantSDNode *CN0 = cast<ConstantSDNode>(Op1);
2762        ConstantSDNode *CN1 = cast<ConstantSDNode>(Op01);
2763        SDOperand combinedConst =
2764          DAG.getConstant(CN0->getValue() + CN1->getValue(),
2765                          Op0.getValueType());
2766
2767        DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2768                   << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2769        DEBUG(cerr << "With:    (SPUindirect <arg>, "
2770                   << CN0->getValue() + CN1->getValue() << ")\n");
2771        return DAG.getNode(SPUISD::IndirectAddr, Op0.getValueType(),
2772                           Op0.getOperand(0), combinedConst);
2773      }
2774    } else if (isa<ConstantSDNode>(Op0)
2775               && Op1.getOpcode() == SPUISD::IndirectAddr) {
2776      SDOperand Op11 = Op1.getOperand(1);
2777      if (Op11.getOpcode() == ISD::Constant
2778          || Op11.getOpcode() == ISD::TargetConstant) {
2779        // (add (SPUindirect <arg>, <const>), <const>) ->
2780        // (SPUindirect <arg>, <const + const>)
2781        ConstantSDNode *CN0 = cast<ConstantSDNode>(Op0);
2782        ConstantSDNode *CN1 = cast<ConstantSDNode>(Op11);
2783        SDOperand combinedConst =
2784          DAG.getConstant(CN0->getValue() + CN1->getValue(),
2785                          Op0.getValueType());
2786
2787        DEBUG(cerr << "Replace: (add " << CN0->getValue() << ", "
2788                   << "(SPUindirect <arg>, " << CN1->getValue() << "))\n");
2789        DEBUG(cerr << "With:    (SPUindirect <arg>, "
2790                   << CN0->getValue() + CN1->getValue() << ")\n");
2791
2792        return DAG.getNode(SPUISD::IndirectAddr, Op1.getValueType(),
2793                           Op1.getOperand(0), combinedConst);
2794      }
2795    }
2796    break;
2797  }
2798  case ISD::SIGN_EXTEND:
2799  case ISD::ZERO_EXTEND:
2800  case ISD::ANY_EXTEND: {
2801    if (Op0.getOpcode() == SPUISD::EXTRACT_ELT0 &&
2802        N->getValueType(0) == Op0.getValueType()) {
2803      // (any_extend (SPUextract_elt0 <arg>)) ->
2804      // (SPUextract_elt0 <arg>)
2805      // Types must match, however...
2806      DEBUG(cerr << "Replace: ");
2807      DEBUG(N->dump(&DAG));
2808      DEBUG(cerr << "\nWith:    ");
2809      DEBUG(Op0.Val->dump(&DAG));
2810      DEBUG(cerr << "\n");
2811
2812      return Op0;
2813    }
2814    break;
2815  }
2816  case SPUISD::IndirectAddr: {
2817    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
2818      ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
2819      if (CN->getValue() == 0) {
2820        // (SPUindirect (SPUaform <addr>, 0), 0) ->
2821        // (SPUaform <addr>, 0)
2822
2823        DEBUG(cerr << "Replace: ");
2824        DEBUG(N->dump(&DAG));
2825        DEBUG(cerr << "\nWith:    ");
2826        DEBUG(Op0.Val->dump(&DAG));
2827        DEBUG(cerr << "\n");
2828
2829        return Op0;
2830      }
2831    }
2832    break;
2833  }
2834  case SPUISD::SHLQUAD_L_BITS:
2835  case SPUISD::SHLQUAD_L_BYTES:
2836  case SPUISD::VEC_SHL:
2837  case SPUISD::VEC_SRL:
2838  case SPUISD::VEC_SRA:
2839  case SPUISD::ROTQUAD_RZ_BYTES:
2840  case SPUISD::ROTQUAD_RZ_BITS: {
2841    SDOperand Op1 = N->getOperand(1);
2842
2843    if (isa<ConstantSDNode>(Op1)) {
2844      // Kill degenerate vector shifts:
2845      ConstantSDNode *CN = cast<ConstantSDNode>(Op1);
2846
2847      if (CN->getValue() == 0) {
2848        Result = Op0;
2849      }
2850    }
2851    break;
2852  }
2853  case SPUISD::PROMOTE_SCALAR: {
2854    switch (Op0.getOpcode()) {
2855    default:
2856      break;
2857    case ISD::ANY_EXTEND:
2858    case ISD::ZERO_EXTEND:
2859    case ISD::SIGN_EXTEND: {
2860      // (SPUpromote_scalar (any|sign|zero_extend (SPUextract_elt0 <arg>))) ->
2861      // <arg>
2862      // but only if the SPUpromote_scalar and <arg> types match.
2863      SDOperand Op00 = Op0.getOperand(0);
2864      if (Op00.getOpcode() == SPUISD::EXTRACT_ELT0) {
2865        SDOperand Op000 = Op00.getOperand(0);
2866        if (Op000.getValueType() == N->getValueType(0)) {
2867          Result = Op000;
2868        }
2869      }
2870      break;
2871    }
2872    case SPUISD::EXTRACT_ELT0: {
2873      // (SPUpromote_scalar (SPUextract_elt0 <arg>)) ->
2874      // <arg>
2875      Result = Op0.getOperand(0);
2876      break;
2877    }
2878    }
2879    break;
2880  }
2881  }
2882  // Otherwise, return unchanged.
2883#if 1
2884  if (Result.Val) {
2885    DEBUG(cerr << "\nReplace.SPU: ");
2886    DEBUG(N->dump(&DAG));
2887    DEBUG(cerr << "\nWith:        ");
2888    DEBUG(Result.Val->dump(&DAG));
2889    DEBUG(cerr << "\n");
2890  }
2891#endif
2892
2893  return Result;
2894}
2895
2896//===----------------------------------------------------------------------===//
2897// Inline Assembly Support
2898//===----------------------------------------------------------------------===//
2899
2900/// getConstraintType - Given a constraint letter, return the type of
2901/// constraint it is for this target.
2902SPUTargetLowering::ConstraintType
2903SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
2904  if (ConstraintLetter.size() == 1) {
2905    switch (ConstraintLetter[0]) {
2906    default: break;
2907    case 'b':
2908    case 'r':
2909    case 'f':
2910    case 'v':
2911    case 'y':
2912      return C_RegisterClass;
2913    }
2914  }
2915  return TargetLowering::getConstraintType(ConstraintLetter);
2916}
2917
2918std::pair<unsigned, const TargetRegisterClass*>
2919SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
2920                                                MVT VT) const
2921{
2922  if (Constraint.size() == 1) {
2923    // GCC RS6000 Constraint Letters
2924    switch (Constraint[0]) {
2925    case 'b':   // R1-R31
2926    case 'r':   // R0-R31
2927      if (VT == MVT::i64)
2928        return std::make_pair(0U, SPU::R64CRegisterClass);
2929      return std::make_pair(0U, SPU::R32CRegisterClass);
2930    case 'f':
2931      if (VT == MVT::f32)
2932        return std::make_pair(0U, SPU::R32FPRegisterClass);
2933      else if (VT == MVT::f64)
2934        return std::make_pair(0U, SPU::R64FPRegisterClass);
2935      break;
2936    case 'v':
2937      return std::make_pair(0U, SPU::GPRCRegisterClass);
2938    }
2939  }
2940
2941  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
2942}
2943
2944//! Compute used/known bits for a SPU operand
2945void
2946SPUTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op,
2947                                                  const APInt &Mask,
2948                                                  APInt &KnownZero,
2949                                                  APInt &KnownOne,
2950                                                  const SelectionDAG &DAG,
2951                                                  unsigned Depth ) const {
2952#if 0
2953  const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
2954#endif
2955
2956  switch (Op.getOpcode()) {
2957  default:
2958    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
2959    break;
2960
2961#if 0
2962  case CALL:
2963  case SHUFB:
2964  case INSERT_MASK:
2965  case CNTB:
2966#endif
2967
2968  case SPUISD::PROMOTE_SCALAR: {
2969    SDOperand Op0 = Op.getOperand(0);
2970    MVT Op0VT = Op0.getValueType();
2971    unsigned Op0VTBits = Op0VT.getSizeInBits();
2972    uint64_t InMask = Op0VT.getIntegerVTBitMask();
2973    KnownZero |= APInt(Op0VTBits, ~InMask, false);
2974    KnownOne |= APInt(Op0VTBits, InMask, false);
2975    break;
2976  }
2977
2978  case SPUISD::LDRESULT:
2979  case SPUISD::EXTRACT_ELT0:
2980  case SPUISD::EXTRACT_ELT0_CHAINED: {
2981    MVT OpVT = Op.getValueType();
2982    unsigned OpVTBits = OpVT.getSizeInBits();
2983    uint64_t InMask = OpVT.getIntegerVTBitMask();
2984    KnownZero |= APInt(OpVTBits, ~InMask, false);
2985    KnownOne |= APInt(OpVTBits, InMask, false);
2986    break;
2987  }
2988
2989#if 0
2990  case EXTRACT_I1_ZEXT:
2991  case EXTRACT_I1_SEXT:
2992  case EXTRACT_I8_ZEXT:
2993  case EXTRACT_I8_SEXT:
2994  case MPY:
2995  case MPYU:
2996  case MPYH:
2997  case MPYHH:
2998  case SPUISD::SHLQUAD_L_BITS:
2999  case SPUISD::SHLQUAD_L_BYTES:
3000  case SPUISD::VEC_SHL:
3001  case SPUISD::VEC_SRL:
3002  case SPUISD::VEC_SRA:
3003  case SPUISD::VEC_ROTL:
3004  case SPUISD::VEC_ROTR:
3005  case SPUISD::ROTQUAD_RZ_BYTES:
3006  case SPUISD::ROTQUAD_RZ_BITS:
3007  case SPUISD::ROTBYTES_RIGHT_S:
3008  case SPUISD::ROTBYTES_LEFT:
3009  case SPUISD::ROTBYTES_LEFT_CHAINED:
3010  case SPUISD::SELECT_MASK:
3011  case SPUISD::SELB:
3012  case SPUISD::FPInterp:
3013  case SPUISD::FPRecipEst:
3014  case SPUISD::SEXT32TO64:
3015#endif
3016  }
3017}
3018
3019// LowerAsmOperandForConstraint
3020void
3021SPUTargetLowering::LowerAsmOperandForConstraint(SDOperand Op,
3022                                                char ConstraintLetter,
3023                                                std::vector<SDOperand> &Ops,
3024                                                SelectionDAG &DAG) const {
3025  // Default, for the time being, to the base class handler
3026  TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, Ops, DAG);
3027}
3028
3029/// isLegalAddressImmediate - Return true if the integer value can be used
3030/// as the offset of the target addressing mode.
3031bool SPUTargetLowering::isLegalAddressImmediate(int64_t V, const Type *Ty) const {
3032  // SPU's addresses are 256K:
3033  return (V > -(1 << 18) && V < (1 << 18) - 1);
3034}
3035
3036bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3037  return false;
3038}
3039