SPUISelLowering.cpp revision 1df30c4061a6b6faaf9641f72b1741a0af4aa532
1//===-- SPUISelLowering.cpp - Cell SPU DAG Lowering Implementation --------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the SPUTargetLowering class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "SPURegisterNames.h"
15#include "SPUISelLowering.h"
16#include "SPUTargetMachine.h"
17#include "SPUFrameInfo.h"
18#include "llvm/ADT/APInt.h"
19#include "llvm/ADT/VectorExtras.h"
20#include "llvm/CodeGen/CallingConvLower.h"
21#include "llvm/CodeGen/MachineFrameInfo.h"
22#include "llvm/CodeGen/MachineFunction.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/Constants.h"
27#include "llvm/Function.h"
28#include "llvm/Intrinsics.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Target/TargetOptions.h"
32
33#include <map>
34
35using namespace llvm;
36
37// Used in getTargetNodeName() below
38namespace {
39  std::map<unsigned, const char *> node_names;
40
41  //! MVT mapping to useful data for Cell SPU
42  struct valtype_map_s {
43    const MVT   valtype;
44    const int   prefslot_byte;
45  };
46
47  const valtype_map_s valtype_map[] = {
48    { MVT::i1,   3 },
49    { MVT::i8,   3 },
50    { MVT::i16,  2 },
51    { MVT::i32,  0 },
52    { MVT::f32,  0 },
53    { MVT::i64,  0 },
54    { MVT::f64,  0 },
55    { MVT::i128, 0 }
56  };
57
58  const size_t n_valtype_map = sizeof(valtype_map) / sizeof(valtype_map[0]);
59
60  const valtype_map_s *getValueTypeMapEntry(MVT VT) {
61    const valtype_map_s *retval = 0;
62
63    for (size_t i = 0; i < n_valtype_map; ++i) {
64      if (valtype_map[i].valtype == VT) {
65        retval = valtype_map + i;
66        break;
67      }
68    }
69
70#ifndef NDEBUG
71    if (retval == 0) {
72      cerr << "getValueTypeMapEntry returns NULL for "
73           << VT.getMVTString()
74           << "\n";
75      abort();
76    }
77#endif
78
79    return retval;
80  }
81}
82
83SPUTargetLowering::SPUTargetLowering(SPUTargetMachine &TM)
84  : TargetLowering(TM),
85    SPUTM(TM)
86{
87  // Fold away setcc operations if possible.
88  setPow2DivIsCheap();
89
90  // Use _setjmp/_longjmp instead of setjmp/longjmp.
91  setUseUnderscoreSetJmp(true);
92  setUseUnderscoreLongJmp(true);
93
94  // Set up the SPU's register classes:
95  addRegisterClass(MVT::i8,   SPU::R8CRegisterClass);
96  addRegisterClass(MVT::i16,  SPU::R16CRegisterClass);
97  addRegisterClass(MVT::i32,  SPU::R32CRegisterClass);
98  addRegisterClass(MVT::i64,  SPU::R64CRegisterClass);
99  addRegisterClass(MVT::f32,  SPU::R32FPRegisterClass);
100  addRegisterClass(MVT::f64,  SPU::R64FPRegisterClass);
101  addRegisterClass(MVT::i128, SPU::GPRCRegisterClass);
102
103  // SPU has no sign or zero extended loads for i1, i8, i16:
104  setLoadExtAction(ISD::EXTLOAD,  MVT::i1, Promote);
105  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
106  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
107
108  setLoadExtAction(ISD::EXTLOAD,  MVT::f32, Expand);
109  setLoadExtAction(ISD::EXTLOAD,  MVT::f64, Expand);
110
111  // SPU constant load actions are custom lowered:
112  setOperationAction(ISD::Constant,   MVT::i64, Custom);
113  setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
114  setOperationAction(ISD::ConstantFP, MVT::f64, Custom);
115
116  // SPU's loads and stores have to be custom lowered:
117  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
118       ++sctype) {
119    MVT VT = (MVT::SimpleValueType)sctype;
120
121    setOperationAction(ISD::LOAD,   VT, Custom);
122    setOperationAction(ISD::STORE,  VT, Custom);
123    setLoadExtAction(ISD::EXTLOAD,  VT, Custom);
124    setLoadExtAction(ISD::ZEXTLOAD, VT, Custom);
125    setLoadExtAction(ISD::SEXTLOAD, VT, Custom);
126
127    // SMUL_LOHI, UMUL_LOHI are not legal for Cell:
128    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
129    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
130
131    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::i8; --stype) {
132      MVT StoreVT = (MVT::SimpleValueType) stype;
133      setTruncStoreAction(VT, StoreVT, Expand);
134    }
135  }
136
137  for (unsigned sctype = (unsigned) MVT::f32; sctype < (unsigned) MVT::f64;
138       ++sctype) {
139    MVT VT = (MVT::SimpleValueType) sctype;
140
141    setOperationAction(ISD::LOAD,   VT, Custom);
142    setOperationAction(ISD::STORE,  VT, Custom);
143
144    for (unsigned stype = sctype - 1; stype >= (unsigned) MVT::f32; --stype) {
145      MVT StoreVT = (MVT::SimpleValueType) stype;
146      setTruncStoreAction(VT, StoreVT, Expand);
147    }
148  }
149
150  // Custom lower BRCOND for i8 to "promote" the result to whatever the result
151  // operand happens to be:
152  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
153
154  // Expand the jumptable branches
155  setOperationAction(ISD::BR_JT,        MVT::Other, Expand);
156  setOperationAction(ISD::BR_CC,        MVT::Other, Expand);
157
158  // Custom lower SELECT_CC for most cases, but expand by default
159  setOperationAction(ISD::SELECT_CC,    MVT::Other, Expand);
160  setOperationAction(ISD::SELECT_CC,    MVT::i8,    Custom);
161  setOperationAction(ISD::SELECT_CC,    MVT::i16,   Custom);
162  setOperationAction(ISD::SELECT_CC,    MVT::i32,   Custom);
163  setOperationAction(ISD::SELECT_CC,    MVT::i64,   Custom);
164
165  // SPU has no intrinsics for these particular operations:
166  setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
167
168  // SPU has no SREM/UREM instructions
169  setOperationAction(ISD::SREM, MVT::i32, Expand);
170  setOperationAction(ISD::UREM, MVT::i32, Expand);
171  setOperationAction(ISD::SREM, MVT::i64, Expand);
172  setOperationAction(ISD::UREM, MVT::i64, Expand);
173
174  // We don't support sin/cos/sqrt/fmod
175  setOperationAction(ISD::FSIN , MVT::f64, Expand);
176  setOperationAction(ISD::FCOS , MVT::f64, Expand);
177  setOperationAction(ISD::FREM , MVT::f64, Expand);
178  setOperationAction(ISD::FSIN , MVT::f32, Expand);
179  setOperationAction(ISD::FCOS , MVT::f32, Expand);
180  setOperationAction(ISD::FREM , MVT::f32, Expand);
181
182  // If we're enabling GP optimizations, use hardware square root
183  setOperationAction(ISD::FSQRT, MVT::f64, Expand);
184  setOperationAction(ISD::FSQRT, MVT::f32, Expand);
185
186  setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
187  setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
188
189  // SPU can do rotate right and left, so legalize it... but customize for i8
190  // because instructions don't exist.
191
192  // FIXME: Change from "expand" to appropriate type once ROTR is supported in
193  //        .td files.
194  setOperationAction(ISD::ROTR, MVT::i32,    Expand /*Legal*/);
195  setOperationAction(ISD::ROTR, MVT::i16,    Expand /*Legal*/);
196  setOperationAction(ISD::ROTR, MVT::i8,     Expand /*Custom*/);
197
198  setOperationAction(ISD::ROTL, MVT::i32,    Legal);
199  setOperationAction(ISD::ROTL, MVT::i16,    Legal);
200  setOperationAction(ISD::ROTL, MVT::i8,     Custom);
201
202  // SPU has no native version of shift left/right for i8
203  setOperationAction(ISD::SHL,  MVT::i8,     Custom);
204  setOperationAction(ISD::SRL,  MVT::i8,     Custom);
205  setOperationAction(ISD::SRA,  MVT::i8,     Custom);
206
207  // SPU needs custom lowering for shift left/right for i64
208  setOperationAction(ISD::SHL,  MVT::i64,    Custom);
209  setOperationAction(ISD::SRL,  MVT::i64,    Custom);
210  setOperationAction(ISD::SRA,  MVT::i64,    Custom);
211
212  // Custom lower i8, i32 and i64 multiplications
213  setOperationAction(ISD::MUL,  MVT::i8,     Custom);
214  setOperationAction(ISD::MUL,  MVT::i32,    Legal);
215  setOperationAction(ISD::MUL,  MVT::i64,    Expand);   // libcall
216
217  // Need to custom handle (some) common i8, i64 math ops
218  setOperationAction(ISD::ADD,  MVT::i64,    Custom);
219  setOperationAction(ISD::SUB,  MVT::i8,     Custom);
220  setOperationAction(ISD::SUB,  MVT::i64,    Custom);
221
222  // SPU does not have BSWAP. It does have i32 support CTLZ.
223  // CTPOP has to be custom lowered.
224  setOperationAction(ISD::BSWAP, MVT::i32,   Expand);
225  setOperationAction(ISD::BSWAP, MVT::i64,   Expand);
226
227  setOperationAction(ISD::CTPOP, MVT::i8,    Custom);
228  setOperationAction(ISD::CTPOP, MVT::i16,   Custom);
229  setOperationAction(ISD::CTPOP, MVT::i32,   Custom);
230  setOperationAction(ISD::CTPOP, MVT::i64,   Custom);
231
232  setOperationAction(ISD::CTTZ , MVT::i32,   Expand);
233  setOperationAction(ISD::CTTZ , MVT::i64,   Expand);
234
235  setOperationAction(ISD::CTLZ , MVT::i32,   Legal);
236
237  // SPU has a version of select that implements (a&~c)|(b&c), just like
238  // select ought to work:
239  setOperationAction(ISD::SELECT, MVT::i8,   Legal);
240  setOperationAction(ISD::SELECT, MVT::i16,  Legal);
241  setOperationAction(ISD::SELECT, MVT::i32,  Legal);
242  setOperationAction(ISD::SELECT, MVT::i64,  Legal);
243
244  setOperationAction(ISD::SETCC, MVT::i8,    Legal);
245  setOperationAction(ISD::SETCC, MVT::i16,   Legal);
246  setOperationAction(ISD::SETCC, MVT::i32,   Legal);
247  setOperationAction(ISD::SETCC, MVT::i64,   Legal);
248
249  // Zero extension and sign extension for i64 have to be
250  // custom legalized
251  setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom);
252  setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom);
253  setOperationAction(ISD::ANY_EXTEND,  MVT::i64, Custom);
254
255  // Custom lower i128 -> i64 truncates
256  setOperationAction(ISD::TRUNCATE, MVT::i64, Custom);
257
258  // SPU has a legal FP -> signed INT instruction
259  setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
260  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
261  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
262  setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
263
264  // FDIV on SPU requires custom lowering
265  setOperationAction(ISD::FDIV, MVT::f32, Custom);
266  setOperationAction(ISD::FDIV, MVT::f64, Expand);      // libcall
267
268  // SPU has [U|S]INT_TO_FP
269  setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
270  setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
271  setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
272  setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
273  setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
274  setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
275  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
276  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
277
278  setOperationAction(ISD::BIT_CONVERT, MVT::i32, Legal);
279  setOperationAction(ISD::BIT_CONVERT, MVT::f32, Legal);
280  setOperationAction(ISD::BIT_CONVERT, MVT::i64, Legal);
281  setOperationAction(ISD::BIT_CONVERT, MVT::f64, Legal);
282
283  // We cannot sextinreg(i1).  Expand to shifts.
284  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
285
286  // Support label based line numbers.
287  setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand);
288  setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand);
289
290  // We want to legalize GlobalAddress and ConstantPool nodes into the
291  // appropriate instructions to materialize the address.
292  for (unsigned sctype = (unsigned) MVT::i8; sctype < (unsigned) MVT::f128;
293       ++sctype) {
294    MVT VT = (MVT::SimpleValueType)sctype;
295
296    setOperationAction(ISD::GlobalAddress,  VT, Custom);
297    setOperationAction(ISD::ConstantPool,   VT, Custom);
298    setOperationAction(ISD::JumpTable,      VT, Custom);
299  }
300
301  // RET must be custom lowered, to meet ABI requirements
302  setOperationAction(ISD::RET,           MVT::Other, Custom);
303
304  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
305  setOperationAction(ISD::VASTART           , MVT::Other, Custom);
306
307  // Use the default implementation.
308  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
309  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
310  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
311  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
312  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
313  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Expand);
314  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64  , Expand);
315
316  // Cell SPU has instructions for converting between i64 and fp.
317  setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
318  setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
319
320  // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT
321  setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote);
322
323  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
324  setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand);
325
326  // First set operation action for all vector types to expand. Then we
327  // will selectively turn on ones that can be effectively codegen'd.
328  addRegisterClass(MVT::v16i8, SPU::VECREGRegisterClass);
329  addRegisterClass(MVT::v8i16, SPU::VECREGRegisterClass);
330  addRegisterClass(MVT::v4i32, SPU::VECREGRegisterClass);
331  addRegisterClass(MVT::v2i64, SPU::VECREGRegisterClass);
332  addRegisterClass(MVT::v4f32, SPU::VECREGRegisterClass);
333  addRegisterClass(MVT::v2f64, SPU::VECREGRegisterClass);
334
335  for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
336       i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
337    MVT VT = (MVT::SimpleValueType)i;
338
339    // add/sub are legal for all supported vector VT's.
340    setOperationAction(ISD::ADD , VT, Legal);
341    setOperationAction(ISD::SUB , VT, Legal);
342    // mul has to be custom lowered.
343    setOperationAction(ISD::MUL , VT, Custom);
344
345    setOperationAction(ISD::AND   , VT, Legal);
346    setOperationAction(ISD::OR    , VT, Legal);
347    setOperationAction(ISD::XOR   , VT, Legal);
348    setOperationAction(ISD::LOAD  , VT, Legal);
349    setOperationAction(ISD::SELECT, VT, Legal);
350    setOperationAction(ISD::STORE,  VT, Legal);
351
352    // These operations need to be expanded:
353    setOperationAction(ISD::SDIV, VT, Expand);
354    setOperationAction(ISD::SREM, VT, Expand);
355    setOperationAction(ISD::UDIV, VT, Expand);
356    setOperationAction(ISD::UREM, VT, Expand);
357    setOperationAction(ISD::FDIV, VT, Custom);
358
359    // Custom lower build_vector, constant pool spills, insert and
360    // extract vector elements:
361    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
362    setOperationAction(ISD::ConstantPool, VT, Custom);
363    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
364    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
365    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
366    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
367  }
368
369  setOperationAction(ISD::AND, MVT::v16i8, Custom);
370  setOperationAction(ISD::OR,  MVT::v16i8, Custom);
371  setOperationAction(ISD::XOR, MVT::v16i8, Custom);
372  setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom);
373
374  // FIXME: This is only temporary until I put all vector multiplications in
375  // SPUInstrInfo.td:
376  setOperationAction(ISD::MUL, MVT::v4i32, Legal);
377
378  setShiftAmountType(MVT::i32);
379  setBooleanContents(ZeroOrNegativeOneBooleanContent);
380
381  setStackPointerRegisterToSaveRestore(SPU::R1);
382
383  // We have target-specific dag combine patterns for the following nodes:
384  setTargetDAGCombine(ISD::ADD);
385  setTargetDAGCombine(ISD::ZERO_EXTEND);
386  setTargetDAGCombine(ISD::SIGN_EXTEND);
387  setTargetDAGCombine(ISD::ANY_EXTEND);
388
389  computeRegisterProperties();
390
391  // Set pre-RA register scheduler default to BURR, which produces slightly
392  // better code than the default (could also be TDRR, but TargetLowering.h
393  // needs a mod to support that model):
394  setSchedulingPreference(SchedulingForRegPressure);
395}
396
397const char *
398SPUTargetLowering::getTargetNodeName(unsigned Opcode) const
399{
400  if (node_names.empty()) {
401    node_names[(unsigned) SPUISD::RET_FLAG] = "SPUISD::RET_FLAG";
402    node_names[(unsigned) SPUISD::Hi] = "SPUISD::Hi";
403    node_names[(unsigned) SPUISD::Lo] = "SPUISD::Lo";
404    node_names[(unsigned) SPUISD::PCRelAddr] = "SPUISD::PCRelAddr";
405    node_names[(unsigned) SPUISD::AFormAddr] = "SPUISD::AFormAddr";
406    node_names[(unsigned) SPUISD::IndirectAddr] = "SPUISD::IndirectAddr";
407    node_names[(unsigned) SPUISD::LDRESULT] = "SPUISD::LDRESULT";
408    node_names[(unsigned) SPUISD::CALL] = "SPUISD::CALL";
409    node_names[(unsigned) SPUISD::SHUFB] = "SPUISD::SHUFB";
410    node_names[(unsigned) SPUISD::SHUFFLE_MASK] = "SPUISD::SHUFFLE_MASK";
411    node_names[(unsigned) SPUISD::CNTB] = "SPUISD::CNTB";
412    node_names[(unsigned) SPUISD::PREFSLOT2VEC] = "SPUISD::PREFSLOT2VEC";
413    node_names[(unsigned) SPUISD::VEC2PREFSLOT] = "SPUISD::VEC2PREFSLOT";
414    node_names[(unsigned) SPUISD::MPY] = "SPUISD::MPY";
415    node_names[(unsigned) SPUISD::MPYU] = "SPUISD::MPYU";
416    node_names[(unsigned) SPUISD::MPYH] = "SPUISD::MPYH";
417    node_names[(unsigned) SPUISD::MPYHH] = "SPUISD::MPYHH";
418    node_names[(unsigned) SPUISD::SHLQUAD_L_BITS] = "SPUISD::SHLQUAD_L_BITS";
419    node_names[(unsigned) SPUISD::SHLQUAD_L_BYTES] = "SPUISD::SHLQUAD_L_BYTES";
420    node_names[(unsigned) SPUISD::VEC_SHL] = "SPUISD::VEC_SHL";
421    node_names[(unsigned) SPUISD::VEC_SRL] = "SPUISD::VEC_SRL";
422    node_names[(unsigned) SPUISD::VEC_SRA] = "SPUISD::VEC_SRA";
423    node_names[(unsigned) SPUISD::VEC_ROTL] = "SPUISD::VEC_ROTL";
424    node_names[(unsigned) SPUISD::VEC_ROTR] = "SPUISD::VEC_ROTR";
425    node_names[(unsigned) SPUISD::ROTQUAD_RZ_BYTES] =
426      "SPUISD::ROTQUAD_RZ_BYTES";
427    node_names[(unsigned) SPUISD::ROTQUAD_RZ_BITS] =
428      "SPUISD::ROTQUAD_RZ_BITS";
429    node_names[(unsigned) SPUISD::ROTBYTES_LEFT] = "SPUISD::ROTBYTES_LEFT";
430    node_names[(unsigned) SPUISD::ROTBYTES_LEFT_BITS] =
431      "SPUISD::ROTBYTES_LEFT_BITS";
432    node_names[(unsigned) SPUISD::SELECT_MASK] = "SPUISD::SELECT_MASK";
433    node_names[(unsigned) SPUISD::SELB] = "SPUISD::SELB";
434    node_names[(unsigned) SPUISD::ADD_EXTENDED] = "SPUISD::ADD_EXTENDED";
435    node_names[(unsigned) SPUISD::CARRY_GENERATE] = "SPUISD::CARRY_GENERATE";
436    node_names[(unsigned) SPUISD::SUB_EXTENDED] = "SPUISD::SUB_EXTENDED";
437    node_names[(unsigned) SPUISD::BORROW_GENERATE] = "SPUISD::BORROW_GENERATE";
438    node_names[(unsigned) SPUISD::FPInterp] = "SPUISD::FPInterp";
439    node_names[(unsigned) SPUISD::FPRecipEst] = "SPUISD::FPRecipEst";
440    node_names[(unsigned) SPUISD::SEXT32TO64] = "SPUISD::SEXT32TO64";
441  }
442
443  std::map<unsigned, const char *>::iterator i = node_names.find(Opcode);
444
445  return ((i != node_names.end()) ? i->second : 0);
446}
447
448//===----------------------------------------------------------------------===//
449// Return the Cell SPU's SETCC result type
450//===----------------------------------------------------------------------===//
451
452MVT SPUTargetLowering::getSetCCResultType(const SDValue &Op) const {
453  MVT VT = Op.getValueType();
454  // i16 and i32 are valid SETCC result types
455  return ((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) ? VT : MVT::i32);
456}
457
458//===----------------------------------------------------------------------===//
459// Calling convention code:
460//===----------------------------------------------------------------------===//
461
462#include "SPUGenCallingConv.inc"
463
464//===----------------------------------------------------------------------===//
465//  LowerOperation implementation
466//===----------------------------------------------------------------------===//
467
468/// Custom lower loads for CellSPU
469/*!
470 All CellSPU loads and stores are aligned to 16-byte boundaries, so for elements
471 within a 16-byte block, we have to rotate to extract the requested element.
472
473 For extending loads, we also want to ensure that the following sequence is
474 emitted, e.g. for MVT::f32 extending load to MVT::f64:
475
476\verbatim
477%1  v16i8,ch = load
478%2  v16i8,ch = rotate %1
479%3  v4f8, ch = bitconvert %2
480%4  f32      = vec2perfslot %3
481%5  f64      = fp_extend %4
482\endverbatim
483*/
484static SDValue
485LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
486  LoadSDNode *LN = cast<LoadSDNode>(Op);
487  SDValue the_chain = LN->getChain();
488  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
489  MVT InVT = LN->getMemoryVT();
490  MVT OutVT = Op.getValueType();
491  ISD::LoadExtType ExtType = LN->getExtensionType();
492  unsigned alignment = LN->getAlignment();
493  const valtype_map_s *vtm = getValueTypeMapEntry(InVT);
494
495  switch (LN->getAddressingMode()) {
496  case ISD::UNINDEXED: {
497    SDValue result;
498    SDValue basePtr = LN->getBasePtr();
499    SDValue rotate;
500
501    if (alignment == 16) {
502      ConstantSDNode *CN;
503
504      // Special cases for a known aligned load to simplify the base pointer
505      // and the rotation amount:
506      if (basePtr.getOpcode() == ISD::ADD
507          && (CN = dyn_cast<ConstantSDNode > (basePtr.getOperand(1))) != 0) {
508        // Known offset into basePtr
509        int64_t offset = CN->getSExtValue();
510        int64_t rotamt = int64_t((offset & 0xf) - vtm->prefslot_byte);
511
512        if (rotamt < 0)
513          rotamt += 16;
514
515        rotate = DAG.getConstant(rotamt, MVT::i16);
516
517        // Simplify the base pointer for this case:
518        basePtr = basePtr.getOperand(0);
519        if ((offset & ~0xf) > 0) {
520          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
521                                basePtr,
522                                DAG.getConstant((offset & ~0xf), PtrVT));
523        }
524      } else if ((basePtr.getOpcode() == SPUISD::AFormAddr)
525                 || (basePtr.getOpcode() == SPUISD::IndirectAddr
526                     && basePtr.getOperand(0).getOpcode() == SPUISD::Hi
527                     && basePtr.getOperand(1).getOpcode() == SPUISD::Lo)) {
528        // Plain aligned a-form address: rotate into preferred slot
529        // Same for (SPUindirect (SPUhi ...), (SPUlo ...))
530        int64_t rotamt = -vtm->prefslot_byte;
531        if (rotamt < 0)
532          rotamt += 16;
533        rotate = DAG.getConstant(rotamt, MVT::i16);
534      } else {
535        // Offset the rotate amount by the basePtr and the preferred slot
536        // byte offset
537        int64_t rotamt = -vtm->prefslot_byte;
538        if (rotamt < 0)
539          rotamt += 16;
540        rotate = DAG.getNode(ISD::ADD, PtrVT,
541                             basePtr,
542                             DAG.getConstant(rotamt, PtrVT));
543      }
544    } else {
545      // Unaligned load: must be more pessimistic about addressing modes:
546      if (basePtr.getOpcode() == ISD::ADD) {
547        MachineFunction &MF = DAG.getMachineFunction();
548        MachineRegisterInfo &RegInfo = MF.getRegInfo();
549        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
550        SDValue Flag;
551
552        SDValue Op0 = basePtr.getOperand(0);
553        SDValue Op1 = basePtr.getOperand(1);
554
555        if (isa<ConstantSDNode>(Op1)) {
556          // Convert the (add <ptr>, <const>) to an indirect address contained
557          // in a register. Note that this is done because we need to avoid
558          // creating a 0(reg) d-form address due to the SPU's block loads.
559          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
560          the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
561          basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
562        } else {
563          // Convert the (add <arg1>, <arg2>) to an indirect address, which
564          // will likely be lowered as a reg(reg) x-form address.
565          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
566        }
567      } else {
568        basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
569                              basePtr,
570                              DAG.getConstant(0, PtrVT));
571      }
572
573      // Offset the rotate amount by the basePtr and the preferred slot
574      // byte offset
575      rotate = DAG.getNode(ISD::ADD, PtrVT,
576                           basePtr,
577                           DAG.getConstant(-vtm->prefslot_byte, PtrVT));
578    }
579
580    // Re-emit as a v16i8 vector load
581    result = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
582                         LN->getSrcValue(), LN->getSrcValueOffset(),
583                         LN->isVolatile(), 16);
584
585    // Update the chain
586    the_chain = result.getValue(1);
587
588    // Rotate into the preferred slot:
589    result = DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v16i8,
590                         result.getValue(0), rotate);
591
592    // Convert the loaded v16i8 vector to the appropriate vector type
593    // specified by the operand:
594    MVT vecVT = MVT::getVectorVT(InVT, (128 / InVT.getSizeInBits()));
595    result = DAG.getNode(SPUISD::VEC2PREFSLOT, InVT,
596	                 DAG.getNode(ISD::BIT_CONVERT, vecVT, result));
597
598    // Handle extending loads by extending the scalar result:
599    if (ExtType == ISD::SEXTLOAD) {
600      result = DAG.getNode(ISD::SIGN_EXTEND, OutVT, result);
601    } else if (ExtType == ISD::ZEXTLOAD) {
602      result = DAG.getNode(ISD::ZERO_EXTEND, OutVT, result);
603    } else if (ExtType == ISD::EXTLOAD) {
604      unsigned NewOpc = ISD::ANY_EXTEND;
605
606      if (OutVT.isFloatingPoint())
607	NewOpc = ISD::FP_EXTEND;
608
609      result = DAG.getNode(NewOpc, OutVT, result);
610    }
611
612    SDVTList retvts = DAG.getVTList(OutVT, MVT::Other);
613    SDValue retops[2] = {
614      result,
615      the_chain
616    };
617
618    result = DAG.getNode(SPUISD::LDRESULT, retvts,
619                         retops, sizeof(retops) / sizeof(retops[0]));
620    return result;
621  }
622  case ISD::PRE_INC:
623  case ISD::PRE_DEC:
624  case ISD::POST_INC:
625  case ISD::POST_DEC:
626  case ISD::LAST_INDEXED_MODE:
627    cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
628            "UNINDEXED\n";
629    cerr << (unsigned) LN->getAddressingMode() << "\n";
630    abort();
631    /*NOTREACHED*/
632  }
633
634  return SDValue();
635}
636
637/// Custom lower stores for CellSPU
638/*!
639 All CellSPU stores are aligned to 16-byte boundaries, so for elements
640 within a 16-byte block, we have to generate a shuffle to insert the
641 requested element into its place, then store the resulting block.
642 */
643static SDValue
644LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
645  StoreSDNode *SN = cast<StoreSDNode>(Op);
646  SDValue Value = SN->getValue();
647  MVT VT = Value.getValueType();
648  MVT StVT = (!SN->isTruncatingStore() ? VT : SN->getMemoryVT());
649  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
650  unsigned alignment = SN->getAlignment();
651
652  switch (SN->getAddressingMode()) {
653  case ISD::UNINDEXED: {
654    // The vector type we really want to load from the 16-byte chunk.
655    MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits())),
656        stVecVT = MVT::getVectorVT(StVT, (128 / StVT.getSizeInBits()));
657
658    SDValue alignLoadVec;
659    SDValue basePtr = SN->getBasePtr();
660    SDValue the_chain = SN->getChain();
661    SDValue insertEltOffs;
662
663    if (alignment == 16) {
664      ConstantSDNode *CN;
665
666      // Special cases for a known aligned load to simplify the base pointer
667      // and insertion byte:
668      if (basePtr.getOpcode() == ISD::ADD
669          && (CN = dyn_cast<ConstantSDNode>(basePtr.getOperand(1))) != 0) {
670        // Known offset into basePtr
671        int64_t offset = CN->getSExtValue();
672
673        // Simplify the base pointer for this case:
674        basePtr = basePtr.getOperand(0);
675        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
676                                    basePtr,
677                                    DAG.getConstant((offset & 0xf), PtrVT));
678
679        if ((offset & ~0xf) > 0) {
680          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
681                                basePtr,
682                                DAG.getConstant((offset & ~0xf), PtrVT));
683        }
684      } else {
685        // Otherwise, assume it's at byte 0 of basePtr
686        insertEltOffs = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
687                                    basePtr,
688                                    DAG.getConstant(0, PtrVT));
689      }
690    } else {
691      // Unaligned load: must be more pessimistic about addressing modes:
692      if (basePtr.getOpcode() == ISD::ADD) {
693        MachineFunction &MF = DAG.getMachineFunction();
694        MachineRegisterInfo &RegInfo = MF.getRegInfo();
695        unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
696        SDValue Flag;
697
698        SDValue Op0 = basePtr.getOperand(0);
699        SDValue Op1 = basePtr.getOperand(1);
700
701        if (isa<ConstantSDNode>(Op1)) {
702          // Convert the (add <ptr>, <const>) to an indirect address contained
703          // in a register. Note that this is done because we need to avoid
704          // creating a 0(reg) d-form address due to the SPU's block loads.
705          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
706          the_chain = DAG.getCopyToReg(the_chain, VReg, basePtr, Flag);
707          basePtr = DAG.getCopyFromReg(the_chain, VReg, PtrVT);
708        } else {
709          // Convert the (add <arg1>, <arg2>) to an indirect address, which
710          // will likely be lowered as a reg(reg) x-form address.
711          basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT, Op0, Op1);
712        }
713      } else {
714        basePtr = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
715                              basePtr,
716                              DAG.getConstant(0, PtrVT));
717      }
718
719      // Insertion point is solely determined by basePtr's contents
720      insertEltOffs = DAG.getNode(ISD::ADD, PtrVT,
721                                  basePtr,
722                                  DAG.getConstant(0, PtrVT));
723    }
724
725    // Re-emit as a v16i8 vector load
726    alignLoadVec = DAG.getLoad(MVT::v16i8, the_chain, basePtr,
727                               SN->getSrcValue(), SN->getSrcValueOffset(),
728                               SN->isVolatile(), 16);
729
730    // Update the chain
731    the_chain = alignLoadVec.getValue(1);
732
733    LoadSDNode *LN = cast<LoadSDNode>(alignLoadVec);
734    SDValue theValue = SN->getValue();
735    SDValue result;
736
737    if (StVT != VT
738        && (theValue.getOpcode() == ISD::AssertZext
739            || theValue.getOpcode() == ISD::AssertSext)) {
740      // Drill down and get the value for zero- and sign-extended
741      // quantities
742      theValue = theValue.getOperand(0);
743    }
744
745    // If the base pointer is already a D-form address, then just create
746    // a new D-form address with a slot offset and the orignal base pointer.
747    // Otherwise generate a D-form address with the slot offset relative
748    // to the stack pointer, which is always aligned.
749#if !defined(NDEBUG)
750      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
751        cerr << "CellSPU LowerSTORE: basePtr = ";
752        basePtr.getNode()->dump(&DAG);
753        cerr << "\n";
754      }
755#endif
756
757    SDValue insertEltOp =
758            DAG.getNode(SPUISD::SHUFFLE_MASK, vecVT, insertEltOffs);
759    SDValue vectorizeOp =
760            DAG.getNode(ISD::SCALAR_TO_VECTOR, vecVT, theValue);
761
762    result = DAG.getNode(SPUISD::SHUFB, vecVT,
763			 vectorizeOp, alignLoadVec,
764			 DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, insertEltOp));
765
766    result = DAG.getStore(the_chain, result, basePtr,
767                          LN->getSrcValue(), LN->getSrcValueOffset(),
768                          LN->isVolatile(), LN->getAlignment());
769
770#if 0 && !defined(NDEBUG)
771    if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
772      const SDValue &currentRoot = DAG.getRoot();
773
774      DAG.setRoot(result);
775      cerr << "------- CellSPU:LowerStore result:\n";
776      DAG.dump();
777      cerr << "-------\n";
778      DAG.setRoot(currentRoot);
779    }
780#endif
781
782    return result;
783    /*UNREACHED*/
784  }
785  case ISD::PRE_INC:
786  case ISD::PRE_DEC:
787  case ISD::POST_INC:
788  case ISD::POST_DEC:
789  case ISD::LAST_INDEXED_MODE:
790    cerr << "LowerLOAD: Got a LoadSDNode with an addr mode other than "
791            "UNINDEXED\n";
792    cerr << (unsigned) SN->getAddressingMode() << "\n";
793    abort();
794    /*NOTREACHED*/
795  }
796
797  return SDValue();
798}
799
800/// Generate the address of a constant pool entry.
801static SDValue
802LowerConstantPool(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
803  MVT PtrVT = Op.getValueType();
804  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
805  Constant *C = CP->getConstVal();
806  SDValue CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment());
807  SDValue Zero = DAG.getConstant(0, PtrVT);
808  const TargetMachine &TM = DAG.getTarget();
809
810  if (TM.getRelocationModel() == Reloc::Static) {
811    if (!ST->usingLargeMem()) {
812      // Just return the SDValue with the constant pool address in it.
813      return DAG.getNode(SPUISD::AFormAddr, PtrVT, CPI, Zero);
814    } else {
815      SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, CPI, Zero);
816      SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, CPI, Zero);
817      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
818    }
819  }
820
821  assert(0 &&
822         "LowerConstantPool: Relocation model other than static"
823         " not supported.");
824  return SDValue();
825}
826
827static SDValue
828LowerJumpTable(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
829  MVT PtrVT = Op.getValueType();
830  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
831  SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
832  SDValue Zero = DAG.getConstant(0, PtrVT);
833  const TargetMachine &TM = DAG.getTarget();
834
835  if (TM.getRelocationModel() == Reloc::Static) {
836    if (!ST->usingLargeMem()) {
837      return DAG.getNode(SPUISD::AFormAddr, PtrVT, JTI, Zero);
838    } else {
839      SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, JTI, Zero);
840      SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, JTI, Zero);
841      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
842    }
843  }
844
845  assert(0 &&
846         "LowerJumpTable: Relocation model other than static not supported.");
847  return SDValue();
848}
849
850static SDValue
851LowerGlobalAddress(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
852  MVT PtrVT = Op.getValueType();
853  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
854  GlobalValue *GV = GSDN->getGlobal();
855  SDValue GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset());
856  const TargetMachine &TM = DAG.getTarget();
857  SDValue Zero = DAG.getConstant(0, PtrVT);
858
859  if (TM.getRelocationModel() == Reloc::Static) {
860    if (!ST->usingLargeMem()) {
861      return DAG.getNode(SPUISD::AFormAddr, PtrVT, GA, Zero);
862    } else {
863      SDValue Hi = DAG.getNode(SPUISD::Hi, PtrVT, GA, Zero);
864      SDValue Lo = DAG.getNode(SPUISD::Lo, PtrVT, GA, Zero);
865      return DAG.getNode(SPUISD::IndirectAddr, PtrVT, Hi, Lo);
866    }
867  } else {
868    cerr << "LowerGlobalAddress: Relocation model other than static not "
869         << "supported.\n";
870    abort();
871    /*NOTREACHED*/
872  }
873
874  return SDValue();
875}
876
877//! Custom lower i64 integer constants
878/*!
879 This code inserts all of the necessary juggling that needs to occur to load
880 a 64-bit constant into a register.
881 */
882static SDValue
883LowerConstant(SDValue Op, SelectionDAG &DAG) {
884  MVT VT = Op.getValueType();
885
886  if (VT == MVT::i64) {
887    ConstantSDNode *CN = cast<ConstantSDNode>(Op.getNode());
888    SDValue T = DAG.getConstant(CN->getZExtValue(), VT);
889    return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
890                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
891  } else {
892    cerr << "LowerConstant: unhandled constant type "
893         << VT.getMVTString()
894         << "\n";
895    abort();
896    /*NOTREACHED*/
897  }
898
899  return SDValue();
900}
901
902//! Custom lower double precision floating point constants
903static SDValue
904LowerConstantFP(SDValue Op, SelectionDAG &DAG) {
905  MVT VT = Op.getValueType();
906
907  if (VT == MVT::f64) {
908    ConstantFPSDNode *FP = cast<ConstantFPSDNode>(Op.getNode());
909
910    assert((FP != 0) &&
911           "LowerConstantFP: Node is not ConstantFPSDNode");
912
913    uint64_t dbits = DoubleToBits(FP->getValueAPF().convertToDouble());
914    SDValue T = DAG.getConstant(dbits, MVT::i64);
915    SDValue Tvec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T);
916    return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
917                       DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64, Tvec));
918  }
919
920  return SDValue();
921}
922
923static SDValue
924LowerBRCOND(SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) {
925  SDValue Cond = Op.getOperand(1);
926  MVT CondVT = Cond.getValueType();
927  unsigned CondOpc;
928
929  if (CondVT == MVT::i8) {
930    SDValue CondOp0 = Cond.getOperand(0);
931    if (Cond.getOpcode() == ISD::TRUNCATE) {
932      // Use the truncate's value type and ANY_EXTEND the condition (DAGcombine
933      // will then remove the truncate)
934      CondVT = CondOp0.getValueType();
935      CondOpc = ISD::ANY_EXTEND;
936    } else {
937      CondVT = MVT::i32;                // default to something reasonable
938      CondOpc = ISD::ZERO_EXTEND;
939    }
940
941    Cond = DAG.getNode(CondOpc, CondVT, Op.getOperand(1));
942
943    return DAG.getNode(ISD::BRCOND, Op.getValueType(),
944                       Op.getOperand(0), Cond, Op.getOperand(2));
945  }
946
947  return SDValue(); // Unchanged
948}
949
950static SDValue
951LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG, int &VarArgsFrameIndex)
952{
953  MachineFunction &MF = DAG.getMachineFunction();
954  MachineFrameInfo *MFI = MF.getFrameInfo();
955  MachineRegisterInfo &RegInfo = MF.getRegInfo();
956  SmallVector<SDValue, 48> ArgValues;
957  SDValue Root = Op.getOperand(0);
958  bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() != 0;
959
960  const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
961  const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
962
963  unsigned ArgOffset = SPUFrameInfo::minStackSize();
964  unsigned ArgRegIdx = 0;
965  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
966
967  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
968
969  // Add DAG nodes to load the arguments or copy them out of registers.
970  for (unsigned ArgNo = 0, e = Op.getNode()->getNumValues() - 1;
971       ArgNo != e; ++ArgNo) {
972    MVT ObjectVT = Op.getValue(ArgNo).getValueType();
973    unsigned ObjSize = ObjectVT.getSizeInBits()/8;
974    SDValue ArgVal;
975
976    if (ArgRegIdx < NumArgRegs) {
977      const TargetRegisterClass *ArgRegClass;
978
979      switch (ObjectVT.getSimpleVT()) {
980      default: {
981        cerr << "LowerFORMAL_ARGUMENTS Unhandled argument type: "
982             << ObjectVT.getMVTString()
983             << "\n";
984        abort();
985      }
986      case MVT::i8:
987        ArgRegClass = &SPU::R8CRegClass;
988        break;
989      case MVT::i16:
990        ArgRegClass = &SPU::R16CRegClass;
991        break;
992      case MVT::i32:
993        ArgRegClass = &SPU::R32CRegClass;
994        break;
995      case MVT::i64:
996        ArgRegClass = &SPU::R64CRegClass;
997        break;
998      case MVT::f32:
999        ArgRegClass = &SPU::R32FPRegClass;
1000        break;
1001      case MVT::f64:
1002        ArgRegClass = &SPU::R64FPRegClass;
1003        break;
1004      case MVT::v2f64:
1005      case MVT::v4f32:
1006      case MVT::v2i64:
1007      case MVT::v4i32:
1008      case MVT::v8i16:
1009      case MVT::v16i8:
1010        ArgRegClass = &SPU::VECREGRegClass;
1011        break;
1012      }
1013
1014      unsigned VReg = RegInfo.createVirtualRegister(ArgRegClass);
1015      RegInfo.addLiveIn(ArgRegs[ArgRegIdx], VReg);
1016      ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT);
1017      ++ArgRegIdx;
1018    } else {
1019      // We need to load the argument to a virtual register if we determined
1020      // above that we ran out of physical registers of the appropriate type
1021      // or we're forced to do vararg
1022      int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
1023      SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1024      ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0);
1025      ArgOffset += StackSlotSize;
1026    }
1027
1028    ArgValues.push_back(ArgVal);
1029    // Update the chain
1030    Root = ArgVal.getOperand(0);
1031  }
1032
1033  // vararg handling:
1034  if (isVarArg) {
1035    // unsigned int ptr_size = PtrVT.getSizeInBits() / 8;
1036    // We will spill (79-3)+1 registers to the stack
1037    SmallVector<SDValue, 79-3+1> MemOps;
1038
1039    // Create the frame slot
1040
1041    for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
1042      VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
1043      SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
1044      SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
1045      SDValue Store = DAG.getStore(Root, ArgVal, FIN, NULL, 0);
1046      Root = Store.getOperand(0);
1047      MemOps.push_back(Store);
1048
1049      // Increment address by stack slot size for the next stored argument
1050      ArgOffset += StackSlotSize;
1051    }
1052    if (!MemOps.empty())
1053      Root = DAG.getNode(ISD::TokenFactor,MVT::Other,&MemOps[0],MemOps.size());
1054  }
1055
1056  ArgValues.push_back(Root);
1057
1058  // Return the new list of results.
1059  return DAG.getNode(ISD::MERGE_VALUES, Op.getNode()->getVTList(),
1060                     &ArgValues[0], ArgValues.size());
1061}
1062
1063/// isLSAAddress - Return the immediate to use if the specified
1064/// value is representable as a LSA address.
1065static SDNode *isLSAAddress(SDValue Op, SelectionDAG &DAG) {
1066  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
1067  if (!C) return 0;
1068
1069  int Addr = C->getZExtValue();
1070  if ((Addr & 3) != 0 ||  // Low 2 bits are implicitly zero.
1071      (Addr << 14 >> 14) != Addr)
1072    return 0;  // Top 14 bits have to be sext of immediate.
1073
1074  return DAG.getConstant((int)C->getZExtValue() >> 2, MVT::i32).getNode();
1075}
1076
1077static
1078SDValue
1079LowerCALL(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) {
1080  CallSDNode *TheCall = cast<CallSDNode>(Op.getNode());
1081  SDValue Chain = TheCall->getChain();
1082  SDValue Callee    = TheCall->getCallee();
1083  unsigned NumOps     = TheCall->getNumArgs();
1084  unsigned StackSlotSize = SPUFrameInfo::stackSlotSize();
1085  const unsigned *ArgRegs = SPURegisterInfo::getArgRegs();
1086  const unsigned NumArgRegs = SPURegisterInfo::getNumArgRegs();
1087
1088  // Handy pointer type
1089  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1090
1091  // Accumulate how many bytes are to be pushed on the stack, including the
1092  // linkage area, and parameter passing area.  According to the SPU ABI,
1093  // we minimally need space for [LR] and [SP]
1094  unsigned NumStackBytes = SPUFrameInfo::minStackSize();
1095
1096  // Set up a copy of the stack pointer for use loading and storing any
1097  // arguments that may not fit in the registers available for argument
1098  // passing.
1099  SDValue StackPtr = DAG.getRegister(SPU::R1, MVT::i32);
1100
1101  // Figure out which arguments are going to go in registers, and which in
1102  // memory.
1103  unsigned ArgOffset = SPUFrameInfo::minStackSize(); // Just below [LR]
1104  unsigned ArgRegIdx = 0;
1105
1106  // Keep track of registers passing arguments
1107  std::vector<std::pair<unsigned, SDValue> > RegsToPass;
1108  // And the arguments passed on the stack
1109  SmallVector<SDValue, 8> MemOpChains;
1110
1111  for (unsigned i = 0; i != NumOps; ++i) {
1112    SDValue Arg = TheCall->getArg(i);
1113
1114    // PtrOff will be used to store the current argument to the stack if a
1115    // register cannot be found for it.
1116    SDValue PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType());
1117    PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff);
1118
1119    switch (Arg.getValueType().getSimpleVT()) {
1120    default: assert(0 && "Unexpected ValueType for argument!");
1121    case MVT::i32:
1122    case MVT::i64:
1123    case MVT::i128:
1124      if (ArgRegIdx != NumArgRegs) {
1125        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1126      } else {
1127        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1128        ArgOffset += StackSlotSize;
1129      }
1130      break;
1131    case MVT::f32:
1132    case MVT::f64:
1133      if (ArgRegIdx != NumArgRegs) {
1134        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1135      } else {
1136        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1137        ArgOffset += StackSlotSize;
1138      }
1139      break;
1140    case MVT::v2i64:
1141    case MVT::v2f64:
1142    case MVT::v4f32:
1143    case MVT::v4i32:
1144    case MVT::v8i16:
1145    case MVT::v16i8:
1146      if (ArgRegIdx != NumArgRegs) {
1147        RegsToPass.push_back(std::make_pair(ArgRegs[ArgRegIdx++], Arg));
1148      } else {
1149        MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0));
1150        ArgOffset += StackSlotSize;
1151      }
1152      break;
1153    }
1154  }
1155
1156  // Update number of stack bytes actually used, insert a call sequence start
1157  NumStackBytes = (ArgOffset - SPUFrameInfo::minStackSize());
1158  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumStackBytes,
1159                                                            true));
1160
1161  if (!MemOpChains.empty()) {
1162    // Adjust the stack pointer for the stack arguments.
1163    Chain = DAG.getNode(ISD::TokenFactor, MVT::Other,
1164                        &MemOpChains[0], MemOpChains.size());
1165  }
1166
1167  // Build a sequence of copy-to-reg nodes chained together with token chain
1168  // and flag operands which copy the outgoing args into the appropriate regs.
1169  SDValue InFlag;
1170  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
1171    Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second,
1172                             InFlag);
1173    InFlag = Chain.getValue(1);
1174  }
1175
1176  SmallVector<SDValue, 8> Ops;
1177  unsigned CallOpc = SPUISD::CALL;
1178
1179  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
1180  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
1181  // node so that legalize doesn't hack it.
1182  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
1183    GlobalValue *GV = G->getGlobal();
1184    MVT CalleeVT = Callee.getValueType();
1185    SDValue Zero = DAG.getConstant(0, PtrVT);
1186    SDValue GA = DAG.getTargetGlobalAddress(GV, CalleeVT);
1187
1188    if (!ST->usingLargeMem()) {
1189      // Turn calls to targets that are defined (i.e., have bodies) into BRSL
1190      // style calls, otherwise, external symbols are BRASL calls. This assumes
1191      // that declared/defined symbols are in the same compilation unit and can
1192      // be reached through PC-relative jumps.
1193      //
1194      // NOTE:
1195      // This may be an unsafe assumption for JIT and really large compilation
1196      // units.
1197      if (GV->isDeclaration()) {
1198        Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, GA, Zero);
1199      } else {
1200        Callee = DAG.getNode(SPUISD::PCRelAddr, CalleeVT, GA, Zero);
1201      }
1202    } else {
1203      // "Large memory" mode: Turn all calls into indirect calls with a X-form
1204      // address pairs:
1205      Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, GA, Zero);
1206    }
1207  } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1208    MVT CalleeVT = Callee.getValueType();
1209    SDValue Zero = DAG.getConstant(0, PtrVT);
1210    SDValue ExtSym = DAG.getTargetExternalSymbol(S->getSymbol(),
1211        Callee.getValueType());
1212
1213    if (!ST->usingLargeMem()) {
1214      Callee = DAG.getNode(SPUISD::AFormAddr, CalleeVT, ExtSym, Zero);
1215    } else {
1216      Callee = DAG.getNode(SPUISD::IndirectAddr, PtrVT, ExtSym, Zero);
1217    }
1218  } else if (SDNode *Dest = isLSAAddress(Callee, DAG)) {
1219    // If this is an absolute destination address that appears to be a legal
1220    // local store address, use the munged value.
1221    Callee = SDValue(Dest, 0);
1222  }
1223
1224  Ops.push_back(Chain);
1225  Ops.push_back(Callee);
1226
1227  // Add argument registers to the end of the list so that they are known live
1228  // into the call.
1229  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
1230    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
1231                                  RegsToPass[i].second.getValueType()));
1232
1233  if (InFlag.getNode())
1234    Ops.push_back(InFlag);
1235  // Returns a chain and a flag for retval copy to use.
1236  Chain = DAG.getNode(CallOpc, DAG.getVTList(MVT::Other, MVT::Flag),
1237                      &Ops[0], Ops.size());
1238  InFlag = Chain.getValue(1);
1239
1240  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumStackBytes, true),
1241                             DAG.getIntPtrConstant(0, true), InFlag);
1242  if (TheCall->getValueType(0) != MVT::Other)
1243    InFlag = Chain.getValue(1);
1244
1245  SDValue ResultVals[3];
1246  unsigned NumResults = 0;
1247
1248  // If the call has results, copy the values out of the ret val registers.
1249  switch (TheCall->getValueType(0).getSimpleVT()) {
1250  default: assert(0 && "Unexpected ret value!");
1251  case MVT::Other: break;
1252  case MVT::i32:
1253    if (TheCall->getValueType(1) == MVT::i32) {
1254      Chain = DAG.getCopyFromReg(Chain, SPU::R4, MVT::i32, InFlag).getValue(1);
1255      ResultVals[0] = Chain.getValue(0);
1256      Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32,
1257                                 Chain.getValue(2)).getValue(1);
1258      ResultVals[1] = Chain.getValue(0);
1259      NumResults = 2;
1260    } else {
1261      Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i32, InFlag).getValue(1);
1262      ResultVals[0] = Chain.getValue(0);
1263      NumResults = 1;
1264    }
1265    break;
1266  case MVT::i64:
1267    Chain = DAG.getCopyFromReg(Chain, SPU::R3, MVT::i64, InFlag).getValue(1);
1268    ResultVals[0] = Chain.getValue(0);
1269    NumResults = 1;
1270    break;
1271  case MVT::f32:
1272  case MVT::f64:
1273    Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1274                               InFlag).getValue(1);
1275    ResultVals[0] = Chain.getValue(0);
1276    NumResults = 1;
1277    break;
1278  case MVT::v2f64:
1279  case MVT::v2i64:
1280  case MVT::v4f32:
1281  case MVT::v4i32:
1282  case MVT::v8i16:
1283  case MVT::v16i8:
1284    Chain = DAG.getCopyFromReg(Chain, SPU::R3, TheCall->getValueType(0),
1285                                   InFlag).getValue(1);
1286    ResultVals[0] = Chain.getValue(0);
1287    NumResults = 1;
1288    break;
1289  }
1290
1291  // If the function returns void, just return the chain.
1292  if (NumResults == 0)
1293    return Chain;
1294
1295  // Otherwise, merge everything together with a MERGE_VALUES node.
1296  ResultVals[NumResults++] = Chain;
1297  SDValue Res = DAG.getMergeValues(ResultVals, NumResults);
1298  return Res.getValue(Op.getResNo());
1299}
1300
1301static SDValue
1302LowerRET(SDValue Op, SelectionDAG &DAG, TargetMachine &TM) {
1303  SmallVector<CCValAssign, 16> RVLocs;
1304  unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv();
1305  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1306  CCState CCInfo(CC, isVarArg, TM, RVLocs);
1307  CCInfo.AnalyzeReturn(Op.getNode(), RetCC_SPU);
1308
1309  // If this is the first return lowered for this function, add the regs to the
1310  // liveout set for the function.
1311  if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
1312    for (unsigned i = 0; i != RVLocs.size(); ++i)
1313      DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
1314  }
1315
1316  SDValue Chain = Op.getOperand(0);
1317  SDValue Flag;
1318
1319  // Copy the result values into the output registers.
1320  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1321    CCValAssign &VA = RVLocs[i];
1322    assert(VA.isRegLoc() && "Can only return in registers!");
1323    Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag);
1324    Flag = Chain.getValue(1);
1325  }
1326
1327  if (Flag.getNode())
1328    return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain, Flag);
1329  else
1330    return DAG.getNode(SPUISD::RET_FLAG, MVT::Other, Chain);
1331}
1332
1333
1334//===----------------------------------------------------------------------===//
1335// Vector related lowering:
1336//===----------------------------------------------------------------------===//
1337
1338static ConstantSDNode *
1339getVecImm(SDNode *N) {
1340  SDValue OpVal(0, 0);
1341
1342  // Check to see if this buildvec has a single non-undef value in its elements.
1343  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
1344    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
1345    if (OpVal.getNode() == 0)
1346      OpVal = N->getOperand(i);
1347    else if (OpVal != N->getOperand(i))
1348      return 0;
1349  }
1350
1351  if (OpVal.getNode() != 0) {
1352    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1353      return CN;
1354    }
1355  }
1356
1357  return 0; // All UNDEF: use implicit def.; not Constant node
1358}
1359
1360/// get_vec_i18imm - Test if this vector is a vector filled with the same value
1361/// and the value fits into an unsigned 18-bit constant, and if so, return the
1362/// constant
1363SDValue SPU::get_vec_u18imm(SDNode *N, SelectionDAG &DAG,
1364                              MVT ValueType) {
1365  if (ConstantSDNode *CN = getVecImm(N)) {
1366    uint64_t Value = CN->getZExtValue();
1367    if (ValueType == MVT::i64) {
1368      uint64_t UValue = CN->getZExtValue();
1369      uint32_t upper = uint32_t(UValue >> 32);
1370      uint32_t lower = uint32_t(UValue);
1371      if (upper != lower)
1372        return SDValue();
1373      Value = Value >> 32;
1374    }
1375    if (Value <= 0x3ffff)
1376      return DAG.getTargetConstant(Value, ValueType);
1377  }
1378
1379  return SDValue();
1380}
1381
1382/// get_vec_i16imm - Test if this vector is a vector filled with the same value
1383/// and the value fits into a signed 16-bit constant, and if so, return the
1384/// constant
1385SDValue SPU::get_vec_i16imm(SDNode *N, SelectionDAG &DAG,
1386                              MVT ValueType) {
1387  if (ConstantSDNode *CN = getVecImm(N)) {
1388    int64_t Value = CN->getSExtValue();
1389    if (ValueType == MVT::i64) {
1390      uint64_t UValue = CN->getZExtValue();
1391      uint32_t upper = uint32_t(UValue >> 32);
1392      uint32_t lower = uint32_t(UValue);
1393      if (upper != lower)
1394        return SDValue();
1395      Value = Value >> 32;
1396    }
1397    if (Value >= -(1 << 15) && Value <= ((1 << 15) - 1)) {
1398      return DAG.getTargetConstant(Value, ValueType);
1399    }
1400  }
1401
1402  return SDValue();
1403}
1404
1405/// get_vec_i10imm - Test if this vector is a vector filled with the same value
1406/// and the value fits into a signed 10-bit constant, and if so, return the
1407/// constant
1408SDValue SPU::get_vec_i10imm(SDNode *N, SelectionDAG &DAG,
1409                              MVT ValueType) {
1410  if (ConstantSDNode *CN = getVecImm(N)) {
1411    int64_t Value = CN->getSExtValue();
1412    if (ValueType == MVT::i64) {
1413      uint64_t UValue = CN->getZExtValue();
1414      uint32_t upper = uint32_t(UValue >> 32);
1415      uint32_t lower = uint32_t(UValue);
1416      if (upper != lower)
1417        return SDValue();
1418      Value = Value >> 32;
1419    }
1420    if (isS10Constant(Value))
1421      return DAG.getTargetConstant(Value, ValueType);
1422  }
1423
1424  return SDValue();
1425}
1426
1427/// get_vec_i8imm - Test if this vector is a vector filled with the same value
1428/// and the value fits into a signed 8-bit constant, and if so, return the
1429/// constant.
1430///
1431/// @note: The incoming vector is v16i8 because that's the only way we can load
1432/// constant vectors. Thus, we test to see if the upper and lower bytes are the
1433/// same value.
1434SDValue SPU::get_vec_i8imm(SDNode *N, SelectionDAG &DAG,
1435                             MVT ValueType) {
1436  if (ConstantSDNode *CN = getVecImm(N)) {
1437    int Value = (int) CN->getZExtValue();
1438    if (ValueType == MVT::i16
1439        && Value <= 0xffff                 /* truncated from uint64_t */
1440        && ((short) Value >> 8) == ((short) Value & 0xff))
1441      return DAG.getTargetConstant(Value & 0xff, ValueType);
1442    else if (ValueType == MVT::i8
1443             && (Value & 0xff) == Value)
1444      return DAG.getTargetConstant(Value, ValueType);
1445  }
1446
1447  return SDValue();
1448}
1449
1450/// get_ILHUvec_imm - Test if this vector is a vector filled with the same value
1451/// and the value fits into a signed 16-bit constant, and if so, return the
1452/// constant
1453SDValue SPU::get_ILHUvec_imm(SDNode *N, SelectionDAG &DAG,
1454                               MVT ValueType) {
1455  if (ConstantSDNode *CN = getVecImm(N)) {
1456    uint64_t Value = CN->getZExtValue();
1457    if ((ValueType == MVT::i32
1458          && ((unsigned) Value & 0xffff0000) == (unsigned) Value)
1459        || (ValueType == MVT::i64 && (Value & 0xffff0000) == Value))
1460      return DAG.getTargetConstant(Value >> 16, ValueType);
1461  }
1462
1463  return SDValue();
1464}
1465
1466/// get_v4i32_imm - Catch-all for general 32-bit constant vectors
1467SDValue SPU::get_v4i32_imm(SDNode *N, SelectionDAG &DAG) {
1468  if (ConstantSDNode *CN = getVecImm(N)) {
1469    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i32);
1470  }
1471
1472  return SDValue();
1473}
1474
1475/// get_v4i32_imm - Catch-all for general 64-bit constant vectors
1476SDValue SPU::get_v2i64_imm(SDNode *N, SelectionDAG &DAG) {
1477  if (ConstantSDNode *CN = getVecImm(N)) {
1478    return DAG.getTargetConstant((unsigned) CN->getZExtValue(), MVT::i64);
1479  }
1480
1481  return SDValue();
1482}
1483
1484// If this is a vector of constants or undefs, get the bits.  A bit in
1485// UndefBits is set if the corresponding element of the vector is an
1486// ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1487// zero.   Return true if this is not an array of constants, false if it is.
1488//
1489static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2],
1490                                       uint64_t UndefBits[2]) {
1491  // Start with zero'd results.
1492  VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0;
1493
1494  unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits();
1495  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
1496    SDValue OpVal = BV->getOperand(i);
1497
1498    unsigned PartNo = i >= e/2;     // In the upper 128 bits?
1499    unsigned SlotNo = e/2 - (i & (e/2-1))-1;  // Which subpiece of the uint64_t.
1500
1501    uint64_t EltBits = 0;
1502    if (OpVal.getOpcode() == ISD::UNDEF) {
1503      uint64_t EltUndefBits = ~0ULL >> (64-EltBitSize);
1504      UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize);
1505      continue;
1506    } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
1507      EltBits = CN->getZExtValue() & (~0ULL >> (64-EltBitSize));
1508    } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
1509      const APFloat &apf = CN->getValueAPF();
1510      EltBits = (CN->getValueType(0) == MVT::f32
1511                 ? FloatToBits(apf.convertToFloat())
1512                 : DoubleToBits(apf.convertToDouble()));
1513    } else {
1514      // Nonconstant element.
1515      return true;
1516    }
1517
1518    VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize);
1519  }
1520
1521  //printf("%llx %llx  %llx %llx\n",
1522  //       VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]);
1523  return false;
1524}
1525
1526/// If this is a splat (repetition) of a value across the whole vector, return
1527/// the smallest size that splats it.  For example, "0x01010101010101..." is a
1528/// splat of 0x01, 0x0101, and 0x01010101.  We return SplatBits = 0x01 and
1529/// SplatSize = 1 byte.
1530static bool isConstantSplat(const uint64_t Bits128[2],
1531                            const uint64_t Undef128[2],
1532                            int MinSplatBits,
1533                            uint64_t &SplatBits, uint64_t &SplatUndef,
1534                            int &SplatSize) {
1535  // Don't let undefs prevent splats from matching.  See if the top 64-bits are
1536  // the same as the lower 64-bits, ignoring undefs.
1537  uint64_t Bits64  = Bits128[0] | Bits128[1];
1538  uint64_t Undef64 = Undef128[0] & Undef128[1];
1539  uint32_t Bits32  = uint32_t(Bits64) | uint32_t(Bits64 >> 32);
1540  uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32);
1541  uint16_t Bits16  = uint16_t(Bits32)  | uint16_t(Bits32 >> 16);
1542  uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16);
1543
1544  if ((Bits128[0] & ~Undef128[1]) == (Bits128[1] & ~Undef128[0])) {
1545    if (MinSplatBits < 64) {
1546
1547      // Check that the top 32-bits are the same as the lower 32-bits, ignoring
1548      // undefs.
1549      if ((Bits64 & (~Undef64 >> 32)) == ((Bits64 >> 32) & ~Undef64)) {
1550        if (MinSplatBits < 32) {
1551
1552          // If the top 16-bits are different than the lower 16-bits, ignoring
1553          // undefs, we have an i32 splat.
1554          if ((Bits32 & (~Undef32 >> 16)) == ((Bits32 >> 16) & ~Undef32)) {
1555            if (MinSplatBits < 16) {
1556              // If the top 8-bits are different than the lower 8-bits, ignoring
1557              // undefs, we have an i16 splat.
1558              if ((Bits16 & (uint16_t(~Undef16) >> 8))
1559                  == ((Bits16 >> 8) & ~Undef16)) {
1560                // Otherwise, we have an 8-bit splat.
1561                SplatBits  = uint8_t(Bits16)  | uint8_t(Bits16 >> 8);
1562                SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8);
1563                SplatSize = 1;
1564                return true;
1565              }
1566            } else {
1567              SplatBits = Bits16;
1568              SplatUndef = Undef16;
1569              SplatSize = 2;
1570              return true;
1571            }
1572          }
1573        } else {
1574          SplatBits = Bits32;
1575          SplatUndef = Undef32;
1576          SplatSize = 4;
1577          return true;
1578        }
1579      }
1580    } else {
1581      SplatBits = Bits128[0];
1582      SplatUndef = Undef128[0];
1583      SplatSize = 8;
1584      return true;
1585    }
1586  }
1587
1588  return false;  // Can't be a splat if two pieces don't match.
1589}
1590
1591// If this is a case we can't handle, return null and let the default
1592// expansion code take care of it.  If we CAN select this case, and if it
1593// selects to a single instruction, return Op.  Otherwise, if we can codegen
1594// this case more efficiently than a constant pool load, lower it to the
1595// sequence of ops that should be used.
1596static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
1597  MVT VT = Op.getValueType();
1598  // If this is a vector of constants or undefs, get the bits.  A bit in
1599  // UndefBits is set if the corresponding element of the vector is an
1600  // ISD::UNDEF value.  For undefs, the corresponding VectorBits values are
1601  // zero.
1602  uint64_t VectorBits[2];
1603  uint64_t UndefBits[2];
1604  uint64_t SplatBits, SplatUndef;
1605  int SplatSize;
1606  if (GetConstantBuildVectorBits(Op.getNode(), VectorBits, UndefBits)
1607      || !isConstantSplat(VectorBits, UndefBits,
1608                          VT.getVectorElementType().getSizeInBits(),
1609                          SplatBits, SplatUndef, SplatSize))
1610    return SDValue();   // Not a constant vector, not a splat.
1611
1612  switch (VT.getSimpleVT()) {
1613  default:
1614  case MVT::v4f32: {
1615    uint32_t Value32 = SplatBits;
1616    assert(SplatSize == 4
1617           && "LowerBUILD_VECTOR: Unexpected floating point vector element.");
1618    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1619    SDValue T = DAG.getConstant(Value32, MVT::i32);
1620    return DAG.getNode(ISD::BIT_CONVERT, MVT::v4f32,
1621                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, T, T, T, T));
1622    break;
1623  }
1624  case MVT::v2f64: {
1625    uint64_t f64val = SplatBits;
1626    assert(SplatSize == 8
1627           && "LowerBUILD_VECTOR: 64-bit float vector size > 8 bytes.");
1628    // NOTE: pretend the constant is an integer. LLVM won't load FP constants
1629    SDValue T = DAG.getConstant(f64val, MVT::i64);
1630    return DAG.getNode(ISD::BIT_CONVERT, MVT::v2f64,
1631                       DAG.getNode(ISD::BUILD_VECTOR, MVT::v2i64, T, T));
1632    break;
1633  }
1634  case MVT::v16i8: {
1635   // 8-bit constants have to be expanded to 16-bits
1636   unsigned short Value16 = SplatBits | (SplatBits << 8);
1637   SDValue Ops[8];
1638   for (int i = 0; i < 8; ++i)
1639     Ops[i] = DAG.getConstant(Value16, MVT::i16);
1640   return DAG.getNode(ISD::BIT_CONVERT, VT,
1641                      DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops, 8));
1642  }
1643  case MVT::v8i16: {
1644    unsigned short Value16;
1645    if (SplatSize == 2)
1646      Value16 = (unsigned short) (SplatBits & 0xffff);
1647    else
1648      Value16 = (unsigned short) (SplatBits | (SplatBits << 8));
1649    SDValue T = DAG.getConstant(Value16, VT.getVectorElementType());
1650    SDValue Ops[8];
1651    for (int i = 0; i < 8; ++i) Ops[i] = T;
1652    return DAG.getNode(ISD::BUILD_VECTOR, VT, Ops, 8);
1653  }
1654  case MVT::v4i32: {
1655    unsigned int Value = SplatBits;
1656    SDValue T = DAG.getConstant(Value, VT.getVectorElementType());
1657    return DAG.getNode(ISD::BUILD_VECTOR, VT, T, T, T, T);
1658  }
1659  case MVT::v2i64: {
1660    uint64_t val = SplatBits;
1661    uint32_t upper = uint32_t(val >> 32);
1662    uint32_t lower = uint32_t(val);
1663
1664    if (upper == lower) {
1665      // Magic constant that can be matched by IL, ILA, et. al.
1666      SDValue Val = DAG.getTargetConstant(val, MVT::i64);
1667      return DAG.getNode(ISD::BUILD_VECTOR, VT, Val, Val);
1668    } else {
1669      SDValue LO32;
1670      SDValue HI32;
1671      SmallVector<SDValue, 16> ShufBytes;
1672      SDValue Result;
1673      bool upper_special, lower_special;
1674
1675      // NOTE: This code creates common-case shuffle masks that can be easily
1676      // detected as common expressions. It is not attempting to create highly
1677      // specialized masks to replace any and all 0's, 0xff's and 0x80's.
1678
1679      // Detect if the upper or lower half is a special shuffle mask pattern:
1680      upper_special = (upper == 0||upper == 0xffffffff||upper == 0x80000000);
1681      lower_special = (lower == 0||lower == 0xffffffff||lower == 0x80000000);
1682
1683      // Create lower vector if not a special pattern
1684      if (!lower_special) {
1685        SDValue LO32C = DAG.getConstant(lower, MVT::i32);
1686        LO32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1687                           DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1688                                       LO32C, LO32C, LO32C, LO32C));
1689      }
1690
1691      // Create upper vector if not a special pattern
1692      if (!upper_special) {
1693        SDValue HI32C = DAG.getConstant(upper, MVT::i32);
1694        HI32 = DAG.getNode(ISD::BIT_CONVERT, VT,
1695                           DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1696                                       HI32C, HI32C, HI32C, HI32C));
1697      }
1698
1699      // If either upper or lower are special, then the two input operands are
1700      // the same (basically, one of them is a "don't care")
1701      if (lower_special)
1702        LO32 = HI32;
1703      if (upper_special)
1704        HI32 = LO32;
1705      if (lower_special && upper_special) {
1706        // Unhappy situation... both upper and lower are special, so punt with
1707        // a target constant:
1708        SDValue Zero = DAG.getConstant(0, MVT::i32);
1709        HI32 = LO32 = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Zero, Zero,
1710                                  Zero, Zero);
1711      }
1712
1713      for (int i = 0; i < 4; ++i) {
1714        uint64_t val = 0;
1715        for (int j = 0; j < 4; ++j) {
1716          SDValue V;
1717          bool process_upper, process_lower;
1718          val <<= 8;
1719          process_upper = (upper_special && (i & 1) == 0);
1720          process_lower = (lower_special && (i & 1) == 1);
1721
1722          if (process_upper || process_lower) {
1723            if ((process_upper && upper == 0)
1724                || (process_lower && lower == 0))
1725              val |= 0x80;
1726            else if ((process_upper && upper == 0xffffffff)
1727                     || (process_lower && lower == 0xffffffff))
1728              val |= 0xc0;
1729            else if ((process_upper && upper == 0x80000000)
1730                     || (process_lower && lower == 0x80000000))
1731              val |= (j == 0 ? 0xe0 : 0x80);
1732          } else
1733            val |= i * 4 + j + ((i & 1) * 16);
1734        }
1735
1736        ShufBytes.push_back(DAG.getConstant(val, MVT::i32));
1737      }
1738
1739      return DAG.getNode(SPUISD::SHUFB, VT, HI32, LO32,
1740                         DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
1741                                     &ShufBytes[0], ShufBytes.size()));
1742    }
1743  }
1744  }
1745
1746  return SDValue();
1747}
1748
1749/// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3) to something on
1750/// which the Cell can operate. The code inspects V3 to ascertain whether the
1751/// permutation vector, V3, is monotonically increasing with one "exception"
1752/// element, e.g., (0, 1, _, 3). If this is the case, then generate a
1753/// SHUFFLE_MASK synthetic instruction. Otherwise, spill V3 to the constant pool.
1754/// In either case, the net result is going to eventually invoke SHUFB to
1755/// permute/shuffle the bytes from V1 and V2.
1756/// \note
1757/// SHUFFLE_MASK is eventually selected as one of the C*D instructions, generate
1758/// control word for byte/halfword/word insertion. This takes care of a single
1759/// element move from V2 into V1.
1760/// \note
1761/// SPUISD::SHUFB is eventually selected as Cell's <i>shufb</i> instructions.
1762static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
1763  SDValue V1 = Op.getOperand(0);
1764  SDValue V2 = Op.getOperand(1);
1765  SDValue PermMask = Op.getOperand(2);
1766
1767  if (V2.getOpcode() == ISD::UNDEF) V2 = V1;
1768
1769  // If we have a single element being moved from V1 to V2, this can be handled
1770  // using the C*[DX] compute mask instructions, but the vector elements have
1771  // to be monotonically increasing with one exception element.
1772  MVT VecVT = V1.getValueType();
1773  MVT EltVT = VecVT.getVectorElementType();
1774  unsigned EltsFromV2 = 0;
1775  unsigned V2Elt = 0;
1776  unsigned V2EltIdx0 = 0;
1777  unsigned CurrElt = 0;
1778  unsigned MaxElts = VecVT.getVectorNumElements();
1779  unsigned PrevElt = 0;
1780  unsigned V0Elt = 0;
1781  bool monotonic = true;
1782  bool rotate = true;
1783
1784  if (EltVT == MVT::i8) {
1785    V2EltIdx0 = 16;
1786  } else if (EltVT == MVT::i16) {
1787    V2EltIdx0 = 8;
1788  } else if (EltVT == MVT::i32 || EltVT == MVT::f32) {
1789    V2EltIdx0 = 4;
1790  } else if (EltVT == MVT::i64 || EltVT == MVT::f64) {
1791    V2EltIdx0 = 2;
1792  } else
1793    assert(0 && "Unhandled vector type in LowerVECTOR_SHUFFLE");
1794
1795  for (unsigned i = 0; i != PermMask.getNumOperands(); ++i) {
1796    if (PermMask.getOperand(i).getOpcode() != ISD::UNDEF) {
1797      unsigned SrcElt = cast<ConstantSDNode > (PermMask.getOperand(i))->getZExtValue();
1798
1799      if (monotonic) {
1800        if (SrcElt >= V2EltIdx0) {
1801          if (1 >= (++EltsFromV2)) {
1802            V2Elt = (V2EltIdx0 - SrcElt) << 2;
1803          }
1804        } else if (CurrElt != SrcElt) {
1805          monotonic = false;
1806        }
1807
1808        ++CurrElt;
1809      }
1810
1811      if (rotate) {
1812        if (PrevElt > 0 && SrcElt < MaxElts) {
1813          if ((PrevElt == SrcElt - 1)
1814              || (PrevElt == MaxElts - 1 && SrcElt == 0)) {
1815            PrevElt = SrcElt;
1816            if (SrcElt == 0)
1817              V0Elt = i;
1818          } else {
1819            rotate = false;
1820          }
1821        } else if (PrevElt == 0) {
1822          // First time through, need to keep track of previous element
1823          PrevElt = SrcElt;
1824        } else {
1825          // This isn't a rotation, takes elements from vector 2
1826          rotate = false;
1827        }
1828      }
1829    }
1830  }
1831
1832  if (EltsFromV2 == 1 && monotonic) {
1833    // Compute mask and shuffle
1834    MachineFunction &MF = DAG.getMachineFunction();
1835    MachineRegisterInfo &RegInfo = MF.getRegInfo();
1836    unsigned VReg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
1837    MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
1838    // Initialize temporary register to 0
1839    SDValue InitTempReg =
1840      DAG.getCopyToReg(DAG.getEntryNode(), VReg, DAG.getConstant(0, PtrVT));
1841    // Copy register's contents as index in SHUFFLE_MASK:
1842    SDValue ShufMaskOp =
1843      DAG.getNode(SPUISD::SHUFFLE_MASK, MVT::v4i32,
1844                  DAG.getTargetConstant(V2Elt, MVT::i32),
1845                  DAG.getCopyFromReg(InitTempReg, VReg, PtrVT));
1846    // Use shuffle mask in SHUFB synthetic instruction:
1847    return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V2, V1, ShufMaskOp);
1848  } else if (rotate) {
1849    int rotamt = (MaxElts - V0Elt) * EltVT.getSizeInBits()/8;
1850
1851    return DAG.getNode(SPUISD::ROTBYTES_LEFT, V1.getValueType(),
1852                       V1, DAG.getConstant(rotamt, MVT::i16));
1853  } else {
1854   // Convert the SHUFFLE_VECTOR mask's input element units to the
1855   // actual bytes.
1856    unsigned BytesPerElement = EltVT.getSizeInBits()/8;
1857
1858    SmallVector<SDValue, 16> ResultMask;
1859    for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) {
1860      unsigned SrcElt;
1861      if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF)
1862        SrcElt = 0;
1863      else
1864        SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getZExtValue();
1865
1866      for (unsigned j = 0; j < BytesPerElement; ++j) {
1867        ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j,
1868                                             MVT::i8));
1869      }
1870    }
1871
1872    SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8,
1873                                    &ResultMask[0], ResultMask.size());
1874    return DAG.getNode(SPUISD::SHUFB, V1.getValueType(), V1, V2, VPermMask);
1875  }
1876}
1877
1878static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
1879  SDValue Op0 = Op.getOperand(0);                     // Op0 = the scalar
1880
1881  if (Op0.getNode()->getOpcode() == ISD::Constant) {
1882    // For a constant, build the appropriate constant vector, which will
1883    // eventually simplify to a vector register load.
1884
1885    ConstantSDNode *CN = cast<ConstantSDNode>(Op0.getNode());
1886    SmallVector<SDValue, 16> ConstVecValues;
1887    MVT VT;
1888    size_t n_copies;
1889
1890    // Create a constant vector:
1891    switch (Op.getValueType().getSimpleVT()) {
1892    default: assert(0 && "Unexpected constant value type in "
1893                         "LowerSCALAR_TO_VECTOR");
1894    case MVT::v16i8: n_copies = 16; VT = MVT::i8; break;
1895    case MVT::v8i16: n_copies = 8; VT = MVT::i16; break;
1896    case MVT::v4i32: n_copies = 4; VT = MVT::i32; break;
1897    case MVT::v4f32: n_copies = 4; VT = MVT::f32; break;
1898    case MVT::v2i64: n_copies = 2; VT = MVT::i64; break;
1899    case MVT::v2f64: n_copies = 2; VT = MVT::f64; break;
1900    }
1901
1902    SDValue CValue = DAG.getConstant(CN->getZExtValue(), VT);
1903    for (size_t j = 0; j < n_copies; ++j)
1904      ConstVecValues.push_back(CValue);
1905
1906    return DAG.getNode(ISD::BUILD_VECTOR, Op.getValueType(),
1907                       &ConstVecValues[0], ConstVecValues.size());
1908  } else {
1909    // Otherwise, copy the value from one register to another:
1910    switch (Op0.getValueType().getSimpleVT()) {
1911    default: assert(0 && "Unexpected value type in LowerSCALAR_TO_VECTOR");
1912    case MVT::i8:
1913    case MVT::i16:
1914    case MVT::i32:
1915    case MVT::i64:
1916    case MVT::f32:
1917    case MVT::f64:
1918      return DAG.getNode(SPUISD::PREFSLOT2VEC, Op.getValueType(), Op0, Op0);
1919    }
1920  }
1921
1922  return SDValue();
1923}
1924
1925static SDValue LowerVectorMUL(SDValue Op, SelectionDAG &DAG) {
1926  switch (Op.getValueType().getSimpleVT()) {
1927  default:
1928    cerr << "CellSPU: Unknown vector multiplication, got "
1929         << Op.getValueType().getMVTString()
1930         << "\n";
1931    abort();
1932    /*NOTREACHED*/
1933
1934  case MVT::v4i32:
1935	  break;
1936
1937  // Multiply two v8i16 vectors (pipeline friendly version):
1938  // a) multiply lower halves, mask off upper 16-bit of 32-bit product
1939  // b) multiply upper halves, rotate left by 16 bits (inserts 16 lower zeroes)
1940  // c) Use SELB to select upper and lower halves from the intermediate results
1941  //
1942  // NOTE: We really want to move the SELECT_MASK to earlier to actually get the
1943  // dual-issue. This code does manage to do this, even if it's a little on
1944  // the wacky side
1945  case MVT::v8i16: {
1946    MachineFunction &MF = DAG.getMachineFunction();
1947    MachineRegisterInfo &RegInfo = MF.getRegInfo();
1948    SDValue Chain = Op.getOperand(0);
1949    SDValue rA = Op.getOperand(0);
1950    SDValue rB = Op.getOperand(1);
1951    unsigned FSMBIreg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1952    unsigned HiProdReg = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
1953
1954    SDValue FSMBOp =
1955      DAG.getCopyToReg(Chain, FSMBIreg,
1956                       DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
1957                                   DAG.getConstant(0xcccc, MVT::i16)));
1958
1959    SDValue HHProd =
1960      DAG.getCopyToReg(FSMBOp, HiProdReg,
1961                       DAG.getNode(SPUISD::MPYHH, MVT::v8i16, rA, rB));
1962
1963    SDValue HHProd_v4i32 =
1964      DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
1965                  DAG.getCopyFromReg(HHProd, HiProdReg, MVT::v4i32));
1966
1967    return DAG.getNode(SPUISD::SELB, MVT::v8i16,
1968                       DAG.getNode(SPUISD::MPY, MVT::v8i16, rA, rB),
1969                       DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(),
1970                                   DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32,
1971                                               HHProd_v4i32,
1972                                               DAG.getConstant(16, MVT::i16))),
1973                       DAG.getCopyFromReg(FSMBOp, FSMBIreg, MVT::v4i32));
1974  }
1975
1976  // This M00sE is N@stI! (apologies to Monty Python)
1977  //
1978  // SPU doesn't know how to do any 8-bit multiplication, so the solution
1979  // is to break it all apart, sign extend, and reassemble the various
1980  // intermediate products.
1981  case MVT::v16i8: {
1982    SDValue rA = Op.getOperand(0);
1983    SDValue rB = Op.getOperand(1);
1984    SDValue c8 = DAG.getConstant(8, MVT::i32);
1985    SDValue c16 = DAG.getConstant(16, MVT::i32);
1986
1987    SDValue LLProd =
1988      DAG.getNode(SPUISD::MPY, MVT::v8i16,
1989                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rA),
1990                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rB));
1991
1992    SDValue rALH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rA, c8);
1993
1994    SDValue rBLH = DAG.getNode(SPUISD::VEC_SRA, MVT::v8i16, rB, c8);
1995
1996    SDValue LHProd =
1997      DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16,
1998                  DAG.getNode(SPUISD::MPY, MVT::v8i16, rALH, rBLH), c8);
1999
2000    SDValue FSMBmask = DAG.getNode(SPUISD::SELECT_MASK, MVT::v8i16,
2001                                     DAG.getConstant(0x2222, MVT::i16));
2002
2003    SDValue LoProdParts =
2004      DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2005                  DAG.getNode(SPUISD::SELB, MVT::v8i16,
2006                              LLProd, LHProd, FSMBmask));
2007
2008    SDValue LoProdMask = DAG.getConstant(0xffff, MVT::i32);
2009
2010    SDValue LoProd =
2011      DAG.getNode(ISD::AND, MVT::v4i32,
2012                  LoProdParts,
2013                  DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2014                              LoProdMask, LoProdMask,
2015                              LoProdMask, LoProdMask));
2016
2017    SDValue rAH =
2018      DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2019                  DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rA), c16);
2020
2021    SDValue rBH =
2022      DAG.getNode(SPUISD::VEC_SRA, MVT::v4i32,
2023                  DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, rB), c16);
2024
2025    SDValue HLProd =
2026      DAG.getNode(SPUISD::MPY, MVT::v8i16,
2027                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rAH),
2028                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, rBH));
2029
2030    SDValue HHProd_1 =
2031      DAG.getNode(SPUISD::MPY, MVT::v8i16,
2032                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2033                              DAG.getNode(SPUISD::VEC_SRA,
2034                                          MVT::v4i32, rAH, c8)),
2035                  DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16,
2036                              DAG.getNode(SPUISD::VEC_SRA,
2037                                          MVT::v4i32, rBH, c8)));
2038
2039    SDValue HHProd =
2040      DAG.getNode(SPUISD::SELB, MVT::v8i16,
2041                  HLProd,
2042                  DAG.getNode(SPUISD::VEC_SHL, MVT::v8i16, HHProd_1, c8),
2043                  FSMBmask);
2044
2045    SDValue HiProd =
2046      DAG.getNode(SPUISD::VEC_SHL, MVT::v4i32, HHProd, c16);
2047
2048    return DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8,
2049                       DAG.getNode(ISD::OR, MVT::v4i32,
2050                                   LoProd, HiProd));
2051  }
2052  }
2053
2054  return SDValue();
2055}
2056
2057static SDValue LowerFDIVf32(SDValue Op, SelectionDAG &DAG) {
2058  MachineFunction &MF = DAG.getMachineFunction();
2059  MachineRegisterInfo &RegInfo = MF.getRegInfo();
2060
2061  SDValue A = Op.getOperand(0);
2062  SDValue B = Op.getOperand(1);
2063  MVT VT = Op.getValueType();
2064
2065  unsigned VRegBR, VRegC;
2066
2067  if (VT == MVT::f32) {
2068    VRegBR = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2069    VRegC = RegInfo.createVirtualRegister(&SPU::R32FPRegClass);
2070  } else {
2071    VRegBR = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2072    VRegC = RegInfo.createVirtualRegister(&SPU::VECREGRegClass);
2073  }
2074  // TODO: make sure we're feeding FPInterp the right arguments
2075  // Right now: fi B, frest(B)
2076
2077  // Computes BRcpl =
2078  // (Floating Interpolate (FP Reciprocal Estimate B))
2079  SDValue BRcpl =
2080      DAG.getCopyToReg(DAG.getEntryNode(), VRegBR,
2081                       DAG.getNode(SPUISD::FPInterp, VT, B,
2082                                DAG.getNode(SPUISD::FPRecipEst, VT, B)));
2083
2084  // Computes A * BRcpl and stores in a temporary register
2085  SDValue AxBRcpl =
2086      DAG.getCopyToReg(BRcpl, VRegC,
2087                 DAG.getNode(ISD::FMUL, VT, A,
2088                        DAG.getCopyFromReg(BRcpl, VRegBR, VT)));
2089  // What's the Chain variable do? It's magic!
2090  // TODO: set Chain = Op(0).getEntryNode()
2091
2092  return DAG.getNode(ISD::FADD, VT,
2093                DAG.getCopyFromReg(AxBRcpl, VRegC, VT),
2094                DAG.getNode(ISD::FMUL, VT,
2095                        DAG.getCopyFromReg(AxBRcpl, VRegBR, VT),
2096                        DAG.getNode(ISD::FSUB, VT, A,
2097                            DAG.getNode(ISD::FMUL, VT, B,
2098                            DAG.getCopyFromReg(AxBRcpl, VRegC, VT)))));
2099}
2100
2101static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2102  MVT VT = Op.getValueType();
2103  SDValue N = Op.getOperand(0);
2104  SDValue Elt = Op.getOperand(1);
2105  SDValue retval;
2106
2107  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
2108    // Constant argument:
2109    int EltNo = (int) C->getZExtValue();
2110
2111    // sanity checks:
2112    if (VT == MVT::i8 && EltNo >= 16)
2113      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i8 extraction slot > 15");
2114    else if (VT == MVT::i16 && EltNo >= 8)
2115      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i16 extraction slot > 7");
2116    else if (VT == MVT::i32 && EltNo >= 4)
2117      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i32 extraction slot > 4");
2118    else if (VT == MVT::i64 && EltNo >= 2)
2119      assert(0 && "SPU LowerEXTRACT_VECTOR_ELT: i64 extraction slot > 2");
2120
2121    if (EltNo == 0 && (VT == MVT::i32 || VT == MVT::i64)) {
2122      // i32 and i64: Element 0 is the preferred slot
2123      return DAG.getNode(SPUISD::VEC2PREFSLOT, VT, N);
2124    }
2125
2126    // Need to generate shuffle mask and extract:
2127    int prefslot_begin = -1, prefslot_end = -1;
2128    int elt_byte = EltNo * VT.getSizeInBits() / 8;
2129
2130    switch (VT.getSimpleVT()) {
2131    default:
2132      assert(false && "Invalid value type!");
2133    case MVT::i8: {
2134      prefslot_begin = prefslot_end = 3;
2135      break;
2136    }
2137    case MVT::i16: {
2138      prefslot_begin = 2; prefslot_end = 3;
2139      break;
2140    }
2141    case MVT::i32:
2142    case MVT::f32: {
2143      prefslot_begin = 0; prefslot_end = 3;
2144      break;
2145    }
2146    case MVT::i64:
2147    case MVT::f64: {
2148      prefslot_begin = 0; prefslot_end = 7;
2149      break;
2150    }
2151    }
2152
2153    assert(prefslot_begin != -1 && prefslot_end != -1 &&
2154           "LowerEXTRACT_VECTOR_ELT: preferred slots uninitialized");
2155
2156    unsigned int ShufBytes[16];
2157    for (int i = 0; i < 16; ++i) {
2158      // zero fill uppper part of preferred slot, don't care about the
2159      // other slots:
2160      unsigned int mask_val;
2161      if (i <= prefslot_end) {
2162        mask_val =
2163          ((i < prefslot_begin)
2164           ? 0x80
2165           : elt_byte + (i - prefslot_begin));
2166
2167        ShufBytes[i] = mask_val;
2168      } else
2169        ShufBytes[i] = ShufBytes[i % (prefslot_end + 1)];
2170    }
2171
2172    SDValue ShufMask[4];
2173    for (unsigned i = 0; i < sizeof(ShufMask)/sizeof(ShufMask[0]); ++i) {
2174      unsigned bidx = i * 4;
2175      unsigned int bits = ((ShufBytes[bidx] << 24) |
2176                           (ShufBytes[bidx+1] << 16) |
2177                           (ShufBytes[bidx+2] << 8) |
2178                           ShufBytes[bidx+3]);
2179      ShufMask[i] = DAG.getConstant(bits, MVT::i32);
2180    }
2181
2182    SDValue ShufMaskVec = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2183                                      &ShufMask[0],
2184                                      sizeof(ShufMask) / sizeof(ShufMask[0]));
2185
2186    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2187                         DAG.getNode(SPUISD::SHUFB, N.getValueType(),
2188                                     N, N, ShufMaskVec));
2189  } else {
2190    // Variable index: Rotate the requested element into slot 0, then replicate
2191    // slot 0 across the vector
2192    MVT VecVT = N.getValueType();
2193    if (!VecVT.isSimple() || !VecVT.isVector() || !VecVT.is128BitVector()) {
2194      cerr << "LowerEXTRACT_VECTOR_ELT: Must have a simple, 128-bit vector type!\n";
2195      abort();
2196    }
2197
2198    // Make life easier by making sure the index is zero-extended to i32
2199    if (Elt.getValueType() != MVT::i32)
2200      Elt = DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Elt);
2201
2202    // Scale the index to a bit/byte shift quantity
2203    APInt scaleFactor =
2204            APInt(32, uint64_t(16 / N.getValueType().getVectorNumElements()), false);
2205    unsigned scaleShift = scaleFactor.logBase2();
2206    SDValue vecShift;
2207
2208    if (scaleShift > 0) {
2209      // Scale the shift factor:
2210      Elt = DAG.getNode(ISD::SHL, MVT::i32, Elt,
2211                        DAG.getConstant(scaleShift, MVT::i32));
2212    }
2213
2214    vecShift = DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT, N, Elt);
2215
2216    // Replicate the bytes starting at byte 0 across the entire vector (for
2217    // consistency with the notion of a unified register set)
2218    SDValue replicate;
2219
2220    switch (VT.getSimpleVT()) {
2221    default:
2222      cerr << "LowerEXTRACT_VECTOR_ELT(varable): Unhandled vector type\n";
2223      abort();
2224      /*NOTREACHED*/
2225    case MVT::i8: {
2226      SDValue factor = DAG.getConstant(0x00000000, MVT::i32);
2227      replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2228                              factor, factor);
2229      break;
2230    }
2231    case MVT::i16: {
2232      SDValue factor = DAG.getConstant(0x00010001, MVT::i32);
2233      replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2234                              factor, factor);
2235      break;
2236    }
2237    case MVT::i32:
2238    case MVT::f32: {
2239      SDValue factor = DAG.getConstant(0x00010203, MVT::i32);
2240      replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, factor, factor,
2241                              factor, factor);
2242      break;
2243    }
2244    case MVT::i64:
2245    case MVT::f64: {
2246      SDValue loFactor = DAG.getConstant(0x00010203, MVT::i32);
2247      SDValue hiFactor = DAG.getConstant(0x04050607, MVT::i32);
2248      replicate = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, loFactor, hiFactor,
2249                              loFactor, hiFactor);
2250      break;
2251    }
2252    }
2253
2254    retval = DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2255                         DAG.getNode(SPUISD::SHUFB, VecVT,
2256                                     vecShift, vecShift, replicate));
2257  }
2258
2259  return retval;
2260}
2261
2262static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
2263  SDValue VecOp = Op.getOperand(0);
2264  SDValue ValOp = Op.getOperand(1);
2265  SDValue IdxOp = Op.getOperand(2);
2266  MVT VT = Op.getValueType();
2267
2268  ConstantSDNode *CN = cast<ConstantSDNode>(IdxOp);
2269  assert(CN != 0 && "LowerINSERT_VECTOR_ELT: Index is not constant!");
2270
2271  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
2272  // Use $sp ($1) because it's always 16-byte aligned and it's available:
2273  SDValue Pointer = DAG.getNode(SPUISD::IndirectAddr, PtrVT,
2274                                DAG.getRegister(SPU::R1, PtrVT),
2275                                DAG.getConstant(CN->getSExtValue(), PtrVT));
2276  SDValue ShufMask = DAG.getNode(SPUISD::SHUFFLE_MASK, VT, Pointer);
2277
2278  SDValue result =
2279    DAG.getNode(SPUISD::SHUFB, VT,
2280                DAG.getNode(ISD::SCALAR_TO_VECTOR, VT, ValOp),
2281                VecOp,
2282		DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32, ShufMask));
2283
2284  return result;
2285}
2286
2287static SDValue LowerI8Math(SDValue Op, SelectionDAG &DAG, unsigned Opc,
2288                           const TargetLowering &TLI)
2289{
2290  SDValue N0 = Op.getOperand(0);      // Everything has at least one operand
2291  MVT ShiftVT = TLI.getShiftAmountTy();
2292
2293  assert(Op.getValueType() == MVT::i8);
2294  switch (Opc) {
2295  default:
2296    assert(0 && "Unhandled i8 math operator");
2297    /*NOTREACHED*/
2298    break;
2299  case ISD::SUB: {
2300    // 8-bit subtraction: Promote the arguments up to 16-bits and truncate
2301    // the result:
2302    SDValue N1 = Op.getOperand(1);
2303    N0 = (N0.getOpcode() != ISD::Constant
2304          ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2305          : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2306                            MVT::i16));
2307    N1 = (N1.getOpcode() != ISD::Constant
2308          ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N1)
2309          : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2310                            MVT::i16));
2311    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2312                       DAG.getNode(Opc, MVT::i16, N0, N1));
2313  }
2314  case ISD::ROTR:
2315  case ISD::ROTL: {
2316    SDValue N1 = Op.getOperand(1);
2317    unsigned N1Opc;
2318    N0 = (N0.getOpcode() != ISD::Constant
2319          ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2320          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2321                            MVT::i16));
2322    N1Opc = N1.getValueType().bitsLT(ShiftVT)
2323            ? ISD::ZERO_EXTEND
2324            : ISD::TRUNCATE;
2325    N1 = (N1.getOpcode() != ISD::Constant
2326          ? DAG.getNode(N1Opc, ShiftVT, N1)
2327          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2328                            TLI.getShiftAmountTy()));
2329    SDValue ExpandArg =
2330      DAG.getNode(ISD::OR, MVT::i16, N0,
2331                  DAG.getNode(ISD::SHL, MVT::i16,
2332                              N0, DAG.getConstant(8, MVT::i32)));
2333    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2334                       DAG.getNode(Opc, MVT::i16, ExpandArg, N1));
2335  }
2336  case ISD::SRL:
2337  case ISD::SHL: {
2338    SDValue N1 = Op.getOperand(1);
2339    unsigned N1Opc;
2340    N0 = (N0.getOpcode() != ISD::Constant
2341          ? DAG.getNode(ISD::ZERO_EXTEND, MVT::i16, N0)
2342          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2343                            MVT::i32));
2344    N1Opc = N1.getValueType().bitsLT(ShiftVT)
2345            ? ISD::ZERO_EXTEND
2346            : ISD::TRUNCATE;
2347    N1 = (N1.getOpcode() != ISD::Constant
2348          ? DAG.getNode(N1Opc, ShiftVT, N1)
2349          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(), ShiftVT));
2350    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2351                       DAG.getNode(Opc, MVT::i16, N0, N1));
2352  }
2353  case ISD::SRA: {
2354    SDValue N1 = Op.getOperand(1);
2355    unsigned N1Opc;
2356    N0 = (N0.getOpcode() != ISD::Constant
2357          ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2358          : DAG.getConstant(cast<ConstantSDNode>(N0)->getSExtValue(),
2359                            MVT::i16));
2360    N1Opc = N1.getValueType().bitsLT(ShiftVT)
2361            ? ISD::SIGN_EXTEND
2362            : ISD::TRUNCATE;
2363    N1 = (N1.getOpcode() != ISD::Constant
2364          ? DAG.getNode(N1Opc, ShiftVT, N1)
2365          : DAG.getConstant(cast<ConstantSDNode>(N1)->getZExtValue(),
2366                            ShiftVT));
2367    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2368                       DAG.getNode(Opc, MVT::i16, N0, N1));
2369  }
2370  case ISD::MUL: {
2371    SDValue N1 = Op.getOperand(1);
2372    unsigned N1Opc;
2373    N0 = (N0.getOpcode() != ISD::Constant
2374          ? DAG.getNode(ISD::SIGN_EXTEND, MVT::i16, N0)
2375          : DAG.getConstant(cast<ConstantSDNode>(N0)->getZExtValue(),
2376                            MVT::i16));
2377    N1Opc = N1.getValueType().bitsLT(MVT::i16) ? ISD::SIGN_EXTEND : ISD::TRUNCATE;
2378    N1 = (N1.getOpcode() != ISD::Constant
2379          ? DAG.getNode(N1Opc, MVT::i16, N1)
2380          : DAG.getConstant(cast<ConstantSDNode>(N1)->getSExtValue(),
2381                            MVT::i16));
2382    return DAG.getNode(ISD::TRUNCATE, MVT::i8,
2383                       DAG.getNode(Opc, MVT::i16, N0, N1));
2384    break;
2385  }
2386  }
2387
2388  return SDValue();
2389}
2390
2391static SDValue LowerI64Math(SDValue Op, SelectionDAG &DAG, unsigned Opc)
2392{
2393  MVT VT = Op.getValueType();
2394  MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2395
2396  SDValue Op0 = Op.getOperand(0);
2397
2398  switch (Opc) {
2399  case ISD::ZERO_EXTEND:
2400  case ISD::SIGN_EXTEND:
2401  case ISD::ANY_EXTEND: {
2402    MVT Op0VT = Op0.getValueType();
2403    MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2404
2405    assert(Op0VT == MVT::i32
2406           && "CellSPU: Zero/sign extending something other than i32");
2407
2408    DEBUG(cerr << "CellSPU.LowerI64Math: lowering zero/sign/any extend\n");
2409
2410    SDValue PromoteScalar =
2411            DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2412
2413    if (Opc != ISD::SIGN_EXTEND) {
2414      // Use a shuffle to zero extend the i32 to i64 directly:
2415      SDValue shufMask =
2416              DAG.getNode(ISD::BUILD_VECTOR, Op0VecVT,
2417                          DAG.getConstant(0x80808080, MVT::i32),
2418                          DAG.getConstant(0x00010203, MVT::i32),
2419                          DAG.getConstant(0x80808080, MVT::i32),
2420                          DAG.getConstant(0x08090a0b, MVT::i32));
2421      SDValue zextShuffle =
2422              DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2423                          PromoteScalar, PromoteScalar, shufMask);
2424
2425      return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2426                         DAG.getNode(ISD::BIT_CONVERT, VecVT, zextShuffle));
2427    } else {
2428      // SPU has no "rotate quadword and replicate bit 0" (i.e. rotate/shift
2429      // right and propagate the sign bit) instruction.
2430      SDValue RotQuad =
2431              DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, Op0VecVT,
2432                          PromoteScalar, DAG.getConstant(4, MVT::i32));
2433      SDValue SignQuad =
2434              DAG.getNode(SPUISD::VEC_SRA, Op0VecVT,
2435                          PromoteScalar, DAG.getConstant(32, MVT::i32));
2436      SDValue SelMask =
2437              DAG.getNode(SPUISD::SELECT_MASK, Op0VecVT,
2438                          DAG.getConstant(0xf0f0, MVT::i16));
2439      SDValue CombineQuad =
2440              DAG.getNode(SPUISD::SELB, Op0VecVT,
2441                          SignQuad, RotQuad, SelMask);
2442
2443      return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2444                         DAG.getNode(ISD::BIT_CONVERT, VecVT, CombineQuad));
2445    }
2446  }
2447
2448  case ISD::ADD: {
2449    // Turn operands into vectors to satisfy type checking (shufb works on
2450    // vectors)
2451    SDValue Op0 =
2452      DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2453    SDValue Op1 =
2454      DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2455    SmallVector<SDValue, 16> ShufBytes;
2456
2457    // Create the shuffle mask for "rotating" the borrow up one register slot
2458    // once the borrow is generated.
2459    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2460    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2461    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2462    ShufBytes.push_back(DAG.getConstant(0x80808080, MVT::i32));
2463
2464    SDValue CarryGen =
2465      DAG.getNode(SPUISD::CARRY_GENERATE, MVT::v2i64, Op0, Op1);
2466    SDValue ShiftedCarry =
2467      DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2468                  CarryGen, CarryGen,
2469                  DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2470                              &ShufBytes[0], ShufBytes.size()));
2471
2472    return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2473                       DAG.getNode(SPUISD::ADD_EXTENDED, MVT::v2i64,
2474                                   Op0, Op1, ShiftedCarry));
2475  }
2476
2477  case ISD::SUB: {
2478    // Turn operands into vectors to satisfy type checking (shufb works on
2479    // vectors)
2480    SDValue Op0 =
2481      DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2482    SDValue Op1 =
2483      DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(1));
2484    SmallVector<SDValue, 16> ShufBytes;
2485
2486    // Create the shuffle mask for "rotating" the borrow up one register slot
2487    // once the borrow is generated.
2488    ShufBytes.push_back(DAG.getConstant(0x04050607, MVT::i32));
2489    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2490    ShufBytes.push_back(DAG.getConstant(0x0c0d0e0f, MVT::i32));
2491    ShufBytes.push_back(DAG.getConstant(0xc0c0c0c0, MVT::i32));
2492
2493    SDValue BorrowGen =
2494      DAG.getNode(SPUISD::BORROW_GENERATE, MVT::v2i64, Op0, Op1);
2495    SDValue ShiftedBorrow =
2496      DAG.getNode(SPUISD::SHUFB, MVT::v2i64,
2497                  BorrowGen, BorrowGen,
2498                  DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2499                              &ShufBytes[0], ShufBytes.size()));
2500
2501    return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2502                       DAG.getNode(SPUISD::SUB_EXTENDED, MVT::v2i64,
2503                                   Op0, Op1, ShiftedBorrow));
2504  }
2505
2506  case ISD::SHL: {
2507    SDValue ShiftAmt = Op.getOperand(1);
2508    MVT ShiftAmtVT = ShiftAmt.getValueType();
2509    SDValue Op0Vec = DAG.getNode(SPUISD::PREFSLOT2VEC, VecVT, Op0);
2510    SDValue MaskLower =
2511      DAG.getNode(SPUISD::SELB, VecVT,
2512                  Op0Vec,
2513                  DAG.getConstant(0, VecVT),
2514                  DAG.getNode(SPUISD::SELECT_MASK, VecVT,
2515                              DAG.getConstant(0xff00ULL, MVT::i16)));
2516    SDValue ShiftAmtBytes =
2517      DAG.getNode(ISD::SRL, ShiftAmtVT,
2518                  ShiftAmt,
2519                  DAG.getConstant(3, ShiftAmtVT));
2520    SDValue ShiftAmtBits =
2521      DAG.getNode(ISD::AND, ShiftAmtVT,
2522                  ShiftAmt,
2523                  DAG.getConstant(7, ShiftAmtVT));
2524
2525    return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2526                       DAG.getNode(SPUISD::SHLQUAD_L_BITS, VecVT,
2527                                   DAG.getNode(SPUISD::SHLQUAD_L_BYTES, VecVT,
2528                                               MaskLower, ShiftAmtBytes),
2529                                   ShiftAmtBits));
2530  }
2531
2532  case ISD::SRL: {
2533    MVT VT = Op.getValueType();
2534    SDValue ShiftAmt = Op.getOperand(1);
2535    MVT ShiftAmtVT = ShiftAmt.getValueType();
2536    SDValue ShiftAmtBytes =
2537      DAG.getNode(ISD::SRL, ShiftAmtVT,
2538                  ShiftAmt,
2539                  DAG.getConstant(3, ShiftAmtVT));
2540    SDValue ShiftAmtBits =
2541      DAG.getNode(ISD::AND, ShiftAmtVT,
2542                  ShiftAmt,
2543                  DAG.getConstant(7, ShiftAmtVT));
2544
2545    return DAG.getNode(SPUISD::ROTQUAD_RZ_BITS, VT,
2546                       DAG.getNode(SPUISD::ROTQUAD_RZ_BYTES, VT,
2547                                   Op0, ShiftAmtBytes),
2548                       ShiftAmtBits);
2549  }
2550
2551  case ISD::SRA: {
2552    // Promote Op0 to vector
2553    SDValue Op0 =
2554      DAG.getNode(SPUISD::PREFSLOT2VEC, MVT::v2i64, Op.getOperand(0));
2555    SDValue ShiftAmt = Op.getOperand(1);
2556    MVT ShiftVT = ShiftAmt.getValueType();
2557
2558    // Negate variable shift amounts
2559    if (!isa<ConstantSDNode>(ShiftAmt)) {
2560      ShiftAmt = DAG.getNode(ISD::SUB, ShiftVT,
2561                             DAG.getConstant(0, ShiftVT), ShiftAmt);
2562    }
2563
2564    SDValue UpperHalfSign =
2565      DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i32,
2566                  DAG.getNode(ISD::BIT_CONVERT, MVT::v4i32,
2567                              DAG.getNode(SPUISD::VEC_SRA, MVT::v2i64,
2568                                          Op0, DAG.getConstant(31, MVT::i32))));
2569    SDValue UpperHalfSignMask =
2570      DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64, UpperHalfSign);
2571    SDValue UpperLowerMask =
2572      DAG.getNode(SPUISD::SELECT_MASK, MVT::v2i64,
2573                  DAG.getConstant(0xff00, MVT::i16));
2574    SDValue UpperLowerSelect =
2575      DAG.getNode(SPUISD::SELB, MVT::v2i64,
2576                  UpperHalfSignMask, Op0, UpperLowerMask);
2577    SDValue RotateLeftBytes =
2578      DAG.getNode(SPUISD::ROTBYTES_LEFT_BITS, MVT::v2i64,
2579                  UpperLowerSelect, ShiftAmt);
2580    SDValue RotateLeftBits =
2581      DAG.getNode(SPUISD::ROTBYTES_LEFT, MVT::v2i64,
2582                  RotateLeftBytes, ShiftAmt);
2583
2584    return DAG.getNode(SPUISD::VEC2PREFSLOT, MVT::i64,
2585                       RotateLeftBits);
2586  }
2587  }
2588
2589  return SDValue();
2590}
2591
2592//! Lower byte immediate operations for v16i8 vectors:
2593static SDValue
2594LowerByteImmed(SDValue Op, SelectionDAG &DAG) {
2595  SDValue ConstVec;
2596  SDValue Arg;
2597  MVT VT = Op.getValueType();
2598
2599  ConstVec = Op.getOperand(0);
2600  Arg = Op.getOperand(1);
2601  if (ConstVec.getNode()->getOpcode() != ISD::BUILD_VECTOR) {
2602    if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2603      ConstVec = ConstVec.getOperand(0);
2604    } else {
2605      ConstVec = Op.getOperand(1);
2606      Arg = Op.getOperand(0);
2607      if (ConstVec.getNode()->getOpcode() == ISD::BIT_CONVERT) {
2608        ConstVec = ConstVec.getOperand(0);
2609      }
2610    }
2611  }
2612
2613  if (ConstVec.getNode()->getOpcode() == ISD::BUILD_VECTOR) {
2614    uint64_t VectorBits[2];
2615    uint64_t UndefBits[2];
2616    uint64_t SplatBits, SplatUndef;
2617    int SplatSize;
2618
2619    if (!GetConstantBuildVectorBits(ConstVec.getNode(), VectorBits, UndefBits)
2620        && isConstantSplat(VectorBits, UndefBits,
2621                           VT.getVectorElementType().getSizeInBits(),
2622                           SplatBits, SplatUndef, SplatSize)) {
2623      SDValue tcVec[16];
2624      SDValue tc = DAG.getTargetConstant(SplatBits & 0xff, MVT::i8);
2625      const size_t tcVecSize = sizeof(tcVec) / sizeof(tcVec[0]);
2626
2627      // Turn the BUILD_VECTOR into a set of target constants:
2628      for (size_t i = 0; i < tcVecSize; ++i)
2629        tcVec[i] = tc;
2630
2631      return DAG.getNode(Op.getNode()->getOpcode(), VT, Arg,
2632                         DAG.getNode(ISD::BUILD_VECTOR, VT, tcVec, tcVecSize));
2633    }
2634  }
2635  // These operations (AND, OR, XOR) are legal, they just couldn't be custom
2636  // lowered.  Return the operation, rather than a null SDValue.
2637  return Op;
2638}
2639
2640//! Custom lowering for CTPOP (count population)
2641/*!
2642  Custom lowering code that counts the number ones in the input
2643  operand. SPU has such an instruction, but it counts the number of
2644  ones per byte, which then have to be accumulated.
2645*/
2646static SDValue LowerCTPOP(SDValue Op, SelectionDAG &DAG) {
2647  MVT VT = Op.getValueType();
2648  MVT vecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2649
2650  switch (VT.getSimpleVT()) {
2651  default:
2652    assert(false && "Invalid value type!");
2653  case MVT::i8: {
2654    SDValue N = Op.getOperand(0);
2655    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2656
2657    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2658    SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2659
2660    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i8, CNTB, Elt0);
2661  }
2662
2663  case MVT::i16: {
2664    MachineFunction &MF = DAG.getMachineFunction();
2665    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2666
2667    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R16CRegClass);
2668
2669    SDValue N = Op.getOperand(0);
2670    SDValue Elt0 = DAG.getConstant(0, MVT::i16);
2671    SDValue Mask0 = DAG.getConstant(0x0f, MVT::i16);
2672    SDValue Shift1 = DAG.getConstant(8, MVT::i32);
2673
2674    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2675    SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2676
2677    // CNTB_result becomes the chain to which all of the virtual registers
2678    // CNTB_reg, SUM1_reg become associated:
2679    SDValue CNTB_result =
2680      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i16, CNTB, Elt0);
2681
2682    SDValue CNTB_rescopy =
2683      DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2684
2685    SDValue Tmp1 = DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i16);
2686
2687    return DAG.getNode(ISD::AND, MVT::i16,
2688                       DAG.getNode(ISD::ADD, MVT::i16,
2689                                   DAG.getNode(ISD::SRL, MVT::i16,
2690                                               Tmp1, Shift1),
2691                                   Tmp1),
2692                       Mask0);
2693  }
2694
2695  case MVT::i32: {
2696    MachineFunction &MF = DAG.getMachineFunction();
2697    MachineRegisterInfo &RegInfo = MF.getRegInfo();
2698
2699    unsigned CNTB_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2700    unsigned SUM1_reg = RegInfo.createVirtualRegister(&SPU::R32CRegClass);
2701
2702    SDValue N = Op.getOperand(0);
2703    SDValue Elt0 = DAG.getConstant(0, MVT::i32);
2704    SDValue Mask0 = DAG.getConstant(0xff, MVT::i32);
2705    SDValue Shift1 = DAG.getConstant(16, MVT::i32);
2706    SDValue Shift2 = DAG.getConstant(8, MVT::i32);
2707
2708    SDValue Promote = DAG.getNode(SPUISD::PREFSLOT2VEC, vecVT, N, N);
2709    SDValue CNTB = DAG.getNode(SPUISD::CNTB, vecVT, Promote);
2710
2711    // CNTB_result becomes the chain to which all of the virtual registers
2712    // CNTB_reg, SUM1_reg become associated:
2713    SDValue CNTB_result =
2714      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, MVT::i32, CNTB, Elt0);
2715
2716    SDValue CNTB_rescopy =
2717      DAG.getCopyToReg(CNTB_result, CNTB_reg, CNTB_result);
2718
2719    SDValue Comp1 =
2720      DAG.getNode(ISD::SRL, MVT::i32,
2721                  DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32), Shift1);
2722
2723    SDValue Sum1 =
2724      DAG.getNode(ISD::ADD, MVT::i32,
2725                  Comp1, DAG.getCopyFromReg(CNTB_rescopy, CNTB_reg, MVT::i32));
2726
2727    SDValue Sum1_rescopy =
2728      DAG.getCopyToReg(CNTB_result, SUM1_reg, Sum1);
2729
2730    SDValue Comp2 =
2731      DAG.getNode(ISD::SRL, MVT::i32,
2732                  DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32),
2733                  Shift2);
2734    SDValue Sum2 =
2735      DAG.getNode(ISD::ADD, MVT::i32, Comp2,
2736                  DAG.getCopyFromReg(Sum1_rescopy, SUM1_reg, MVT::i32));
2737
2738    return DAG.getNode(ISD::AND, MVT::i32, Sum2, Mask0);
2739  }
2740
2741  case MVT::i64:
2742    break;
2743  }
2744
2745  return SDValue();
2746}
2747
2748//! Lower ISD::SETCC
2749/*!
2750 Lower i64 condition code handling.
2751 */
2752
2753static SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) {
2754  MVT VT = Op.getValueType();
2755  SDValue lhs = Op.getOperand(0);
2756  SDValue rhs = Op.getOperand(1);
2757  SDValue condition = Op.getOperand(2);
2758
2759  if (VT == MVT::i32 && lhs.getValueType() == MVT::i64) {
2760    // Expand the i64 comparisons to what Cell can actually support,
2761    // which is eq, ugt and sgt:
2762#if 0
2763    CondCodeSDNode *ccvalue = dyn_cast<CondCodeSDValue>(condition);
2764
2765    switch (ccvalue->get()) {
2766      case
2767    }
2768#endif
2769  }
2770
2771  return SDValue();
2772}
2773
2774//! Lower ISD::SELECT_CC
2775/*!
2776  ISD::SELECT_CC can (generally) be implemented directly on the SPU using the
2777  SELB instruction.
2778
2779  \note Need to revisit this in the future: if the code path through the true
2780  and false value computations is longer than the latency of a branch (6
2781  cycles), then it would be more advantageous to branch and insert a new basic
2782  block and branch on the condition. However, this code does not make that
2783  assumption, given the simplisitc uses so far.
2784 */
2785
2786static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
2787                              const TargetLowering &TLI) {
2788  MVT VT = Op.getValueType();
2789  SDValue lhs = Op.getOperand(0);
2790  SDValue rhs = Op.getOperand(1);
2791  SDValue trueval = Op.getOperand(2);
2792  SDValue falseval = Op.getOperand(3);
2793  SDValue condition = Op.getOperand(4);
2794
2795  // NOTE: SELB's arguments: $rA, $rB, $mask
2796  //
2797  // SELB selects bits from $rA where bits in $mask are 0, bits from $rB
2798  // where bits in $mask are 1. CCond will be inverted, having 1s where the
2799  // condition was true and 0s where the condition was false. Hence, the
2800  // arguments to SELB get reversed.
2801
2802  // Note: Really should be ISD::SELECT instead of SPUISD::SELB, but LLVM's
2803  // legalizer insists on combining SETCC/SELECT into SELECT_CC, so we end up
2804  // with another "cannot select select_cc" assert:
2805
2806  SDValue compare = DAG.getNode(ISD::SETCC, TLI.getSetCCResultType(Op),
2807                                lhs, rhs, condition);
2808  return DAG.getNode(SPUISD::SELB, VT, falseval, trueval, compare);
2809}
2810
2811//! Custom lower ISD::TRUNCATE
2812static SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG)
2813{
2814  MVT VT = Op.getValueType();
2815  MVT::SimpleValueType simpleVT = VT.getSimpleVT();
2816  MVT VecVT = MVT::getVectorVT(VT, (128 / VT.getSizeInBits()));
2817
2818  SDValue Op0 = Op.getOperand(0);
2819  MVT Op0VT = Op0.getValueType();
2820  MVT Op0VecVT = MVT::getVectorVT(Op0VT, (128 / Op0VT.getSizeInBits()));
2821
2822  // Create shuffle mask
2823  if (Op0VT.getSimpleVT() == MVT::i128 && simpleVT == MVT::i64) {
2824    // least significant doubleword of quadword
2825    unsigned maskHigh = 0x08090a0b;
2826    unsigned maskLow = 0x0c0d0e0f;
2827    // Use a shuffle to perform the truncation
2828    SDValue shufMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32,
2829                                   DAG.getConstant(maskHigh, MVT::i32),
2830                                   DAG.getConstant(maskLow, MVT::i32),
2831                                   DAG.getConstant(maskHigh, MVT::i32),
2832                                   DAG.getConstant(maskLow, MVT::i32));
2833
2834
2835    SDValue PromoteScalar = DAG.getNode(SPUISD::PREFSLOT2VEC, Op0VecVT, Op0);
2836
2837    SDValue truncShuffle = DAG.getNode(SPUISD::SHUFB, Op0VecVT,
2838                                       PromoteScalar, PromoteScalar, shufMask);
2839
2840    return DAG.getNode(SPUISD::VEC2PREFSLOT, VT,
2841                       DAG.getNode(ISD::BIT_CONVERT, VecVT, truncShuffle));
2842  }
2843
2844  return SDValue();             // Leave the truncate unmolested
2845}
2846
2847//! Custom (target-specific) lowering entry point
2848/*!
2849  This is where LLVM's DAG selection process calls to do target-specific
2850  lowering of nodes.
2851 */
2852SDValue
2853SPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
2854{
2855  unsigned Opc = (unsigned) Op.getOpcode();
2856  MVT VT = Op.getValueType();
2857
2858  switch (Opc) {
2859  default: {
2860    cerr << "SPUTargetLowering::LowerOperation(): need to lower this!\n";
2861    cerr << "Op.getOpcode() = " << Opc << "\n";
2862    cerr << "*Op.getNode():\n";
2863    Op.getNode()->dump();
2864    abort();
2865  }
2866  case ISD::LOAD:
2867  case ISD::EXTLOAD:
2868  case ISD::SEXTLOAD:
2869  case ISD::ZEXTLOAD:
2870    return LowerLOAD(Op, DAG, SPUTM.getSubtargetImpl());
2871  case ISD::STORE:
2872    return LowerSTORE(Op, DAG, SPUTM.getSubtargetImpl());
2873  case ISD::ConstantPool:
2874    return LowerConstantPool(Op, DAG, SPUTM.getSubtargetImpl());
2875  case ISD::GlobalAddress:
2876    return LowerGlobalAddress(Op, DAG, SPUTM.getSubtargetImpl());
2877  case ISD::JumpTable:
2878    return LowerJumpTable(Op, DAG, SPUTM.getSubtargetImpl());
2879  case ISD::Constant:
2880    return LowerConstant(Op, DAG);
2881  case ISD::ConstantFP:
2882    return LowerConstantFP(Op, DAG);
2883  case ISD::BRCOND:
2884    return LowerBRCOND(Op, DAG, *this);
2885  case ISD::FORMAL_ARGUMENTS:
2886    return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex);
2887  case ISD::CALL:
2888    return LowerCALL(Op, DAG, SPUTM.getSubtargetImpl());
2889  case ISD::RET:
2890    return LowerRET(Op, DAG, getTargetMachine());
2891
2892
2893  // i8, i64 math ops:
2894  case ISD::ZERO_EXTEND:
2895  case ISD::SIGN_EXTEND:
2896  case ISD::ANY_EXTEND:
2897  case ISD::ADD:
2898  case ISD::SUB:
2899  case ISD::ROTR:
2900  case ISD::ROTL:
2901  case ISD::SRL:
2902  case ISD::SHL:
2903  case ISD::SRA: {
2904    if (VT == MVT::i8)
2905      return LowerI8Math(Op, DAG, Opc, *this);
2906    else if (VT == MVT::i64)
2907      return LowerI64Math(Op, DAG, Opc);
2908    break;
2909  }
2910
2911  // Vector-related lowering.
2912  case ISD::BUILD_VECTOR:
2913    return LowerBUILD_VECTOR(Op, DAG);
2914  case ISD::SCALAR_TO_VECTOR:
2915    return LowerSCALAR_TO_VECTOR(Op, DAG);
2916  case ISD::VECTOR_SHUFFLE:
2917    return LowerVECTOR_SHUFFLE(Op, DAG);
2918  case ISD::EXTRACT_VECTOR_ELT:
2919    return LowerEXTRACT_VECTOR_ELT(Op, DAG);
2920  case ISD::INSERT_VECTOR_ELT:
2921    return LowerINSERT_VECTOR_ELT(Op, DAG);
2922
2923  // Look for ANDBI, ORBI and XORBI opportunities and lower appropriately:
2924  case ISD::AND:
2925  case ISD::OR:
2926  case ISD::XOR:
2927    return LowerByteImmed(Op, DAG);
2928
2929  // Vector and i8 multiply:
2930  case ISD::MUL:
2931    if (VT.isVector())
2932      return LowerVectorMUL(Op, DAG);
2933    else if (VT == MVT::i8)
2934      return LowerI8Math(Op, DAG, Opc, *this);
2935
2936  case ISD::FDIV:
2937    if (VT == MVT::f32 || VT == MVT::v4f32)
2938      return LowerFDIVf32(Op, DAG);
2939#if 0
2940    // This is probably a libcall
2941    else if (Op.getValueType() == MVT::f64)
2942      return LowerFDIVf64(Op, DAG);
2943#endif
2944    else
2945      assert(0 && "Calling FDIV on unsupported MVT");
2946
2947  case ISD::CTPOP:
2948    return LowerCTPOP(Op, DAG);
2949
2950  case ISD::SELECT_CC:
2951    return LowerSELECT_CC(Op, DAG, *this);
2952
2953  case ISD::TRUNCATE:
2954    return LowerTRUNCATE(Op, DAG);
2955
2956  case ISD::SETCC:
2957    return LowerSETCC(Op, DAG);
2958  }
2959
2960  return SDValue();
2961}
2962
2963void SPUTargetLowering::ReplaceNodeResults(SDNode *N,
2964                                           SmallVectorImpl<SDValue>&Results,
2965                                           SelectionDAG &DAG)
2966{
2967#if 0
2968  unsigned Opc = (unsigned) N->getOpcode();
2969  MVT OpVT = N->getValueType(0);
2970
2971  switch (Opc) {
2972  default: {
2973    cerr << "SPUTargetLowering::ReplaceNodeResults(): need to fix this!\n";
2974    cerr << "Op.getOpcode() = " << Opc << "\n";
2975    cerr << "*Op.getNode():\n";
2976    N->dump();
2977    abort();
2978    /*NOTREACHED*/
2979  }
2980  }
2981#endif
2982
2983  /* Otherwise, return unchanged */
2984}
2985
2986//===----------------------------------------------------------------------===//
2987// Target Optimization Hooks
2988//===----------------------------------------------------------------------===//
2989
2990SDValue
2991SPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const
2992{
2993#if 0
2994  TargetMachine &TM = getTargetMachine();
2995#endif
2996  const SPUSubtarget *ST = SPUTM.getSubtargetImpl();
2997  SelectionDAG &DAG = DCI.DAG;
2998  SDValue Op0 = N->getOperand(0);       // everything has at least one operand
2999  MVT NodeVT = N->getValueType(0);      // The node's value type
3000  MVT Op0VT = Op0.getValueType();       // The first operand's result
3001  SDValue Result;                       // Initially, empty result
3002
3003  switch (N->getOpcode()) {
3004  default: break;
3005  case ISD::ADD: {
3006    SDValue Op1 = N->getOperand(1);
3007
3008    if (Op0.getOpcode() == SPUISD::IndirectAddr
3009        || Op1.getOpcode() == SPUISD::IndirectAddr) {
3010      // Normalize the operands to reduce repeated code
3011      SDValue IndirectArg = Op0, AddArg = Op1;
3012
3013      if (Op1.getOpcode() == SPUISD::IndirectAddr) {
3014        IndirectArg = Op1;
3015        AddArg = Op0;
3016      }
3017
3018      if (isa<ConstantSDNode>(AddArg)) {
3019        ConstantSDNode *CN0 = cast<ConstantSDNode > (AddArg);
3020        SDValue IndOp1 = IndirectArg.getOperand(1);
3021
3022        if (CN0->isNullValue()) {
3023          // (add (SPUindirect <arg>, <arg>), 0) ->
3024          // (SPUindirect <arg>, <arg>)
3025
3026#if !defined(NDEBUG)
3027          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3028            cerr << "\n"
3029                 << "Replace: (add (SPUindirect <arg>, <arg>), 0)\n"
3030                 << "With:    (SPUindirect <arg>, <arg>)\n";
3031          }
3032#endif
3033
3034          return IndirectArg;
3035        } else if (isa<ConstantSDNode>(IndOp1)) {
3036          // (add (SPUindirect <arg>, <const>), <const>) ->
3037          // (SPUindirect <arg>, <const + const>)
3038          ConstantSDNode *CN1 = cast<ConstantSDNode > (IndOp1);
3039          int64_t combinedConst = CN0->getSExtValue() + CN1->getSExtValue();
3040          SDValue combinedValue = DAG.getConstant(combinedConst, Op0VT);
3041
3042#if !defined(NDEBUG)
3043          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3044            cerr << "\n"
3045                 << "Replace: (add (SPUindirect <arg>, " << CN1->getSExtValue()
3046                 << "), " << CN0->getSExtValue() << ")\n"
3047                 << "With:    (SPUindirect <arg>, "
3048                 << combinedConst << ")\n";
3049          }
3050#endif
3051
3052          return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
3053                             IndirectArg, combinedValue);
3054        }
3055      }
3056    }
3057    break;
3058  }
3059  case ISD::SIGN_EXTEND:
3060  case ISD::ZERO_EXTEND:
3061  case ISD::ANY_EXTEND: {
3062    if (Op0.getOpcode() == SPUISD::VEC2PREFSLOT && NodeVT == Op0VT) {
3063      // (any_extend (SPUextract_elt0 <arg>)) ->
3064      // (SPUextract_elt0 <arg>)
3065      // Types must match, however...
3066#if !defined(NDEBUG)
3067      if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3068        cerr << "\nReplace: ";
3069        N->dump(&DAG);
3070        cerr << "\nWith:    ";
3071        Op0.getNode()->dump(&DAG);
3072        cerr << "\n";
3073      }
3074#endif
3075
3076      return Op0;
3077    }
3078    break;
3079  }
3080  case SPUISD::IndirectAddr: {
3081    if (!ST->usingLargeMem() && Op0.getOpcode() == SPUISD::AFormAddr) {
3082      ConstantSDNode *CN = cast<ConstantSDNode>(N->getOperand(1));
3083      if (CN->getZExtValue() == 0) {
3084        // (SPUindirect (SPUaform <addr>, 0), 0) ->
3085        // (SPUaform <addr>, 0)
3086
3087        DEBUG(cerr << "Replace: ");
3088        DEBUG(N->dump(&DAG));
3089        DEBUG(cerr << "\nWith:    ");
3090        DEBUG(Op0.getNode()->dump(&DAG));
3091        DEBUG(cerr << "\n");
3092
3093        return Op0;
3094      }
3095    } else if (Op0.getOpcode() == ISD::ADD) {
3096      SDValue Op1 = N->getOperand(1);
3097      if (ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(Op1)) {
3098        // (SPUindirect (add <arg>, <arg>), 0) ->
3099        // (SPUindirect <arg>, <arg>)
3100        if (CN1->isNullValue()) {
3101
3102#if !defined(NDEBUG)
3103          if (DebugFlag && isCurrentDebugType(DEBUG_TYPE)) {
3104            cerr << "\n"
3105                 << "Replace: (SPUindirect (add <arg>, <arg>), 0)\n"
3106                 << "With:    (SPUindirect <arg>, <arg>)\n";
3107          }
3108#endif
3109
3110          return DAG.getNode(SPUISD::IndirectAddr, Op0VT,
3111                             Op0.getOperand(0), Op0.getOperand(1));
3112        }
3113      }
3114    }
3115    break;
3116  }
3117  case SPUISD::SHLQUAD_L_BITS:
3118  case SPUISD::SHLQUAD_L_BYTES:
3119  case SPUISD::VEC_SHL:
3120  case SPUISD::VEC_SRL:
3121  case SPUISD::VEC_SRA:
3122  case SPUISD::ROTQUAD_RZ_BYTES:
3123  case SPUISD::ROTQUAD_RZ_BITS:
3124  case SPUISD::ROTBYTES_LEFT: {
3125    SDValue Op1 = N->getOperand(1);
3126
3127    // Kill degenerate vector shifts:
3128    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op1)) {
3129      if (CN->isNullValue()) {
3130        Result = Op0;
3131      }
3132    }
3133    break;
3134  }
3135  case SPUISD::PREFSLOT2VEC: {
3136    switch (Op0.getOpcode()) {
3137    default:
3138      break;
3139    case ISD::ANY_EXTEND:
3140    case ISD::ZERO_EXTEND:
3141    case ISD::SIGN_EXTEND: {
3142      // (SPUprefslot2vec (any|zero|sign_extend (SPUvec2prefslot <arg>))) ->
3143      // <arg>
3144      // but only if the SPUprefslot2vec and <arg> types match.
3145      SDValue Op00 = Op0.getOperand(0);
3146      if (Op00.getOpcode() == SPUISD::VEC2PREFSLOT) {
3147        SDValue Op000 = Op00.getOperand(0);
3148        if (Op000.getValueType() == NodeVT) {
3149          Result = Op000;
3150        }
3151      }
3152      break;
3153    }
3154    case SPUISD::VEC2PREFSLOT: {
3155      // (SPUprefslot2vec (SPUvec2prefslot <arg>)) ->
3156      // <arg>
3157      Result = Op0.getOperand(0);
3158      break;
3159    }
3160    }
3161    break;
3162  }
3163  }
3164  // Otherwise, return unchanged.
3165#ifndef NDEBUG
3166  if (Result.getNode()) {
3167    DEBUG(cerr << "\nReplace.SPU: ");
3168    DEBUG(N->dump(&DAG));
3169    DEBUG(cerr << "\nWith:        ");
3170    DEBUG(Result.getNode()->dump(&DAG));
3171    DEBUG(cerr << "\n");
3172  }
3173#endif
3174
3175  return Result;
3176}
3177
3178//===----------------------------------------------------------------------===//
3179// Inline Assembly Support
3180//===----------------------------------------------------------------------===//
3181
3182/// getConstraintType - Given a constraint letter, return the type of
3183/// constraint it is for this target.
3184SPUTargetLowering::ConstraintType
3185SPUTargetLowering::getConstraintType(const std::string &ConstraintLetter) const {
3186  if (ConstraintLetter.size() == 1) {
3187    switch (ConstraintLetter[0]) {
3188    default: break;
3189    case 'b':
3190    case 'r':
3191    case 'f':
3192    case 'v':
3193    case 'y':
3194      return C_RegisterClass;
3195    }
3196  }
3197  return TargetLowering::getConstraintType(ConstraintLetter);
3198}
3199
3200std::pair<unsigned, const TargetRegisterClass*>
3201SPUTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
3202                                                MVT VT) const
3203{
3204  if (Constraint.size() == 1) {
3205    // GCC RS6000 Constraint Letters
3206    switch (Constraint[0]) {
3207    case 'b':   // R1-R31
3208    case 'r':   // R0-R31
3209      if (VT == MVT::i64)
3210        return std::make_pair(0U, SPU::R64CRegisterClass);
3211      return std::make_pair(0U, SPU::R32CRegisterClass);
3212    case 'f':
3213      if (VT == MVT::f32)
3214        return std::make_pair(0U, SPU::R32FPRegisterClass);
3215      else if (VT == MVT::f64)
3216        return std::make_pair(0U, SPU::R64FPRegisterClass);
3217      break;
3218    case 'v':
3219      return std::make_pair(0U, SPU::GPRCRegisterClass);
3220    }
3221  }
3222
3223  return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
3224}
3225
3226//! Compute used/known bits for a SPU operand
3227void
3228SPUTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
3229                                                  const APInt &Mask,
3230                                                  APInt &KnownZero,
3231                                                  APInt &KnownOne,
3232                                                  const SelectionDAG &DAG,
3233                                                  unsigned Depth ) const {
3234#if 0
3235  const uint64_t uint64_sizebits = sizeof(uint64_t) * 8;
3236#endif
3237
3238  switch (Op.getOpcode()) {
3239  default:
3240    // KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0);
3241    break;
3242
3243#if 0
3244  case CALL:
3245  case SHUFB:
3246  case SHUFFLE_MASK:
3247  case CNTB:
3248#endif
3249
3250  case SPUISD::PREFSLOT2VEC: {
3251    SDValue Op0 = Op.getOperand(0);
3252    MVT Op0VT = Op0.getValueType();
3253    unsigned Op0VTBits = Op0VT.getSizeInBits();
3254    uint64_t InMask = Op0VT.getIntegerVTBitMask();
3255    KnownZero |= APInt(Op0VTBits, ~InMask, false);
3256    KnownOne |= APInt(Op0VTBits, InMask, false);
3257    break;
3258  }
3259
3260  case SPUISD::LDRESULT:
3261  case SPUISD::VEC2PREFSLOT: {
3262    MVT OpVT = Op.getValueType();
3263    unsigned OpVTBits = OpVT.getSizeInBits();
3264    uint64_t InMask = OpVT.getIntegerVTBitMask();
3265    KnownZero |= APInt(OpVTBits, ~InMask, false);
3266    KnownOne |= APInt(OpVTBits, InMask, false);
3267    break;
3268  }
3269
3270#if 0
3271  case MPY:
3272  case MPYU:
3273  case MPYH:
3274  case MPYHH:
3275  case SPUISD::SHLQUAD_L_BITS:
3276  case SPUISD::SHLQUAD_L_BYTES:
3277  case SPUISD::VEC_SHL:
3278  case SPUISD::VEC_SRL:
3279  case SPUISD::VEC_SRA:
3280  case SPUISD::VEC_ROTL:
3281  case SPUISD::VEC_ROTR:
3282  case SPUISD::ROTQUAD_RZ_BYTES:
3283  case SPUISD::ROTQUAD_RZ_BITS:
3284  case SPUISD::ROTBYTES_LEFT:
3285  case SPUISD::SELECT_MASK:
3286  case SPUISD::SELB:
3287  case SPUISD::FPInterp:
3288  case SPUISD::FPRecipEst:
3289  case SPUISD::SEXT32TO64:
3290#endif
3291  }
3292}
3293
3294unsigned
3295SPUTargetLowering::ComputeNumSignBitsForTargetNode(SDValue Op,
3296                                                   unsigned Depth) const {
3297  switch (Op.getOpcode()) {
3298  default:
3299    return 1;
3300
3301  case ISD::SETCC: {
3302    MVT VT = Op.getValueType();
3303
3304    if (VT != MVT::i8 && VT != MVT::i16 && VT != MVT::i32) {
3305      VT = MVT::i32;
3306    }
3307    return VT.getSizeInBits();
3308  }
3309  }
3310}
3311
3312// LowerAsmOperandForConstraint
3313void
3314SPUTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
3315                                                char ConstraintLetter,
3316                                                bool hasMemory,
3317                                                std::vector<SDValue> &Ops,
3318                                                SelectionDAG &DAG) const {
3319  // Default, for the time being, to the base class handler
3320  TargetLowering::LowerAsmOperandForConstraint(Op, ConstraintLetter, hasMemory,
3321                                               Ops, DAG);
3322}
3323
3324/// isLegalAddressImmediate - Return true if the integer value can be used
3325/// as the offset of the target addressing mode.
3326bool SPUTargetLowering::isLegalAddressImmediate(int64_t V,
3327                                                const Type *Ty) const {
3328  // SPU's addresses are 256K:
3329  return (V > -(1 << 18) && V < (1 << 18) - 1);
3330}
3331
3332bool SPUTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const {
3333  return false;
3334}
3335
3336bool
3337SPUTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
3338  // The SPU target isn't yet aware of offsets.
3339  return false;
3340}
3341