AMDILISelLowering.cpp revision 34ff22b75f8e3616109c3deacea2ec27f12f3398
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file implements the interfaces that AMDIL uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDILISelLowering.h"
16#include "AMDILDevices.h"
17#include "AMDILIntrinsicInfo.h"
18#include "AMDILRegisterInfo.h"
19#include "AMDILSubtarget.h"
20#include "AMDILUtilityFunctions.h"
21#include "llvm/CallingConv.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/PseudoSourceValue.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28#include "llvm/DerivedTypes.h"
29#include "llvm/Instructions.h"
30#include "llvm/Intrinsics.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/Target/TargetInstrInfo.h"
33#include "llvm/Target/TargetOptions.h"
34
35using namespace llvm;
36#define ISDBITCAST  ISD::BITCAST
37#define MVTGLUE     MVT::Glue
38//===----------------------------------------------------------------------===//
39// Calling Convention Implementation
40//===----------------------------------------------------------------------===//
41#include "AMDGPUGenCallingConv.inc"
42
43//===----------------------------------------------------------------------===//
44// TargetLowering Implementation Help Functions Begin
45//===----------------------------------------------------------------------===//
46  static SDValue
47getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
48{
49  DebugLoc DL = Src.getDebugLoc();
50  EVT svt = Src.getValueType().getScalarType();
51  EVT dvt = Dst.getValueType().getScalarType();
52  if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
53    if (dvt.bitsGT(svt)) {
54      Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
55    } else if (svt.bitsLT(svt)) {
56      Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
57          DAG.getConstant(1, MVT::i32));
58    }
59  } else if (svt.isInteger() && dvt.isInteger()) {
60    if (!svt.bitsEq(dvt)) {
61      Src = DAG.getSExtOrTrunc(Src, DL, dvt);
62    }
63  } else if (svt.isInteger()) {
64    unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
65    if (!svt.bitsEq(dvt)) {
66      if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
67        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
68      } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
69        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
70      } else {
71        assert(0 && "We only support 32 and 64bit fp types");
72      }
73    }
74    Src = DAG.getNode(opcode, DL, dvt, Src);
75  } else if (dvt.isInteger()) {
76    unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
77    if (svt.getSimpleVT().SimpleTy == MVT::f32) {
78      Src = DAG.getNode(opcode, DL, MVT::i32, Src);
79    } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
80      Src = DAG.getNode(opcode, DL, MVT::i64, Src);
81    } else {
82      assert(0 && "We only support 32 and 64bit fp types");
83    }
84    Src = DAG.getSExtOrTrunc(Src, DL, dvt);
85  }
86  return Src;
87}
88// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
89// condition.
90  static AMDILCC::CondCodes
91CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
92{
93  switch (CC) {
94    default:
95      {
96        errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
97        assert(0 && "Unknown condition code!");
98      }
99    case ISD::SETO:
100      switch(type) {
101        case MVT::f32:
102          return AMDILCC::IL_CC_F_O;
103        case MVT::f64:
104          return AMDILCC::IL_CC_D_O;
105        default:
106          assert(0 && "Opcode combination not generated correctly!");
107          return AMDILCC::COND_ERROR;
108      };
109    case ISD::SETUO:
110      switch(type) {
111        case MVT::f32:
112          return AMDILCC::IL_CC_F_UO;
113        case MVT::f64:
114          return AMDILCC::IL_CC_D_UO;
115        default:
116          assert(0 && "Opcode combination not generated correctly!");
117          return AMDILCC::COND_ERROR;
118      };
119    case ISD::SETGT:
120      switch (type) {
121        case MVT::i1:
122        case MVT::i8:
123        case MVT::i16:
124        case MVT::i32:
125          return AMDILCC::IL_CC_I_GT;
126        case MVT::f32:
127          return AMDILCC::IL_CC_F_GT;
128        case MVT::f64:
129          return AMDILCC::IL_CC_D_GT;
130        case MVT::i64:
131          return AMDILCC::IL_CC_L_GT;
132        default:
133          assert(0 && "Opcode combination not generated correctly!");
134          return AMDILCC::COND_ERROR;
135      };
136    case ISD::SETGE:
137      switch (type) {
138        case MVT::i1:
139        case MVT::i8:
140        case MVT::i16:
141        case MVT::i32:
142          return AMDILCC::IL_CC_I_GE;
143        case MVT::f32:
144          return AMDILCC::IL_CC_F_GE;
145        case MVT::f64:
146          return AMDILCC::IL_CC_D_GE;
147        case MVT::i64:
148          return AMDILCC::IL_CC_L_GE;
149        default:
150          assert(0 && "Opcode combination not generated correctly!");
151          return AMDILCC::COND_ERROR;
152      };
153    case ISD::SETLT:
154      switch (type) {
155        case MVT::i1:
156        case MVT::i8:
157        case MVT::i16:
158        case MVT::i32:
159          return AMDILCC::IL_CC_I_LT;
160        case MVT::f32:
161          return AMDILCC::IL_CC_F_LT;
162        case MVT::f64:
163          return AMDILCC::IL_CC_D_LT;
164        case MVT::i64:
165          return AMDILCC::IL_CC_L_LT;
166        default:
167          assert(0 && "Opcode combination not generated correctly!");
168          return AMDILCC::COND_ERROR;
169      };
170    case ISD::SETLE:
171      switch (type) {
172        case MVT::i1:
173        case MVT::i8:
174        case MVT::i16:
175        case MVT::i32:
176          return AMDILCC::IL_CC_I_LE;
177        case MVT::f32:
178          return AMDILCC::IL_CC_F_LE;
179        case MVT::f64:
180          return AMDILCC::IL_CC_D_LE;
181        case MVT::i64:
182          return AMDILCC::IL_CC_L_LE;
183        default:
184          assert(0 && "Opcode combination not generated correctly!");
185          return AMDILCC::COND_ERROR;
186      };
187    case ISD::SETNE:
188      switch (type) {
189        case MVT::i1:
190        case MVT::i8:
191        case MVT::i16:
192        case MVT::i32:
193          return AMDILCC::IL_CC_I_NE;
194        case MVT::f32:
195          return AMDILCC::IL_CC_F_NE;
196        case MVT::f64:
197          return AMDILCC::IL_CC_D_NE;
198        case MVT::i64:
199          return AMDILCC::IL_CC_L_NE;
200        default:
201          assert(0 && "Opcode combination not generated correctly!");
202          return AMDILCC::COND_ERROR;
203      };
204    case ISD::SETEQ:
205      switch (type) {
206        case MVT::i1:
207        case MVT::i8:
208        case MVT::i16:
209        case MVT::i32:
210          return AMDILCC::IL_CC_I_EQ;
211        case MVT::f32:
212          return AMDILCC::IL_CC_F_EQ;
213        case MVT::f64:
214          return AMDILCC::IL_CC_D_EQ;
215        case MVT::i64:
216          return AMDILCC::IL_CC_L_EQ;
217        default:
218          assert(0 && "Opcode combination not generated correctly!");
219          return AMDILCC::COND_ERROR;
220      };
221    case ISD::SETUGT:
222      switch (type) {
223        case MVT::i1:
224        case MVT::i8:
225        case MVT::i16:
226        case MVT::i32:
227          return AMDILCC::IL_CC_U_GT;
228        case MVT::f32:
229          return AMDILCC::IL_CC_F_UGT;
230        case MVT::f64:
231          return AMDILCC::IL_CC_D_UGT;
232        case MVT::i64:
233          return AMDILCC::IL_CC_UL_GT;
234        default:
235          assert(0 && "Opcode combination not generated correctly!");
236          return AMDILCC::COND_ERROR;
237      };
238    case ISD::SETUGE:
239      switch (type) {
240        case MVT::i1:
241        case MVT::i8:
242        case MVT::i16:
243        case MVT::i32:
244          return AMDILCC::IL_CC_U_GE;
245        case MVT::f32:
246          return AMDILCC::IL_CC_F_UGE;
247        case MVT::f64:
248          return AMDILCC::IL_CC_D_UGE;
249        case MVT::i64:
250          return AMDILCC::IL_CC_UL_GE;
251        default:
252          assert(0 && "Opcode combination not generated correctly!");
253          return AMDILCC::COND_ERROR;
254      };
255    case ISD::SETULT:
256      switch (type) {
257        case MVT::i1:
258        case MVT::i8:
259        case MVT::i16:
260        case MVT::i32:
261          return AMDILCC::IL_CC_U_LT;
262        case MVT::f32:
263          return AMDILCC::IL_CC_F_ULT;
264        case MVT::f64:
265          return AMDILCC::IL_CC_D_ULT;
266        case MVT::i64:
267          return AMDILCC::IL_CC_UL_LT;
268        default:
269          assert(0 && "Opcode combination not generated correctly!");
270          return AMDILCC::COND_ERROR;
271      };
272    case ISD::SETULE:
273      switch (type) {
274        case MVT::i1:
275        case MVT::i8:
276        case MVT::i16:
277        case MVT::i32:
278          return AMDILCC::IL_CC_U_LE;
279        case MVT::f32:
280          return AMDILCC::IL_CC_F_ULE;
281        case MVT::f64:
282          return AMDILCC::IL_CC_D_ULE;
283        case MVT::i64:
284          return AMDILCC::IL_CC_UL_LE;
285        default:
286          assert(0 && "Opcode combination not generated correctly!");
287          return AMDILCC::COND_ERROR;
288      };
289    case ISD::SETUNE:
290      switch (type) {
291        case MVT::i1:
292        case MVT::i8:
293        case MVT::i16:
294        case MVT::i32:
295          return AMDILCC::IL_CC_U_NE;
296        case MVT::f32:
297          return AMDILCC::IL_CC_F_UNE;
298        case MVT::f64:
299          return AMDILCC::IL_CC_D_UNE;
300        case MVT::i64:
301          return AMDILCC::IL_CC_UL_NE;
302        default:
303          assert(0 && "Opcode combination not generated correctly!");
304          return AMDILCC::COND_ERROR;
305      };
306    case ISD::SETUEQ:
307      switch (type) {
308        case MVT::i1:
309        case MVT::i8:
310        case MVT::i16:
311        case MVT::i32:
312          return AMDILCC::IL_CC_U_EQ;
313        case MVT::f32:
314          return AMDILCC::IL_CC_F_UEQ;
315        case MVT::f64:
316          return AMDILCC::IL_CC_D_UEQ;
317        case MVT::i64:
318          return AMDILCC::IL_CC_UL_EQ;
319        default:
320          assert(0 && "Opcode combination not generated correctly!");
321          return AMDILCC::COND_ERROR;
322      };
323    case ISD::SETOGT:
324      switch (type) {
325        case MVT::f32:
326          return AMDILCC::IL_CC_F_OGT;
327        case MVT::f64:
328          return AMDILCC::IL_CC_D_OGT;
329        case MVT::i1:
330        case MVT::i8:
331        case MVT::i16:
332        case MVT::i32:
333        case MVT::i64:
334        default:
335          assert(0 && "Opcode combination not generated correctly!");
336          return AMDILCC::COND_ERROR;
337      };
338    case ISD::SETOGE:
339      switch (type) {
340        case MVT::f32:
341          return AMDILCC::IL_CC_F_OGE;
342        case MVT::f64:
343          return AMDILCC::IL_CC_D_OGE;
344        case MVT::i1:
345        case MVT::i8:
346        case MVT::i16:
347        case MVT::i32:
348        case MVT::i64:
349        default:
350          assert(0 && "Opcode combination not generated correctly!");
351          return AMDILCC::COND_ERROR;
352      };
353    case ISD::SETOLT:
354      switch (type) {
355        case MVT::f32:
356          return AMDILCC::IL_CC_F_OLT;
357        case MVT::f64:
358          return AMDILCC::IL_CC_D_OLT;
359        case MVT::i1:
360        case MVT::i8:
361        case MVT::i16:
362        case MVT::i32:
363        case MVT::i64:
364        default:
365          assert(0 && "Opcode combination not generated correctly!");
366          return AMDILCC::COND_ERROR;
367      };
368    case ISD::SETOLE:
369      switch (type) {
370        case MVT::f32:
371          return AMDILCC::IL_CC_F_OLE;
372        case MVT::f64:
373          return AMDILCC::IL_CC_D_OLE;
374        case MVT::i1:
375        case MVT::i8:
376        case MVT::i16:
377        case MVT::i32:
378        case MVT::i64:
379        default:
380          assert(0 && "Opcode combination not generated correctly!");
381          return AMDILCC::COND_ERROR;
382      };
383    case ISD::SETONE:
384      switch (type) {
385        case MVT::f32:
386          return AMDILCC::IL_CC_F_ONE;
387        case MVT::f64:
388          return AMDILCC::IL_CC_D_ONE;
389        case MVT::i1:
390        case MVT::i8:
391        case MVT::i16:
392        case MVT::i32:
393        case MVT::i64:
394        default:
395          assert(0 && "Opcode combination not generated correctly!");
396          return AMDILCC::COND_ERROR;
397      };
398    case ISD::SETOEQ:
399      switch (type) {
400        case MVT::f32:
401          return AMDILCC::IL_CC_F_OEQ;
402        case MVT::f64:
403          return AMDILCC::IL_CC_D_OEQ;
404        case MVT::i1:
405        case MVT::i8:
406        case MVT::i16:
407        case MVT::i32:
408        case MVT::i64:
409        default:
410          assert(0 && "Opcode combination not generated correctly!");
411          return AMDILCC::COND_ERROR;
412      };
413  };
414}
415
416SDValue
417AMDILTargetLowering::LowerMemArgument(
418    SDValue Chain,
419    CallingConv::ID CallConv,
420    const SmallVectorImpl<ISD::InputArg> &Ins,
421    DebugLoc dl, SelectionDAG &DAG,
422    const CCValAssign &VA,
423    MachineFrameInfo *MFI,
424    unsigned i) const
425{
426  // Create the nodes corresponding to a load from this parameter slot.
427  ISD::ArgFlagsTy Flags = Ins[i].Flags;
428
429  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
430    getTargetMachine().Options.GuaranteedTailCallOpt;
431  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
432
433  // FIXME: For now, all byval parameter objects are marked mutable. This can
434  // be changed with more analysis.
435  // In case of tail call optimization mark all arguments mutable. Since they
436  // could be overwritten by lowering of arguments in case of a tail call.
437  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
438      VA.getLocMemOffset(), isImmutable);
439  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
440
441  if (Flags.isByVal())
442    return FIN;
443  return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
444      MachinePointerInfo::getFixedStack(FI),
445      false, false, false, 0);
446}
447//===----------------------------------------------------------------------===//
448// TargetLowering Implementation Help Functions End
449//===----------------------------------------------------------------------===//
450
451//===----------------------------------------------------------------------===//
452// TargetLowering Class Implementation Begins
453//===----------------------------------------------------------------------===//
454  AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
455: TargetLowering(TM, new TargetLoweringObjectFileELF())
456{
457  int types[] =
458  {
459    (int)MVT::i8,
460    (int)MVT::i16,
461    (int)MVT::i32,
462    (int)MVT::f32,
463    (int)MVT::f64,
464    (int)MVT::i64,
465    (int)MVT::v2i8,
466    (int)MVT::v4i8,
467    (int)MVT::v2i16,
468    (int)MVT::v4i16,
469    (int)MVT::v4f32,
470    (int)MVT::v4i32,
471    (int)MVT::v2f32,
472    (int)MVT::v2i32,
473    (int)MVT::v2f64,
474    (int)MVT::v2i64
475  };
476
477  int IntTypes[] =
478  {
479    (int)MVT::i8,
480    (int)MVT::i16,
481    (int)MVT::i32,
482    (int)MVT::i64
483  };
484
485  int FloatTypes[] =
486  {
487    (int)MVT::f32,
488    (int)MVT::f64
489  };
490
491  int VectorTypes[] =
492  {
493    (int)MVT::v2i8,
494    (int)MVT::v4i8,
495    (int)MVT::v2i16,
496    (int)MVT::v4i16,
497    (int)MVT::v4f32,
498    (int)MVT::v4i32,
499    (int)MVT::v2f32,
500    (int)MVT::v2i32,
501    (int)MVT::v2f64,
502    (int)MVT::v2i64
503  };
504  size_t numTypes = sizeof(types) / sizeof(*types);
505  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
506  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
507  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
508
509  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
510  // These are the current register classes that are
511  // supported
512
513  for (unsigned int x  = 0; x < numTypes; ++x) {
514    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
515
516    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
517    // We cannot sextinreg, expand to shifts
518    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
519    setOperationAction(ISD::SUBE, VT, Expand);
520    setOperationAction(ISD::SUBC, VT, Expand);
521    setOperationAction(ISD::ADDE, VT, Expand);
522    setOperationAction(ISD::ADDC, VT, Expand);
523    setOperationAction(ISD::SETCC, VT, Custom);
524    setOperationAction(ISD::BRCOND, VT, Custom);
525    setOperationAction(ISD::BR_CC, VT, Custom);
526    setOperationAction(ISD::BR_JT, VT, Expand);
527    setOperationAction(ISD::BRIND, VT, Expand);
528    // TODO: Implement custom UREM/SREM routines
529    setOperationAction(ISD::SREM, VT, Expand);
530    setOperationAction(ISD::GlobalAddress, VT, Custom);
531    setOperationAction(ISD::JumpTable, VT, Custom);
532    setOperationAction(ISD::ConstantPool, VT, Custom);
533    setOperationAction(ISD::SELECT, VT, Custom);
534    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
535    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
536    if (VT != MVT::i64 && VT != MVT::v2i64) {
537      setOperationAction(ISD::SDIV, VT, Custom);
538    }
539  }
540  for (unsigned int x = 0; x < numFloatTypes; ++x) {
541    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
542
543    // IL does not have these operations for floating point types
544    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
545    setOperationAction(ISD::SETOLT, VT, Expand);
546    setOperationAction(ISD::SETOGE, VT, Expand);
547    setOperationAction(ISD::SETOGT, VT, Expand);
548    setOperationAction(ISD::SETOLE, VT, Expand);
549    setOperationAction(ISD::SETULT, VT, Expand);
550    setOperationAction(ISD::SETUGE, VT, Expand);
551    setOperationAction(ISD::SETUGT, VT, Expand);
552    setOperationAction(ISD::SETULE, VT, Expand);
553  }
554
555  for (unsigned int x = 0; x < numIntTypes; ++x) {
556    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
557
558    // GPU also does not have divrem function for signed or unsigned
559    setOperationAction(ISD::SDIVREM, VT, Expand);
560
561    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
562    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
563    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
564
565    // GPU doesn't have a rotl, rotr, or byteswap instruction
566    setOperationAction(ISD::ROTR, VT, Expand);
567    setOperationAction(ISD::ROTL, VT, Expand);
568    setOperationAction(ISD::BSWAP, VT, Expand);
569
570    // GPU doesn't have any counting operators
571    setOperationAction(ISD::CTPOP, VT, Expand);
572    setOperationAction(ISD::CTTZ, VT, Expand);
573    setOperationAction(ISD::CTLZ, VT, Expand);
574  }
575
576  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
577  {
578    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
579
580    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
581    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
582    setOperationAction(ISD::SDIVREM, VT, Expand);
583    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
584    // setOperationAction(ISD::VSETCC, VT, Expand);
585    setOperationAction(ISD::SETCC, VT, Expand);
586    setOperationAction(ISD::SELECT_CC, VT, Expand);
587    setOperationAction(ISD::SELECT, VT, Expand);
588
589  }
590  if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
591    setOperationAction(ISD::MULHU, MVT::i64, Expand);
592    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
593    setOperationAction(ISD::MULHS, MVT::i64, Expand);
594    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
595    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
596    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
597    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
598    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
599    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
600    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
601    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
602    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
603  }
604  if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
605    // we support loading/storing v2f64 but not operations on the type
606    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
607    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
608    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
609    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
610    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
611    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
612    // We want to expand vector conversions into their scalar
613    // counterparts.
614    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
615    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
616    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
617    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
618    setOperationAction(ISD::FABS, MVT::f64, Expand);
619    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
620  }
621  // TODO: Fix the UDIV24 algorithm so it works for these
622  // types correctly. This needs vector comparisons
623  // for this to work correctly.
624  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
625  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
626  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
627  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
628  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
629  setOperationAction(ISD::SUBC, MVT::Other, Expand);
630  setOperationAction(ISD::ADDE, MVT::Other, Expand);
631  setOperationAction(ISD::ADDC, MVT::Other, Expand);
632  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
633  setOperationAction(ISD::BR_CC, MVT::Other, Custom);
634  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
635  setOperationAction(ISD::BRIND, MVT::Other, Expand);
636  setOperationAction(ISD::SETCC, MVT::Other, Custom);
637  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
638
639  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
640  // Use the default implementation.
641  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
642  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
643  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
644  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
645  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
646  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
647  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
648  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
649  setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
650
651  setStackPointerRegisterToSaveRestore(AMDIL::SP);
652  setSchedulingPreference(Sched::RegPressure);
653  setPow2DivIsCheap(false);
654  setPrefLoopAlignment(16);
655  setSelectIsExpensive(true);
656  setJumpIsExpensive(true);
657
658  maxStoresPerMemcpy  = 4096;
659  maxStoresPerMemmove = 4096;
660  maxStoresPerMemset  = 4096;
661
662#undef numTypes
663#undef numIntTypes
664#undef numVectorTypes
665#undef numFloatTypes
666}
667
668const char *
669AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
670{
671  switch (Opcode) {
672    default: return 0;
673    case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
674    case AMDILISD::DP_TO_FP:  return "AMDILISD::DP_TO_FP";
675    case AMDILISD::FP_TO_DP:  return "AMDILISD::FP_TO_DP";
676    case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
677    case AMDILISD::CMOV:  return "AMDILISD::CMOV";
678    case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
679    case AMDILISD::INEGATE:  return "AMDILISD::INEGATE";
680    case AMDILISD::MAD:  return "AMDILISD::MAD";
681    case AMDILISD::UMAD:  return "AMDILISD::UMAD";
682    case AMDILISD::CALL:  return "AMDILISD::CALL";
683    case AMDILISD::RET:   return "AMDILISD::RET";
684    case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
685    case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
686    case AMDILISD::ADD: return "AMDILISD::ADD";
687    case AMDILISD::UMUL: return "AMDILISD::UMUL";
688    case AMDILISD::AND: return "AMDILISD::AND";
689    case AMDILISD::OR: return "AMDILISD::OR";
690    case AMDILISD::NOT: return "AMDILISD::NOT";
691    case AMDILISD::XOR: return "AMDILISD::XOR";
692    case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
693    case AMDILISD::SMAX: return "AMDILISD::SMAX";
694    case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
695    case AMDILISD::MOVE: return "AMDILISD::MOVE";
696    case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
697    case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
698    case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
699    case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
700    case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
701    case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
702    case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
703    case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
704    case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
705    case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
706    case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
707    case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
708    case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
709    case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
710    case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
711    case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
712    case AMDILISD::CMP: return "AMDILISD::CMP";
713    case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
714    case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
715    case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
716    case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
717    case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
718    case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
719    case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
720    case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
721    case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
722    case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
723    case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
724    case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
725    case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
726    case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
727    case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
728    case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
729    case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
730    case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
731    case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
732    case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
733    case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
734    case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
735    case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
736    case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
737    case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
738    case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
739    case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
740    case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
741    case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
742    case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
743    case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
744    case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
745    case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
746    case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
747    case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
748    case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
749    case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
750    case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
751    case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
752    case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
753    case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
754    case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
755    case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
756    case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
757    case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
758    case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
759    case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
760    case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
761    case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
762    case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
763    case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
764    case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
765    case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
766    case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
767    case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
768    case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
769    case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
770    case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
771    case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
772    case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
773    case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
774    case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
775    case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
776    case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
777    case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
778    case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
779    case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
780    case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
781    case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
782    case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
783    case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
784    case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
785    case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
786    case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
787    case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
788    case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
789    case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
790    case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
791    case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
792    case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
793    case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
794    case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
795    case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
796    case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
797    case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
798    case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
799    case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
800    case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
801    case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
802    case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
803    case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
804    case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
805    case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
806    case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
807    case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
808    case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
809    case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
810    case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
811    case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
812    case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
813    case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
814    case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
815    case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
816    case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
817    case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
818    case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
819    case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
820    case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
821    case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
822
823  };
824}
825bool
826AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
827    const CallInst &I, unsigned Intrinsic) const
828{
829  if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
830      || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
831    return false;
832  }
833  bool bitCastToInt = false;
834  unsigned IntNo;
835  bool isRet = true;
836  const AMDILSubtarget *STM = &this->getTargetMachine()
837    .getSubtarget<AMDILSubtarget>();
838  switch (Intrinsic) {
839    default: return false; // Don't custom lower most intrinsics.
840    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
841    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
842             IntNo = AMDILISD::ATOM_G_ADD; break;
843    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
844    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
845             isRet = false;
846             IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
847    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
848    case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
849             IntNo = AMDILISD::ATOM_L_ADD; break;
850    case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
851    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
852             isRet = false;
853             IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
854    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
855    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
856             IntNo = AMDILISD::ATOM_R_ADD; break;
857    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
858    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
859             isRet = false;
860             IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
861    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
862    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
863             IntNo = AMDILISD::ATOM_G_AND; break;
864    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
865    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
866             isRet = false;
867             IntNo = AMDILISD::ATOM_G_AND_NORET; break;
868    case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
869    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
870             IntNo = AMDILISD::ATOM_L_AND; break;
871    case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
872    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
873             isRet = false;
874             IntNo = AMDILISD::ATOM_L_AND_NORET; break;
875    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
876    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
877             IntNo = AMDILISD::ATOM_R_AND; break;
878    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
879    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
880             isRet = false;
881             IntNo = AMDILISD::ATOM_R_AND_NORET; break;
882    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
883    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
884             IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
885    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
886    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
887             isRet = false;
888             IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
889    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
890    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
891             IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
892    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
893    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
894             isRet = false;
895             IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
896    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
897    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
898             IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
899    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
900    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
901             isRet = false;
902             IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
903    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
904    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
905             if (STM->calVersion() >= CAL_VERSION_SC_136) {
906               IntNo = AMDILISD::ATOM_G_DEC;
907             } else {
908               IntNo = AMDILISD::ATOM_G_SUB;
909             }
910             break;
911    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
912    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
913             isRet = false;
914             if (STM->calVersion() >= CAL_VERSION_SC_136) {
915               IntNo = AMDILISD::ATOM_G_DEC_NORET;
916             } else {
917               IntNo = AMDILISD::ATOM_G_SUB_NORET;
918             }
919             break;
920    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
921    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
922             if (STM->calVersion() >= CAL_VERSION_SC_136) {
923               IntNo = AMDILISD::ATOM_L_DEC;
924             } else {
925               IntNo = AMDILISD::ATOM_L_SUB;
926             }
927             break;
928    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
929    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
930             isRet = false;
931             if (STM->calVersion() >= CAL_VERSION_SC_136) {
932               IntNo = AMDILISD::ATOM_L_DEC_NORET;
933             } else {
934               IntNo = AMDILISD::ATOM_L_SUB_NORET;
935             }
936             break;
937    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
938    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
939             if (STM->calVersion() >= CAL_VERSION_SC_136) {
940               IntNo = AMDILISD::ATOM_R_DEC;
941             } else {
942               IntNo = AMDILISD::ATOM_R_SUB;
943             }
944             break;
945    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
946    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
947             isRet = false;
948             if (STM->calVersion() >= CAL_VERSION_SC_136) {
949               IntNo = AMDILISD::ATOM_R_DEC_NORET;
950             } else {
951               IntNo = AMDILISD::ATOM_R_SUB_NORET;
952             }
953             break;
954    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
955    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
956             if (STM->calVersion() >= CAL_VERSION_SC_136) {
957               IntNo = AMDILISD::ATOM_G_INC;
958             } else {
959               IntNo = AMDILISD::ATOM_G_ADD;
960             }
961             break;
962    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
963    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
964             isRet = false;
965             if (STM->calVersion() >= CAL_VERSION_SC_136) {
966               IntNo = AMDILISD::ATOM_G_INC_NORET;
967             } else {
968               IntNo = AMDILISD::ATOM_G_ADD_NORET;
969             }
970             break;
971    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
972    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
973             if (STM->calVersion() >= CAL_VERSION_SC_136) {
974               IntNo = AMDILISD::ATOM_L_INC;
975             } else {
976               IntNo = AMDILISD::ATOM_L_ADD;
977             }
978             break;
979    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
980    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
981             isRet = false;
982             if (STM->calVersion() >= CAL_VERSION_SC_136) {
983               IntNo = AMDILISD::ATOM_L_INC_NORET;
984             } else {
985               IntNo = AMDILISD::ATOM_L_ADD_NORET;
986             }
987             break;
988    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
989    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
990             if (STM->calVersion() >= CAL_VERSION_SC_136) {
991               IntNo = AMDILISD::ATOM_R_INC;
992             } else {
993               IntNo = AMDILISD::ATOM_R_ADD;
994             }
995             break;
996    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
997    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
998             isRet = false;
999             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1000               IntNo = AMDILISD::ATOM_R_INC_NORET;
1001             } else {
1002               IntNo = AMDILISD::ATOM_R_ADD_NORET;
1003             }
1004             break;
1005    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1006             IntNo = AMDILISD::ATOM_G_MAX; break;
1007    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1008             IntNo = AMDILISD::ATOM_G_UMAX; break;
1009    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1010             isRet = false;
1011             IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1012    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1013             isRet = false;
1014             IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1015    case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1016             IntNo = AMDILISD::ATOM_L_MAX; break;
1017    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1018             IntNo = AMDILISD::ATOM_L_UMAX; break;
1019    case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1020             isRet = false;
1021             IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1022    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1023             isRet = false;
1024             IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1025    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1026             IntNo = AMDILISD::ATOM_R_MAX; break;
1027    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1028             IntNo = AMDILISD::ATOM_R_UMAX; break;
1029    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1030             isRet = false;
1031             IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1032    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1033             isRet = false;
1034             IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1035    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1036             IntNo = AMDILISD::ATOM_G_MIN; break;
1037    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1038             IntNo = AMDILISD::ATOM_G_UMIN; break;
1039    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1040             isRet = false;
1041             IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1042    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1043             isRet = false;
1044             IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1045    case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1046             IntNo = AMDILISD::ATOM_L_MIN; break;
1047    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1048             IntNo = AMDILISD::ATOM_L_UMIN; break;
1049    case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1050             isRet = false;
1051             IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1052    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1053             isRet = false;
1054             IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1055    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1056             IntNo = AMDILISD::ATOM_R_MIN; break;
1057    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1058             IntNo = AMDILISD::ATOM_R_UMIN; break;
1059    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1060             isRet = false;
1061             IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1062    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1063             isRet = false;
1064             IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1065    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1066    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1067             IntNo = AMDILISD::ATOM_G_OR; break;
1068    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1069    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1070             isRet = false;
1071             IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1072    case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1073    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1074             IntNo = AMDILISD::ATOM_L_OR; break;
1075    case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1076    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1077             isRet = false;
1078             IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1079    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1080    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1081             IntNo = AMDILISD::ATOM_R_OR; break;
1082    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1083    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1084             isRet = false;
1085             IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1086    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1087    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1088             IntNo = AMDILISD::ATOM_G_SUB; break;
1089    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1090    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1091             isRet = false;
1092             IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1093    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1094    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1095             IntNo = AMDILISD::ATOM_L_SUB; break;
1096    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1097    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1098             isRet = false;
1099             IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1100    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1101    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1102             IntNo = AMDILISD::ATOM_R_SUB; break;
1103    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1104    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1105             isRet = false;
1106             IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1107    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1108    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1109             IntNo = AMDILISD::ATOM_G_RSUB; break;
1110    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1111    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1112             isRet = false;
1113             IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1114    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1115    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1116             IntNo = AMDILISD::ATOM_L_RSUB; break;
1117    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1118    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1119             isRet = false;
1120             IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1121    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1122    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1123             IntNo = AMDILISD::ATOM_R_RSUB; break;
1124    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1125    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1126             isRet = false;
1127             IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1128    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1129             bitCastToInt = true;
1130    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1131    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1132             IntNo = AMDILISD::ATOM_G_XCHG; break;
1133    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1134             bitCastToInt = true;
1135    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1136    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1137             isRet = false;
1138             IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1139    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1140             bitCastToInt = true;
1141    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1142    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1143             IntNo = AMDILISD::ATOM_L_XCHG; break;
1144    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1145             bitCastToInt = true;
1146    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1147    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1148             isRet = false;
1149             IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1150    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1151             bitCastToInt = true;
1152    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1153    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1154             IntNo = AMDILISD::ATOM_R_XCHG; break;
1155    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1156             bitCastToInt = true;
1157    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1158    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1159             isRet = false;
1160             IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1161    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1162    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1163             IntNo = AMDILISD::ATOM_G_XOR; break;
1164    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1165    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1166             isRet = false;
1167             IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1168    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1169    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1170             IntNo = AMDILISD::ATOM_L_XOR; break;
1171    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1172    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1173             isRet = false;
1174             IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1175    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1176    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1177             IntNo = AMDILISD::ATOM_R_XOR; break;
1178    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1179    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1180             isRet = false;
1181             IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1182    case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1183             IntNo = AMDILISD::APPEND_ALLOC; break;
1184    case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1185             isRet = false;
1186             IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1187    case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1188             IntNo = AMDILISD::APPEND_CONSUME; break;
1189    case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1190             isRet = false;
1191             IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1192  };
1193
1194  Info.opc = IntNo;
1195  Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1196  Info.ptrVal = I.getOperand(0);
1197  Info.offset = 0;
1198  Info.align = 4;
1199  Info.vol = true;
1200  Info.readMem = isRet;
1201  Info.writeMem = true;
1202  return true;
1203}
1204// The backend supports 32 and 64 bit floating point immediates
1205bool
1206AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1207{
1208  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1209      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1210    return true;
1211  } else {
1212    return false;
1213  }
1214}
1215
1216bool
1217AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1218{
1219  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1220      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1221    return false;
1222  } else {
1223    return true;
1224  }
1225}
1226
1227
1228// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1229// be zero. Op is expected to be a target specific node. Used by DAG
1230// combiner.
1231
1232void
1233AMDILTargetLowering::computeMaskedBitsForTargetNode(
1234    const SDValue Op,
1235    APInt &KnownZero,
1236    APInt &KnownOne,
1237    const SelectionDAG &DAG,
1238    unsigned Depth) const
1239{
1240  APInt KnownZero2;
1241  APInt KnownOne2;
1242  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1243  switch (Op.getOpcode()) {
1244    default: break;
1245    case AMDILISD::SELECT_CC:
1246             DAG.ComputeMaskedBits(
1247                 Op.getOperand(1),
1248                 KnownZero,
1249                 KnownOne,
1250                 Depth + 1
1251                 );
1252             DAG.ComputeMaskedBits(
1253                 Op.getOperand(0),
1254                 KnownZero2,
1255                 KnownOne2
1256                 );
1257             assert((KnownZero & KnownOne) == 0
1258                 && "Bits known to be one AND zero?");
1259             assert((KnownZero2 & KnownOne2) == 0
1260                 && "Bits known to be one AND zero?");
1261             // Only known if known in both the LHS and RHS
1262             KnownOne &= KnownOne2;
1263             KnownZero &= KnownZero2;
1264             break;
1265  };
1266}
1267
1268// This is the function that determines which calling convention should
1269// be used. Currently there is only one calling convention
1270CCAssignFn*
1271AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
1272{
1273  //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1274  return CC_AMDIL32;
1275}
1276
1277// LowerCallResult - Lower the result values of an ISD::CALL into the
1278// appropriate copies out of appropriate physical registers.  This assumes that
1279// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
1280// being lowered.  The returns a SDNode with the same number of values as the
1281// ISD::CALL.
1282SDValue
1283AMDILTargetLowering::LowerCallResult(
1284    SDValue Chain,
1285    SDValue InFlag,
1286    CallingConv::ID CallConv,
1287    bool isVarArg,
1288    const SmallVectorImpl<ISD::InputArg> &Ins,
1289    DebugLoc dl,
1290    SelectionDAG &DAG,
1291    SmallVectorImpl<SDValue> &InVals) const
1292{
1293  // Assign locations to each value returned by this call
1294  SmallVector<CCValAssign, 16> RVLocs;
1295  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1296                 getTargetMachine(), RVLocs, *DAG.getContext());
1297  CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
1298
1299  // Copy all of the result registers out of their specified physreg.
1300  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1301    EVT CopyVT = RVLocs[i].getValVT();
1302    if (RVLocs[i].isRegLoc()) {
1303      Chain = DAG.getCopyFromReg(
1304          Chain,
1305          dl,
1306          RVLocs[i].getLocReg(),
1307          CopyVT,
1308          InFlag
1309          ).getValue(1);
1310      SDValue Val = Chain.getValue(0);
1311      InFlag = Chain.getValue(2);
1312      InVals.push_back(Val);
1313    }
1314  }
1315
1316  return Chain;
1317
1318}
1319
1320//===----------------------------------------------------------------------===//
1321//                           Other Lowering Hooks
1322//===----------------------------------------------------------------------===//
1323
1324// Recursively assign SDNodeOrdering to any unordered nodes
1325// This is necessary to maintain source ordering of instructions
1326// under -O0 to avoid odd-looking "skipping around" issues.
1327  static const SDValue
1328Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
1329{
1330  if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
1331    DAG.AssignOrdering( New.getNode(), order );
1332    for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
1333      Ordered( DAG, order, New.getOperand(i) );
1334  }
1335  return New;
1336}
1337
1338#define LOWER(A) \
1339  case ISD:: A: \
1340return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
1341
1342SDValue
1343AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
1344{
1345  switch (Op.getOpcode()) {
1346    default:
1347      Op.getNode()->dump();
1348      assert(0 && "Custom lowering code for this"
1349          "instruction is not implemented yet!");
1350      break;
1351      LOWER(GlobalAddress);
1352      LOWER(JumpTable);
1353      LOWER(ConstantPool);
1354      LOWER(ExternalSymbol);
1355      LOWER(SDIV);
1356      LOWER(SREM);
1357      LOWER(BUILD_VECTOR);
1358      LOWER(SELECT);
1359      LOWER(SETCC);
1360      LOWER(SIGN_EXTEND_INREG);
1361      LOWER(DYNAMIC_STACKALLOC);
1362      LOWER(BRCOND);
1363      LOWER(BR_CC);
1364  }
1365  return Op;
1366}
1367
1368#undef LOWER
1369
1370SDValue
1371AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
1372{
1373  SDValue DST = Op;
1374  const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
1375  const GlobalValue *G = GADN->getGlobal();
1376  DebugLoc DL = Op.getDebugLoc();
1377  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
1378  if (!GV) {
1379    DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1380  } else {
1381    if (GV->hasInitializer()) {
1382      const Constant *C = dyn_cast<Constant>(GV->getInitializer());
1383      if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
1384        DST = DAG.getConstant(CI->getValue(), Op.getValueType());
1385      } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
1386        DST = DAG.getConstantFP(CF->getValueAPF(),
1387            Op.getValueType());
1388      } else if (dyn_cast<ConstantAggregateZero>(C)) {
1389        EVT VT = Op.getValueType();
1390        if (VT.isInteger()) {
1391          DST = DAG.getConstant(0, VT);
1392        } else {
1393          DST = DAG.getConstantFP(0, VT);
1394        }
1395      } else {
1396        assert(!"lowering this type of Global Address "
1397            "not implemented yet!");
1398        C->dump();
1399        DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1400      }
1401    } else {
1402      DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1403    }
1404  }
1405  return DST;
1406}
1407
1408SDValue
1409AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
1410{
1411  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1412  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
1413  return Result;
1414}
1415SDValue
1416AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
1417{
1418  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1419  EVT PtrVT = Op.getValueType();
1420  SDValue Result;
1421  if (CP->isMachineConstantPoolEntry()) {
1422    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1423        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1424  } else {
1425    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1426        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1427  }
1428  return Result;
1429}
1430
1431SDValue
1432AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
1433{
1434  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
1435  SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
1436  return Result;
1437}
1438
1439/// LowerFORMAL_ARGUMENTS - transform physical registers into
1440/// virtual registers and generate load operations for
1441/// arguments places on the stack.
1442/// TODO: isVarArg, hasStructRet, isMemReg
1443  SDValue
1444AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
1445    CallingConv::ID CallConv,
1446    bool isVarArg,
1447    const SmallVectorImpl<ISD::InputArg> &Ins,
1448    DebugLoc dl,
1449    SelectionDAG &DAG,
1450    SmallVectorImpl<SDValue> &InVals)
1451const
1452{
1453
1454  MachineFunction &MF = DAG.getMachineFunction();
1455  MachineFrameInfo *MFI = MF.getFrameInfo();
1456  //const Function *Fn = MF.getFunction();
1457  //MachineRegisterInfo &RegInfo = MF.getRegInfo();
1458
1459  SmallVector<CCValAssign, 16> ArgLocs;
1460  CallingConv::ID CC = MF.getFunction()->getCallingConv();
1461  //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
1462
1463  CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
1464                 getTargetMachine(), ArgLocs, *DAG.getContext());
1465
1466  // When more calling conventions are added, they need to be chosen here
1467  CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
1468  SDValue StackPtr;
1469
1470  //unsigned int FirstStackArgLoc = 0;
1471
1472  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1473    CCValAssign &VA = ArgLocs[i];
1474    if (VA.isRegLoc()) {
1475      EVT RegVT = VA.getLocVT();
1476      const TargetRegisterClass *RC = getRegClassFor(
1477          RegVT.getSimpleVT().SimpleTy);
1478
1479      unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
1480      SDValue ArgValue = DAG.getCopyFromReg(
1481          Chain,
1482          dl,
1483          Reg,
1484          RegVT);
1485      // If this is an 8 or 16-bit value, it is really passed
1486      // promoted to 32 bits.  Insert an assert[sz]ext to capture
1487      // this, then truncate to the right size.
1488
1489      if (VA.getLocInfo() == CCValAssign::SExt) {
1490        ArgValue = DAG.getNode(
1491            ISD::AssertSext,
1492            dl,
1493            RegVT,
1494            ArgValue,
1495            DAG.getValueType(VA.getValVT()));
1496      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
1497        ArgValue = DAG.getNode(
1498            ISD::AssertZext,
1499            dl,
1500            RegVT,
1501            ArgValue,
1502            DAG.getValueType(VA.getValVT()));
1503      }
1504      if (VA.getLocInfo() != CCValAssign::Full) {
1505        ArgValue = DAG.getNode(
1506            ISD::TRUNCATE,
1507            dl,
1508            VA.getValVT(),
1509            ArgValue);
1510      }
1511      // Add the value to the list of arguments
1512      // to be passed in registers
1513      InVals.push_back(ArgValue);
1514      if (isVarArg) {
1515        assert(0 && "Variable arguments are not yet supported");
1516        // See MipsISelLowering.cpp for ideas on how to implement
1517      }
1518    } else if(VA.isMemLoc()) {
1519      InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1520            dl, DAG, VA, MFI, i));
1521    } else {
1522      assert(0 && "found a Value Assign that is "
1523          "neither a register or a memory location");
1524    }
1525  }
1526  /*if (hasStructRet) {
1527    assert(0 && "Has struct return is not yet implemented");
1528  // See MipsISelLowering.cpp for ideas on how to implement
1529  }*/
1530
1531  if (isVarArg) {
1532    assert(0 && "Variable arguments are not yet supported");
1533    // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1534  }
1535  // This needs to be changed to non-zero if the return function needs
1536  // to pop bytes
1537  return Chain;
1538}
1539/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1540/// by "Src" to address "Dst" with size and alignment information specified by
1541/// the specific parameter attribute. The copy will be passed as a byval
1542/// function parameter.
1543static SDValue
1544CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1545    ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1546  assert(0 && "MemCopy does not exist yet");
1547  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1548
1549  return DAG.getMemcpy(Chain,
1550      Src.getDebugLoc(),
1551      Dst, Src, SizeNode, Flags.getByValAlign(),
1552      /*IsVol=*/false, /*AlwaysInline=*/true,
1553      MachinePointerInfo(), MachinePointerInfo());
1554}
1555
1556SDValue
1557AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1558    SDValue StackPtr, SDValue Arg,
1559    DebugLoc dl, SelectionDAG &DAG,
1560    const CCValAssign &VA,
1561    ISD::ArgFlagsTy Flags) const
1562{
1563  unsigned int LocMemOffset = VA.getLocMemOffset();
1564  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1565  PtrOff = DAG.getNode(ISD::ADD,
1566      dl,
1567      getPointerTy(), StackPtr, PtrOff);
1568  if (Flags.isByVal()) {
1569    PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1570  } else {
1571    PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1572        MachinePointerInfo::getStack(LocMemOffset),
1573        false, false, 0);
1574  }
1575  return PtrOff;
1576}
1577/// LowerCAL - functions arguments are copied from virtual
1578/// regs to (physical regs)/(stack frame), CALLSEQ_START and
1579/// CALLSEQ_END are emitted.
1580/// TODO: isVarArg, isTailCall, hasStructRet
1581SDValue
1582AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1583    CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1584    bool& isTailCall,
1585    const SmallVectorImpl<ISD::OutputArg> &Outs,
1586    const SmallVectorImpl<SDValue> &OutVals,
1587    const SmallVectorImpl<ISD::InputArg> &Ins,
1588    DebugLoc dl, SelectionDAG &DAG,
1589    SmallVectorImpl<SDValue> &InVals)
1590const
1591{
1592  isTailCall = false;
1593  MachineFunction& MF = DAG.getMachineFunction();
1594  // FIXME: DO we need to handle fast calling conventions and tail call
1595  // optimizations?? X86/PPC ISelLowering
1596  /*bool hasStructRet = (TheCall->getNumArgs())
1597    ? TheCall->getArgFlags(0).device()->isSRet()
1598    : false;*/
1599
1600  MachineFrameInfo *MFI = MF.getFrameInfo();
1601
1602  // Analyze operands of the call, assigning locations to each operand
1603  SmallVector<CCValAssign, 16> ArgLocs;
1604  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1605                 getTargetMachine(), ArgLocs, *DAG.getContext());
1606  // Analyize the calling operands, but need to change
1607  // if we have more than one calling convetion
1608  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1609
1610  unsigned int NumBytes = CCInfo.getNextStackOffset();
1611  if (isTailCall) {
1612    assert(isTailCall && "Tail Call not handled yet!");
1613    // See X86/PPC ISelLowering
1614  }
1615
1616  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1617
1618  SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1619  SmallVector<SDValue, 8> MemOpChains;
1620  SDValue StackPtr;
1621  //unsigned int FirstStacArgLoc = 0;
1622  //int LastArgStackLoc = 0;
1623
1624  // Walk the register/memloc assignments, insert copies/loads
1625  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1626    CCValAssign &VA = ArgLocs[i];
1627    //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1628    // Arguments start after the 5 first operands of ISD::CALL
1629    SDValue Arg = OutVals[i];
1630    //Promote the value if needed
1631    switch(VA.getLocInfo()) {
1632      default: assert(0 && "Unknown loc info!");
1633      case CCValAssign::Full:
1634               break;
1635      case CCValAssign::SExt:
1636               Arg = DAG.getNode(ISD::SIGN_EXTEND,
1637                   dl,
1638                   VA.getLocVT(), Arg);
1639               break;
1640      case CCValAssign::ZExt:
1641               Arg = DAG.getNode(ISD::ZERO_EXTEND,
1642                   dl,
1643                   VA.getLocVT(), Arg);
1644               break;
1645      case CCValAssign::AExt:
1646               Arg = DAG.getNode(ISD::ANY_EXTEND,
1647                   dl,
1648                   VA.getLocVT(), Arg);
1649               break;
1650    }
1651
1652    if (VA.isRegLoc()) {
1653      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1654    } else if (VA.isMemLoc()) {
1655      // Create the frame index object for this incoming parameter
1656      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1657          VA.getLocMemOffset(), true);
1658      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1659
1660      // emit ISD::STORE whichs stores the
1661      // parameter value to a stack Location
1662      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1663            MachinePointerInfo::getFixedStack(FI),
1664            false, false, 0));
1665    } else {
1666      assert(0 && "Not a Reg/Mem Loc, major error!");
1667    }
1668  }
1669  if (!MemOpChains.empty()) {
1670    Chain = DAG.getNode(ISD::TokenFactor,
1671        dl,
1672        MVT::Other,
1673        &MemOpChains[0],
1674        MemOpChains.size());
1675  }
1676  SDValue InFlag;
1677  if (!isTailCall) {
1678    for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1679      Chain = DAG.getCopyToReg(Chain,
1680          dl,
1681          RegsToPass[i].first,
1682          RegsToPass[i].second,
1683          InFlag);
1684      InFlag = Chain.getValue(1);
1685    }
1686  }
1687
1688  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1689  // every direct call is) turn it into a TargetGlobalAddress/
1690  // TargetExternalSymbol
1691  // node so that legalize doesn't hack it.
1692  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
1693    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1694  }
1695  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1696    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1697  }
1698  else if (isTailCall) {
1699    assert(0 && "Tail calls are not handled yet");
1700    // see X86 ISelLowering for ideas on implementation: 1708
1701  }
1702
1703  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1704  SmallVector<SDValue, 8> Ops;
1705
1706  if (isTailCall) {
1707    assert(0 && "Tail calls are not handled yet");
1708    // see X86 ISelLowering for ideas on implementation: 1721
1709  }
1710  // If this is a direct call, pass the chain and the callee
1711  if (Callee.getNode()) {
1712    Ops.push_back(Chain);
1713    Ops.push_back(Callee);
1714  }
1715
1716  if (isTailCall) {
1717    assert(0 && "Tail calls are not handled yet");
1718    // see X86 ISelLowering for ideas on implementation: 1739
1719  }
1720
1721  // Add argument registers to the end of the list so that they are known
1722  // live into the call
1723  for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1724    Ops.push_back(DAG.getRegister(
1725          RegsToPass[i].first,
1726          RegsToPass[i].second.getValueType()));
1727  }
1728  if (InFlag.getNode()) {
1729    Ops.push_back(InFlag);
1730  }
1731
1732  // Emit Tail Call
1733  if (isTailCall) {
1734    assert(0 && "Tail calls are not handled yet");
1735    // see X86 ISelLowering for ideas on implementation: 1762
1736  }
1737
1738  Chain = DAG.getNode(AMDILISD::CALL,
1739      dl,
1740      NodeTys, &Ops[0], Ops.size());
1741  InFlag = Chain.getValue(1);
1742
1743  // Create the CALLSEQ_END node
1744  Chain = DAG.getCALLSEQ_END(
1745      Chain,
1746      DAG.getIntPtrConstant(NumBytes, true),
1747      DAG.getIntPtrConstant(0, true),
1748      InFlag);
1749  InFlag = Chain.getValue(1);
1750  // Handle result values, copying them out of physregs into vregs that
1751  // we return
1752  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1753      InVals);
1754}
1755
1756SDValue
1757AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
1758{
1759  EVT OVT = Op.getValueType();
1760  SDValue DST;
1761  if (OVT.getScalarType() == MVT::i64) {
1762    DST = LowerSDIV64(Op, DAG);
1763  } else if (OVT.getScalarType() == MVT::i32) {
1764    DST = LowerSDIV32(Op, DAG);
1765  } else if (OVT.getScalarType() == MVT::i16
1766      || OVT.getScalarType() == MVT::i8) {
1767    DST = LowerSDIV24(Op, DAG);
1768  } else {
1769    DST = SDValue(Op.getNode(), 0);
1770  }
1771  return DST;
1772}
1773
1774SDValue
1775AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
1776{
1777  EVT OVT = Op.getValueType();
1778  SDValue DST;
1779  if (OVT.getScalarType() == MVT::i64) {
1780    DST = LowerSREM64(Op, DAG);
1781  } else if (OVT.getScalarType() == MVT::i32) {
1782    DST = LowerSREM32(Op, DAG);
1783  } else if (OVT.getScalarType() == MVT::i16) {
1784    DST = LowerSREM16(Op, DAG);
1785  } else if (OVT.getScalarType() == MVT::i8) {
1786    DST = LowerSREM8(Op, DAG);
1787  } else {
1788    DST = SDValue(Op.getNode(), 0);
1789  }
1790  return DST;
1791}
1792
1793SDValue
1794AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
1795{
1796  EVT VT = Op.getValueType();
1797  SDValue Nodes1;
1798  SDValue second;
1799  SDValue third;
1800  SDValue fourth;
1801  DebugLoc DL = Op.getDebugLoc();
1802  Nodes1 = DAG.getNode(AMDILISD::VBUILD,
1803      DL,
1804      VT, Op.getOperand(0));
1805#if 0
1806  bool allEqual = true;
1807  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
1808    if (Op.getOperand(0) != Op.getOperand(x)) {
1809      allEqual = false;
1810      break;
1811    }
1812  }
1813  if (allEqual) {
1814    return Nodes1;
1815  }
1816#endif
1817  switch(Op.getNumOperands()) {
1818    default:
1819    case 1:
1820      break;
1821    case 4:
1822      fourth = Op.getOperand(3);
1823      if (fourth.getOpcode() != ISD::UNDEF) {
1824        Nodes1 = DAG.getNode(
1825            ISD::INSERT_VECTOR_ELT,
1826            DL,
1827            Op.getValueType(),
1828            Nodes1,
1829            fourth,
1830            DAG.getConstant(7, MVT::i32));
1831      }
1832    case 3:
1833      third = Op.getOperand(2);
1834      if (third.getOpcode() != ISD::UNDEF) {
1835        Nodes1 = DAG.getNode(
1836            ISD::INSERT_VECTOR_ELT,
1837            DL,
1838            Op.getValueType(),
1839            Nodes1,
1840            third,
1841            DAG.getConstant(6, MVT::i32));
1842      }
1843    case 2:
1844      second = Op.getOperand(1);
1845      if (second.getOpcode() != ISD::UNDEF) {
1846        Nodes1 = DAG.getNode(
1847            ISD::INSERT_VECTOR_ELT,
1848            DL,
1849            Op.getValueType(),
1850            Nodes1,
1851            second,
1852            DAG.getConstant(5, MVT::i32));
1853      }
1854      break;
1855  };
1856  return Nodes1;
1857}
1858
1859SDValue
1860AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
1861{
1862  SDValue Cond = Op.getOperand(0);
1863  SDValue LHS = Op.getOperand(1);
1864  SDValue RHS = Op.getOperand(2);
1865  DebugLoc DL = Op.getDebugLoc();
1866  Cond = getConversionNode(DAG, Cond, Op, true);
1867  Cond = DAG.getNode(AMDILISD::CMOVLOG,
1868      DL,
1869      Op.getValueType(), Cond, LHS, RHS);
1870  return Cond;
1871}
1872SDValue
1873AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
1874{
1875  SDValue Cond;
1876  SDValue LHS = Op.getOperand(0);
1877  SDValue RHS = Op.getOperand(1);
1878  SDValue CC  = Op.getOperand(2);
1879  DebugLoc DL = Op.getDebugLoc();
1880  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
1881  unsigned int AMDILCC = CondCCodeToCC(
1882      SetCCOpcode,
1883      LHS.getValueType().getSimpleVT().SimpleTy);
1884  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
1885  Cond = DAG.getNode(
1886      ISD::SELECT_CC,
1887      Op.getDebugLoc(),
1888      LHS.getValueType(),
1889      LHS, RHS,
1890      DAG.getConstant(-1, MVT::i32),
1891      DAG.getConstant(0, MVT::i32),
1892      CC);
1893  Cond = getConversionNode(DAG, Cond, Op, true);
1894  Cond = DAG.getNode(
1895      ISD::AND,
1896      DL,
1897      Cond.getValueType(),
1898      DAG.getConstant(1, Cond.getValueType()),
1899      Cond);
1900  return Cond;
1901}
1902
1903SDValue
1904AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
1905{
1906  SDValue Data = Op.getOperand(0);
1907  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
1908  DebugLoc DL = Op.getDebugLoc();
1909  EVT DVT = Data.getValueType();
1910  EVT BVT = BaseType->getVT();
1911  unsigned baseBits = BVT.getScalarType().getSizeInBits();
1912  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
1913  unsigned shiftBits = srcBits - baseBits;
1914  if (srcBits < 32) {
1915    // If the op is less than 32 bits, then it needs to extend to 32bits
1916    // so it can properly keep the upper bits valid.
1917    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
1918    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
1919    shiftBits = 32 - baseBits;
1920    DVT = IVT;
1921  }
1922  SDValue Shift = DAG.getConstant(shiftBits, DVT);
1923  // Shift left by 'Shift' bits.
1924  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
1925  // Signed shift Right by 'Shift' bits.
1926  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
1927  if (srcBits < 32) {
1928    // Once the sign extension is done, the op needs to be converted to
1929    // its original type.
1930    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
1931  }
1932  return Data;
1933}
1934EVT
1935AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
1936{
1937  int iSize = (size * numEle);
1938  int vEle = (iSize >> ((size == 64) ? 6 : 5));
1939  if (!vEle) {
1940    vEle = 1;
1941  }
1942  if (size == 64) {
1943    if (vEle == 1) {
1944      return EVT(MVT::i64);
1945    } else {
1946      return EVT(MVT::getVectorVT(MVT::i64, vEle));
1947    }
1948  } else {
1949    if (vEle == 1) {
1950      return EVT(MVT::i32);
1951    } else {
1952      return EVT(MVT::getVectorVT(MVT::i32, vEle));
1953    }
1954  }
1955}
1956
1957SDValue
1958AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1959    SelectionDAG &DAG) const
1960{
1961  SDValue Chain = Op.getOperand(0);
1962  SDValue Size = Op.getOperand(1);
1963  unsigned int SPReg = AMDIL::SP;
1964  DebugLoc DL = Op.getDebugLoc();
1965  SDValue SP = DAG.getCopyFromReg(Chain,
1966      DL,
1967      SPReg, MVT::i32);
1968  SDValue NewSP = DAG.getNode(ISD::ADD,
1969      DL,
1970      MVT::i32, SP, Size);
1971  Chain = DAG.getCopyToReg(SP.getValue(1),
1972      DL,
1973      SPReg, NewSP);
1974  SDValue Ops[2] = {NewSP, Chain};
1975  Chain = DAG.getMergeValues(Ops, 2 ,DL);
1976  return Chain;
1977}
1978SDValue
1979AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
1980{
1981  SDValue Chain = Op.getOperand(0);
1982  SDValue Cond  = Op.getOperand(1);
1983  SDValue Jump  = Op.getOperand(2);
1984  SDValue Result;
1985  Result = DAG.getNode(
1986      AMDILISD::BRANCH_COND,
1987      Op.getDebugLoc(),
1988      Op.getValueType(),
1989      Chain, Jump, Cond);
1990  return Result;
1991}
1992
1993SDValue
1994AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
1995{
1996  SDValue Chain = Op.getOperand(0);
1997  SDValue CC = Op.getOperand(1);
1998  SDValue LHS   = Op.getOperand(2);
1999  SDValue RHS   = Op.getOperand(3);
2000  SDValue JumpT  = Op.getOperand(4);
2001  SDValue CmpValue;
2002  SDValue Result;
2003  CmpValue = DAG.getNode(
2004      ISD::SELECT_CC,
2005      Op.getDebugLoc(),
2006      LHS.getValueType(),
2007      LHS, RHS,
2008      DAG.getConstant(-1, MVT::i32),
2009      DAG.getConstant(0, MVT::i32),
2010      CC);
2011  Result = DAG.getNode(
2012      AMDILISD::BRANCH_COND,
2013      CmpValue.getDebugLoc(),
2014      MVT::Other, Chain,
2015      JumpT, CmpValue);
2016  return Result;
2017}
2018
2019// LowerRET - Lower an ISD::RET node.
2020SDValue
2021AMDILTargetLowering::LowerReturn(SDValue Chain,
2022    CallingConv::ID CallConv, bool isVarArg,
2023    const SmallVectorImpl<ISD::OutputArg> &Outs,
2024    const SmallVectorImpl<SDValue> &OutVals,
2025    DebugLoc dl, SelectionDAG &DAG)
2026const
2027{
2028  //MachineFunction& MF = DAG.getMachineFunction();
2029  // CCValAssign - represent the assignment of the return value
2030  // to a location
2031  SmallVector<CCValAssign, 16> RVLocs;
2032
2033  // CCState - Info about the registers and stack slot
2034  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2035                 getTargetMachine(), RVLocs, *DAG.getContext());
2036
2037  // Analyze return values of ISD::RET
2038  CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
2039  // If this is the first return lowered for this function, add
2040  // the regs to the liveout set for the function
2041  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
2042  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
2043    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
2044      MRI.addLiveOut(RVLocs[i].getLocReg());
2045    }
2046  }
2047  // FIXME: implement this when tail call is implemented
2048  // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
2049  // both x86 and ppc implement this in ISelLowering
2050
2051  // Regular return here
2052  SDValue Flag;
2053  SmallVector<SDValue, 6> RetOps;
2054  RetOps.push_back(Chain);
2055  RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
2056  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
2057    CCValAssign &VA = RVLocs[i];
2058    SDValue ValToCopy = OutVals[i];
2059    assert(VA.isRegLoc() && "Can only return in registers!");
2060    // ISD::Ret => ret chain, (regnum1, val1), ...
2061    // So i * 2 + 1 index only the regnums
2062    Chain = DAG.getCopyToReg(Chain,
2063        dl,
2064        VA.getLocReg(),
2065        ValToCopy,
2066        Flag);
2067    // guarantee that all emitted copies are stuck together
2068    // avoiding something bad
2069    Flag = Chain.getValue(1);
2070  }
2071  /*if (MF.getFunction()->hasStructRetAttr()) {
2072    assert(0 && "Struct returns are not yet implemented!");
2073  // Both MIPS and X86 have this
2074  }*/
2075  RetOps[0] = Chain;
2076  if (Flag.getNode())
2077    RetOps.push_back(Flag);
2078
2079  Flag = DAG.getNode(AMDILISD::RET_FLAG,
2080      dl,
2081      MVT::Other, &RetOps[0], RetOps.size());
2082  return Flag;
2083}
2084
2085unsigned int
2086AMDILTargetLowering::getFunctionAlignment(const Function *) const
2087{
2088  return 0;
2089}
2090
2091SDValue
2092AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
2093{
2094  DebugLoc DL = Op.getDebugLoc();
2095  EVT OVT = Op.getValueType();
2096  SDValue LHS = Op.getOperand(0);
2097  SDValue RHS = Op.getOperand(1);
2098  MVT INTTY;
2099  MVT FLTTY;
2100  if (!OVT.isVector()) {
2101    INTTY = MVT::i32;
2102    FLTTY = MVT::f32;
2103  } else if (OVT.getVectorNumElements() == 2) {
2104    INTTY = MVT::v2i32;
2105    FLTTY = MVT::v2f32;
2106  } else if (OVT.getVectorNumElements() == 4) {
2107    INTTY = MVT::v4i32;
2108    FLTTY = MVT::v4f32;
2109  }
2110  unsigned bitsize = OVT.getScalarType().getSizeInBits();
2111  // char|short jq = ia ^ ib;
2112  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
2113
2114  // jq = jq >> (bitsize - 2)
2115  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
2116
2117  // jq = jq | 0x1
2118  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
2119
2120  // jq = (int)jq
2121  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
2122
2123  // int ia = (int)LHS;
2124  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
2125
2126  // int ib, (int)RHS;
2127  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
2128
2129  // float fa = (float)ia;
2130  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
2131
2132  // float fb = (float)ib;
2133  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
2134
2135  // float fq = native_divide(fa, fb);
2136  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
2137
2138  // fq = trunc(fq);
2139  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
2140
2141  // float fqneg = -fq;
2142  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
2143
2144  // float fr = mad(fqneg, fb, fa);
2145  SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
2146
2147  // int iq = (int)fq;
2148  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
2149
2150  // fr = fabs(fr);
2151  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
2152
2153  // fb = fabs(fb);
2154  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
2155
2156  // int cv = fr >= fb;
2157  SDValue cv;
2158  if (INTTY == MVT::i32) {
2159    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
2160  } else {
2161    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
2162  }
2163  // jq = (cv ? jq : 0);
2164  jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
2165      DAG.getConstant(0, OVT));
2166  // dst = iq + jq;
2167  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
2168  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
2169  return iq;
2170}
2171
2172SDValue
2173AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
2174{
2175  DebugLoc DL = Op.getDebugLoc();
2176  EVT OVT = Op.getValueType();
2177  SDValue LHS = Op.getOperand(0);
2178  SDValue RHS = Op.getOperand(1);
2179  // The LowerSDIV32 function generates equivalent to the following IL.
2180  // mov r0, LHS
2181  // mov r1, RHS
2182  // ilt r10, r0, 0
2183  // ilt r11, r1, 0
2184  // iadd r0, r0, r10
2185  // iadd r1, r1, r11
2186  // ixor r0, r0, r10
2187  // ixor r1, r1, r11
2188  // udiv r0, r0, r1
2189  // ixor r10, r10, r11
2190  // iadd r0, r0, r10
2191  // ixor DST, r0, r10
2192
2193  // mov r0, LHS
2194  SDValue r0 = LHS;
2195
2196  // mov r1, RHS
2197  SDValue r1 = RHS;
2198
2199  // ilt r10, r0, 0
2200  SDValue r10 = DAG.getSelectCC(DL,
2201      r0, DAG.getConstant(0, OVT),
2202      DAG.getConstant(-1, MVT::i32),
2203      DAG.getConstant(0, MVT::i32),
2204      ISD::SETLT);
2205
2206  // ilt r11, r1, 0
2207  SDValue r11 = DAG.getSelectCC(DL,
2208      r1, DAG.getConstant(0, OVT),
2209      DAG.getConstant(-1, MVT::i32),
2210      DAG.getConstant(0, MVT::i32),
2211      ISD::SETLT);
2212
2213  // iadd r0, r0, r10
2214  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
2215
2216  // iadd r1, r1, r11
2217  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
2218
2219  // ixor r0, r0, r10
2220  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
2221
2222  // ixor r1, r1, r11
2223  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
2224
2225  // udiv r0, r0, r1
2226  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
2227
2228  // ixor r10, r10, r11
2229  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
2230
2231  // iadd r0, r0, r10
2232  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
2233
2234  // ixor DST, r0, r10
2235  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
2236  return DST;
2237}
2238
2239SDValue
2240AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
2241{
2242  return SDValue(Op.getNode(), 0);
2243}
2244
2245SDValue
2246AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
2247{
2248  DebugLoc DL = Op.getDebugLoc();
2249  EVT OVT = Op.getValueType();
2250  MVT INTTY = MVT::i32;
2251  if (OVT == MVT::v2i8) {
2252    INTTY = MVT::v2i32;
2253  } else if (OVT == MVT::v4i8) {
2254    INTTY = MVT::v4i32;
2255  }
2256  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
2257  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
2258  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
2259  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
2260  return LHS;
2261}
2262
2263SDValue
2264AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
2265{
2266  DebugLoc DL = Op.getDebugLoc();
2267  EVT OVT = Op.getValueType();
2268  MVT INTTY = MVT::i32;
2269  if (OVT == MVT::v2i16) {
2270    INTTY = MVT::v2i32;
2271  } else if (OVT == MVT::v4i16) {
2272    INTTY = MVT::v4i32;
2273  }
2274  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
2275  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
2276  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
2277  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
2278  return LHS;
2279}
2280
2281SDValue
2282AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
2283{
2284  DebugLoc DL = Op.getDebugLoc();
2285  EVT OVT = Op.getValueType();
2286  SDValue LHS = Op.getOperand(0);
2287  SDValue RHS = Op.getOperand(1);
2288  // The LowerSREM32 function generates equivalent to the following IL.
2289  // mov r0, LHS
2290  // mov r1, RHS
2291  // ilt r10, r0, 0
2292  // ilt r11, r1, 0
2293  // iadd r0, r0, r10
2294  // iadd r1, r1, r11
2295  // ixor r0, r0, r10
2296  // ixor r1, r1, r11
2297  // udiv r20, r0, r1
2298  // umul r20, r20, r1
2299  // sub r0, r0, r20
2300  // iadd r0, r0, r10
2301  // ixor DST, r0, r10
2302
2303  // mov r0, LHS
2304  SDValue r0 = LHS;
2305
2306  // mov r1, RHS
2307  SDValue r1 = RHS;
2308
2309  // ilt r10, r0, 0
2310  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
2311      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2312      r0, DAG.getConstant(0, OVT));
2313
2314  // ilt r11, r1, 0
2315  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
2316      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2317      r1, DAG.getConstant(0, OVT));
2318
2319  // iadd r0, r0, r10
2320  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
2321
2322  // iadd r1, r1, r11
2323  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
2324
2325  // ixor r0, r0, r10
2326  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
2327
2328  // ixor r1, r1, r11
2329  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
2330
2331  // udiv r20, r0, r1
2332  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
2333
2334  // umul r20, r20, r1
2335  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
2336
2337  // sub r0, r0, r20
2338  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
2339
2340  // iadd r0, r0, r10
2341  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
2342
2343  // ixor DST, r0, r10
2344  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
2345  return DST;
2346}
2347
2348SDValue
2349AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
2350{
2351  return SDValue(Op.getNode(), 0);
2352}
2353