AMDILISelLowering.cpp revision 440ab9ea02690008b4d8da11494fd1e9cd86e57e
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file implements the interfaces that AMDIL uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDILISelLowering.h"
16#include "AMDILDevices.h"
17#include "AMDILIntrinsicInfo.h"
18#include "AMDILRegisterInfo.h"
19#include "AMDILSubtarget.h"
20#include "AMDILUtilityFunctions.h"
21#include "llvm/CallingConv.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/PseudoSourceValue.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28#include "llvm/DerivedTypes.h"
29#include "llvm/Instructions.h"
30#include "llvm/Intrinsics.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/Target/TargetInstrInfo.h"
33#include "llvm/Target/TargetOptions.h"
34
35using namespace llvm;
36#define ISDBITCAST  ISD::BITCAST
37#define MVTGLUE     MVT::Glue
38//===----------------------------------------------------------------------===//
39// Calling Convention Implementation
40//===----------------------------------------------------------------------===//
41#include "AMDGPUGenCallingConv.inc"
42
43//===----------------------------------------------------------------------===//
44// TargetLowering Implementation Help Functions Begin
45//===----------------------------------------------------------------------===//
46  static SDValue
47getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
48{
49  DebugLoc DL = Src.getDebugLoc();
50  EVT svt = Src.getValueType().getScalarType();
51  EVT dvt = Dst.getValueType().getScalarType();
52  if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
53    if (dvt.bitsGT(svt)) {
54      Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
55    } else if (svt.bitsLT(svt)) {
56      Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
57          DAG.getConstant(1, MVT::i32));
58    }
59  } else if (svt.isInteger() && dvt.isInteger()) {
60    if (!svt.bitsEq(dvt)) {
61      Src = DAG.getSExtOrTrunc(Src, DL, dvt);
62    }
63  } else if (svt.isInteger()) {
64    unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
65    if (!svt.bitsEq(dvt)) {
66      if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
67        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
68      } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
69        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
70      } else {
71        assert(0 && "We only support 32 and 64bit fp types");
72      }
73    }
74    Src = DAG.getNode(opcode, DL, dvt, Src);
75  } else if (dvt.isInteger()) {
76    unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
77    if (svt.getSimpleVT().SimpleTy == MVT::f32) {
78      Src = DAG.getNode(opcode, DL, MVT::i32, Src);
79    } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
80      Src = DAG.getNode(opcode, DL, MVT::i64, Src);
81    } else {
82      assert(0 && "We only support 32 and 64bit fp types");
83    }
84    Src = DAG.getSExtOrTrunc(Src, DL, dvt);
85  }
86  return Src;
87}
88// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
89// condition.
90  static AMDILCC::CondCodes
91CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
92{
93  switch (CC) {
94    default:
95      {
96        errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
97        assert(0 && "Unknown condition code!");
98      }
99    case ISD::SETO:
100      switch(type) {
101        case MVT::f32:
102          return AMDILCC::IL_CC_F_O;
103        case MVT::f64:
104          return AMDILCC::IL_CC_D_O;
105        default:
106          assert(0 && "Opcode combination not generated correctly!");
107          return AMDILCC::COND_ERROR;
108      };
109    case ISD::SETUO:
110      switch(type) {
111        case MVT::f32:
112          return AMDILCC::IL_CC_F_UO;
113        case MVT::f64:
114          return AMDILCC::IL_CC_D_UO;
115        default:
116          assert(0 && "Opcode combination not generated correctly!");
117          return AMDILCC::COND_ERROR;
118      };
119    case ISD::SETGT:
120      switch (type) {
121        case MVT::i1:
122        case MVT::i8:
123        case MVT::i16:
124        case MVT::i32:
125          return AMDILCC::IL_CC_I_GT;
126        case MVT::f32:
127          return AMDILCC::IL_CC_F_GT;
128        case MVT::f64:
129          return AMDILCC::IL_CC_D_GT;
130        case MVT::i64:
131          return AMDILCC::IL_CC_L_GT;
132        default:
133          assert(0 && "Opcode combination not generated correctly!");
134          return AMDILCC::COND_ERROR;
135      };
136    case ISD::SETGE:
137      switch (type) {
138        case MVT::i1:
139        case MVT::i8:
140        case MVT::i16:
141        case MVT::i32:
142          return AMDILCC::IL_CC_I_GE;
143        case MVT::f32:
144          return AMDILCC::IL_CC_F_GE;
145        case MVT::f64:
146          return AMDILCC::IL_CC_D_GE;
147        case MVT::i64:
148          return AMDILCC::IL_CC_L_GE;
149        default:
150          assert(0 && "Opcode combination not generated correctly!");
151          return AMDILCC::COND_ERROR;
152      };
153    case ISD::SETLT:
154      switch (type) {
155        case MVT::i1:
156        case MVT::i8:
157        case MVT::i16:
158        case MVT::i32:
159          return AMDILCC::IL_CC_I_LT;
160        case MVT::f32:
161          return AMDILCC::IL_CC_F_LT;
162        case MVT::f64:
163          return AMDILCC::IL_CC_D_LT;
164        case MVT::i64:
165          return AMDILCC::IL_CC_L_LT;
166        default:
167          assert(0 && "Opcode combination not generated correctly!");
168          return AMDILCC::COND_ERROR;
169      };
170    case ISD::SETLE:
171      switch (type) {
172        case MVT::i1:
173        case MVT::i8:
174        case MVT::i16:
175        case MVT::i32:
176          return AMDILCC::IL_CC_I_LE;
177        case MVT::f32:
178          return AMDILCC::IL_CC_F_LE;
179        case MVT::f64:
180          return AMDILCC::IL_CC_D_LE;
181        case MVT::i64:
182          return AMDILCC::IL_CC_L_LE;
183        default:
184          assert(0 && "Opcode combination not generated correctly!");
185          return AMDILCC::COND_ERROR;
186      };
187    case ISD::SETNE:
188      switch (type) {
189        case MVT::i1:
190        case MVT::i8:
191        case MVT::i16:
192        case MVT::i32:
193          return AMDILCC::IL_CC_I_NE;
194        case MVT::f32:
195          return AMDILCC::IL_CC_F_NE;
196        case MVT::f64:
197          return AMDILCC::IL_CC_D_NE;
198        case MVT::i64:
199          return AMDILCC::IL_CC_L_NE;
200        default:
201          assert(0 && "Opcode combination not generated correctly!");
202          return AMDILCC::COND_ERROR;
203      };
204    case ISD::SETEQ:
205      switch (type) {
206        case MVT::i1:
207        case MVT::i8:
208        case MVT::i16:
209        case MVT::i32:
210          return AMDILCC::IL_CC_I_EQ;
211        case MVT::f32:
212          return AMDILCC::IL_CC_F_EQ;
213        case MVT::f64:
214          return AMDILCC::IL_CC_D_EQ;
215        case MVT::i64:
216          return AMDILCC::IL_CC_L_EQ;
217        default:
218          assert(0 && "Opcode combination not generated correctly!");
219          return AMDILCC::COND_ERROR;
220      };
221    case ISD::SETUGT:
222      switch (type) {
223        case MVT::i1:
224        case MVT::i8:
225        case MVT::i16:
226        case MVT::i32:
227          return AMDILCC::IL_CC_U_GT;
228        case MVT::f32:
229          return AMDILCC::IL_CC_F_UGT;
230        case MVT::f64:
231          return AMDILCC::IL_CC_D_UGT;
232        case MVT::i64:
233          return AMDILCC::IL_CC_UL_GT;
234        default:
235          assert(0 && "Opcode combination not generated correctly!");
236          return AMDILCC::COND_ERROR;
237      };
238    case ISD::SETUGE:
239      switch (type) {
240        case MVT::i1:
241        case MVT::i8:
242        case MVT::i16:
243        case MVT::i32:
244          return AMDILCC::IL_CC_U_GE;
245        case MVT::f32:
246          return AMDILCC::IL_CC_F_UGE;
247        case MVT::f64:
248          return AMDILCC::IL_CC_D_UGE;
249        case MVT::i64:
250          return AMDILCC::IL_CC_UL_GE;
251        default:
252          assert(0 && "Opcode combination not generated correctly!");
253          return AMDILCC::COND_ERROR;
254      };
255    case ISD::SETULT:
256      switch (type) {
257        case MVT::i1:
258        case MVT::i8:
259        case MVT::i16:
260        case MVT::i32:
261          return AMDILCC::IL_CC_U_LT;
262        case MVT::f32:
263          return AMDILCC::IL_CC_F_ULT;
264        case MVT::f64:
265          return AMDILCC::IL_CC_D_ULT;
266        case MVT::i64:
267          return AMDILCC::IL_CC_UL_LT;
268        default:
269          assert(0 && "Opcode combination not generated correctly!");
270          return AMDILCC::COND_ERROR;
271      };
272    case ISD::SETULE:
273      switch (type) {
274        case MVT::i1:
275        case MVT::i8:
276        case MVT::i16:
277        case MVT::i32:
278          return AMDILCC::IL_CC_U_LE;
279        case MVT::f32:
280          return AMDILCC::IL_CC_F_ULE;
281        case MVT::f64:
282          return AMDILCC::IL_CC_D_ULE;
283        case MVT::i64:
284          return AMDILCC::IL_CC_UL_LE;
285        default:
286          assert(0 && "Opcode combination not generated correctly!");
287          return AMDILCC::COND_ERROR;
288      };
289    case ISD::SETUNE:
290      switch (type) {
291        case MVT::i1:
292        case MVT::i8:
293        case MVT::i16:
294        case MVT::i32:
295          return AMDILCC::IL_CC_U_NE;
296        case MVT::f32:
297          return AMDILCC::IL_CC_F_UNE;
298        case MVT::f64:
299          return AMDILCC::IL_CC_D_UNE;
300        case MVT::i64:
301          return AMDILCC::IL_CC_UL_NE;
302        default:
303          assert(0 && "Opcode combination not generated correctly!");
304          return AMDILCC::COND_ERROR;
305      };
306    case ISD::SETUEQ:
307      switch (type) {
308        case MVT::i1:
309        case MVT::i8:
310        case MVT::i16:
311        case MVT::i32:
312          return AMDILCC::IL_CC_U_EQ;
313        case MVT::f32:
314          return AMDILCC::IL_CC_F_UEQ;
315        case MVT::f64:
316          return AMDILCC::IL_CC_D_UEQ;
317        case MVT::i64:
318          return AMDILCC::IL_CC_UL_EQ;
319        default:
320          assert(0 && "Opcode combination not generated correctly!");
321          return AMDILCC::COND_ERROR;
322      };
323    case ISD::SETOGT:
324      switch (type) {
325        case MVT::f32:
326          return AMDILCC::IL_CC_F_OGT;
327        case MVT::f64:
328          return AMDILCC::IL_CC_D_OGT;
329        case MVT::i1:
330        case MVT::i8:
331        case MVT::i16:
332        case MVT::i32:
333        case MVT::i64:
334        default:
335          assert(0 && "Opcode combination not generated correctly!");
336          return AMDILCC::COND_ERROR;
337      };
338    case ISD::SETOGE:
339      switch (type) {
340        case MVT::f32:
341          return AMDILCC::IL_CC_F_OGE;
342        case MVT::f64:
343          return AMDILCC::IL_CC_D_OGE;
344        case MVT::i1:
345        case MVT::i8:
346        case MVT::i16:
347        case MVT::i32:
348        case MVT::i64:
349        default:
350          assert(0 && "Opcode combination not generated correctly!");
351          return AMDILCC::COND_ERROR;
352      };
353    case ISD::SETOLT:
354      switch (type) {
355        case MVT::f32:
356          return AMDILCC::IL_CC_F_OLT;
357        case MVT::f64:
358          return AMDILCC::IL_CC_D_OLT;
359        case MVT::i1:
360        case MVT::i8:
361        case MVT::i16:
362        case MVT::i32:
363        case MVT::i64:
364        default:
365          assert(0 && "Opcode combination not generated correctly!");
366          return AMDILCC::COND_ERROR;
367      };
368    case ISD::SETOLE:
369      switch (type) {
370        case MVT::f32:
371          return AMDILCC::IL_CC_F_OLE;
372        case MVT::f64:
373          return AMDILCC::IL_CC_D_OLE;
374        case MVT::i1:
375        case MVT::i8:
376        case MVT::i16:
377        case MVT::i32:
378        case MVT::i64:
379        default:
380          assert(0 && "Opcode combination not generated correctly!");
381          return AMDILCC::COND_ERROR;
382      };
383    case ISD::SETONE:
384      switch (type) {
385        case MVT::f32:
386          return AMDILCC::IL_CC_F_ONE;
387        case MVT::f64:
388          return AMDILCC::IL_CC_D_ONE;
389        case MVT::i1:
390        case MVT::i8:
391        case MVT::i16:
392        case MVT::i32:
393        case MVT::i64:
394        default:
395          assert(0 && "Opcode combination not generated correctly!");
396          return AMDILCC::COND_ERROR;
397      };
398    case ISD::SETOEQ:
399      switch (type) {
400        case MVT::f32:
401          return AMDILCC::IL_CC_F_OEQ;
402        case MVT::f64:
403          return AMDILCC::IL_CC_D_OEQ;
404        case MVT::i1:
405        case MVT::i8:
406        case MVT::i16:
407        case MVT::i32:
408        case MVT::i64:
409        default:
410          assert(0 && "Opcode combination not generated correctly!");
411          return AMDILCC::COND_ERROR;
412      };
413  };
414}
415
416/// Helper function used by LowerFormalArguments
417static const TargetRegisterClass*
418getRegClassFromType(unsigned int type) {
419  switch (type) {
420  default:
421    assert(0 && "Passed in type does not match any register classes.");
422  case MVT::i8:
423    return &AMDIL::GPRI8RegClass;
424  case MVT::i16:
425    return &AMDIL::GPRI16RegClass;
426  case MVT::i32:
427    return &AMDIL::GPRI32RegClass;
428  case MVT::f32:
429    return &AMDIL::GPRF32RegClass;
430  case MVT::i64:
431    return &AMDIL::GPRI64RegClass;
432  case MVT::f64:
433    return &AMDIL::GPRF64RegClass;
434  case MVT::v4f32:
435    return &AMDIL::GPRV4F32RegClass;
436  case MVT::v4i8:
437    return &AMDIL::GPRV4I8RegClass;
438  case MVT::v4i16:
439    return &AMDIL::GPRV4I16RegClass;
440  case MVT::v4i32:
441    return &AMDIL::GPRV4I32RegClass;
442  case MVT::v2f32:
443    return &AMDIL::GPRV2F32RegClass;
444  case MVT::v2i8:
445    return &AMDIL::GPRV2I8RegClass;
446  case MVT::v2i16:
447    return &AMDIL::GPRV2I16RegClass;
448  case MVT::v2i32:
449    return &AMDIL::GPRV2I32RegClass;
450  case MVT::v2f64:
451    return &AMDIL::GPRV2F64RegClass;
452  case MVT::v2i64:
453    return &AMDIL::GPRV2I64RegClass;
454  }
455}
456
457SDValue
458AMDILTargetLowering::LowerMemArgument(
459    SDValue Chain,
460    CallingConv::ID CallConv,
461    const SmallVectorImpl<ISD::InputArg> &Ins,
462    DebugLoc dl, SelectionDAG &DAG,
463    const CCValAssign &VA,
464    MachineFrameInfo *MFI,
465    unsigned i) const
466{
467  // Create the nodes corresponding to a load from this parameter slot.
468  ISD::ArgFlagsTy Flags = Ins[i].Flags;
469
470  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
471    getTargetMachine().Options.GuaranteedTailCallOpt;
472  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
473
474  // FIXME: For now, all byval parameter objects are marked mutable. This can
475  // be changed with more analysis.
476  // In case of tail call optimization mark all arguments mutable. Since they
477  // could be overwritten by lowering of arguments in case of a tail call.
478  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
479      VA.getLocMemOffset(), isImmutable);
480  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
481
482  if (Flags.isByVal())
483    return FIN;
484  return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
485      MachinePointerInfo::getFixedStack(FI),
486      false, false, false, 0);
487}
488//===----------------------------------------------------------------------===//
489// TargetLowering Implementation Help Functions End
490//===----------------------------------------------------------------------===//
491
492//===----------------------------------------------------------------------===//
493// TargetLowering Class Implementation Begins
494//===----------------------------------------------------------------------===//
495  AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
496: TargetLowering(TM, new TargetLoweringObjectFileELF())
497{
498  int types[] =
499  {
500    (int)MVT::i8,
501    (int)MVT::i16,
502    (int)MVT::i32,
503    (int)MVT::f32,
504    (int)MVT::f64,
505    (int)MVT::i64,
506    (int)MVT::v2i8,
507    (int)MVT::v4i8,
508    (int)MVT::v2i16,
509    (int)MVT::v4i16,
510    (int)MVT::v4f32,
511    (int)MVT::v4i32,
512    (int)MVT::v2f32,
513    (int)MVT::v2i32,
514    (int)MVT::v2f64,
515    (int)MVT::v2i64
516  };
517
518  int IntTypes[] =
519  {
520    (int)MVT::i8,
521    (int)MVT::i16,
522    (int)MVT::i32,
523    (int)MVT::i64
524  };
525
526  int FloatTypes[] =
527  {
528    (int)MVT::f32,
529    (int)MVT::f64
530  };
531
532  int VectorTypes[] =
533  {
534    (int)MVT::v2i8,
535    (int)MVT::v4i8,
536    (int)MVT::v2i16,
537    (int)MVT::v4i16,
538    (int)MVT::v4f32,
539    (int)MVT::v4i32,
540    (int)MVT::v2f32,
541    (int)MVT::v2i32,
542    (int)MVT::v2f64,
543    (int)MVT::v2i64
544  };
545  size_t numTypes = sizeof(types) / sizeof(*types);
546  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
547  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
548  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
549
550  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
551  // These are the current register classes that are
552  // supported
553
554  for (unsigned int x  = 0; x < numTypes; ++x) {
555    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
556
557    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
558    // We cannot sextinreg, expand to shifts
559    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
560    setOperationAction(ISD::SUBE, VT, Expand);
561    setOperationAction(ISD::SUBC, VT, Expand);
562    setOperationAction(ISD::ADDE, VT, Expand);
563    setOperationAction(ISD::ADDC, VT, Expand);
564    setOperationAction(ISD::SETCC, VT, Custom);
565    setOperationAction(ISD::BRCOND, VT, Custom);
566    setOperationAction(ISD::BR_CC, VT, Custom);
567    setOperationAction(ISD::BR_JT, VT, Expand);
568    setOperationAction(ISD::BRIND, VT, Expand);
569    // TODO: Implement custom UREM/SREM routines
570    setOperationAction(ISD::SREM, VT, Expand);
571    setOperationAction(ISD::GlobalAddress, VT, Custom);
572    setOperationAction(ISD::JumpTable, VT, Custom);
573    setOperationAction(ISD::ConstantPool, VT, Custom);
574    setOperationAction(ISD::SELECT, VT, Custom);
575    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
576    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
577    if (VT != MVT::i64 && VT != MVT::v2i64) {
578      setOperationAction(ISD::SDIV, VT, Custom);
579    }
580  }
581  for (unsigned int x = 0; x < numFloatTypes; ++x) {
582    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
583
584    // IL does not have these operations for floating point types
585    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
586    setOperationAction(ISD::SETOLT, VT, Expand);
587    setOperationAction(ISD::SETOGE, VT, Expand);
588    setOperationAction(ISD::SETOGT, VT, Expand);
589    setOperationAction(ISD::SETOLE, VT, Expand);
590    setOperationAction(ISD::SETULT, VT, Expand);
591    setOperationAction(ISD::SETUGE, VT, Expand);
592    setOperationAction(ISD::SETUGT, VT, Expand);
593    setOperationAction(ISD::SETULE, VT, Expand);
594  }
595
596  for (unsigned int x = 0; x < numIntTypes; ++x) {
597    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
598
599    // GPU also does not have divrem function for signed or unsigned
600    setOperationAction(ISD::SDIVREM, VT, Expand);
601
602    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
603    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
604    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
605
606    // GPU doesn't have a rotl, rotr, or byteswap instruction
607    setOperationAction(ISD::ROTR, VT, Expand);
608    setOperationAction(ISD::ROTL, VT, Expand);
609    setOperationAction(ISD::BSWAP, VT, Expand);
610
611    // GPU doesn't have any counting operators
612    setOperationAction(ISD::CTPOP, VT, Expand);
613    setOperationAction(ISD::CTTZ, VT, Expand);
614    setOperationAction(ISD::CTLZ, VT, Expand);
615  }
616
617  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
618  {
619    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
620
621    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
622    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
623    setOperationAction(ISD::SDIVREM, VT, Expand);
624    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
625    // setOperationAction(ISD::VSETCC, VT, Expand);
626    setOperationAction(ISD::SETCC, VT, Expand);
627    setOperationAction(ISD::SELECT_CC, VT, Expand);
628    setOperationAction(ISD::SELECT, VT, Expand);
629
630  }
631  if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
632    setOperationAction(ISD::MULHU, MVT::i64, Expand);
633    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
634    setOperationAction(ISD::MULHS, MVT::i64, Expand);
635    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
636    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
637    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
638    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
639    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
640    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
641    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
642    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
643    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
644  }
645  if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
646    // we support loading/storing v2f64 but not operations on the type
647    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
648    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
649    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
650    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
651    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
652    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
653    // We want to expand vector conversions into their scalar
654    // counterparts.
655    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
656    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
657    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
658    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
659    setOperationAction(ISD::FABS, MVT::f64, Expand);
660    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
661  }
662  // TODO: Fix the UDIV24 algorithm so it works for these
663  // types correctly. This needs vector comparisons
664  // for this to work correctly.
665  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
666  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
667  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
668  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
669  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
670  setOperationAction(ISD::SUBC, MVT::Other, Expand);
671  setOperationAction(ISD::ADDE, MVT::Other, Expand);
672  setOperationAction(ISD::ADDC, MVT::Other, Expand);
673  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
674  setOperationAction(ISD::BR_CC, MVT::Other, Custom);
675  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
676  setOperationAction(ISD::BRIND, MVT::Other, Expand);
677  setOperationAction(ISD::SETCC, MVT::Other, Custom);
678  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
679
680  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
681  // Use the default implementation.
682  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
683  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
684  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
685  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
686  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
687  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
688  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
689  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
690  setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
691
692  setStackPointerRegisterToSaveRestore(AMDIL::SP);
693  setSchedulingPreference(Sched::RegPressure);
694  setPow2DivIsCheap(false);
695  setPrefLoopAlignment(16);
696  setSelectIsExpensive(true);
697  setJumpIsExpensive(true);
698
699  maxStoresPerMemcpy  = 4096;
700  maxStoresPerMemmove = 4096;
701  maxStoresPerMemset  = 4096;
702
703#undef numTypes
704#undef numIntTypes
705#undef numVectorTypes
706#undef numFloatTypes
707}
708
709const char *
710AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
711{
712  switch (Opcode) {
713    default: return 0;
714    case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
715    case AMDILISD::DP_TO_FP:  return "AMDILISD::DP_TO_FP";
716    case AMDILISD::FP_TO_DP:  return "AMDILISD::FP_TO_DP";
717    case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
718    case AMDILISD::CMOV:  return "AMDILISD::CMOV";
719    case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
720    case AMDILISD::INEGATE:  return "AMDILISD::INEGATE";
721    case AMDILISD::MAD:  return "AMDILISD::MAD";
722    case AMDILISD::UMAD:  return "AMDILISD::UMAD";
723    case AMDILISD::CALL:  return "AMDILISD::CALL";
724    case AMDILISD::RET:   return "AMDILISD::RET";
725    case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
726    case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
727    case AMDILISD::ADD: return "AMDILISD::ADD";
728    case AMDILISD::UMUL: return "AMDILISD::UMUL";
729    case AMDILISD::AND: return "AMDILISD::AND";
730    case AMDILISD::OR: return "AMDILISD::OR";
731    case AMDILISD::NOT: return "AMDILISD::NOT";
732    case AMDILISD::XOR: return "AMDILISD::XOR";
733    case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
734    case AMDILISD::SMAX: return "AMDILISD::SMAX";
735    case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
736    case AMDILISD::MOVE: return "AMDILISD::MOVE";
737    case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
738    case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
739    case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
740    case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
741    case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
742    case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
743    case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
744    case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
745    case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
746    case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
747    case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
748    case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
749    case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
750    case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
751    case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
752    case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
753    case AMDILISD::CMP: return "AMDILISD::CMP";
754    case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
755    case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
756    case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
757    case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
758    case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
759    case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
760    case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
761    case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
762    case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
763    case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
764    case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
765    case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
766    case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
767    case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
768    case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
769    case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
770    case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
771    case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
772    case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
773    case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
774    case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
775    case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
776    case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
777    case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
778    case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
779    case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
780    case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
781    case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
782    case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
783    case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
784    case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
785    case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
786    case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
787    case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
788    case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
789    case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
790    case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
791    case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
792    case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
793    case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
794    case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
795    case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
796    case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
797    case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
798    case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
799    case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
800    case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
801    case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
802    case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
803    case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
804    case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
805    case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
806    case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
807    case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
808    case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
809    case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
810    case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
811    case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
812    case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
813    case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
814    case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
815    case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
816    case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
817    case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
818    case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
819    case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
820    case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
821    case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
822    case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
823    case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
824    case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
825    case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
826    case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
827    case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
828    case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
829    case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
830    case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
831    case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
832    case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
833    case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
834    case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
835    case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
836    case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
837    case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
838    case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
839    case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
840    case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
841    case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
842    case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
843    case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
844    case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
845    case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
846    case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
847    case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
848    case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
849    case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
850    case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
851    case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
852    case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
853    case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
854    case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
855    case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
856    case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
857    case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
858    case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
859    case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
860    case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
861    case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
862    case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
863
864  };
865}
866bool
867AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
868    const CallInst &I, unsigned Intrinsic) const
869{
870  if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
871      || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
872    return false;
873  }
874  bool bitCastToInt = false;
875  unsigned IntNo;
876  bool isRet = true;
877  const AMDILSubtarget *STM = &this->getTargetMachine()
878    .getSubtarget<AMDILSubtarget>();
879  switch (Intrinsic) {
880    default: return false; // Don't custom lower most intrinsics.
881    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
882    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
883             IntNo = AMDILISD::ATOM_G_ADD; break;
884    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
885    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
886             isRet = false;
887             IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
888    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
889    case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
890             IntNo = AMDILISD::ATOM_L_ADD; break;
891    case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
892    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
893             isRet = false;
894             IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
895    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
896    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
897             IntNo = AMDILISD::ATOM_R_ADD; break;
898    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
899    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
900             isRet = false;
901             IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
902    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
903    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
904             IntNo = AMDILISD::ATOM_G_AND; break;
905    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
906    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
907             isRet = false;
908             IntNo = AMDILISD::ATOM_G_AND_NORET; break;
909    case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
910    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
911             IntNo = AMDILISD::ATOM_L_AND; break;
912    case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
913    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
914             isRet = false;
915             IntNo = AMDILISD::ATOM_L_AND_NORET; break;
916    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
917    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
918             IntNo = AMDILISD::ATOM_R_AND; break;
919    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
920    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
921             isRet = false;
922             IntNo = AMDILISD::ATOM_R_AND_NORET; break;
923    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
924    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
925             IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
926    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
927    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
928             isRet = false;
929             IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
930    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
931    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
932             IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
933    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
934    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
935             isRet = false;
936             IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
937    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
938    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
939             IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
940    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
941    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
942             isRet = false;
943             IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
944    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
945    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
946             if (STM->calVersion() >= CAL_VERSION_SC_136) {
947               IntNo = AMDILISD::ATOM_G_DEC;
948             } else {
949               IntNo = AMDILISD::ATOM_G_SUB;
950             }
951             break;
952    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
953    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
954             isRet = false;
955             if (STM->calVersion() >= CAL_VERSION_SC_136) {
956               IntNo = AMDILISD::ATOM_G_DEC_NORET;
957             } else {
958               IntNo = AMDILISD::ATOM_G_SUB_NORET;
959             }
960             break;
961    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
962    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
963             if (STM->calVersion() >= CAL_VERSION_SC_136) {
964               IntNo = AMDILISD::ATOM_L_DEC;
965             } else {
966               IntNo = AMDILISD::ATOM_L_SUB;
967             }
968             break;
969    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
970    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
971             isRet = false;
972             if (STM->calVersion() >= CAL_VERSION_SC_136) {
973               IntNo = AMDILISD::ATOM_L_DEC_NORET;
974             } else {
975               IntNo = AMDILISD::ATOM_L_SUB_NORET;
976             }
977             break;
978    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
979    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
980             if (STM->calVersion() >= CAL_VERSION_SC_136) {
981               IntNo = AMDILISD::ATOM_R_DEC;
982             } else {
983               IntNo = AMDILISD::ATOM_R_SUB;
984             }
985             break;
986    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
987    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
988             isRet = false;
989             if (STM->calVersion() >= CAL_VERSION_SC_136) {
990               IntNo = AMDILISD::ATOM_R_DEC_NORET;
991             } else {
992               IntNo = AMDILISD::ATOM_R_SUB_NORET;
993             }
994             break;
995    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
996    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
997             if (STM->calVersion() >= CAL_VERSION_SC_136) {
998               IntNo = AMDILISD::ATOM_G_INC;
999             } else {
1000               IntNo = AMDILISD::ATOM_G_ADD;
1001             }
1002             break;
1003    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1004    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1005             isRet = false;
1006             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1007               IntNo = AMDILISD::ATOM_G_INC_NORET;
1008             } else {
1009               IntNo = AMDILISD::ATOM_G_ADD_NORET;
1010             }
1011             break;
1012    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1013    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1014             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1015               IntNo = AMDILISD::ATOM_L_INC;
1016             } else {
1017               IntNo = AMDILISD::ATOM_L_ADD;
1018             }
1019             break;
1020    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1021    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1022             isRet = false;
1023             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1024               IntNo = AMDILISD::ATOM_L_INC_NORET;
1025             } else {
1026               IntNo = AMDILISD::ATOM_L_ADD_NORET;
1027             }
1028             break;
1029    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1030    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1031             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1032               IntNo = AMDILISD::ATOM_R_INC;
1033             } else {
1034               IntNo = AMDILISD::ATOM_R_ADD;
1035             }
1036             break;
1037    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1038    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1039             isRet = false;
1040             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1041               IntNo = AMDILISD::ATOM_R_INC_NORET;
1042             } else {
1043               IntNo = AMDILISD::ATOM_R_ADD_NORET;
1044             }
1045             break;
1046    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1047             IntNo = AMDILISD::ATOM_G_MAX; break;
1048    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1049             IntNo = AMDILISD::ATOM_G_UMAX; break;
1050    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1051             isRet = false;
1052             IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1053    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1054             isRet = false;
1055             IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1056    case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1057             IntNo = AMDILISD::ATOM_L_MAX; break;
1058    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1059             IntNo = AMDILISD::ATOM_L_UMAX; break;
1060    case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1061             isRet = false;
1062             IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1063    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1064             isRet = false;
1065             IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1066    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1067             IntNo = AMDILISD::ATOM_R_MAX; break;
1068    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1069             IntNo = AMDILISD::ATOM_R_UMAX; break;
1070    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1071             isRet = false;
1072             IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1073    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1074             isRet = false;
1075             IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1076    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1077             IntNo = AMDILISD::ATOM_G_MIN; break;
1078    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1079             IntNo = AMDILISD::ATOM_G_UMIN; break;
1080    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1081             isRet = false;
1082             IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1083    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1084             isRet = false;
1085             IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1086    case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1087             IntNo = AMDILISD::ATOM_L_MIN; break;
1088    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1089             IntNo = AMDILISD::ATOM_L_UMIN; break;
1090    case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1091             isRet = false;
1092             IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1093    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1094             isRet = false;
1095             IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1096    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1097             IntNo = AMDILISD::ATOM_R_MIN; break;
1098    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1099             IntNo = AMDILISD::ATOM_R_UMIN; break;
1100    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1101             isRet = false;
1102             IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1103    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1104             isRet = false;
1105             IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1106    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1107    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1108             IntNo = AMDILISD::ATOM_G_OR; break;
1109    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1110    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1111             isRet = false;
1112             IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1113    case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1114    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1115             IntNo = AMDILISD::ATOM_L_OR; break;
1116    case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1117    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1118             isRet = false;
1119             IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1120    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1121    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1122             IntNo = AMDILISD::ATOM_R_OR; break;
1123    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1124    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1125             isRet = false;
1126             IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1127    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1128    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1129             IntNo = AMDILISD::ATOM_G_SUB; break;
1130    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1131    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1132             isRet = false;
1133             IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1134    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1135    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1136             IntNo = AMDILISD::ATOM_L_SUB; break;
1137    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1138    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1139             isRet = false;
1140             IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1141    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1142    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1143             IntNo = AMDILISD::ATOM_R_SUB; break;
1144    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1145    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1146             isRet = false;
1147             IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1148    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1149    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1150             IntNo = AMDILISD::ATOM_G_RSUB; break;
1151    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1152    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1153             isRet = false;
1154             IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1155    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1156    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1157             IntNo = AMDILISD::ATOM_L_RSUB; break;
1158    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1159    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1160             isRet = false;
1161             IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1162    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1163    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1164             IntNo = AMDILISD::ATOM_R_RSUB; break;
1165    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1166    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1167             isRet = false;
1168             IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1169    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1170             bitCastToInt = true;
1171    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1172    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1173             IntNo = AMDILISD::ATOM_G_XCHG; break;
1174    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1175             bitCastToInt = true;
1176    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1177    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1178             isRet = false;
1179             IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1180    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1181             bitCastToInt = true;
1182    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1183    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1184             IntNo = AMDILISD::ATOM_L_XCHG; break;
1185    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1186             bitCastToInt = true;
1187    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1188    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1189             isRet = false;
1190             IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1191    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1192             bitCastToInt = true;
1193    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1194    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1195             IntNo = AMDILISD::ATOM_R_XCHG; break;
1196    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1197             bitCastToInt = true;
1198    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1199    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1200             isRet = false;
1201             IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1202    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1203    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1204             IntNo = AMDILISD::ATOM_G_XOR; break;
1205    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1206    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1207             isRet = false;
1208             IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1209    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1210    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1211             IntNo = AMDILISD::ATOM_L_XOR; break;
1212    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1213    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1214             isRet = false;
1215             IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1216    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1217    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1218             IntNo = AMDILISD::ATOM_R_XOR; break;
1219    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1220    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1221             isRet = false;
1222             IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1223    case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1224             IntNo = AMDILISD::APPEND_ALLOC; break;
1225    case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1226             isRet = false;
1227             IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1228    case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1229             IntNo = AMDILISD::APPEND_CONSUME; break;
1230    case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1231             isRet = false;
1232             IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1233  };
1234
1235  Info.opc = IntNo;
1236  Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1237  Info.ptrVal = I.getOperand(0);
1238  Info.offset = 0;
1239  Info.align = 4;
1240  Info.vol = true;
1241  Info.readMem = isRet;
1242  Info.writeMem = true;
1243  return true;
1244}
1245// The backend supports 32 and 64 bit floating point immediates
1246bool
1247AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1248{
1249  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1250      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1251    return true;
1252  } else {
1253    return false;
1254  }
1255}
1256
1257bool
1258AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1259{
1260  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1261      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1262    return false;
1263  } else {
1264    return true;
1265  }
1266}
1267
1268
1269// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1270// be zero. Op is expected to be a target specific node. Used by DAG
1271// combiner.
1272
1273void
1274AMDILTargetLowering::computeMaskedBitsForTargetNode(
1275    const SDValue Op,
1276    APInt &KnownZero,
1277    APInt &KnownOne,
1278    const SelectionDAG &DAG,
1279    unsigned Depth) const
1280{
1281  APInt KnownZero2;
1282  APInt KnownOne2;
1283  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1284  switch (Op.getOpcode()) {
1285    default: break;
1286    case AMDILISD::SELECT_CC:
1287             DAG.ComputeMaskedBits(
1288                 Op.getOperand(1),
1289                 KnownZero,
1290                 KnownOne,
1291                 Depth + 1
1292                 );
1293             DAG.ComputeMaskedBits(
1294                 Op.getOperand(0),
1295                 KnownZero2,
1296                 KnownOne2
1297                 );
1298             assert((KnownZero & KnownOne) == 0
1299                 && "Bits known to be one AND zero?");
1300             assert((KnownZero2 & KnownOne2) == 0
1301                 && "Bits known to be one AND zero?");
1302             // Only known if known in both the LHS and RHS
1303             KnownOne &= KnownOne2;
1304             KnownZero &= KnownZero2;
1305             break;
1306  };
1307}
1308
1309// This is the function that determines which calling convention should
1310// be used. Currently there is only one calling convention
1311CCAssignFn*
1312AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
1313{
1314  //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1315  return CC_AMDIL32;
1316}
1317
1318// LowerCallResult - Lower the result values of an ISD::CALL into the
1319// appropriate copies out of appropriate physical registers.  This assumes that
1320// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
1321// being lowered.  The returns a SDNode with the same number of values as the
1322// ISD::CALL.
1323SDValue
1324AMDILTargetLowering::LowerCallResult(
1325    SDValue Chain,
1326    SDValue InFlag,
1327    CallingConv::ID CallConv,
1328    bool isVarArg,
1329    const SmallVectorImpl<ISD::InputArg> &Ins,
1330    DebugLoc dl,
1331    SelectionDAG &DAG,
1332    SmallVectorImpl<SDValue> &InVals) const
1333{
1334  // Assign locations to each value returned by this call
1335  SmallVector<CCValAssign, 16> RVLocs;
1336  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1337                 getTargetMachine(), RVLocs, *DAG.getContext());
1338  CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
1339
1340  // Copy all of the result registers out of their specified physreg.
1341  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1342    EVT CopyVT = RVLocs[i].getValVT();
1343    if (RVLocs[i].isRegLoc()) {
1344      Chain = DAG.getCopyFromReg(
1345          Chain,
1346          dl,
1347          RVLocs[i].getLocReg(),
1348          CopyVT,
1349          InFlag
1350          ).getValue(1);
1351      SDValue Val = Chain.getValue(0);
1352      InFlag = Chain.getValue(2);
1353      InVals.push_back(Val);
1354    }
1355  }
1356
1357  return Chain;
1358
1359}
1360
1361//===----------------------------------------------------------------------===//
1362//                           Other Lowering Hooks
1363//===----------------------------------------------------------------------===//
1364
1365// Recursively assign SDNodeOrdering to any unordered nodes
1366// This is necessary to maintain source ordering of instructions
1367// under -O0 to avoid odd-looking "skipping around" issues.
1368  static const SDValue
1369Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
1370{
1371  if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
1372    DAG.AssignOrdering( New.getNode(), order );
1373    for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
1374      Ordered( DAG, order, New.getOperand(i) );
1375  }
1376  return New;
1377}
1378
1379#define LOWER(A) \
1380  case ISD:: A: \
1381return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
1382
1383SDValue
1384AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
1385{
1386  switch (Op.getOpcode()) {
1387    default:
1388      Op.getNode()->dump();
1389      assert(0 && "Custom lowering code for this"
1390          "instruction is not implemented yet!");
1391      break;
1392      LOWER(GlobalAddress);
1393      LOWER(JumpTable);
1394      LOWER(ConstantPool);
1395      LOWER(ExternalSymbol);
1396      LOWER(SDIV);
1397      LOWER(SREM);
1398      LOWER(BUILD_VECTOR);
1399      LOWER(SELECT);
1400      LOWER(SETCC);
1401      LOWER(SIGN_EXTEND_INREG);
1402      LOWER(DYNAMIC_STACKALLOC);
1403      LOWER(BRCOND);
1404      LOWER(BR_CC);
1405  }
1406  return Op;
1407}
1408
1409#undef LOWER
1410
1411SDValue
1412AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
1413{
1414  SDValue DST = Op;
1415  const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
1416  const GlobalValue *G = GADN->getGlobal();
1417  DebugLoc DL = Op.getDebugLoc();
1418  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
1419  if (!GV) {
1420    DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1421  } else {
1422    if (GV->hasInitializer()) {
1423      const Constant *C = dyn_cast<Constant>(GV->getInitializer());
1424      if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
1425        DST = DAG.getConstant(CI->getValue(), Op.getValueType());
1426      } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
1427        DST = DAG.getConstantFP(CF->getValueAPF(),
1428            Op.getValueType());
1429      } else if (dyn_cast<ConstantAggregateZero>(C)) {
1430        EVT VT = Op.getValueType();
1431        if (VT.isInteger()) {
1432          DST = DAG.getConstant(0, VT);
1433        } else {
1434          DST = DAG.getConstantFP(0, VT);
1435        }
1436      } else {
1437        assert(!"lowering this type of Global Address "
1438            "not implemented yet!");
1439        C->dump();
1440        DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1441      }
1442    } else {
1443      DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1444    }
1445  }
1446  return DST;
1447}
1448
1449SDValue
1450AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
1451{
1452  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1453  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
1454  return Result;
1455}
1456SDValue
1457AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
1458{
1459  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1460  EVT PtrVT = Op.getValueType();
1461  SDValue Result;
1462  if (CP->isMachineConstantPoolEntry()) {
1463    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1464        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1465  } else {
1466    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1467        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1468  }
1469  return Result;
1470}
1471
1472SDValue
1473AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
1474{
1475  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
1476  SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
1477  return Result;
1478}
1479
1480/// LowerFORMAL_ARGUMENTS - transform physical registers into
1481/// virtual registers and generate load operations for
1482/// arguments places on the stack.
1483/// TODO: isVarArg, hasStructRet, isMemReg
1484  SDValue
1485AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
1486    CallingConv::ID CallConv,
1487    bool isVarArg,
1488    const SmallVectorImpl<ISD::InputArg> &Ins,
1489    DebugLoc dl,
1490    SelectionDAG &DAG,
1491    SmallVectorImpl<SDValue> &InVals)
1492const
1493{
1494
1495  MachineFunction &MF = DAG.getMachineFunction();
1496  MachineFrameInfo *MFI = MF.getFrameInfo();
1497  //const Function *Fn = MF.getFunction();
1498  //MachineRegisterInfo &RegInfo = MF.getRegInfo();
1499
1500  SmallVector<CCValAssign, 16> ArgLocs;
1501  CallingConv::ID CC = MF.getFunction()->getCallingConv();
1502  //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
1503
1504  CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
1505                 getTargetMachine(), ArgLocs, *DAG.getContext());
1506
1507  // When more calling conventions are added, they need to be chosen here
1508  CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
1509  SDValue StackPtr;
1510
1511  //unsigned int FirstStackArgLoc = 0;
1512
1513  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1514    CCValAssign &VA = ArgLocs[i];
1515    if (VA.isRegLoc()) {
1516      EVT RegVT = VA.getLocVT();
1517      const TargetRegisterClass *RC = getRegClassFromType(
1518          RegVT.getSimpleVT().SimpleTy);
1519
1520      unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
1521      SDValue ArgValue = DAG.getCopyFromReg(
1522          Chain,
1523          dl,
1524          Reg,
1525          RegVT);
1526      // If this is an 8 or 16-bit value, it is really passed
1527      // promoted to 32 bits.  Insert an assert[sz]ext to capture
1528      // this, then truncate to the right size.
1529
1530      if (VA.getLocInfo() == CCValAssign::SExt) {
1531        ArgValue = DAG.getNode(
1532            ISD::AssertSext,
1533            dl,
1534            RegVT,
1535            ArgValue,
1536            DAG.getValueType(VA.getValVT()));
1537      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
1538        ArgValue = DAG.getNode(
1539            ISD::AssertZext,
1540            dl,
1541            RegVT,
1542            ArgValue,
1543            DAG.getValueType(VA.getValVT()));
1544      }
1545      if (VA.getLocInfo() != CCValAssign::Full) {
1546        ArgValue = DAG.getNode(
1547            ISD::TRUNCATE,
1548            dl,
1549            VA.getValVT(),
1550            ArgValue);
1551      }
1552      // Add the value to the list of arguments
1553      // to be passed in registers
1554      InVals.push_back(ArgValue);
1555      if (isVarArg) {
1556        assert(0 && "Variable arguments are not yet supported");
1557        // See MipsISelLowering.cpp for ideas on how to implement
1558      }
1559    } else if(VA.isMemLoc()) {
1560      InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1561            dl, DAG, VA, MFI, i));
1562    } else {
1563      assert(0 && "found a Value Assign that is "
1564          "neither a register or a memory location");
1565    }
1566  }
1567  /*if (hasStructRet) {
1568    assert(0 && "Has struct return is not yet implemented");
1569  // See MipsISelLowering.cpp for ideas on how to implement
1570  }*/
1571
1572  if (isVarArg) {
1573    assert(0 && "Variable arguments are not yet supported");
1574    // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1575  }
1576  // This needs to be changed to non-zero if the return function needs
1577  // to pop bytes
1578  return Chain;
1579}
1580/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1581/// by "Src" to address "Dst" with size and alignment information specified by
1582/// the specific parameter attribute. The copy will be passed as a byval
1583/// function parameter.
1584static SDValue
1585CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1586    ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1587  assert(0 && "MemCopy does not exist yet");
1588  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1589
1590  return DAG.getMemcpy(Chain,
1591      Src.getDebugLoc(),
1592      Dst, Src, SizeNode, Flags.getByValAlign(),
1593      /*IsVol=*/false, /*AlwaysInline=*/true,
1594      MachinePointerInfo(), MachinePointerInfo());
1595}
1596
1597SDValue
1598AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1599    SDValue StackPtr, SDValue Arg,
1600    DebugLoc dl, SelectionDAG &DAG,
1601    const CCValAssign &VA,
1602    ISD::ArgFlagsTy Flags) const
1603{
1604  unsigned int LocMemOffset = VA.getLocMemOffset();
1605  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1606  PtrOff = DAG.getNode(ISD::ADD,
1607      dl,
1608      getPointerTy(), StackPtr, PtrOff);
1609  if (Flags.isByVal()) {
1610    PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1611  } else {
1612    PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1613        MachinePointerInfo::getStack(LocMemOffset),
1614        false, false, 0);
1615  }
1616  return PtrOff;
1617}
1618/// LowerCAL - functions arguments are copied from virtual
1619/// regs to (physical regs)/(stack frame), CALLSEQ_START and
1620/// CALLSEQ_END are emitted.
1621/// TODO: isVarArg, isTailCall, hasStructRet
1622SDValue
1623AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1624    CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1625    bool& isTailCall,
1626    const SmallVectorImpl<ISD::OutputArg> &Outs,
1627    const SmallVectorImpl<SDValue> &OutVals,
1628    const SmallVectorImpl<ISD::InputArg> &Ins,
1629    DebugLoc dl, SelectionDAG &DAG,
1630    SmallVectorImpl<SDValue> &InVals)
1631const
1632{
1633  isTailCall = false;
1634  MachineFunction& MF = DAG.getMachineFunction();
1635  // FIXME: DO we need to handle fast calling conventions and tail call
1636  // optimizations?? X86/PPC ISelLowering
1637  /*bool hasStructRet = (TheCall->getNumArgs())
1638    ? TheCall->getArgFlags(0).device()->isSRet()
1639    : false;*/
1640
1641  MachineFrameInfo *MFI = MF.getFrameInfo();
1642
1643  // Analyze operands of the call, assigning locations to each operand
1644  SmallVector<CCValAssign, 16> ArgLocs;
1645  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1646                 getTargetMachine(), ArgLocs, *DAG.getContext());
1647  // Analyize the calling operands, but need to change
1648  // if we have more than one calling convetion
1649  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1650
1651  unsigned int NumBytes = CCInfo.getNextStackOffset();
1652  if (isTailCall) {
1653    assert(isTailCall && "Tail Call not handled yet!");
1654    // See X86/PPC ISelLowering
1655  }
1656
1657  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1658
1659  SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1660  SmallVector<SDValue, 8> MemOpChains;
1661  SDValue StackPtr;
1662  //unsigned int FirstStacArgLoc = 0;
1663  //int LastArgStackLoc = 0;
1664
1665  // Walk the register/memloc assignments, insert copies/loads
1666  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1667    CCValAssign &VA = ArgLocs[i];
1668    //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1669    // Arguments start after the 5 first operands of ISD::CALL
1670    SDValue Arg = OutVals[i];
1671    //Promote the value if needed
1672    switch(VA.getLocInfo()) {
1673      default: assert(0 && "Unknown loc info!");
1674      case CCValAssign::Full:
1675               break;
1676      case CCValAssign::SExt:
1677               Arg = DAG.getNode(ISD::SIGN_EXTEND,
1678                   dl,
1679                   VA.getLocVT(), Arg);
1680               break;
1681      case CCValAssign::ZExt:
1682               Arg = DAG.getNode(ISD::ZERO_EXTEND,
1683                   dl,
1684                   VA.getLocVT(), Arg);
1685               break;
1686      case CCValAssign::AExt:
1687               Arg = DAG.getNode(ISD::ANY_EXTEND,
1688                   dl,
1689                   VA.getLocVT(), Arg);
1690               break;
1691    }
1692
1693    if (VA.isRegLoc()) {
1694      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1695    } else if (VA.isMemLoc()) {
1696      // Create the frame index object for this incoming parameter
1697      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1698          VA.getLocMemOffset(), true);
1699      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1700
1701      // emit ISD::STORE whichs stores the
1702      // parameter value to a stack Location
1703      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1704            MachinePointerInfo::getFixedStack(FI),
1705            false, false, 0));
1706    } else {
1707      assert(0 && "Not a Reg/Mem Loc, major error!");
1708    }
1709  }
1710  if (!MemOpChains.empty()) {
1711    Chain = DAG.getNode(ISD::TokenFactor,
1712        dl,
1713        MVT::Other,
1714        &MemOpChains[0],
1715        MemOpChains.size());
1716  }
1717  SDValue InFlag;
1718  if (!isTailCall) {
1719    for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1720      Chain = DAG.getCopyToReg(Chain,
1721          dl,
1722          RegsToPass[i].first,
1723          RegsToPass[i].second,
1724          InFlag);
1725      InFlag = Chain.getValue(1);
1726    }
1727  }
1728
1729  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1730  // every direct call is) turn it into a TargetGlobalAddress/
1731  // TargetExternalSymbol
1732  // node so that legalize doesn't hack it.
1733  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
1734    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1735  }
1736  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1737    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1738  }
1739  else if (isTailCall) {
1740    assert(0 && "Tail calls are not handled yet");
1741    // see X86 ISelLowering for ideas on implementation: 1708
1742  }
1743
1744  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1745  SmallVector<SDValue, 8> Ops;
1746
1747  if (isTailCall) {
1748    assert(0 && "Tail calls are not handled yet");
1749    // see X86 ISelLowering for ideas on implementation: 1721
1750  }
1751  // If this is a direct call, pass the chain and the callee
1752  if (Callee.getNode()) {
1753    Ops.push_back(Chain);
1754    Ops.push_back(Callee);
1755  }
1756
1757  if (isTailCall) {
1758    assert(0 && "Tail calls are not handled yet");
1759    // see X86 ISelLowering for ideas on implementation: 1739
1760  }
1761
1762  // Add argument registers to the end of the list so that they are known
1763  // live into the call
1764  for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1765    Ops.push_back(DAG.getRegister(
1766          RegsToPass[i].first,
1767          RegsToPass[i].second.getValueType()));
1768  }
1769  if (InFlag.getNode()) {
1770    Ops.push_back(InFlag);
1771  }
1772
1773  // Emit Tail Call
1774  if (isTailCall) {
1775    assert(0 && "Tail calls are not handled yet");
1776    // see X86 ISelLowering for ideas on implementation: 1762
1777  }
1778
1779  Chain = DAG.getNode(AMDILISD::CALL,
1780      dl,
1781      NodeTys, &Ops[0], Ops.size());
1782  InFlag = Chain.getValue(1);
1783
1784  // Create the CALLSEQ_END node
1785  Chain = DAG.getCALLSEQ_END(
1786      Chain,
1787      DAG.getIntPtrConstant(NumBytes, true),
1788      DAG.getIntPtrConstant(0, true),
1789      InFlag);
1790  InFlag = Chain.getValue(1);
1791  // Handle result values, copying them out of physregs into vregs that
1792  // we return
1793  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1794      InVals);
1795}
1796
1797SDValue
1798AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
1799{
1800  EVT OVT = Op.getValueType();
1801  SDValue DST;
1802  if (OVT.getScalarType() == MVT::i64) {
1803    DST = LowerSDIV64(Op, DAG);
1804  } else if (OVT.getScalarType() == MVT::i32) {
1805    DST = LowerSDIV32(Op, DAG);
1806  } else if (OVT.getScalarType() == MVT::i16
1807      || OVT.getScalarType() == MVT::i8) {
1808    DST = LowerSDIV24(Op, DAG);
1809  } else {
1810    DST = SDValue(Op.getNode(), 0);
1811  }
1812  return DST;
1813}
1814
1815SDValue
1816AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
1817{
1818  EVT OVT = Op.getValueType();
1819  SDValue DST;
1820  if (OVT.getScalarType() == MVT::i64) {
1821    DST = LowerSREM64(Op, DAG);
1822  } else if (OVT.getScalarType() == MVT::i32) {
1823    DST = LowerSREM32(Op, DAG);
1824  } else if (OVT.getScalarType() == MVT::i16) {
1825    DST = LowerSREM16(Op, DAG);
1826  } else if (OVT.getScalarType() == MVT::i8) {
1827    DST = LowerSREM8(Op, DAG);
1828  } else {
1829    DST = SDValue(Op.getNode(), 0);
1830  }
1831  return DST;
1832}
1833
1834SDValue
1835AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
1836{
1837  EVT VT = Op.getValueType();
1838  SDValue Nodes1;
1839  SDValue second;
1840  SDValue third;
1841  SDValue fourth;
1842  DebugLoc DL = Op.getDebugLoc();
1843  Nodes1 = DAG.getNode(AMDILISD::VBUILD,
1844      DL,
1845      VT, Op.getOperand(0));
1846#if 0
1847  bool allEqual = true;
1848  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
1849    if (Op.getOperand(0) != Op.getOperand(x)) {
1850      allEqual = false;
1851      break;
1852    }
1853  }
1854  if (allEqual) {
1855    return Nodes1;
1856  }
1857#endif
1858  switch(Op.getNumOperands()) {
1859    default:
1860    case 1:
1861      break;
1862    case 4:
1863      fourth = Op.getOperand(3);
1864      if (fourth.getOpcode() != ISD::UNDEF) {
1865        Nodes1 = DAG.getNode(
1866            ISD::INSERT_VECTOR_ELT,
1867            DL,
1868            Op.getValueType(),
1869            Nodes1,
1870            fourth,
1871            DAG.getConstant(7, MVT::i32));
1872      }
1873    case 3:
1874      third = Op.getOperand(2);
1875      if (third.getOpcode() != ISD::UNDEF) {
1876        Nodes1 = DAG.getNode(
1877            ISD::INSERT_VECTOR_ELT,
1878            DL,
1879            Op.getValueType(),
1880            Nodes1,
1881            third,
1882            DAG.getConstant(6, MVT::i32));
1883      }
1884    case 2:
1885      second = Op.getOperand(1);
1886      if (second.getOpcode() != ISD::UNDEF) {
1887        Nodes1 = DAG.getNode(
1888            ISD::INSERT_VECTOR_ELT,
1889            DL,
1890            Op.getValueType(),
1891            Nodes1,
1892            second,
1893            DAG.getConstant(5, MVT::i32));
1894      }
1895      break;
1896  };
1897  return Nodes1;
1898}
1899
1900SDValue
1901AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
1902{
1903  SDValue Cond = Op.getOperand(0);
1904  SDValue LHS = Op.getOperand(1);
1905  SDValue RHS = Op.getOperand(2);
1906  DebugLoc DL = Op.getDebugLoc();
1907  Cond = getConversionNode(DAG, Cond, Op, true);
1908  Cond = DAG.getNode(AMDILISD::CMOVLOG,
1909      DL,
1910      Op.getValueType(), Cond, LHS, RHS);
1911  return Cond;
1912}
1913SDValue
1914AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
1915{
1916  SDValue Cond;
1917  SDValue LHS = Op.getOperand(0);
1918  SDValue RHS = Op.getOperand(1);
1919  SDValue CC  = Op.getOperand(2);
1920  DebugLoc DL = Op.getDebugLoc();
1921  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
1922  unsigned int AMDILCC = CondCCodeToCC(
1923      SetCCOpcode,
1924      LHS.getValueType().getSimpleVT().SimpleTy);
1925  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
1926  Cond = DAG.getNode(
1927      ISD::SELECT_CC,
1928      Op.getDebugLoc(),
1929      LHS.getValueType(),
1930      LHS, RHS,
1931      DAG.getConstant(-1, MVT::i32),
1932      DAG.getConstant(0, MVT::i32),
1933      CC);
1934  Cond = getConversionNode(DAG, Cond, Op, true);
1935  Cond = DAG.getNode(
1936      ISD::AND,
1937      DL,
1938      Cond.getValueType(),
1939      DAG.getConstant(1, Cond.getValueType()),
1940      Cond);
1941  return Cond;
1942}
1943
1944SDValue
1945AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
1946{
1947  SDValue Data = Op.getOperand(0);
1948  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
1949  DebugLoc DL = Op.getDebugLoc();
1950  EVT DVT = Data.getValueType();
1951  EVT BVT = BaseType->getVT();
1952  unsigned baseBits = BVT.getScalarType().getSizeInBits();
1953  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
1954  unsigned shiftBits = srcBits - baseBits;
1955  if (srcBits < 32) {
1956    // If the op is less than 32 bits, then it needs to extend to 32bits
1957    // so it can properly keep the upper bits valid.
1958    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
1959    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
1960    shiftBits = 32 - baseBits;
1961    DVT = IVT;
1962  }
1963  SDValue Shift = DAG.getConstant(shiftBits, DVT);
1964  // Shift left by 'Shift' bits.
1965  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
1966  // Signed shift Right by 'Shift' bits.
1967  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
1968  if (srcBits < 32) {
1969    // Once the sign extension is done, the op needs to be converted to
1970    // its original type.
1971    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
1972  }
1973  return Data;
1974}
1975EVT
1976AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
1977{
1978  int iSize = (size * numEle);
1979  int vEle = (iSize >> ((size == 64) ? 6 : 5));
1980  if (!vEle) {
1981    vEle = 1;
1982  }
1983  if (size == 64) {
1984    if (vEle == 1) {
1985      return EVT(MVT::i64);
1986    } else {
1987      return EVT(MVT::getVectorVT(MVT::i64, vEle));
1988    }
1989  } else {
1990    if (vEle == 1) {
1991      return EVT(MVT::i32);
1992    } else {
1993      return EVT(MVT::getVectorVT(MVT::i32, vEle));
1994    }
1995  }
1996}
1997
1998SDValue
1999AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
2000    SelectionDAG &DAG) const
2001{
2002  SDValue Chain = Op.getOperand(0);
2003  SDValue Size = Op.getOperand(1);
2004  unsigned int SPReg = AMDIL::SP;
2005  DebugLoc DL = Op.getDebugLoc();
2006  SDValue SP = DAG.getCopyFromReg(Chain,
2007      DL,
2008      SPReg, MVT::i32);
2009  SDValue NewSP = DAG.getNode(ISD::ADD,
2010      DL,
2011      MVT::i32, SP, Size);
2012  Chain = DAG.getCopyToReg(SP.getValue(1),
2013      DL,
2014      SPReg, NewSP);
2015  SDValue Ops[2] = {NewSP, Chain};
2016  Chain = DAG.getMergeValues(Ops, 2 ,DL);
2017  return Chain;
2018}
2019SDValue
2020AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
2021{
2022  SDValue Chain = Op.getOperand(0);
2023  SDValue Cond  = Op.getOperand(1);
2024  SDValue Jump  = Op.getOperand(2);
2025  SDValue Result;
2026  Result = DAG.getNode(
2027      AMDILISD::BRANCH_COND,
2028      Op.getDebugLoc(),
2029      Op.getValueType(),
2030      Chain, Jump, Cond);
2031  return Result;
2032}
2033
2034SDValue
2035AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
2036{
2037  SDValue Chain = Op.getOperand(0);
2038  SDValue CC = Op.getOperand(1);
2039  SDValue LHS   = Op.getOperand(2);
2040  SDValue RHS   = Op.getOperand(3);
2041  SDValue JumpT  = Op.getOperand(4);
2042  SDValue CmpValue;
2043  SDValue Result;
2044  CmpValue = DAG.getNode(
2045      ISD::SELECT_CC,
2046      Op.getDebugLoc(),
2047      LHS.getValueType(),
2048      LHS, RHS,
2049      DAG.getConstant(-1, MVT::i32),
2050      DAG.getConstant(0, MVT::i32),
2051      CC);
2052  Result = DAG.getNode(
2053      AMDILISD::BRANCH_COND,
2054      CmpValue.getDebugLoc(),
2055      MVT::Other, Chain,
2056      JumpT, CmpValue);
2057  return Result;
2058}
2059
2060// LowerRET - Lower an ISD::RET node.
2061SDValue
2062AMDILTargetLowering::LowerReturn(SDValue Chain,
2063    CallingConv::ID CallConv, bool isVarArg,
2064    const SmallVectorImpl<ISD::OutputArg> &Outs,
2065    const SmallVectorImpl<SDValue> &OutVals,
2066    DebugLoc dl, SelectionDAG &DAG)
2067const
2068{
2069  //MachineFunction& MF = DAG.getMachineFunction();
2070  // CCValAssign - represent the assignment of the return value
2071  // to a location
2072  SmallVector<CCValAssign, 16> RVLocs;
2073
2074  // CCState - Info about the registers and stack slot
2075  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2076                 getTargetMachine(), RVLocs, *DAG.getContext());
2077
2078  // Analyze return values of ISD::RET
2079  CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
2080  // If this is the first return lowered for this function, add
2081  // the regs to the liveout set for the function
2082  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
2083  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
2084    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
2085      MRI.addLiveOut(RVLocs[i].getLocReg());
2086    }
2087  }
2088  // FIXME: implement this when tail call is implemented
2089  // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
2090  // both x86 and ppc implement this in ISelLowering
2091
2092  // Regular return here
2093  SDValue Flag;
2094  SmallVector<SDValue, 6> RetOps;
2095  RetOps.push_back(Chain);
2096  RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
2097  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
2098    CCValAssign &VA = RVLocs[i];
2099    SDValue ValToCopy = OutVals[i];
2100    assert(VA.isRegLoc() && "Can only return in registers!");
2101    // ISD::Ret => ret chain, (regnum1, val1), ...
2102    // So i * 2 + 1 index only the regnums
2103    Chain = DAG.getCopyToReg(Chain,
2104        dl,
2105        VA.getLocReg(),
2106        ValToCopy,
2107        Flag);
2108    // guarantee that all emitted copies are stuck together
2109    // avoiding something bad
2110    Flag = Chain.getValue(1);
2111  }
2112  /*if (MF.getFunction()->hasStructRetAttr()) {
2113    assert(0 && "Struct returns are not yet implemented!");
2114  // Both MIPS and X86 have this
2115  }*/
2116  RetOps[0] = Chain;
2117  if (Flag.getNode())
2118    RetOps.push_back(Flag);
2119
2120  Flag = DAG.getNode(AMDILISD::RET_FLAG,
2121      dl,
2122      MVT::Other, &RetOps[0], RetOps.size());
2123  return Flag;
2124}
2125
2126unsigned int
2127AMDILTargetLowering::getFunctionAlignment(const Function *) const
2128{
2129  return 0;
2130}
2131
2132SDValue
2133AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
2134{
2135  DebugLoc DL = Op.getDebugLoc();
2136  EVT OVT = Op.getValueType();
2137  SDValue LHS = Op.getOperand(0);
2138  SDValue RHS = Op.getOperand(1);
2139  MVT INTTY;
2140  MVT FLTTY;
2141  if (!OVT.isVector()) {
2142    INTTY = MVT::i32;
2143    FLTTY = MVT::f32;
2144  } else if (OVT.getVectorNumElements() == 2) {
2145    INTTY = MVT::v2i32;
2146    FLTTY = MVT::v2f32;
2147  } else if (OVT.getVectorNumElements() == 4) {
2148    INTTY = MVT::v4i32;
2149    FLTTY = MVT::v4f32;
2150  }
2151  unsigned bitsize = OVT.getScalarType().getSizeInBits();
2152  // char|short jq = ia ^ ib;
2153  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
2154
2155  // jq = jq >> (bitsize - 2)
2156  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
2157
2158  // jq = jq | 0x1
2159  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
2160
2161  // jq = (int)jq
2162  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
2163
2164  // int ia = (int)LHS;
2165  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
2166
2167  // int ib, (int)RHS;
2168  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
2169
2170  // float fa = (float)ia;
2171  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
2172
2173  // float fb = (float)ib;
2174  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
2175
2176  // float fq = native_divide(fa, fb);
2177  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
2178
2179  // fq = trunc(fq);
2180  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
2181
2182  // float fqneg = -fq;
2183  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
2184
2185  // float fr = mad(fqneg, fb, fa);
2186  SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
2187
2188  // int iq = (int)fq;
2189  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
2190
2191  // fr = fabs(fr);
2192  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
2193
2194  // fb = fabs(fb);
2195  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
2196
2197  // int cv = fr >= fb;
2198  SDValue cv;
2199  if (INTTY == MVT::i32) {
2200    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
2201  } else {
2202    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
2203  }
2204  // jq = (cv ? jq : 0);
2205  jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
2206      DAG.getConstant(0, OVT));
2207  // dst = iq + jq;
2208  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
2209  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
2210  return iq;
2211}
2212
2213SDValue
2214AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
2215{
2216  DebugLoc DL = Op.getDebugLoc();
2217  EVT OVT = Op.getValueType();
2218  SDValue LHS = Op.getOperand(0);
2219  SDValue RHS = Op.getOperand(1);
2220  // The LowerSDIV32 function generates equivalent to the following IL.
2221  // mov r0, LHS
2222  // mov r1, RHS
2223  // ilt r10, r0, 0
2224  // ilt r11, r1, 0
2225  // iadd r0, r0, r10
2226  // iadd r1, r1, r11
2227  // ixor r0, r0, r10
2228  // ixor r1, r1, r11
2229  // udiv r0, r0, r1
2230  // ixor r10, r10, r11
2231  // iadd r0, r0, r10
2232  // ixor DST, r0, r10
2233
2234  // mov r0, LHS
2235  SDValue r0 = LHS;
2236
2237  // mov r1, RHS
2238  SDValue r1 = RHS;
2239
2240  // ilt r10, r0, 0
2241  SDValue r10 = DAG.getSelectCC(DL,
2242      r0, DAG.getConstant(0, OVT),
2243      DAG.getConstant(-1, MVT::i32),
2244      DAG.getConstant(0, MVT::i32),
2245      ISD::SETLT);
2246
2247  // ilt r11, r1, 0
2248  SDValue r11 = DAG.getSelectCC(DL,
2249      r1, DAG.getConstant(0, OVT),
2250      DAG.getConstant(-1, MVT::i32),
2251      DAG.getConstant(0, MVT::i32),
2252      ISD::SETLT);
2253
2254  // iadd r0, r0, r10
2255  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
2256
2257  // iadd r1, r1, r11
2258  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
2259
2260  // ixor r0, r0, r10
2261  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
2262
2263  // ixor r1, r1, r11
2264  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
2265
2266  // udiv r0, r0, r1
2267  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
2268
2269  // ixor r10, r10, r11
2270  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
2271
2272  // iadd r0, r0, r10
2273  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
2274
2275  // ixor DST, r0, r10
2276  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
2277  return DST;
2278}
2279
2280SDValue
2281AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
2282{
2283  return SDValue(Op.getNode(), 0);
2284}
2285
2286SDValue
2287AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
2288{
2289  DebugLoc DL = Op.getDebugLoc();
2290  EVT OVT = Op.getValueType();
2291  MVT INTTY = MVT::i32;
2292  if (OVT == MVT::v2i8) {
2293    INTTY = MVT::v2i32;
2294  } else if (OVT == MVT::v4i8) {
2295    INTTY = MVT::v4i32;
2296  }
2297  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
2298  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
2299  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
2300  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
2301  return LHS;
2302}
2303
2304SDValue
2305AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
2306{
2307  DebugLoc DL = Op.getDebugLoc();
2308  EVT OVT = Op.getValueType();
2309  MVT INTTY = MVT::i32;
2310  if (OVT == MVT::v2i16) {
2311    INTTY = MVT::v2i32;
2312  } else if (OVT == MVT::v4i16) {
2313    INTTY = MVT::v4i32;
2314  }
2315  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
2316  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
2317  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
2318  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
2319  return LHS;
2320}
2321
2322SDValue
2323AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
2324{
2325  DebugLoc DL = Op.getDebugLoc();
2326  EVT OVT = Op.getValueType();
2327  SDValue LHS = Op.getOperand(0);
2328  SDValue RHS = Op.getOperand(1);
2329  // The LowerSREM32 function generates equivalent to the following IL.
2330  // mov r0, LHS
2331  // mov r1, RHS
2332  // ilt r10, r0, 0
2333  // ilt r11, r1, 0
2334  // iadd r0, r0, r10
2335  // iadd r1, r1, r11
2336  // ixor r0, r0, r10
2337  // ixor r1, r1, r11
2338  // udiv r20, r0, r1
2339  // umul r20, r20, r1
2340  // sub r0, r0, r20
2341  // iadd r0, r0, r10
2342  // ixor DST, r0, r10
2343
2344  // mov r0, LHS
2345  SDValue r0 = LHS;
2346
2347  // mov r1, RHS
2348  SDValue r1 = RHS;
2349
2350  // ilt r10, r0, 0
2351  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
2352      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2353      r0, DAG.getConstant(0, OVT));
2354
2355  // ilt r11, r1, 0
2356  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
2357      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2358      r1, DAG.getConstant(0, OVT));
2359
2360  // iadd r0, r0, r10
2361  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
2362
2363  // iadd r1, r1, r11
2364  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
2365
2366  // ixor r0, r0, r10
2367  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
2368
2369  // ixor r1, r1, r11
2370  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
2371
2372  // udiv r20, r0, r1
2373  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
2374
2375  // umul r20, r20, r1
2376  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
2377
2378  // sub r0, r0, r20
2379  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
2380
2381  // iadd r0, r0, r10
2382  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
2383
2384  // ixor DST, r0, r10
2385  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
2386  return DST;
2387}
2388
2389SDValue
2390AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
2391{
2392  return SDValue(Op.getNode(), 0);
2393}
2394