AMDILISelLowering.cpp revision 49fb99bd131a4ed89e6f55cf360f67618acafec4
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file implements the interfaces that AMDIL uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDILISelLowering.h"
16#include "AMDILDevices.h"
17#include "AMDILIntrinsicInfo.h"
18#include "AMDILRegisterInfo.h"
19#include "AMDILSubtarget.h"
20#include "AMDILUtilityFunctions.h"
21#include "llvm/CallingConv.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/PseudoSourceValue.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28#include "llvm/DerivedTypes.h"
29#include "llvm/Instructions.h"
30#include "llvm/Intrinsics.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/Target/TargetInstrInfo.h"
33#include "llvm/Target/TargetOptions.h"
34
35using namespace llvm;
36#define ISDBITCAST  ISD::BITCAST
37#define MVTGLUE     MVT::Glue
38//===----------------------------------------------------------------------===//
39// Calling Convention Implementation
40//===----------------------------------------------------------------------===//
41#include "AMDILGenCallingConv.inc"
42
43//===----------------------------------------------------------------------===//
44// TargetLowering Implementation Help Functions Begin
45//===----------------------------------------------------------------------===//
46  static SDValue
47getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
48{
49  DebugLoc DL = Src.getDebugLoc();
50  EVT svt = Src.getValueType().getScalarType();
51  EVT dvt = Dst.getValueType().getScalarType();
52  if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
53    if (dvt.bitsGT(svt)) {
54      Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
55    } else if (svt.bitsLT(svt)) {
56      Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
57          DAG.getConstant(1, MVT::i32));
58    }
59  } else if (svt.isInteger() && dvt.isInteger()) {
60    if (!svt.bitsEq(dvt)) {
61      Src = DAG.getSExtOrTrunc(Src, DL, dvt);
62    } else {
63      Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
64    }
65  } else if (svt.isInteger()) {
66    unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
67    if (!svt.bitsEq(dvt)) {
68      if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
69        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
70      } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
71        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
72      } else {
73        assert(0 && "We only support 32 and 64bit fp types");
74      }
75    }
76    Src = DAG.getNode(opcode, DL, dvt, Src);
77  } else if (dvt.isInteger()) {
78    unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
79    if (svt.getSimpleVT().SimpleTy == MVT::f32) {
80      Src = DAG.getNode(opcode, DL, MVT::i32, Src);
81    } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
82      Src = DAG.getNode(opcode, DL, MVT::i64, Src);
83    } else {
84      assert(0 && "We only support 32 and 64bit fp types");
85    }
86    Src = DAG.getSExtOrTrunc(Src, DL, dvt);
87  }
88  return Src;
89}
90// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
91// condition.
92  static AMDILCC::CondCodes
93CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
94{
95  switch (CC) {
96    default:
97      {
98        errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
99        assert(0 && "Unknown condition code!");
100      }
101    case ISD::SETO:
102      switch(type) {
103        case MVT::f32:
104          return AMDILCC::IL_CC_F_O;
105        case MVT::f64:
106          return AMDILCC::IL_CC_D_O;
107        default:
108          assert(0 && "Opcode combination not generated correctly!");
109          return AMDILCC::COND_ERROR;
110      };
111    case ISD::SETUO:
112      switch(type) {
113        case MVT::f32:
114          return AMDILCC::IL_CC_F_UO;
115        case MVT::f64:
116          return AMDILCC::IL_CC_D_UO;
117        default:
118          assert(0 && "Opcode combination not generated correctly!");
119          return AMDILCC::COND_ERROR;
120      };
121    case ISD::SETGT:
122      switch (type) {
123        case MVT::i1:
124        case MVT::i8:
125        case MVT::i16:
126        case MVT::i32:
127          return AMDILCC::IL_CC_I_GT;
128        case MVT::f32:
129          return AMDILCC::IL_CC_F_GT;
130        case MVT::f64:
131          return AMDILCC::IL_CC_D_GT;
132        case MVT::i64:
133          return AMDILCC::IL_CC_L_GT;
134        default:
135          assert(0 && "Opcode combination not generated correctly!");
136          return AMDILCC::COND_ERROR;
137      };
138    case ISD::SETGE:
139      switch (type) {
140        case MVT::i1:
141        case MVT::i8:
142        case MVT::i16:
143        case MVT::i32:
144          return AMDILCC::IL_CC_I_GE;
145        case MVT::f32:
146          return AMDILCC::IL_CC_F_GE;
147        case MVT::f64:
148          return AMDILCC::IL_CC_D_GE;
149        case MVT::i64:
150          return AMDILCC::IL_CC_L_GE;
151        default:
152          assert(0 && "Opcode combination not generated correctly!");
153          return AMDILCC::COND_ERROR;
154      };
155    case ISD::SETLT:
156      switch (type) {
157        case MVT::i1:
158        case MVT::i8:
159        case MVT::i16:
160        case MVT::i32:
161          return AMDILCC::IL_CC_I_LT;
162        case MVT::f32:
163          return AMDILCC::IL_CC_F_LT;
164        case MVT::f64:
165          return AMDILCC::IL_CC_D_LT;
166        case MVT::i64:
167          return AMDILCC::IL_CC_L_LT;
168        default:
169          assert(0 && "Opcode combination not generated correctly!");
170          return AMDILCC::COND_ERROR;
171      };
172    case ISD::SETLE:
173      switch (type) {
174        case MVT::i1:
175        case MVT::i8:
176        case MVT::i16:
177        case MVT::i32:
178          return AMDILCC::IL_CC_I_LE;
179        case MVT::f32:
180          return AMDILCC::IL_CC_F_LE;
181        case MVT::f64:
182          return AMDILCC::IL_CC_D_LE;
183        case MVT::i64:
184          return AMDILCC::IL_CC_L_LE;
185        default:
186          assert(0 && "Opcode combination not generated correctly!");
187          return AMDILCC::COND_ERROR;
188      };
189    case ISD::SETNE:
190      switch (type) {
191        case MVT::i1:
192        case MVT::i8:
193        case MVT::i16:
194        case MVT::i32:
195          return AMDILCC::IL_CC_I_NE;
196        case MVT::f32:
197          return AMDILCC::IL_CC_F_NE;
198        case MVT::f64:
199          return AMDILCC::IL_CC_D_NE;
200        case MVT::i64:
201          return AMDILCC::IL_CC_L_NE;
202        default:
203          assert(0 && "Opcode combination not generated correctly!");
204          return AMDILCC::COND_ERROR;
205      };
206    case ISD::SETEQ:
207      switch (type) {
208        case MVT::i1:
209        case MVT::i8:
210        case MVT::i16:
211        case MVT::i32:
212          return AMDILCC::IL_CC_I_EQ;
213        case MVT::f32:
214          return AMDILCC::IL_CC_F_EQ;
215        case MVT::f64:
216          return AMDILCC::IL_CC_D_EQ;
217        case MVT::i64:
218          return AMDILCC::IL_CC_L_EQ;
219        default:
220          assert(0 && "Opcode combination not generated correctly!");
221          return AMDILCC::COND_ERROR;
222      };
223    case ISD::SETUGT:
224      switch (type) {
225        case MVT::i1:
226        case MVT::i8:
227        case MVT::i16:
228        case MVT::i32:
229          return AMDILCC::IL_CC_U_GT;
230        case MVT::f32:
231          return AMDILCC::IL_CC_F_UGT;
232        case MVT::f64:
233          return AMDILCC::IL_CC_D_UGT;
234        case MVT::i64:
235          return AMDILCC::IL_CC_UL_GT;
236        default:
237          assert(0 && "Opcode combination not generated correctly!");
238          return AMDILCC::COND_ERROR;
239      };
240    case ISD::SETUGE:
241      switch (type) {
242        case MVT::i1:
243        case MVT::i8:
244        case MVT::i16:
245        case MVT::i32:
246          return AMDILCC::IL_CC_U_GE;
247        case MVT::f32:
248          return AMDILCC::IL_CC_F_UGE;
249        case MVT::f64:
250          return AMDILCC::IL_CC_D_UGE;
251        case MVT::i64:
252          return AMDILCC::IL_CC_UL_GE;
253        default:
254          assert(0 && "Opcode combination not generated correctly!");
255          return AMDILCC::COND_ERROR;
256      };
257    case ISD::SETULT:
258      switch (type) {
259        case MVT::i1:
260        case MVT::i8:
261        case MVT::i16:
262        case MVT::i32:
263          return AMDILCC::IL_CC_U_LT;
264        case MVT::f32:
265          return AMDILCC::IL_CC_F_ULT;
266        case MVT::f64:
267          return AMDILCC::IL_CC_D_ULT;
268        case MVT::i64:
269          return AMDILCC::IL_CC_UL_LT;
270        default:
271          assert(0 && "Opcode combination not generated correctly!");
272          return AMDILCC::COND_ERROR;
273      };
274    case ISD::SETULE:
275      switch (type) {
276        case MVT::i1:
277        case MVT::i8:
278        case MVT::i16:
279        case MVT::i32:
280          return AMDILCC::IL_CC_U_LE;
281        case MVT::f32:
282          return AMDILCC::IL_CC_F_ULE;
283        case MVT::f64:
284          return AMDILCC::IL_CC_D_ULE;
285        case MVT::i64:
286          return AMDILCC::IL_CC_UL_LE;
287        default:
288          assert(0 && "Opcode combination not generated correctly!");
289          return AMDILCC::COND_ERROR;
290      };
291    case ISD::SETUNE:
292      switch (type) {
293        case MVT::i1:
294        case MVT::i8:
295        case MVT::i16:
296        case MVT::i32:
297          return AMDILCC::IL_CC_U_NE;
298        case MVT::f32:
299          return AMDILCC::IL_CC_F_UNE;
300        case MVT::f64:
301          return AMDILCC::IL_CC_D_UNE;
302        case MVT::i64:
303          return AMDILCC::IL_CC_UL_NE;
304        default:
305          assert(0 && "Opcode combination not generated correctly!");
306          return AMDILCC::COND_ERROR;
307      };
308    case ISD::SETUEQ:
309      switch (type) {
310        case MVT::i1:
311        case MVT::i8:
312        case MVT::i16:
313        case MVT::i32:
314          return AMDILCC::IL_CC_U_EQ;
315        case MVT::f32:
316          return AMDILCC::IL_CC_F_UEQ;
317        case MVT::f64:
318          return AMDILCC::IL_CC_D_UEQ;
319        case MVT::i64:
320          return AMDILCC::IL_CC_UL_EQ;
321        default:
322          assert(0 && "Opcode combination not generated correctly!");
323          return AMDILCC::COND_ERROR;
324      };
325    case ISD::SETOGT:
326      switch (type) {
327        case MVT::f32:
328          return AMDILCC::IL_CC_F_OGT;
329        case MVT::f64:
330          return AMDILCC::IL_CC_D_OGT;
331        case MVT::i1:
332        case MVT::i8:
333        case MVT::i16:
334        case MVT::i32:
335        case MVT::i64:
336        default:
337          assert(0 && "Opcode combination not generated correctly!");
338          return AMDILCC::COND_ERROR;
339      };
340    case ISD::SETOGE:
341      switch (type) {
342        case MVT::f32:
343          return AMDILCC::IL_CC_F_OGE;
344        case MVT::f64:
345          return AMDILCC::IL_CC_D_OGE;
346        case MVT::i1:
347        case MVT::i8:
348        case MVT::i16:
349        case MVT::i32:
350        case MVT::i64:
351        default:
352          assert(0 && "Opcode combination not generated correctly!");
353          return AMDILCC::COND_ERROR;
354      };
355    case ISD::SETOLT:
356      switch (type) {
357        case MVT::f32:
358          return AMDILCC::IL_CC_F_OLT;
359        case MVT::f64:
360          return AMDILCC::IL_CC_D_OLT;
361        case MVT::i1:
362        case MVT::i8:
363        case MVT::i16:
364        case MVT::i32:
365        case MVT::i64:
366        default:
367          assert(0 && "Opcode combination not generated correctly!");
368          return AMDILCC::COND_ERROR;
369      };
370    case ISD::SETOLE:
371      switch (type) {
372        case MVT::f32:
373          return AMDILCC::IL_CC_F_OLE;
374        case MVT::f64:
375          return AMDILCC::IL_CC_D_OLE;
376        case MVT::i1:
377        case MVT::i8:
378        case MVT::i16:
379        case MVT::i32:
380        case MVT::i64:
381        default:
382          assert(0 && "Opcode combination not generated correctly!");
383          return AMDILCC::COND_ERROR;
384      };
385    case ISD::SETONE:
386      switch (type) {
387        case MVT::f32:
388          return AMDILCC::IL_CC_F_ONE;
389        case MVT::f64:
390          return AMDILCC::IL_CC_D_ONE;
391        case MVT::i1:
392        case MVT::i8:
393        case MVT::i16:
394        case MVT::i32:
395        case MVT::i64:
396        default:
397          assert(0 && "Opcode combination not generated correctly!");
398          return AMDILCC::COND_ERROR;
399      };
400    case ISD::SETOEQ:
401      switch (type) {
402        case MVT::f32:
403          return AMDILCC::IL_CC_F_OEQ;
404        case MVT::f64:
405          return AMDILCC::IL_CC_D_OEQ;
406        case MVT::i1:
407        case MVT::i8:
408        case MVT::i16:
409        case MVT::i32:
410        case MVT::i64:
411        default:
412          assert(0 && "Opcode combination not generated correctly!");
413          return AMDILCC::COND_ERROR;
414      };
415  };
416}
417
418/// Helper function used by LowerFormalArguments
419static const TargetRegisterClass*
420getRegClassFromType(unsigned int type) {
421  switch (type) {
422  default:
423    assert(0 && "Passed in type does not match any register classes.");
424  case MVT::i8:
425    return &AMDIL::GPRI8RegClass;
426  case MVT::i16:
427    return &AMDIL::GPRI16RegClass;
428  case MVT::i32:
429    return &AMDIL::GPRI32RegClass;
430  case MVT::f32:
431    return &AMDIL::GPRF32RegClass;
432  case MVT::i64:
433    return &AMDIL::GPRI64RegClass;
434  case MVT::f64:
435    return &AMDIL::GPRF64RegClass;
436  case MVT::v4f32:
437    return &AMDIL::GPRV4F32RegClass;
438  case MVT::v4i8:
439    return &AMDIL::GPRV4I8RegClass;
440  case MVT::v4i16:
441    return &AMDIL::GPRV4I16RegClass;
442  case MVT::v4i32:
443    return &AMDIL::GPRV4I32RegClass;
444  case MVT::v2f32:
445    return &AMDIL::GPRV2F32RegClass;
446  case MVT::v2i8:
447    return &AMDIL::GPRV2I8RegClass;
448  case MVT::v2i16:
449    return &AMDIL::GPRV2I16RegClass;
450  case MVT::v2i32:
451    return &AMDIL::GPRV2I32RegClass;
452  case MVT::v2f64:
453    return &AMDIL::GPRV2F64RegClass;
454  case MVT::v2i64:
455    return &AMDIL::GPRV2I64RegClass;
456  }
457}
458
459SDValue
460AMDILTargetLowering::LowerMemArgument(
461    SDValue Chain,
462    CallingConv::ID CallConv,
463    const SmallVectorImpl<ISD::InputArg> &Ins,
464    DebugLoc dl, SelectionDAG &DAG,
465    const CCValAssign &VA,
466    MachineFrameInfo *MFI,
467    unsigned i) const
468{
469  // Create the nodes corresponding to a load from this parameter slot.
470  ISD::ArgFlagsTy Flags = Ins[i].Flags;
471
472  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
473    getTargetMachine().Options.GuaranteedTailCallOpt;
474  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
475
476  // FIXME: For now, all byval parameter objects are marked mutable. This can
477  // be changed with more analysis.
478  // In case of tail call optimization mark all arguments mutable. Since they
479  // could be overwritten by lowering of arguments in case of a tail call.
480  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
481      VA.getLocMemOffset(), isImmutable);
482  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
483
484  if (Flags.isByVal())
485    return FIN;
486  return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
487      MachinePointerInfo::getFixedStack(FI),
488      false, false, false, 0);
489}
490//===----------------------------------------------------------------------===//
491// TargetLowering Implementation Help Functions End
492//===----------------------------------------------------------------------===//
493//===----------------------------------------------------------------------===//
494// Instruction generation functions
495//===----------------------------------------------------------------------===//
496MachineOperand
497AMDILTargetLowering::convertToReg(MachineOperand op) const
498{
499  if (op.isReg()) {
500    return op;
501  } else if (op.isImm()) {
502    uint32_t loadReg
503      = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
504    generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
505      .addImm(op.getImm());
506    op.ChangeToRegister(loadReg, false);
507  } else if (op.isFPImm()) {
508    uint32_t loadReg
509      = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
510    generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
511      .addFPImm(op.getFPImm());
512    op.ChangeToRegister(loadReg, false);
513  } else if (op.isMBB()) {
514    op.ChangeToRegister(0, false);
515  } else if (op.isFI()) {
516    op.ChangeToRegister(0, false);
517  } else if (op.isCPI()) {
518    op.ChangeToRegister(0, false);
519  } else if (op.isJTI()) {
520    op.ChangeToRegister(0, false);
521  } else if (op.isGlobal()) {
522    op.ChangeToRegister(0, false);
523  } else if (op.isSymbol()) {
524    op.ChangeToRegister(0, false);
525  }/* else if (op.isMetadata()) {
526      op.ChangeToRegister(0, false);
527      }*/
528  return op;
529}
530
531//===----------------------------------------------------------------------===//
532// TargetLowering Class Implementation Begins
533//===----------------------------------------------------------------------===//
534  AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
535: TargetLowering(TM, new TargetLoweringObjectFileELF())
536{
537  int types[] =
538  {
539    (int)MVT::i8,
540    (int)MVT::i16,
541    (int)MVT::i32,
542    (int)MVT::f32,
543    (int)MVT::f64,
544    (int)MVT::i64,
545    (int)MVT::v2i8,
546    (int)MVT::v4i8,
547    (int)MVT::v2i16,
548    (int)MVT::v4i16,
549    (int)MVT::v4f32,
550    (int)MVT::v4i32,
551    (int)MVT::v2f32,
552    (int)MVT::v2i32,
553    (int)MVT::v2f64,
554    (int)MVT::v2i64
555  };
556
557  int IntTypes[] =
558  {
559    (int)MVT::i8,
560    (int)MVT::i16,
561    (int)MVT::i32,
562    (int)MVT::i64
563  };
564
565  int FloatTypes[] =
566  {
567    (int)MVT::f32,
568    (int)MVT::f64
569  };
570
571  int VectorTypes[] =
572  {
573    (int)MVT::v2i8,
574    (int)MVT::v4i8,
575    (int)MVT::v2i16,
576    (int)MVT::v4i16,
577    (int)MVT::v4f32,
578    (int)MVT::v4i32,
579    (int)MVT::v2f32,
580    (int)MVT::v2i32,
581    (int)MVT::v2f64,
582    (int)MVT::v2i64
583  };
584  size_t numTypes = sizeof(types) / sizeof(*types);
585  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
586  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
587  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
588
589  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
590  // These are the current register classes that are
591  // supported
592
593  addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
594  addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
595
596  if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
597    addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
598    addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
599  }
600  if (STM.device()->isSupported(AMDILDeviceInfo::ByteOps)) {
601    addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
602    addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
603    addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
604    setOperationAction(ISD::Constant          , MVT::i8   , Legal);
605  }
606  if (STM.device()->isSupported(AMDILDeviceInfo::ShortOps)) {
607    addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
608    addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
609    addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
610    setOperationAction(ISD::Constant          , MVT::i16  , Legal);
611  }
612  addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
613  addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
614  addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
615  addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
616  if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
617    addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
618    addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
619  }
620
621  for (unsigned int x  = 0; x < numTypes; ++x) {
622    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
623
624    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
625    // We cannot sextinreg, expand to shifts
626    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
627    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
628    setOperationAction(ISD::FP_ROUND, VT, Expand);
629    setOperationAction(ISD::SUBE, VT, Expand);
630    setOperationAction(ISD::SUBC, VT, Expand);
631    setOperationAction(ISD::ADDE, VT, Expand);
632    setOperationAction(ISD::ADDC, VT, Expand);
633    setOperationAction(ISD::SETCC, VT, Custom);
634    setOperationAction(ISD::BRCOND, VT, Custom);
635    setOperationAction(ISD::BR_CC, VT, Custom);
636    setOperationAction(ISD::BR_JT, VT, Expand);
637    setOperationAction(ISD::BRIND, VT, Expand);
638    // TODO: Implement custom UREM/SREM routines
639    setOperationAction(ISD::UREM, VT, Expand);
640    setOperationAction(ISD::SREM, VT, Expand);
641    setOperationAction(ISD::UINT_TO_FP, VT, Custom);
642    setOperationAction(ISD::FP_TO_UINT, VT, Custom);
643    setOperationAction(ISD::GlobalAddress, VT, Custom);
644    setOperationAction(ISD::JumpTable, VT, Custom);
645    setOperationAction(ISD::ConstantPool, VT, Custom);
646    setOperationAction(ISD::SELECT_CC, VT, Custom);
647    setOperationAction(ISD::SELECT, VT, Custom);
648    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
649    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
650    if (VT != MVT::i64 && VT != MVT::v2i64) {
651      setOperationAction(ISD::SDIV, VT, Custom);
652    }
653    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
654    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
655  }
656  for (unsigned int x = 0; x < numFloatTypes; ++x) {
657    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
658
659    // IL does not have these operations for floating point types
660    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
661    setOperationAction(ISD::FP_ROUND, VT, Custom);
662    setOperationAction(ISD::SETOLT, VT, Expand);
663    setOperationAction(ISD::SETOGE, VT, Expand);
664    setOperationAction(ISD::SETOGT, VT, Expand);
665    setOperationAction(ISD::SETOLE, VT, Expand);
666    setOperationAction(ISD::SETULT, VT, Expand);
667    setOperationAction(ISD::SETUGE, VT, Expand);
668    setOperationAction(ISD::SETUGT, VT, Expand);
669    setOperationAction(ISD::SETULE, VT, Expand);
670  }
671
672  for (unsigned int x = 0; x < numIntTypes; ++x) {
673    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
674
675    // GPU also does not have divrem function for signed or unsigned
676    setOperationAction(ISD::SDIVREM, VT, Expand);
677    setOperationAction(ISD::UDIVREM, VT, Expand);
678    setOperationAction(ISD::FP_ROUND, VT, Expand);
679
680    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
681    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
682    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
683
684    // GPU doesn't have a rotl, rotr, or byteswap instruction
685    setOperationAction(ISD::ROTR, VT, Expand);
686    setOperationAction(ISD::ROTL, VT, Expand);
687    setOperationAction(ISD::BSWAP, VT, Expand);
688
689    // GPU doesn't have any counting operators
690    setOperationAction(ISD::CTPOP, VT, Expand);
691    setOperationAction(ISD::CTTZ, VT, Expand);
692    setOperationAction(ISD::CTLZ, VT, Expand);
693  }
694
695  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
696  {
697    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
698
699    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
700    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
701    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
702    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
703    setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
704    setOperationAction(ISD::FP_ROUND, VT, Expand);
705    setOperationAction(ISD::SDIVREM, VT, Expand);
706    setOperationAction(ISD::UDIVREM, VT, Expand);
707    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
708    // setOperationAction(ISD::VSETCC, VT, Expand);
709    setOperationAction(ISD::SETCC, VT, Expand);
710    setOperationAction(ISD::SELECT_CC, VT, Expand);
711    setOperationAction(ISD::SELECT, VT, Expand);
712
713  }
714  setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
715  if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
716    if (STM.calVersion() < CAL_VERSION_SC_139
717        || STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
718      setOperationAction(ISD::MUL, MVT::i64, Custom);
719    }
720    setOperationAction(ISD::SUB, MVT::i64, Custom);
721    setOperationAction(ISD::ADD, MVT::i64, Custom);
722    setOperationAction(ISD::MULHU, MVT::i64, Expand);
723    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
724    setOperationAction(ISD::MULHS, MVT::i64, Expand);
725    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
726    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
727    setOperationAction(ISD::SUB, MVT::v2i64, Expand);
728    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
729    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
730    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
731    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
732    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
733    setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
734    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
735    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
736    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
737    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
738  }
739  if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
740    // we support loading/storing v2f64 but not operations on the type
741    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
742    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
743    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
744    setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
745    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
746    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
747    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
748    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
749    // We want to expand vector conversions into their scalar
750    // counterparts.
751    setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
752    setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
753    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
754    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
755    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
756    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
757    setOperationAction(ISD::FABS, MVT::f64, Expand);
758    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
759  }
760  // TODO: Fix the UDIV24 algorithm so it works for these
761  // types correctly. This needs vector comparisons
762  // for this to work correctly.
763  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
764  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
765  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
766  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
767  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
768  setOperationAction(ISD::SUBC, MVT::Other, Expand);
769  setOperationAction(ISD::ADDE, MVT::Other, Expand);
770  setOperationAction(ISD::ADDC, MVT::Other, Expand);
771  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
772  setOperationAction(ISD::BR_CC, MVT::Other, Custom);
773  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
774  setOperationAction(ISD::BRIND, MVT::Other, Expand);
775  setOperationAction(ISD::SETCC, MVT::Other, Custom);
776  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
777  setOperationAction(ISD::FDIV, MVT::f32, Custom);
778  setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
779  setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
780
781  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
782  // Use the default implementation.
783  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
784  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
785  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
786  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
787  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
788  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
789  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
790  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
791  setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
792
793  setStackPointerRegisterToSaveRestore(AMDIL::SP);
794  setSchedulingPreference(Sched::RegPressure);
795  setPow2DivIsCheap(false);
796  setPrefLoopAlignment(16);
797  setSelectIsExpensive(true);
798  setJumpIsExpensive(true);
799  computeRegisterProperties();
800
801  maxStoresPerMemcpy  = 4096;
802  maxStoresPerMemmove = 4096;
803  maxStoresPerMemset  = 4096;
804
805#undef numTypes
806#undef numIntTypes
807#undef numVectorTypes
808#undef numFloatTypes
809}
810
811const char *
812AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
813{
814  switch (Opcode) {
815    default: return 0;
816    case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
817    case AMDILISD::DP_TO_FP:  return "AMDILISD::DP_TO_FP";
818    case AMDILISD::FP_TO_DP:  return "AMDILISD::FP_TO_DP";
819    case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
820    case AMDILISD::CMOV:  return "AMDILISD::CMOV";
821    case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
822    case AMDILISD::INEGATE:  return "AMDILISD::INEGATE";
823    case AMDILISD::MAD:  return "AMDILISD::MAD";
824    case AMDILISD::UMAD:  return "AMDILISD::UMAD";
825    case AMDILISD::CALL:  return "AMDILISD::CALL";
826    case AMDILISD::RET:   return "AMDILISD::RET";
827    case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
828    case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
829    case AMDILISD::ADD: return "AMDILISD::ADD";
830    case AMDILISD::UMUL: return "AMDILISD::UMUL";
831    case AMDILISD::AND: return "AMDILISD::AND";
832    case AMDILISD::OR: return "AMDILISD::OR";
833    case AMDILISD::NOT: return "AMDILISD::NOT";
834    case AMDILISD::XOR: return "AMDILISD::XOR";
835    case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
836    case AMDILISD::SMAX: return "AMDILISD::SMAX";
837    case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
838    case AMDILISD::MOVE: return "AMDILISD::MOVE";
839    case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
840    case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
841    case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
842    case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
843    case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
844    case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
845    case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
846    case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
847    case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
848    case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
849    case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
850    case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
851    case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
852    case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
853    case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
854    case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
855    case AMDILISD::CMP: return "AMDILISD::CMP";
856    case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
857    case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
858    case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
859    case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
860    case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
861    case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
862    case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
863    case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
864    case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
865    case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
866    case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
867    case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
868    case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
869    case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
870    case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
871    case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
872    case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
873    case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
874    case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
875    case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
876    case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
877    case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
878    case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
879    case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
880    case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
881    case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
882    case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
883    case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
884    case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
885    case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
886    case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
887    case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
888    case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
889    case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
890    case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
891    case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
892    case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
893    case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
894    case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
895    case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
896    case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
897    case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
898    case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
899    case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
900    case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
901    case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
902    case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
903    case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
904    case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
905    case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
906    case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
907    case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
908    case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
909    case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
910    case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
911    case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
912    case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
913    case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
914    case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
915    case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
916    case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
917    case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
918    case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
919    case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
920    case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
921    case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
922    case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
923    case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
924    case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
925    case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
926    case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
927    case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
928    case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
929    case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
930    case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
931    case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
932    case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
933    case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
934    case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
935    case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
936    case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
937    case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
938    case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
939    case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
940    case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
941    case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
942    case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
943    case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
944    case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
945    case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
946    case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
947    case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
948    case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
949    case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
950    case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
951    case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
952    case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
953    case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
954    case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
955    case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
956    case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
957    case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
958    case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
959    case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
960    case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
961    case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
962    case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
963    case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
964    case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
965
966  };
967}
968bool
969AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
970    const CallInst &I, unsigned Intrinsic) const
971{
972  if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
973      || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
974    return false;
975  }
976  bool bitCastToInt = false;
977  unsigned IntNo;
978  bool isRet = true;
979  const AMDILSubtarget *STM = &this->getTargetMachine()
980    .getSubtarget<AMDILSubtarget>();
981  switch (Intrinsic) {
982    default: return false; // Don't custom lower most intrinsics.
983    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
984    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
985             IntNo = AMDILISD::ATOM_G_ADD; break;
986    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
987    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
988             isRet = false;
989             IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
990    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
991    case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
992             IntNo = AMDILISD::ATOM_L_ADD; break;
993    case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
994    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
995             isRet = false;
996             IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
997    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
998    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
999             IntNo = AMDILISD::ATOM_R_ADD; break;
1000    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
1001    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
1002             isRet = false;
1003             IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
1004    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
1005    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
1006             IntNo = AMDILISD::ATOM_G_AND; break;
1007    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
1008    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
1009             isRet = false;
1010             IntNo = AMDILISD::ATOM_G_AND_NORET; break;
1011    case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
1012    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
1013             IntNo = AMDILISD::ATOM_L_AND; break;
1014    case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
1015    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
1016             isRet = false;
1017             IntNo = AMDILISD::ATOM_L_AND_NORET; break;
1018    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
1019    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
1020             IntNo = AMDILISD::ATOM_R_AND; break;
1021    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
1022    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
1023             isRet = false;
1024             IntNo = AMDILISD::ATOM_R_AND_NORET; break;
1025    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
1026    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
1027             IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
1028    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
1029    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
1030             isRet = false;
1031             IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
1032    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
1033    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
1034             IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
1035    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
1036    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
1037             isRet = false;
1038             IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
1039    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
1040    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
1041             IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
1042    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
1043    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
1044             isRet = false;
1045             IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
1046    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
1047    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
1048             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1049               IntNo = AMDILISD::ATOM_G_DEC;
1050             } else {
1051               IntNo = AMDILISD::ATOM_G_SUB;
1052             }
1053             break;
1054    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
1055    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
1056             isRet = false;
1057             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1058               IntNo = AMDILISD::ATOM_G_DEC_NORET;
1059             } else {
1060               IntNo = AMDILISD::ATOM_G_SUB_NORET;
1061             }
1062             break;
1063    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
1064    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
1065             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1066               IntNo = AMDILISD::ATOM_L_DEC;
1067             } else {
1068               IntNo = AMDILISD::ATOM_L_SUB;
1069             }
1070             break;
1071    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
1072    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
1073             isRet = false;
1074             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1075               IntNo = AMDILISD::ATOM_L_DEC_NORET;
1076             } else {
1077               IntNo = AMDILISD::ATOM_L_SUB_NORET;
1078             }
1079             break;
1080    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
1081    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
1082             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1083               IntNo = AMDILISD::ATOM_R_DEC;
1084             } else {
1085               IntNo = AMDILISD::ATOM_R_SUB;
1086             }
1087             break;
1088    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
1089    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
1090             isRet = false;
1091             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1092               IntNo = AMDILISD::ATOM_R_DEC_NORET;
1093             } else {
1094               IntNo = AMDILISD::ATOM_R_SUB_NORET;
1095             }
1096             break;
1097    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
1098    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
1099             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1100               IntNo = AMDILISD::ATOM_G_INC;
1101             } else {
1102               IntNo = AMDILISD::ATOM_G_ADD;
1103             }
1104             break;
1105    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1106    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1107             isRet = false;
1108             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1109               IntNo = AMDILISD::ATOM_G_INC_NORET;
1110             } else {
1111               IntNo = AMDILISD::ATOM_G_ADD_NORET;
1112             }
1113             break;
1114    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1115    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1116             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1117               IntNo = AMDILISD::ATOM_L_INC;
1118             } else {
1119               IntNo = AMDILISD::ATOM_L_ADD;
1120             }
1121             break;
1122    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1123    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1124             isRet = false;
1125             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1126               IntNo = AMDILISD::ATOM_L_INC_NORET;
1127             } else {
1128               IntNo = AMDILISD::ATOM_L_ADD_NORET;
1129             }
1130             break;
1131    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1132    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1133             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1134               IntNo = AMDILISD::ATOM_R_INC;
1135             } else {
1136               IntNo = AMDILISD::ATOM_R_ADD;
1137             }
1138             break;
1139    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1140    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1141             isRet = false;
1142             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1143               IntNo = AMDILISD::ATOM_R_INC_NORET;
1144             } else {
1145               IntNo = AMDILISD::ATOM_R_ADD_NORET;
1146             }
1147             break;
1148    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1149             IntNo = AMDILISD::ATOM_G_MAX; break;
1150    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1151             IntNo = AMDILISD::ATOM_G_UMAX; break;
1152    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1153             isRet = false;
1154             IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1155    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1156             isRet = false;
1157             IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1158    case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1159             IntNo = AMDILISD::ATOM_L_MAX; break;
1160    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1161             IntNo = AMDILISD::ATOM_L_UMAX; break;
1162    case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1163             isRet = false;
1164             IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1165    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1166             isRet = false;
1167             IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1168    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1169             IntNo = AMDILISD::ATOM_R_MAX; break;
1170    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1171             IntNo = AMDILISD::ATOM_R_UMAX; break;
1172    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1173             isRet = false;
1174             IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1175    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1176             isRet = false;
1177             IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1178    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1179             IntNo = AMDILISD::ATOM_G_MIN; break;
1180    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1181             IntNo = AMDILISD::ATOM_G_UMIN; break;
1182    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1183             isRet = false;
1184             IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1185    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1186             isRet = false;
1187             IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1188    case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1189             IntNo = AMDILISD::ATOM_L_MIN; break;
1190    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1191             IntNo = AMDILISD::ATOM_L_UMIN; break;
1192    case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1193             isRet = false;
1194             IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1195    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1196             isRet = false;
1197             IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1198    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1199             IntNo = AMDILISD::ATOM_R_MIN; break;
1200    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1201             IntNo = AMDILISD::ATOM_R_UMIN; break;
1202    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1203             isRet = false;
1204             IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1205    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1206             isRet = false;
1207             IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1208    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1209    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1210             IntNo = AMDILISD::ATOM_G_OR; break;
1211    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1212    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1213             isRet = false;
1214             IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1215    case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1216    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1217             IntNo = AMDILISD::ATOM_L_OR; break;
1218    case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1219    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1220             isRet = false;
1221             IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1222    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1223    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1224             IntNo = AMDILISD::ATOM_R_OR; break;
1225    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1226    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1227             isRet = false;
1228             IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1229    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1230    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1231             IntNo = AMDILISD::ATOM_G_SUB; break;
1232    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1233    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1234             isRet = false;
1235             IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1236    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1237    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1238             IntNo = AMDILISD::ATOM_L_SUB; break;
1239    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1240    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1241             isRet = false;
1242             IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1243    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1244    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1245             IntNo = AMDILISD::ATOM_R_SUB; break;
1246    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1247    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1248             isRet = false;
1249             IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1250    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1251    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1252             IntNo = AMDILISD::ATOM_G_RSUB; break;
1253    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1254    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1255             isRet = false;
1256             IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1257    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1258    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1259             IntNo = AMDILISD::ATOM_L_RSUB; break;
1260    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1261    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1262             isRet = false;
1263             IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1264    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1265    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1266             IntNo = AMDILISD::ATOM_R_RSUB; break;
1267    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1268    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1269             isRet = false;
1270             IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1271    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1272             bitCastToInt = true;
1273    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1274    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1275             IntNo = AMDILISD::ATOM_G_XCHG; break;
1276    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1277             bitCastToInt = true;
1278    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1279    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1280             isRet = false;
1281             IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1282    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1283             bitCastToInt = true;
1284    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1285    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1286             IntNo = AMDILISD::ATOM_L_XCHG; break;
1287    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1288             bitCastToInt = true;
1289    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1290    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1291             isRet = false;
1292             IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1293    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1294             bitCastToInt = true;
1295    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1296    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1297             IntNo = AMDILISD::ATOM_R_XCHG; break;
1298    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1299             bitCastToInt = true;
1300    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1301    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1302             isRet = false;
1303             IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1304    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1305    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1306             IntNo = AMDILISD::ATOM_G_XOR; break;
1307    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1308    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1309             isRet = false;
1310             IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1311    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1312    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1313             IntNo = AMDILISD::ATOM_L_XOR; break;
1314    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1315    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1316             isRet = false;
1317             IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1318    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1319    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1320             IntNo = AMDILISD::ATOM_R_XOR; break;
1321    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1322    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1323             isRet = false;
1324             IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1325    case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1326             IntNo = AMDILISD::APPEND_ALLOC; break;
1327    case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1328             isRet = false;
1329             IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1330    case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1331             IntNo = AMDILISD::APPEND_CONSUME; break;
1332    case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1333             isRet = false;
1334             IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1335  };
1336
1337  Info.opc = IntNo;
1338  Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1339  Info.ptrVal = I.getOperand(0);
1340  Info.offset = 0;
1341  Info.align = 4;
1342  Info.vol = true;
1343  Info.readMem = isRet;
1344  Info.writeMem = true;
1345  return true;
1346}
1347// The backend supports 32 and 64 bit floating point immediates
1348bool
1349AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1350{
1351  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1352      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1353    return true;
1354  } else {
1355    return false;
1356  }
1357}
1358
1359bool
1360AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1361{
1362  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1363      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1364    return false;
1365  } else {
1366    return true;
1367  }
1368}
1369
1370
1371// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1372// be zero. Op is expected to be a target specific node. Used by DAG
1373// combiner.
1374
1375void
1376AMDILTargetLowering::computeMaskedBitsForTargetNode(
1377    const SDValue Op,
1378    APInt &KnownZero,
1379    APInt &KnownOne,
1380    const SelectionDAG &DAG,
1381    unsigned Depth) const
1382{
1383  APInt KnownZero2;
1384  APInt KnownOne2;
1385  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1386  switch (Op.getOpcode()) {
1387    default: break;
1388    case AMDILISD::SELECT_CC:
1389             DAG.ComputeMaskedBits(
1390                 Op.getOperand(1),
1391                 KnownZero,
1392                 KnownOne,
1393                 Depth + 1
1394                 );
1395             DAG.ComputeMaskedBits(
1396                 Op.getOperand(0),
1397                 KnownZero2,
1398                 KnownOne2
1399                 );
1400             assert((KnownZero & KnownOne) == 0
1401                 && "Bits known to be one AND zero?");
1402             assert((KnownZero2 & KnownOne2) == 0
1403                 && "Bits known to be one AND zero?");
1404             // Only known if known in both the LHS and RHS
1405             KnownOne &= KnownOne2;
1406             KnownZero &= KnownZero2;
1407             break;
1408  };
1409}
1410
1411// This is the function that determines which calling convention should
1412// be used. Currently there is only one calling convention
1413CCAssignFn*
1414AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
1415{
1416  //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1417  return CC_AMDIL32;
1418}
1419
1420// LowerCallResult - Lower the result values of an ISD::CALL into the
1421// appropriate copies out of appropriate physical registers.  This assumes that
1422// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
1423// being lowered.  The returns a SDNode with the same number of values as the
1424// ISD::CALL.
1425SDValue
1426AMDILTargetLowering::LowerCallResult(
1427    SDValue Chain,
1428    SDValue InFlag,
1429    CallingConv::ID CallConv,
1430    bool isVarArg,
1431    const SmallVectorImpl<ISD::InputArg> &Ins,
1432    DebugLoc dl,
1433    SelectionDAG &DAG,
1434    SmallVectorImpl<SDValue> &InVals) const
1435{
1436  // Assign locations to each value returned by this call
1437  SmallVector<CCValAssign, 16> RVLocs;
1438  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1439                 getTargetMachine(), RVLocs, *DAG.getContext());
1440  CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
1441
1442  // Copy all of the result registers out of their specified physreg.
1443  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1444    EVT CopyVT = RVLocs[i].getValVT();
1445    if (RVLocs[i].isRegLoc()) {
1446      Chain = DAG.getCopyFromReg(
1447          Chain,
1448          dl,
1449          RVLocs[i].getLocReg(),
1450          CopyVT,
1451          InFlag
1452          ).getValue(1);
1453      SDValue Val = Chain.getValue(0);
1454      InFlag = Chain.getValue(2);
1455      InVals.push_back(Val);
1456    }
1457  }
1458
1459  return Chain;
1460
1461}
1462
1463//===----------------------------------------------------------------------===//
1464//                           Other Lowering Hooks
1465//===----------------------------------------------------------------------===//
1466
1467// Recursively assign SDNodeOrdering to any unordered nodes
1468// This is necessary to maintain source ordering of instructions
1469// under -O0 to avoid odd-looking "skipping around" issues.
1470  static const SDValue
1471Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
1472{
1473  if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
1474    DAG.AssignOrdering( New.getNode(), order );
1475    for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
1476      Ordered( DAG, order, New.getOperand(i) );
1477  }
1478  return New;
1479}
1480
1481#define LOWER(A) \
1482  case ISD:: A: \
1483return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
1484
1485SDValue
1486AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
1487{
1488  switch (Op.getOpcode()) {
1489    default:
1490      Op.getNode()->dump();
1491      assert(0 && "Custom lowering code for this"
1492          "instruction is not implemented yet!");
1493      break;
1494      LOWER(GlobalAddress);
1495      LOWER(JumpTable);
1496      LOWER(ConstantPool);
1497      LOWER(ExternalSymbol);
1498      LOWER(FP_TO_UINT);
1499      LOWER(UINT_TO_FP);
1500      LOWER(MUL);
1501      LOWER(SUB);
1502      LOWER(FDIV);
1503      LOWER(SDIV);
1504      LOWER(SREM);
1505      LOWER(UREM);
1506      LOWER(BUILD_VECTOR);
1507      LOWER(INSERT_VECTOR_ELT);
1508      LOWER(EXTRACT_VECTOR_ELT);
1509      LOWER(EXTRACT_SUBVECTOR);
1510      LOWER(SCALAR_TO_VECTOR);
1511      LOWER(CONCAT_VECTORS);
1512      LOWER(SELECT);
1513      LOWER(SETCC);
1514      LOWER(SIGN_EXTEND_INREG);
1515      LOWER(DYNAMIC_STACKALLOC);
1516      LOWER(BRCOND);
1517      LOWER(BR_CC);
1518      LOWER(FP_ROUND);
1519  }
1520  return Op;
1521}
1522
1523int
1524AMDILTargetLowering::getVarArgsFrameOffset() const
1525{
1526  return VarArgsFrameOffset;
1527}
1528#undef LOWER
1529
1530SDValue
1531AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
1532{
1533  SDValue DST = Op;
1534  const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
1535  const GlobalValue *G = GADN->getGlobal();
1536  DebugLoc DL = Op.getDebugLoc();
1537  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
1538  if (!GV) {
1539    DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1540  } else {
1541    if (GV->hasInitializer()) {
1542      const Constant *C = dyn_cast<Constant>(GV->getInitializer());
1543      if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
1544        DST = DAG.getConstant(CI->getValue(), Op.getValueType());
1545      } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
1546        DST = DAG.getConstantFP(CF->getValueAPF(),
1547            Op.getValueType());
1548      } else if (dyn_cast<ConstantAggregateZero>(C)) {
1549        EVT VT = Op.getValueType();
1550        if (VT.isInteger()) {
1551          DST = DAG.getConstant(0, VT);
1552        } else {
1553          DST = DAG.getConstantFP(0, VT);
1554        }
1555      } else {
1556        assert(!"lowering this type of Global Address "
1557            "not implemented yet!");
1558        C->dump();
1559        DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1560      }
1561    } else {
1562      DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1563    }
1564  }
1565  return DST;
1566}
1567
1568SDValue
1569AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
1570{
1571  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1572  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
1573  return Result;
1574}
1575SDValue
1576AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
1577{
1578  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1579  EVT PtrVT = Op.getValueType();
1580  SDValue Result;
1581  if (CP->isMachineConstantPoolEntry()) {
1582    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1583        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1584  } else {
1585    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1586        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1587  }
1588  return Result;
1589}
1590
1591SDValue
1592AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
1593{
1594  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
1595  SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
1596  return Result;
1597}
1598
1599/// LowerFORMAL_ARGUMENTS - transform physical registers into
1600/// virtual registers and generate load operations for
1601/// arguments places on the stack.
1602/// TODO: isVarArg, hasStructRet, isMemReg
1603  SDValue
1604AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
1605    CallingConv::ID CallConv,
1606    bool isVarArg,
1607    const SmallVectorImpl<ISD::InputArg> &Ins,
1608    DebugLoc dl,
1609    SelectionDAG &DAG,
1610    SmallVectorImpl<SDValue> &InVals)
1611const
1612{
1613
1614  MachineFunction &MF = DAG.getMachineFunction();
1615  MachineFrameInfo *MFI = MF.getFrameInfo();
1616  //const Function *Fn = MF.getFunction();
1617  //MachineRegisterInfo &RegInfo = MF.getRegInfo();
1618
1619  SmallVector<CCValAssign, 16> ArgLocs;
1620  CallingConv::ID CC = MF.getFunction()->getCallingConv();
1621  //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
1622
1623  CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
1624                 getTargetMachine(), ArgLocs, *DAG.getContext());
1625
1626  // When more calling conventions are added, they need to be chosen here
1627  CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
1628  SDValue StackPtr;
1629
1630  //unsigned int FirstStackArgLoc = 0;
1631
1632  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1633    CCValAssign &VA = ArgLocs[i];
1634    if (VA.isRegLoc()) {
1635      EVT RegVT = VA.getLocVT();
1636      const TargetRegisterClass *RC = getRegClassFromType(
1637          RegVT.getSimpleVT().SimpleTy);
1638
1639      unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
1640      SDValue ArgValue = DAG.getCopyFromReg(
1641          Chain,
1642          dl,
1643          Reg,
1644          RegVT);
1645      // If this is an 8 or 16-bit value, it is really passed
1646      // promoted to 32 bits.  Insert an assert[sz]ext to capture
1647      // this, then truncate to the right size.
1648
1649      if (VA.getLocInfo() == CCValAssign::SExt) {
1650        ArgValue = DAG.getNode(
1651            ISD::AssertSext,
1652            dl,
1653            RegVT,
1654            ArgValue,
1655            DAG.getValueType(VA.getValVT()));
1656      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
1657        ArgValue = DAG.getNode(
1658            ISD::AssertZext,
1659            dl,
1660            RegVT,
1661            ArgValue,
1662            DAG.getValueType(VA.getValVT()));
1663      }
1664      if (VA.getLocInfo() != CCValAssign::Full) {
1665        ArgValue = DAG.getNode(
1666            ISD::TRUNCATE,
1667            dl,
1668            VA.getValVT(),
1669            ArgValue);
1670      }
1671      // Add the value to the list of arguments
1672      // to be passed in registers
1673      InVals.push_back(ArgValue);
1674      if (isVarArg) {
1675        assert(0 && "Variable arguments are not yet supported");
1676        // See MipsISelLowering.cpp for ideas on how to implement
1677      }
1678    } else if(VA.isMemLoc()) {
1679      InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1680            dl, DAG, VA, MFI, i));
1681    } else {
1682      assert(0 && "found a Value Assign that is "
1683          "neither a register or a memory location");
1684    }
1685  }
1686  /*if (hasStructRet) {
1687    assert(0 && "Has struct return is not yet implemented");
1688  // See MipsISelLowering.cpp for ideas on how to implement
1689  }*/
1690
1691  if (isVarArg) {
1692    assert(0 && "Variable arguments are not yet supported");
1693    // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1694  }
1695  // This needs to be changed to non-zero if the return function needs
1696  // to pop bytes
1697  return Chain;
1698}
1699/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1700/// by "Src" to address "Dst" with size and alignment information specified by
1701/// the specific parameter attribute. The copy will be passed as a byval
1702/// function parameter.
1703static SDValue
1704CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1705    ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1706  assert(0 && "MemCopy does not exist yet");
1707  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1708
1709  return DAG.getMemcpy(Chain,
1710      Src.getDebugLoc(),
1711      Dst, Src, SizeNode, Flags.getByValAlign(),
1712      /*IsVol=*/false, /*AlwaysInline=*/true,
1713      MachinePointerInfo(), MachinePointerInfo());
1714}
1715
1716SDValue
1717AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1718    SDValue StackPtr, SDValue Arg,
1719    DebugLoc dl, SelectionDAG &DAG,
1720    const CCValAssign &VA,
1721    ISD::ArgFlagsTy Flags) const
1722{
1723  unsigned int LocMemOffset = VA.getLocMemOffset();
1724  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1725  PtrOff = DAG.getNode(ISD::ADD,
1726      dl,
1727      getPointerTy(), StackPtr, PtrOff);
1728  if (Flags.isByVal()) {
1729    PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1730  } else {
1731    PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1732        MachinePointerInfo::getStack(LocMemOffset),
1733        false, false, 0);
1734  }
1735  return PtrOff;
1736}
1737/// LowerCAL - functions arguments are copied from virtual
1738/// regs to (physical regs)/(stack frame), CALLSEQ_START and
1739/// CALLSEQ_END are emitted.
1740/// TODO: isVarArg, isTailCall, hasStructRet
1741SDValue
1742AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1743    CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1744    bool& isTailCall,
1745    const SmallVectorImpl<ISD::OutputArg> &Outs,
1746    const SmallVectorImpl<SDValue> &OutVals,
1747    const SmallVectorImpl<ISD::InputArg> &Ins,
1748    DebugLoc dl, SelectionDAG &DAG,
1749    SmallVectorImpl<SDValue> &InVals)
1750const
1751{
1752  isTailCall = false;
1753  MachineFunction& MF = DAG.getMachineFunction();
1754  // FIXME: DO we need to handle fast calling conventions and tail call
1755  // optimizations?? X86/PPC ISelLowering
1756  /*bool hasStructRet = (TheCall->getNumArgs())
1757    ? TheCall->getArgFlags(0).device()->isSRet()
1758    : false;*/
1759
1760  MachineFrameInfo *MFI = MF.getFrameInfo();
1761
1762  // Analyze operands of the call, assigning locations to each operand
1763  SmallVector<CCValAssign, 16> ArgLocs;
1764  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1765                 getTargetMachine(), ArgLocs, *DAG.getContext());
1766  // Analyize the calling operands, but need to change
1767  // if we have more than one calling convetion
1768  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1769
1770  unsigned int NumBytes = CCInfo.getNextStackOffset();
1771  if (isTailCall) {
1772    assert(isTailCall && "Tail Call not handled yet!");
1773    // See X86/PPC ISelLowering
1774  }
1775
1776  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1777
1778  SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1779  SmallVector<SDValue, 8> MemOpChains;
1780  SDValue StackPtr;
1781  //unsigned int FirstStacArgLoc = 0;
1782  //int LastArgStackLoc = 0;
1783
1784  // Walk the register/memloc assignments, insert copies/loads
1785  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1786    CCValAssign &VA = ArgLocs[i];
1787    //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1788    // Arguments start after the 5 first operands of ISD::CALL
1789    SDValue Arg = OutVals[i];
1790    //Promote the value if needed
1791    switch(VA.getLocInfo()) {
1792      default: assert(0 && "Unknown loc info!");
1793      case CCValAssign::Full:
1794               break;
1795      case CCValAssign::SExt:
1796               Arg = DAG.getNode(ISD::SIGN_EXTEND,
1797                   dl,
1798                   VA.getLocVT(), Arg);
1799               break;
1800      case CCValAssign::ZExt:
1801               Arg = DAG.getNode(ISD::ZERO_EXTEND,
1802                   dl,
1803                   VA.getLocVT(), Arg);
1804               break;
1805      case CCValAssign::AExt:
1806               Arg = DAG.getNode(ISD::ANY_EXTEND,
1807                   dl,
1808                   VA.getLocVT(), Arg);
1809               break;
1810    }
1811
1812    if (VA.isRegLoc()) {
1813      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1814    } else if (VA.isMemLoc()) {
1815      // Create the frame index object for this incoming parameter
1816      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1817          VA.getLocMemOffset(), true);
1818      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1819
1820      // emit ISD::STORE whichs stores the
1821      // parameter value to a stack Location
1822      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1823            MachinePointerInfo::getFixedStack(FI),
1824            false, false, 0));
1825    } else {
1826      assert(0 && "Not a Reg/Mem Loc, major error!");
1827    }
1828  }
1829  if (!MemOpChains.empty()) {
1830    Chain = DAG.getNode(ISD::TokenFactor,
1831        dl,
1832        MVT::Other,
1833        &MemOpChains[0],
1834        MemOpChains.size());
1835  }
1836  SDValue InFlag;
1837  if (!isTailCall) {
1838    for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1839      Chain = DAG.getCopyToReg(Chain,
1840          dl,
1841          RegsToPass[i].first,
1842          RegsToPass[i].second,
1843          InFlag);
1844      InFlag = Chain.getValue(1);
1845    }
1846  }
1847
1848  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1849  // every direct call is) turn it into a TargetGlobalAddress/
1850  // TargetExternalSymbol
1851  // node so that legalize doesn't hack it.
1852  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
1853    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1854  }
1855  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1856    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1857  }
1858  else if (isTailCall) {
1859    assert(0 && "Tail calls are not handled yet");
1860    // see X86 ISelLowering for ideas on implementation: 1708
1861  }
1862
1863  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1864  SmallVector<SDValue, 8> Ops;
1865
1866  if (isTailCall) {
1867    assert(0 && "Tail calls are not handled yet");
1868    // see X86 ISelLowering for ideas on implementation: 1721
1869  }
1870  // If this is a direct call, pass the chain and the callee
1871  if (Callee.getNode()) {
1872    Ops.push_back(Chain);
1873    Ops.push_back(Callee);
1874  }
1875
1876  if (isTailCall) {
1877    assert(0 && "Tail calls are not handled yet");
1878    // see X86 ISelLowering for ideas on implementation: 1739
1879  }
1880
1881  // Add argument registers to the end of the list so that they are known
1882  // live into the call
1883  for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1884    Ops.push_back(DAG.getRegister(
1885          RegsToPass[i].first,
1886          RegsToPass[i].second.getValueType()));
1887  }
1888  if (InFlag.getNode()) {
1889    Ops.push_back(InFlag);
1890  }
1891
1892  // Emit Tail Call
1893  if (isTailCall) {
1894    assert(0 && "Tail calls are not handled yet");
1895    // see X86 ISelLowering for ideas on implementation: 1762
1896  }
1897
1898  Chain = DAG.getNode(AMDILISD::CALL,
1899      dl,
1900      NodeTys, &Ops[0], Ops.size());
1901  InFlag = Chain.getValue(1);
1902
1903  // Create the CALLSEQ_END node
1904  Chain = DAG.getCALLSEQ_END(
1905      Chain,
1906      DAG.getIntPtrConstant(NumBytes, true),
1907      DAG.getIntPtrConstant(0, true),
1908      InFlag);
1909  InFlag = Chain.getValue(1);
1910  // Handle result values, copying them out of physregs into vregs that
1911  // we return
1912  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1913      InVals);
1914}
1915
1916SDValue
1917AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
1918    uint32_t bits) const
1919{
1920  DebugLoc DL = Op.getDebugLoc();
1921  EVT INTTY = Op.getValueType();
1922  EVT FPTY;
1923  if (INTTY.isVector()) {
1924    FPTY = EVT(MVT::getVectorVT(MVT::f32,
1925          INTTY.getVectorNumElements()));
1926  } else {
1927    FPTY = EVT(MVT::f32);
1928  }
1929  /* static inline uint
1930     __clz_Nbit(uint x)
1931     {
1932     int xor = 0x3f800000U | x;
1933     float tp = as_float(xor);
1934     float t = tp + -1.0f;
1935     uint tint = as_uint(t);
1936     int cmp = (x != 0);
1937     uint tsrc = tint >> 23;
1938     uint tmask = tsrc & 0xffU;
1939     uint cst = (103 + N)U - tmask;
1940     return cmp ? cst : N;
1941     }
1942     */
1943  assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
1944      && "genCLZu16 only works on 32bit types");
1945  // uint x = Op
1946  SDValue x = Op;
1947  // xornode = 0x3f800000 | x
1948  SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
1949      DAG.getConstant(0x3f800000, INTTY), x);
1950  // float tp = as_float(xornode)
1951  SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
1952  // float t = tp + -1.0f
1953  SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
1954      DAG.getConstantFP(-1.0f, FPTY));
1955  // uint tint = as_uint(t)
1956  SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
1957  // int cmp = (x != 0)
1958  SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
1959      DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
1960      DAG.getConstant(0, INTTY));
1961  // uint tsrc = tint >> 23
1962  SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
1963      DAG.getConstant(23, INTTY));
1964  // uint tmask = tsrc & 0xFF
1965  SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
1966      DAG.getConstant(0xFFU, INTTY));
1967  // uint cst = (103 + bits) - tmask
1968  SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
1969      DAG.getConstant((103U + bits), INTTY), tmask);
1970  // return cmp ? cst : N
1971  cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
1972      DAG.getConstant(bits, INTTY));
1973  return cst;
1974}
1975
1976SDValue
1977AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
1978{
1979  SDValue DST = SDValue();
1980  DebugLoc DL = Op.getDebugLoc();
1981  EVT INTTY = Op.getValueType();
1982  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
1983  if (STM.device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
1984    //__clz_32bit(uint u)
1985    //{
1986    // int z = __amdil_ffb_hi(u) ;
1987    // return z < 0 ? 32 : z;
1988    // }
1989    // uint u = op
1990    SDValue u = Op;
1991    // int z = __amdil_ffb_hi(u)
1992    SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
1993    // int cmp = z < 0
1994    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
1995        DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1996        z, DAG.getConstant(0, INTTY));
1997    // return cmp ? 32 : z
1998    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
1999        DAG.getConstant(32, INTTY), z);
2000  } else if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2001    //  static inline uint
2002    //__clz_32bit(uint x)
2003    //{
2004    //    uint zh = __clz_16bit(x >> 16);
2005    //    uint zl = __clz_16bit(x & 0xffffU);
2006    //   return zh == 16U ? 16U + zl : zh;
2007    //}
2008    // uint x = Op
2009    SDValue x = Op;
2010    // uint xs16 = x >> 16
2011    SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
2012        DAG.getConstant(16, INTTY));
2013    // uint zh = __clz_16bit(xs16)
2014    SDValue zh = genCLZuN(xs16, DAG, 16);
2015    // uint xa16 = x & 0xFFFF
2016    SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
2017        DAG.getConstant(0xFFFFU, INTTY));
2018    // uint zl = __clz_16bit(xa16)
2019    SDValue zl = genCLZuN(xa16, DAG, 16);
2020    // uint cmp = zh == 16U
2021    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2022        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2023        zh, DAG.getConstant(16U, INTTY));
2024    // uint zl16 = zl + 16
2025    SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
2026        DAG.getConstant(16, INTTY), zl);
2027    // return cmp ? zl16 : zh
2028    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2029        cmp, zl16, zh);
2030  } else {
2031    assert(0 && "Attempting to generate a CLZ function with an"
2032        " unknown graphics card");
2033  }
2034  return DST;
2035}
2036SDValue
2037AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
2038{
2039  SDValue DST = SDValue();
2040  DebugLoc DL = Op.getDebugLoc();
2041  EVT INTTY;
2042  EVT LONGTY = Op.getValueType();
2043  bool isVec = LONGTY.isVector();
2044  if (isVec) {
2045    INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
2046          .getVectorNumElements()));
2047  } else {
2048    INTTY = EVT(MVT::i32);
2049  }
2050  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2051  if (STM.device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2052    // Evergreen:
2053    // static inline uint
2054    // __clz_u64(ulong x)
2055    // {
2056    //uint zhi = __clz_32bit((uint)(x >> 32));
2057    //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2058    //return zhi == 32U ? 32U + zlo : zhi;
2059    //}
2060    //ulong x = op
2061    SDValue x = Op;
2062    // uint xhi = x >> 32
2063    SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2064    // uint xlo = x & 0xFFFFFFFF
2065    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
2066    // uint zhi = __clz_32bit(xhi)
2067    SDValue zhi = genCLZu32(xhi, DAG);
2068    // uint zlo = __clz_32bit(xlo)
2069    SDValue zlo = genCLZu32(xlo, DAG);
2070    // uint cmp = zhi == 32
2071    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2072        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2073        zhi, DAG.getConstant(32U, INTTY));
2074    // uint zlop32 = 32 + zlo
2075    SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
2076        DAG.getConstant(32U, INTTY), zlo);
2077    // return cmp ? zlop32: zhi
2078    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
2079  } else if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2080    // HD4XXX:
2081    //  static inline uint
2082    //__clz_64bit(ulong x)
2083    //{
2084    //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2085    //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2086    //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2087    //uint r = zh == 18U ? 18U + zm : zh;
2088    //return zh + zm == 41U ? 41U + zl : r;
2089    //}
2090    //ulong x = Op
2091    SDValue x = Op;
2092    // ulong xs46 = x >> 46
2093    SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2094        DAG.getConstant(46, LONGTY));
2095    // uint ixs46 = (uint)xs46
2096    SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
2097    // ulong xs23 = x >> 23
2098    SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2099        DAG.getConstant(23, LONGTY));
2100    // uint ixs23 = (uint)xs23
2101    SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
2102    // uint xs23m23 = ixs23 & 0x7FFFFF
2103    SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
2104        DAG.getConstant(0x7fffffU, INTTY));
2105    // uint ix = (uint)x
2106    SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2107    // uint xm23 = ix & 0x7FFFFF
2108    SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
2109        DAG.getConstant(0x7fffffU, INTTY));
2110    // uint zh = __clz_23bit(ixs46)
2111    SDValue zh = genCLZuN(ixs46, DAG, 23);
2112    // uint zm = __clz_23bit(xs23m23)
2113    SDValue zm = genCLZuN(xs23m23, DAG, 23);
2114    // uint zl = __clz_23bit(xm23)
2115    SDValue zl = genCLZuN(xm23, DAG, 23);
2116    // uint zhm5 = zh - 5
2117    SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
2118        DAG.getConstant(-5U, INTTY));
2119    SDValue const18 = DAG.getConstant(18, INTTY);
2120    SDValue const41 = DAG.getConstant(41, INTTY);
2121    // uint cmp1 = zh = 18
2122    SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2123        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2124        zhm5, const18);
2125    // uint zhm5zm = zhm5 + zh
2126    SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
2127    // uint cmp2 = zhm5zm == 41
2128    SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2129        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2130        zhm5zm, const41);
2131    // uint zmp18 = zhm5 + 18
2132    SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
2133    // uint zlp41 = zl + 41
2134    SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
2135    // uint r = cmp1 ? zmp18 : zh
2136    SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2137        cmp1, zmp18, zhm5);
2138    // return cmp2 ? zlp41 : r
2139    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
2140  } else {
2141    assert(0 && "Attempting to generate a CLZ function with an"
2142        " unknown graphics card");
2143  }
2144  return DST;
2145}
2146SDValue
2147AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
2148    bool includeSign) const
2149{
2150  EVT INTVT;
2151  EVT LONGVT;
2152  SDValue DST;
2153  DebugLoc DL = RHS.getDebugLoc();
2154  EVT RHSVT = RHS.getValueType();
2155  bool isVec = RHSVT.isVector();
2156  if (isVec) {
2157    LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
2158          .getVectorNumElements()));
2159    INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
2160          .getVectorNumElements()));
2161  } else {
2162    LONGVT = EVT(MVT::i64);
2163    INTVT = EVT(MVT::i32);
2164  }
2165  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2166  if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2167    // unsigned version:
2168    // uint uhi = (uint)(d * 0x1.0p-32);
2169    // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2170    // return as_ulong2((uint2)(ulo, uhi));
2171    //
2172    // signed version:
2173    // double ad = fabs(d);
2174    // long l = unsigned_version(ad);
2175    // long nl = -l;
2176    // return d == ad ? l : nl;
2177    SDValue d = RHS;
2178    if (includeSign) {
2179      d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
2180    }
2181    SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
2182        DAG.getConstantFP(0x2f800000, RHSVT));
2183    SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
2184    SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
2185    ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
2186        DAG.getConstantFP(0xcf800000, RHSVT), d);
2187    SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
2188    SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
2189    if (includeSign) {
2190      SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
2191      SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
2192          DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
2193          RHS, d);
2194      l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
2195    }
2196    DST = l;
2197  } else {
2198    /*
2199       __attribute__((always_inline)) long
2200       cast_f64_to_i64(double d)
2201       {
2202    // Convert d in to 32-bit components
2203    long x = as_long(d);
2204    xhi = LCOMPHI(x);
2205    xlo = LCOMPLO(x);
2206
2207    // Generate 'normalized' mantissa
2208    mhi = xhi | 0x00100000; // hidden bit
2209    mhi <<= 11;
2210    temp = xlo >> (32 - 11);
2211    mhi |= temp
2212    mlo = xlo << 11;
2213
2214    // Compute shift right count from exponent
2215    e = (xhi >> (52-32)) & 0x7ff;
2216    sr = 1023 + 63 - e;
2217    srge64 = sr >= 64;
2218    srge32 = sr >= 32;
2219
2220    // Compute result for 0 <= sr < 32
2221    rhi0 = mhi >> (sr &31);
2222    rlo0 = mlo >> (sr &31);
2223    temp = mhi << (32 - sr);
2224    temp |= rlo0;
2225    rlo0 = sr ? temp : rlo0;
2226
2227    // Compute result for 32 <= sr
2228    rhi1 = 0;
2229    rlo1 = srge64 ? 0 : rhi0;
2230
2231    // Pick between the 2 results
2232    rhi = srge32 ? rhi1 : rhi0;
2233    rlo = srge32 ? rlo1 : rlo0;
2234
2235    // Optional saturate on overflow
2236    srlt0 = sr < 0;
2237    rhi = srlt0 ? MAXVALUE : rhi;
2238    rlo = srlt0 ? MAXVALUE : rlo;
2239
2240    // Create long
2241    res = LCREATE( rlo, rhi );
2242
2243    // Deal with sign bit (ignoring whether result is signed or unsigned value)
2244    if (includeSign) {
2245    sign = ((signed int) xhi) >> 31; fill with sign bit
2246    sign = LCREATE( sign, sign );
2247    res += sign;
2248    res ^= sign;
2249    }
2250
2251    return res;
2252    }
2253    */
2254    SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
2255    SDValue c32 = DAG.getConstant( 32, INTVT );
2256
2257    // Convert d in to 32-bit components
2258    SDValue d = RHS;
2259    SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
2260    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2261    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2262
2263    // Generate 'normalized' mantissa
2264    SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
2265        xhi, DAG.getConstant( 0x00100000, INTVT ) );
2266    mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
2267    SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
2268        xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
2269    mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
2270    SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
2271
2272    // Compute shift right count from exponent
2273    SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
2274        xhi, DAG.getConstant( 52-32, INTVT ) );
2275    e = DAG.getNode( ISD::AND, DL, INTVT,
2276        e, DAG.getConstant( 0x7ff, INTVT ) );
2277    SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
2278        DAG.getConstant( 1023 + 63, INTVT ), e );
2279    SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2280        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2281        sr, DAG.getConstant(64, INTVT));
2282    SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2283        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2284        sr, DAG.getConstant(32, INTVT));
2285
2286    // Compute result for 0 <= sr < 32
2287    SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
2288    SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
2289    temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
2290    temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
2291    temp = DAG.getNode( ISD::OR,  DL, INTVT, rlo0, temp );
2292    rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
2293
2294    // Compute result for 32 <= sr
2295    SDValue rhi1 = DAG.getConstant( 0, INTVT );
2296    SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2297        srge64, rhi1, rhi0 );
2298
2299    // Pick between the 2 results
2300    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2301        srge32, rhi1, rhi0 );
2302    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2303        srge32, rlo1, rlo0 );
2304
2305    // Create long
2306    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2307
2308    // Deal with sign bit
2309    if (includeSign) {
2310      SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
2311          xhi, DAG.getConstant( 31, INTVT ) );
2312      sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
2313      res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
2314      res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
2315    }
2316    DST = res;
2317  }
2318  return DST;
2319}
2320SDValue
2321AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
2322    bool includeSign) const
2323{
2324  EVT INTVT;
2325  EVT LONGVT;
2326  DebugLoc DL = RHS.getDebugLoc();
2327  EVT RHSVT = RHS.getValueType();
2328  bool isVec = RHSVT.isVector();
2329  if (isVec) {
2330    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
2331          RHSVT.getVectorNumElements()));
2332    INTVT = EVT(MVT::getVectorVT(MVT::i32,
2333          RHSVT.getVectorNumElements()));
2334  } else {
2335    LONGVT = EVT(MVT::i64);
2336    INTVT = EVT(MVT::i32);
2337  }
2338  /*
2339     __attribute__((always_inline)) int
2340     cast_f64_to_[u|i]32(double d)
2341     {
2342  // Convert d in to 32-bit components
2343  long x = as_long(d);
2344  xhi = LCOMPHI(x);
2345  xlo = LCOMPLO(x);
2346
2347  // Generate 'normalized' mantissa
2348  mhi = xhi | 0x00100000; // hidden bit
2349  mhi <<= 11;
2350  temp = xlo >> (32 - 11);
2351  mhi |= temp
2352
2353  // Compute shift right count from exponent
2354  e = (xhi >> (52-32)) & 0x7ff;
2355  sr = 1023 + 31 - e;
2356  srge32 = sr >= 32;
2357
2358  // Compute result for 0 <= sr < 32
2359  res = mhi >> (sr &31);
2360  res = srge32 ? 0 : res;
2361
2362  // Optional saturate on overflow
2363  srlt0 = sr < 0;
2364  res = srlt0 ? MAXVALUE : res;
2365
2366  // Deal with sign bit (ignoring whether result is signed or unsigned value)
2367  if (includeSign) {
2368  sign = ((signed int) xhi) >> 31; fill with sign bit
2369  res += sign;
2370  res ^= sign;
2371  }
2372
2373  return res;
2374  }
2375  */
2376  SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
2377
2378  // Convert d in to 32-bit components
2379  SDValue d = RHS;
2380  SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
2381  SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2382  SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2383
2384  // Generate 'normalized' mantissa
2385  SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
2386      xhi, DAG.getConstant( 0x00100000, INTVT ) );
2387  mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
2388  SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
2389      xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
2390  mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
2391
2392  // Compute shift right count from exponent
2393  SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
2394      xhi, DAG.getConstant( 52-32, INTVT ) );
2395  e = DAG.getNode( ISD::AND, DL, INTVT,
2396      e, DAG.getConstant( 0x7ff, INTVT ) );
2397  SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
2398      DAG.getConstant( 1023 + 31, INTVT ), e );
2399  SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2400      DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2401      sr, DAG.getConstant(32, INTVT));
2402
2403  // Compute result for 0 <= sr < 32
2404  SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
2405  res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2406      srge32, DAG.getConstant(0,INTVT), res );
2407
2408  // Deal with sign bit
2409  if (includeSign) {
2410    SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
2411        xhi, DAG.getConstant( 31, INTVT ) );
2412    res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
2413    res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
2414  }
2415  return res;
2416}
2417
2418SDValue
2419AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
2420{
2421  SDValue DST;
2422  SDValue RHS = Op.getOperand(0);
2423  EVT RHSVT = RHS.getValueType();
2424  MVT RST = RHSVT.getScalarType().getSimpleVT();
2425  EVT LHSVT = Op.getValueType();
2426  MVT LST = LHSVT.getScalarType().getSimpleVT();
2427  DebugLoc DL = Op.getDebugLoc();
2428  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2429  if (RST == MVT::f64 && RHSVT.isVector()
2430      && STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
2431    // We dont support vector 64bit floating point convertions.
2432    for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
2433      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2434          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2435      op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
2436      if (!x) {
2437        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
2438      } else {
2439        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
2440            DST, op, DAG.getTargetConstant(x, MVT::i32));
2441      }
2442
2443    }
2444  } else {
2445    if (RST == MVT::f64
2446        && LST == MVT::i32) {
2447      if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2448        DST = SDValue(Op.getNode(), 0);
2449      } else {
2450        DST = genf64toi32(RHS, DAG, false);
2451      }
2452    } else if (RST == MVT::f64
2453        && LST == MVT::i64) {
2454      DST = genf64toi64(RHS, DAG, false);
2455    } else if (RST == MVT::f64
2456        && (LST == MVT::i8 || LST == MVT::i16)) {
2457      if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2458        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
2459      } else {
2460        SDValue ToInt = genf64toi32(RHS, DAG, false);
2461        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
2462      }
2463
2464    } else {
2465      DST = SDValue(Op.getNode(), 0);
2466    }
2467  }
2468  return DST;
2469}
2470SDValue
2471AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
2472    SelectionDAG &DAG) const
2473{
2474  EVT RHSVT = RHS.getValueType();
2475  DebugLoc DL = RHS.getDebugLoc();
2476  EVT INTVT;
2477  EVT LONGVT;
2478  bool isVec = RHSVT.isVector();
2479  if (isVec) {
2480    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
2481          RHSVT.getVectorNumElements()));
2482    INTVT = EVT(MVT::getVectorVT(MVT::i32,
2483          RHSVT.getVectorNumElements()));
2484  } else {
2485    LONGVT = EVT(MVT::i64);
2486    INTVT = EVT(MVT::i32);
2487  }
2488  SDValue x = RHS;
2489  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2490  if (STM.calVersion() >= CAL_VERSION_SC_135) {
2491    // unsigned x = RHS;
2492    // ulong xd = (ulong)(0x4330_0000 << 32) | x;
2493    // double d = as_double( xd );
2494    // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
2495    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
2496        DAG.getConstant( 0x43300000, INTVT ) );
2497    SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
2498    SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
2499        DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
2500    return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
2501  } else {
2502    SDValue clz = genCLZu32(x, DAG);
2503
2504    // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
2505    // Except for an input 0... which requires a 0 exponent
2506    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
2507        DAG.getConstant( (1023+31), INTVT), clz );
2508    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
2509
2510    // Normalize frac
2511    SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
2512
2513    // Eliminate hidden bit
2514    rhi = DAG.getNode( ISD::AND, DL, INTVT,
2515        rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
2516
2517    // Pack exponent and frac
2518    SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
2519        rhi, DAG.getConstant( (32 - 11), INTVT ) );
2520    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
2521        rhi, DAG.getConstant( 11, INTVT ) );
2522    exp = DAG.getNode( ISD::SHL, DL, INTVT,
2523        exp, DAG.getConstant( 20, INTVT ) );
2524    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
2525
2526    // Convert 2 x 32 in to 1 x 64, then to double precision float type
2527    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2528    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
2529  }
2530}
2531SDValue
2532AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
2533    SelectionDAG &DAG) const
2534{
2535  EVT RHSVT = RHS.getValueType();
2536  DebugLoc DL = RHS.getDebugLoc();
2537  EVT INTVT;
2538  EVT LONGVT;
2539  bool isVec = RHSVT.isVector();
2540  if (isVec) {
2541    INTVT = EVT(MVT::getVectorVT(MVT::i32,
2542          RHSVT.getVectorNumElements()));
2543  } else {
2544    INTVT = EVT(MVT::i32);
2545  }
2546  LONGVT = RHSVT;
2547  SDValue x = RHS;
2548  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2549  if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2550    // double dhi = (double)(as_uint2(x).y);
2551    // double dlo = (double)(as_uint2(x).x);
2552    // return mad(dhi, 0x1.0p+32, dlo)
2553    SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
2554    dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
2555    SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
2556    dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
2557    return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
2558        DAG.getConstantFP(0x4f800000, LHSVT), dlo);
2559  } else if (STM.calVersion() >= CAL_VERSION_SC_135) {
2560    // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
2561    // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
2562    // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
2563    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );  // x & 0xffff_ffffUL
2564    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
2565    SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
2566    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 :  AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
2567    SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
2568    SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
2569    SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
2570        DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
2571    hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
2572    return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
2573
2574  } else {
2575    SDValue clz = genCLZu64(x, DAG);
2576    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2577    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2578
2579    // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
2580    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
2581        DAG.getConstant( (1023+63), INTVT), clz );
2582    SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
2583    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2584        mash, exp, mash );  // exp = exp, or 0 if input was 0
2585
2586    // Normalize frac
2587    SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
2588        clz, DAG.getConstant( 31, INTVT ) );
2589    SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
2590        DAG.getConstant( 32, INTVT ), clz31 );
2591    SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
2592    SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
2593    t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
2594    SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
2595    SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
2596    SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
2597    SDValue rlo2 = DAG.getConstant( 0, INTVT );
2598    SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
2599        clz, DAG.getConstant( 32, INTVT ) );
2600    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2601        clz32, rhi2, rhi1 );
2602    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2603        clz32, rlo2, rlo1 );
2604
2605    // Eliminate hidden bit
2606    rhi = DAG.getNode( ISD::AND, DL, INTVT,
2607        rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
2608
2609    // Save bits needed to round properly
2610    SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
2611        rlo, DAG.getConstant( 0x7ff, INTVT ) );
2612
2613    // Pack exponent and frac
2614    rlo = DAG.getNode( ISD::SRL, DL, INTVT,
2615        rlo, DAG.getConstant( 11, INTVT ) );
2616    SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
2617        rhi, DAG.getConstant( (32 - 11), INTVT ) );
2618    rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
2619    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
2620        rhi, DAG.getConstant( 11, INTVT ) );
2621    exp = DAG.getNode( ISD::SHL, DL, INTVT,
2622        exp, DAG.getConstant( 20, INTVT ) );
2623    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
2624
2625    // Compute rounding bit
2626    SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
2627        rlo, DAG.getConstant( 1, INTVT ) );
2628    SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
2629        round, DAG.getConstant( 0x3ff, INTVT ) );
2630    grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2631        DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
2632        grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
2633    grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
2634    round = DAG.getNode( ISD::SRL, DL, INTVT,
2635        round, DAG.getConstant( 10, INTVT ) );
2636    round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
2637
2638    // Add rounding bit
2639    SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
2640        round, DAG.getConstant( 0, INTVT ) );
2641    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2642    res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
2643    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
2644  }
2645}
2646SDValue
2647AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
2648{
2649  SDValue RHS = Op.getOperand(0);
2650  EVT RHSVT = RHS.getValueType();
2651  MVT RST = RHSVT.getScalarType().getSimpleVT();
2652  EVT LHSVT = Op.getValueType();
2653  MVT LST = LHSVT.getScalarType().getSimpleVT();
2654  DebugLoc DL = Op.getDebugLoc();
2655  SDValue DST;
2656  EVT INTVT;
2657  EVT LONGVT;
2658  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
2659  if (LST == MVT::f64 && LHSVT.isVector()
2660      && STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
2661    // We dont support vector 64bit floating point convertions.
2662    DST = Op;
2663    for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
2664      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2665          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2666      op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
2667      if (!x) {
2668        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
2669      } else {
2670        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
2671            op, DAG.getTargetConstant(x, MVT::i32));
2672      }
2673
2674    }
2675  } else {
2676
2677    if (RST == MVT::i32
2678        && LST == MVT::f64) {
2679      if (STM.device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2680        DST = SDValue(Op.getNode(), 0);
2681      } else {
2682        DST = genu32tof64(RHS, LHSVT, DAG);
2683      }
2684    } else if (RST == MVT::i64
2685        && LST == MVT::f64) {
2686      DST = genu64tof64(RHS, LHSVT, DAG);
2687    } else {
2688      DST = SDValue(Op.getNode(), 0);
2689    }
2690  }
2691  return DST;
2692}
2693
2694SDValue
2695AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
2696{
2697  SDValue LHS = Op.getOperand(0);
2698  SDValue RHS = Op.getOperand(1);
2699  DebugLoc DL = Op.getDebugLoc();
2700  EVT OVT = Op.getValueType();
2701  SDValue DST;
2702  bool isVec = RHS.getValueType().isVector();
2703  if (OVT.getScalarType() == MVT::i64) {
2704    MVT INTTY = MVT::i32;
2705    if (OVT == MVT::v2i64) {
2706      INTTY = MVT::v2i32;
2707    }
2708    SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
2709    // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2710    LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
2711    RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
2712    LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
2713    RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
2714    INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
2715    INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
2716    //TODO: need to use IBORROW on HD5XXX and later hardware
2717    SDValue cmp;
2718    if (OVT == MVT::i64) {
2719      cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2720          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2721          LHSLO, RHSLO);
2722    } else {
2723      SDValue cmplo;
2724      SDValue cmphi;
2725      SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2726          DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
2727      SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2728          DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
2729      SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2730          DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
2731      SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2732          DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
2733      cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
2734          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2735          LHSRLO, RHSRLO);
2736      cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
2737          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2738          LHSRHI, RHSRHI);
2739      cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
2740      cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
2741          cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
2742    }
2743    INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
2744    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
2745        INTLO, INTHI);
2746  } else {
2747    DST = SDValue(Op.getNode(), 0);
2748  }
2749  return DST;
2750}
2751SDValue
2752AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
2753{
2754  EVT OVT = Op.getValueType();
2755  SDValue DST;
2756  if (OVT.getScalarType() == MVT::f64) {
2757    DST = LowerFDIV64(Op, DAG);
2758  } else if (OVT.getScalarType() == MVT::f32) {
2759    DST = LowerFDIV32(Op, DAG);
2760  } else {
2761    DST = SDValue(Op.getNode(), 0);
2762  }
2763  return DST;
2764}
2765
2766SDValue
2767AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
2768{
2769  EVT OVT = Op.getValueType();
2770  SDValue DST;
2771  if (OVT.getScalarType() == MVT::i64) {
2772    DST = LowerSDIV64(Op, DAG);
2773  } else if (OVT.getScalarType() == MVT::i32) {
2774    DST = LowerSDIV32(Op, DAG);
2775  } else if (OVT.getScalarType() == MVT::i16
2776      || OVT.getScalarType() == MVT::i8) {
2777    DST = LowerSDIV24(Op, DAG);
2778  } else {
2779    DST = SDValue(Op.getNode(), 0);
2780  }
2781  return DST;
2782}
2783
2784SDValue
2785AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
2786{
2787  EVT OVT = Op.getValueType();
2788  SDValue DST;
2789  if (OVT.getScalarType() == MVT::i64) {
2790    DST = LowerSREM64(Op, DAG);
2791  } else if (OVT.getScalarType() == MVT::i32) {
2792    DST = LowerSREM32(Op, DAG);
2793  } else if (OVT.getScalarType() == MVT::i16) {
2794    DST = LowerSREM16(Op, DAG);
2795  } else if (OVT.getScalarType() == MVT::i8) {
2796    DST = LowerSREM8(Op, DAG);
2797  } else {
2798    DST = SDValue(Op.getNode(), 0);
2799  }
2800  return DST;
2801}
2802
2803SDValue
2804AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
2805{
2806  EVT OVT = Op.getValueType();
2807  SDValue DST;
2808  if (OVT.getScalarType() == MVT::i64) {
2809    DST = LowerUREM64(Op, DAG);
2810  } else if (OVT.getScalarType() == MVT::i32) {
2811    DST = LowerUREM32(Op, DAG);
2812  } else if (OVT.getScalarType() == MVT::i16) {
2813    DST = LowerUREM16(Op, DAG);
2814  } else if (OVT.getScalarType() == MVT::i8) {
2815    DST = LowerUREM8(Op, DAG);
2816  } else {
2817    DST = SDValue(Op.getNode(), 0);
2818  }
2819  return DST;
2820}
2821
2822SDValue
2823AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
2824{
2825  DebugLoc DL = Op.getDebugLoc();
2826  EVT OVT = Op.getValueType();
2827  SDValue DST;
2828  bool isVec = OVT.isVector();
2829  if (OVT.getScalarType() != MVT::i64)
2830  {
2831    DST = SDValue(Op.getNode(), 0);
2832  } else {
2833    assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
2834    // TODO: This needs to be turned into a tablegen pattern
2835    SDValue LHS = Op.getOperand(0);
2836    SDValue RHS = Op.getOperand(1);
2837
2838    MVT INTTY = MVT::i32;
2839    if (OVT == MVT::v2i64) {
2840      INTTY = MVT::v2i32;
2841    }
2842    // mul64(h1, l1, h0, l0)
2843    SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
2844        DL,
2845        INTTY, LHS);
2846    SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
2847        DL,
2848        INTTY, LHS);
2849    SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
2850        DL,
2851        INTTY, RHS);
2852    SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
2853        DL,
2854        INTTY, RHS);
2855    // MULLO_UINT_1 r1, h0, l1
2856    SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
2857        DL,
2858        INTTY, RHSHI, LHSLO);
2859    // MULLO_UINT_1 r2, h1, l0
2860    SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
2861        DL,
2862        INTTY, RHSLO, LHSHI);
2863    // ADD_INT hr, r1, r2
2864    SDValue ADDHI = DAG.getNode(ISD::ADD,
2865        DL,
2866        INTTY, RHILLO, RLOHHI);
2867    // MULHI_UINT_1 r3, l1, l0
2868    SDValue RLOLLO = DAG.getNode(ISD::MULHU,
2869        DL,
2870        INTTY, RHSLO, LHSLO);
2871    // ADD_INT hr, hr, r3
2872    SDValue HIGH = DAG.getNode(ISD::ADD,
2873        DL,
2874        INTTY, ADDHI, RLOLLO);
2875    // MULLO_UINT_1 l3, l1, l0
2876    SDValue LOW = DAG.getNode(AMDILISD::UMUL,
2877        DL,
2878        INTTY, LHSLO, RHSLO);
2879    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
2880        DL,
2881        OVT, LOW, HIGH);
2882  }
2883  return DST;
2884}
2885SDValue
2886AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
2887{
2888  EVT VT = Op.getValueType();
2889  SDValue Nodes1;
2890  SDValue second;
2891  SDValue third;
2892  SDValue fourth;
2893  DebugLoc DL = Op.getDebugLoc();
2894  Nodes1 = DAG.getNode(AMDILISD::VBUILD,
2895      DL,
2896      VT, Op.getOperand(0));
2897#if 0
2898  bool allEqual = true;
2899  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
2900    if (Op.getOperand(0) != Op.getOperand(x)) {
2901      allEqual = false;
2902      break;
2903    }
2904  }
2905  if (allEqual) {
2906    return Nodes1;
2907  }
2908#endif
2909  switch(Op.getNumOperands()) {
2910    default:
2911    case 1:
2912      break;
2913    case 4:
2914      fourth = Op.getOperand(3);
2915      if (fourth.getOpcode() != ISD::UNDEF) {
2916        Nodes1 = DAG.getNode(
2917            ISD::INSERT_VECTOR_ELT,
2918            DL,
2919            Op.getValueType(),
2920            Nodes1,
2921            fourth,
2922            DAG.getConstant(7, MVT::i32));
2923      }
2924    case 3:
2925      third = Op.getOperand(2);
2926      if (third.getOpcode() != ISD::UNDEF) {
2927        Nodes1 = DAG.getNode(
2928            ISD::INSERT_VECTOR_ELT,
2929            DL,
2930            Op.getValueType(),
2931            Nodes1,
2932            third,
2933            DAG.getConstant(6, MVT::i32));
2934      }
2935    case 2:
2936      second = Op.getOperand(1);
2937      if (second.getOpcode() != ISD::UNDEF) {
2938        Nodes1 = DAG.getNode(
2939            ISD::INSERT_VECTOR_ELT,
2940            DL,
2941            Op.getValueType(),
2942            Nodes1,
2943            second,
2944            DAG.getConstant(5, MVT::i32));
2945      }
2946      break;
2947  };
2948  return Nodes1;
2949}
2950
2951SDValue
2952AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
2953    SelectionDAG &DAG) const
2954{
2955  DebugLoc DL = Op.getDebugLoc();
2956  EVT VT = Op.getValueType();
2957  const SDValue *ptr = NULL;
2958  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2959  uint32_t swizzleNum = 0;
2960  SDValue DST;
2961  if (!VT.isVector()) {
2962    SDValue Res = Op.getOperand(0);
2963    return Res;
2964  }
2965
2966  if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
2967    ptr = &Op.getOperand(1);
2968  } else {
2969    ptr = &Op.getOperand(0);
2970  }
2971  if (CSDN) {
2972    swizzleNum = (uint32_t)CSDN->getZExtValue();
2973    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
2974    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
2975    DST = DAG.getNode(AMDILISD::VINSERT,
2976        DL,
2977        VT,
2978        Op.getOperand(0),
2979        *ptr,
2980        DAG.getTargetConstant(mask2, MVT::i32),
2981        DAG.getTargetConstant(mask3, MVT::i32));
2982  } else {
2983    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
2984    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
2985    SDValue res = DAG.getNode(AMDILISD::VINSERT,
2986        DL, VT, Op.getOperand(0), *ptr,
2987        DAG.getTargetConstant(mask2, MVT::i32),
2988        DAG.getTargetConstant(mask3, MVT::i32));
2989    for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
2990      mask2 = 0x04030201 & ~(0xFF << (x * 8));
2991      mask3 = 0x01010101 & (0xFF << (x * 8));
2992      SDValue t = DAG.getNode(AMDILISD::VINSERT,
2993          DL, VT, Op.getOperand(0), *ptr,
2994          DAG.getTargetConstant(mask2, MVT::i32),
2995          DAG.getTargetConstant(mask3, MVT::i32));
2996      SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
2997          DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
2998          Op.getOperand(2), DAG.getConstant(x, MVT::i32));
2999      c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
3000      res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
3001    }
3002    DST = res;
3003  }
3004  return DST;
3005}
3006
3007SDValue
3008AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
3009    SelectionDAG &DAG) const
3010{
3011  EVT VT = Op.getValueType();
3012  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3013  uint64_t swizzleNum = 0;
3014  DebugLoc DL = Op.getDebugLoc();
3015  SDValue Res;
3016  if (!Op.getOperand(0).getValueType().isVector()) {
3017    Res = Op.getOperand(0);
3018    return Res;
3019  }
3020  if (CSDN) {
3021    // Static vector extraction
3022    swizzleNum = CSDN->getZExtValue() + 1;
3023    Res = DAG.getNode(AMDILISD::VEXTRACT,
3024        DL, VT,
3025        Op.getOperand(0),
3026        DAG.getTargetConstant(swizzleNum, MVT::i32));
3027  } else {
3028    SDValue Op1 = Op.getOperand(1);
3029    uint32_t vecSize = 4;
3030    SDValue Op0 = Op.getOperand(0);
3031    SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
3032        DL, VT, Op0,
3033        DAG.getTargetConstant(1, MVT::i32));
3034    if (Op0.getValueType().isVector()) {
3035      vecSize = Op0.getValueType().getVectorNumElements();
3036    }
3037    for (uint32_t x = 2; x <= vecSize; ++x) {
3038      SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
3039          DL, VT, Op0,
3040          DAG.getTargetConstant(x, MVT::i32));
3041      SDValue c = DAG.getNode(AMDILISD::CMP,
3042          DL, Op1.getValueType(),
3043          DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3044          Op1, DAG.getConstant(x, MVT::i32));
3045      res = DAG.getNode(AMDILISD::CMOVLOG, DL,
3046          VT, c, t, res);
3047
3048    }
3049    Res = res;
3050  }
3051  return Res;
3052}
3053
3054SDValue
3055AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
3056    SelectionDAG &DAG) const
3057{
3058  uint32_t vecSize = Op.getValueType().getVectorNumElements();
3059  SDValue src = Op.getOperand(0);
3060  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3061  uint64_t offset = 0;
3062  EVT vecType = Op.getValueType().getVectorElementType();
3063  DebugLoc DL = Op.getDebugLoc();
3064  SDValue Result;
3065  if (CSDN) {
3066    offset = CSDN->getZExtValue();
3067    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3068        DL,vecType, src, DAG.getConstant(offset, MVT::i32));
3069    Result = DAG.getNode(AMDILISD::VBUILD, DL,
3070        Op.getValueType(), Result);
3071    for (uint32_t x = 1; x < vecSize; ++x) {
3072      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
3073          src, DAG.getConstant(offset + x, MVT::i32));
3074      if (elt.getOpcode() != ISD::UNDEF) {
3075        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3076            Op.getValueType(), Result, elt,
3077            DAG.getConstant(x, MVT::i32));
3078      }
3079    }
3080  } else {
3081    SDValue idx = Op.getOperand(1);
3082    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3083        DL, vecType, src, idx);
3084    Result = DAG.getNode(AMDILISD::VBUILD, DL,
3085        Op.getValueType(), Result);
3086    for (uint32_t x = 1; x < vecSize; ++x) {
3087      idx = DAG.getNode(ISD::ADD, DL, vecType,
3088          idx, DAG.getConstant(1, MVT::i32));
3089      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
3090          src, idx);
3091      if (elt.getOpcode() != ISD::UNDEF) {
3092        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3093            Op.getValueType(), Result, elt, idx);
3094      }
3095    }
3096  }
3097  return Result;
3098}
3099SDValue
3100AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
3101    SelectionDAG &DAG) const
3102{
3103  SDValue Res = DAG.getNode(AMDILISD::VBUILD,
3104      Op.getDebugLoc(),
3105      Op.getValueType(),
3106      Op.getOperand(0));
3107  return Res;
3108}
3109SDValue
3110AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
3111{
3112  SDValue Cond = Op.getOperand(0);
3113  SDValue LHS = Op.getOperand(1);
3114  SDValue RHS = Op.getOperand(2);
3115  DebugLoc DL = Op.getDebugLoc();
3116  Cond = getConversionNode(DAG, Cond, Op, true);
3117  Cond = DAG.getNode(AMDILISD::CMOVLOG,
3118      DL,
3119      Op.getValueType(), Cond, LHS, RHS);
3120  return Cond;
3121}
3122SDValue
3123AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
3124{
3125  SDValue Cond;
3126  SDValue LHS = Op.getOperand(0);
3127  SDValue RHS = Op.getOperand(1);
3128  SDValue CC  = Op.getOperand(2);
3129  DebugLoc DL = Op.getDebugLoc();
3130  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3131  unsigned int AMDILCC = CondCCodeToCC(
3132      SetCCOpcode,
3133      LHS.getValueType().getSimpleVT().SimpleTy);
3134  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
3135  Cond = DAG.getNode(
3136      ISD::SELECT_CC,
3137      Op.getDebugLoc(),
3138      LHS.getValueType(),
3139      LHS, RHS,
3140      DAG.getConstant(-1, MVT::i32),
3141      DAG.getConstant(0, MVT::i32),
3142      CC);
3143  Cond = getConversionNode(DAG, Cond, Op, true);
3144  Cond = DAG.getNode(
3145      ISD::AND,
3146      DL,
3147      Cond.getValueType(),
3148      DAG.getConstant(1, Cond.getValueType()),
3149      Cond);
3150  return Cond;
3151}
3152
3153SDValue
3154AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
3155{
3156  SDValue Data = Op.getOperand(0);
3157  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
3158  DebugLoc DL = Op.getDebugLoc();
3159  EVT DVT = Data.getValueType();
3160  EVT BVT = BaseType->getVT();
3161  unsigned baseBits = BVT.getScalarType().getSizeInBits();
3162  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
3163  unsigned shiftBits = srcBits - baseBits;
3164  if (srcBits < 32) {
3165    // If the op is less than 32 bits, then it needs to extend to 32bits
3166    // so it can properly keep the upper bits valid.
3167    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
3168    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
3169    shiftBits = 32 - baseBits;
3170    DVT = IVT;
3171  }
3172  SDValue Shift = DAG.getConstant(shiftBits, DVT);
3173  // Shift left by 'Shift' bits.
3174  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
3175  // Signed shift Right by 'Shift' bits.
3176  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
3177  if (srcBits < 32) {
3178    // Once the sign extension is done, the op needs to be converted to
3179    // its original type.
3180    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
3181  }
3182  return Data;
3183}
3184EVT
3185AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
3186{
3187  int iSize = (size * numEle);
3188  int vEle = (iSize >> ((size == 64) ? 6 : 5));
3189  if (!vEle) {
3190    vEle = 1;
3191  }
3192  if (size == 64) {
3193    if (vEle == 1) {
3194      return EVT(MVT::i64);
3195    } else {
3196      return EVT(MVT::getVectorVT(MVT::i64, vEle));
3197    }
3198  } else {
3199    if (vEle == 1) {
3200      return EVT(MVT::i32);
3201    } else {
3202      return EVT(MVT::getVectorVT(MVT::i32, vEle));
3203    }
3204  }
3205}
3206
3207SDValue
3208AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
3209    SelectionDAG &DAG) const
3210{
3211  SDValue Chain = Op.getOperand(0);
3212  SDValue Size = Op.getOperand(1);
3213  unsigned int SPReg = AMDIL::SP;
3214  DebugLoc DL = Op.getDebugLoc();
3215  SDValue SP = DAG.getCopyFromReg(Chain,
3216      DL,
3217      SPReg, MVT::i32);
3218  SDValue NewSP = DAG.getNode(ISD::ADD,
3219      DL,
3220      MVT::i32, SP, Size);
3221  Chain = DAG.getCopyToReg(SP.getValue(1),
3222      DL,
3223      SPReg, NewSP);
3224  SDValue Ops[2] = {NewSP, Chain};
3225  Chain = DAG.getMergeValues(Ops, 2 ,DL);
3226  return Chain;
3227}
3228SDValue
3229AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
3230{
3231  SDValue Chain = Op.getOperand(0);
3232  SDValue Cond  = Op.getOperand(1);
3233  SDValue Jump  = Op.getOperand(2);
3234  SDValue Result;
3235  Result = DAG.getNode(
3236      AMDILISD::BRANCH_COND,
3237      Op.getDebugLoc(),
3238      Op.getValueType(),
3239      Chain, Jump, Cond);
3240  return Result;
3241}
3242
3243SDValue
3244AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
3245{
3246  SDValue Chain = Op.getOperand(0);
3247  SDValue CC = Op.getOperand(1);
3248  SDValue LHS   = Op.getOperand(2);
3249  SDValue RHS   = Op.getOperand(3);
3250  SDValue JumpT  = Op.getOperand(4);
3251  SDValue CmpValue;
3252  SDValue Result;
3253  CmpValue = DAG.getNode(
3254      ISD::SELECT_CC,
3255      Op.getDebugLoc(),
3256      LHS.getValueType(),
3257      LHS, RHS,
3258      DAG.getConstant(-1, MVT::i32),
3259      DAG.getConstant(0, MVT::i32),
3260      CC);
3261  Result = DAG.getNode(
3262      AMDILISD::BRANCH_COND,
3263      CmpValue.getDebugLoc(),
3264      MVT::Other, Chain,
3265      JumpT, CmpValue);
3266  return Result;
3267}
3268
3269SDValue
3270AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
3271{
3272  SDValue Result = DAG.getNode(
3273      AMDILISD::DP_TO_FP,
3274      Op.getDebugLoc(),
3275      Op.getValueType(),
3276      Op.getOperand(0),
3277      Op.getOperand(1));
3278  return Result;
3279}
3280
3281SDValue
3282AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
3283{
3284  SDValue Result = DAG.getNode(
3285      AMDILISD::VCONCAT,
3286      Op.getDebugLoc(),
3287      Op.getValueType(),
3288      Op.getOperand(0),
3289      Op.getOperand(1));
3290  return Result;
3291}
3292// LowerRET - Lower an ISD::RET node.
3293SDValue
3294AMDILTargetLowering::LowerReturn(SDValue Chain,
3295    CallingConv::ID CallConv, bool isVarArg,
3296    const SmallVectorImpl<ISD::OutputArg> &Outs,
3297    const SmallVectorImpl<SDValue> &OutVals,
3298    DebugLoc dl, SelectionDAG &DAG)
3299const
3300{
3301  //MachineFunction& MF = DAG.getMachineFunction();
3302  // CCValAssign - represent the assignment of the return value
3303  // to a location
3304  SmallVector<CCValAssign, 16> RVLocs;
3305
3306  // CCState - Info about the registers and stack slot
3307  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3308                 getTargetMachine(), RVLocs, *DAG.getContext());
3309
3310  // Analyze return values of ISD::RET
3311  CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
3312  // If this is the first return lowered for this function, add
3313  // the regs to the liveout set for the function
3314  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
3315  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
3316    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
3317      MRI.addLiveOut(RVLocs[i].getLocReg());
3318    }
3319  }
3320  // FIXME: implement this when tail call is implemented
3321  // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
3322  // both x86 and ppc implement this in ISelLowering
3323
3324  // Regular return here
3325  SDValue Flag;
3326  SmallVector<SDValue, 6> RetOps;
3327  RetOps.push_back(Chain);
3328  RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
3329  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
3330    CCValAssign &VA = RVLocs[i];
3331    SDValue ValToCopy = OutVals[i];
3332    assert(VA.isRegLoc() && "Can only return in registers!");
3333    // ISD::Ret => ret chain, (regnum1, val1), ...
3334    // So i * 2 + 1 index only the regnums
3335    Chain = DAG.getCopyToReg(Chain,
3336        dl,
3337        VA.getLocReg(),
3338        ValToCopy,
3339        Flag);
3340    // guarantee that all emitted copies are stuck together
3341    // avoiding something bad
3342    Flag = Chain.getValue(1);
3343  }
3344  /*if (MF.getFunction()->hasStructRetAttr()) {
3345    assert(0 && "Struct returns are not yet implemented!");
3346  // Both MIPS and X86 have this
3347  }*/
3348  RetOps[0] = Chain;
3349  if (Flag.getNode())
3350    RetOps.push_back(Flag);
3351
3352  Flag = DAG.getNode(AMDILISD::RET_FLAG,
3353      dl,
3354      MVT::Other, &RetOps[0], RetOps.size());
3355  return Flag;
3356}
3357
3358unsigned int
3359AMDILTargetLowering::getFunctionAlignment(const Function *) const
3360{
3361  return 0;
3362}
3363
3364void
3365AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
3366    MachineBasicBlock::iterator &BBI,
3367    DebugLoc *DL, const TargetInstrInfo *TII) const
3368{
3369  mBB = BB;
3370  mBBI = BBI;
3371  mDL = DL;
3372  mTII = TII;
3373}
3374uint32_t
3375AMDILTargetLowering::genVReg(uint32_t regType) const
3376{
3377  return mBB->getParent()->getRegInfo().createVirtualRegister(
3378      getTargetMachine().getRegisterInfo()->getRegClass(regType));
3379}
3380
3381MachineInstrBuilder
3382AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
3383{
3384  return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
3385}
3386
3387MachineInstrBuilder
3388AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
3389    uint32_t src1) const
3390{
3391  return generateMachineInst(opcode, dst).addReg(src1);
3392}
3393
3394MachineInstrBuilder
3395AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
3396    uint32_t src1, uint32_t src2) const
3397{
3398  return generateMachineInst(opcode, dst, src1).addReg(src2);
3399}
3400
3401MachineInstrBuilder
3402AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
3403    uint32_t src1, uint32_t src2, uint32_t src3) const
3404{
3405  return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
3406}
3407
3408
3409SDValue
3410AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
3411{
3412  DebugLoc DL = Op.getDebugLoc();
3413  EVT OVT = Op.getValueType();
3414  SDValue LHS = Op.getOperand(0);
3415  SDValue RHS = Op.getOperand(1);
3416  MVT INTTY;
3417  MVT FLTTY;
3418  if (!OVT.isVector()) {
3419    INTTY = MVT::i32;
3420    FLTTY = MVT::f32;
3421  } else if (OVT.getVectorNumElements() == 2) {
3422    INTTY = MVT::v2i32;
3423    FLTTY = MVT::v2f32;
3424  } else if (OVT.getVectorNumElements() == 4) {
3425    INTTY = MVT::v4i32;
3426    FLTTY = MVT::v4f32;
3427  }
3428  unsigned bitsize = OVT.getScalarType().getSizeInBits();
3429  // char|short jq = ia ^ ib;
3430  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
3431
3432  // jq = jq >> (bitsize - 2)
3433  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
3434
3435  // jq = jq | 0x1
3436  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
3437
3438  // jq = (int)jq
3439  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
3440
3441  // int ia = (int)LHS;
3442  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
3443
3444  // int ib, (int)RHS;
3445  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
3446
3447  // float fa = (float)ia;
3448  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
3449
3450  // float fb = (float)ib;
3451  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
3452
3453  // float fq = native_divide(fa, fb);
3454  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
3455
3456  // fq = trunc(fq);
3457  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
3458
3459  // float fqneg = -fq;
3460  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
3461
3462  // float fr = mad(fqneg, fb, fa);
3463  SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
3464
3465  // int iq = (int)fq;
3466  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
3467
3468  // fr = fabs(fr);
3469  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
3470
3471  // fb = fabs(fb);
3472  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
3473
3474  // int cv = fr >= fb;
3475  SDValue cv;
3476  if (INTTY == MVT::i32) {
3477    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
3478  } else {
3479    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
3480  }
3481  // jq = (cv ? jq : 0);
3482  jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
3483      DAG.getConstant(0, OVT));
3484  // dst = iq + jq;
3485  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
3486  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
3487  return iq;
3488}
3489
3490SDValue
3491AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
3492{
3493  DebugLoc DL = Op.getDebugLoc();
3494  EVT OVT = Op.getValueType();
3495  SDValue LHS = Op.getOperand(0);
3496  SDValue RHS = Op.getOperand(1);
3497  // The LowerSDIV32 function generates equivalent to the following IL.
3498  // mov r0, LHS
3499  // mov r1, RHS
3500  // ilt r10, r0, 0
3501  // ilt r11, r1, 0
3502  // iadd r0, r0, r10
3503  // iadd r1, r1, r11
3504  // ixor r0, r0, r10
3505  // ixor r1, r1, r11
3506  // udiv r0, r0, r1
3507  // ixor r10, r10, r11
3508  // iadd r0, r0, r10
3509  // ixor DST, r0, r10
3510
3511  // mov r0, LHS
3512  SDValue r0 = LHS;
3513
3514  // mov r1, RHS
3515  SDValue r1 = RHS;
3516
3517  // ilt r10, r0, 0
3518  SDValue r10 = DAG.getSelectCC(DL,
3519      r0, DAG.getConstant(0, OVT),
3520      DAG.getConstant(-1, MVT::i32),
3521      DAG.getConstant(0, MVT::i32),
3522      ISD::SETLT);
3523
3524  // ilt r11, r1, 0
3525  SDValue r11 = DAG.getSelectCC(DL,
3526      r1, DAG.getConstant(0, OVT),
3527      DAG.getConstant(-1, MVT::i32),
3528      DAG.getConstant(0, MVT::i32),
3529      ISD::SETLT);
3530
3531  // iadd r0, r0, r10
3532  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
3533
3534  // iadd r1, r1, r11
3535  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
3536
3537  // ixor r0, r0, r10
3538  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
3539
3540  // ixor r1, r1, r11
3541  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
3542
3543  // udiv r0, r0, r1
3544  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
3545
3546  // ixor r10, r10, r11
3547  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
3548
3549  // iadd r0, r0, r10
3550  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
3551
3552  // ixor DST, r0, r10
3553  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
3554  return DST;
3555}
3556
3557SDValue
3558AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
3559{
3560  return SDValue(Op.getNode(), 0);
3561}
3562
3563SDValue
3564AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
3565{
3566  DebugLoc DL = Op.getDebugLoc();
3567  EVT OVT = Op.getValueType();
3568  SDValue LHS = Op.getOperand(0);
3569  SDValue RHS = Op.getOperand(1);
3570  MVT INTTY;
3571  MVT FLTTY;
3572  if (!OVT.isVector()) {
3573    INTTY = MVT::i32;
3574    FLTTY = MVT::f32;
3575  } else if (OVT.getVectorNumElements() == 2) {
3576    INTTY = MVT::v2i32;
3577    FLTTY = MVT::v2f32;
3578  } else if (OVT.getVectorNumElements() == 4) {
3579    INTTY = MVT::v4i32;
3580    FLTTY = MVT::v4f32;
3581  }
3582
3583  // The LowerUDIV24 function implements the following CL.
3584  // int ia = (int)LHS
3585  // float fa = (float)ia
3586  // int ib = (int)RHS
3587  // float fb = (float)ib
3588  // float fq = native_divide(fa, fb)
3589  // fq = trunc(fq)
3590  // float t = mad(fq, fb, fb)
3591  // int iq = (int)fq - (t <= fa)
3592  // return (type)iq
3593
3594  // int ia = (int)LHS
3595  SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
3596
3597  // float fa = (float)ia
3598  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
3599
3600  // int ib = (int)RHS
3601  SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
3602
3603  // float fb = (float)ib
3604  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
3605
3606  // float fq = native_divide(fa, fb)
3607  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
3608
3609  // fq = trunc(fq)
3610  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
3611
3612  // float t = mad(fq, fb, fb)
3613  SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
3614
3615  // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
3616  SDValue iq;
3617  fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
3618  if (INTTY == MVT::i32) {
3619    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
3620  } else {
3621    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
3622  }
3623  iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
3624
3625
3626  // return (type)iq
3627  iq = DAG.getZExtOrTrunc(iq, DL, OVT);
3628  return iq;
3629
3630}
3631
3632SDValue
3633AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
3634{
3635  DebugLoc DL = Op.getDebugLoc();
3636  EVT OVT = Op.getValueType();
3637  MVT INTTY = MVT::i32;
3638  if (OVT == MVT::v2i8) {
3639    INTTY = MVT::v2i32;
3640  } else if (OVT == MVT::v4i8) {
3641    INTTY = MVT::v4i32;
3642  }
3643  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
3644  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
3645  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
3646  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
3647  return LHS;
3648}
3649
3650SDValue
3651AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
3652{
3653  DebugLoc DL = Op.getDebugLoc();
3654  EVT OVT = Op.getValueType();
3655  MVT INTTY = MVT::i32;
3656  if (OVT == MVT::v2i16) {
3657    INTTY = MVT::v2i32;
3658  } else if (OVT == MVT::v4i16) {
3659    INTTY = MVT::v4i32;
3660  }
3661  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
3662  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
3663  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
3664  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
3665  return LHS;
3666}
3667
3668SDValue
3669AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
3670{
3671  DebugLoc DL = Op.getDebugLoc();
3672  EVT OVT = Op.getValueType();
3673  SDValue LHS = Op.getOperand(0);
3674  SDValue RHS = Op.getOperand(1);
3675  // The LowerSREM32 function generates equivalent to the following IL.
3676  // mov r0, LHS
3677  // mov r1, RHS
3678  // ilt r10, r0, 0
3679  // ilt r11, r1, 0
3680  // iadd r0, r0, r10
3681  // iadd r1, r1, r11
3682  // ixor r0, r0, r10
3683  // ixor r1, r1, r11
3684  // udiv r20, r0, r1
3685  // umul r20, r20, r1
3686  // sub r0, r0, r20
3687  // iadd r0, r0, r10
3688  // ixor DST, r0, r10
3689
3690  // mov r0, LHS
3691  SDValue r0 = LHS;
3692
3693  // mov r1, RHS
3694  SDValue r1 = RHS;
3695
3696  // ilt r10, r0, 0
3697  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
3698      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
3699      r0, DAG.getConstant(0, OVT));
3700
3701  // ilt r11, r1, 0
3702  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
3703      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
3704      r1, DAG.getConstant(0, OVT));
3705
3706  // iadd r0, r0, r10
3707  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
3708
3709  // iadd r1, r1, r11
3710  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
3711
3712  // ixor r0, r0, r10
3713  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
3714
3715  // ixor r1, r1, r11
3716  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
3717
3718  // udiv r20, r0, r1
3719  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
3720
3721  // umul r20, r20, r1
3722  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
3723
3724  // sub r0, r0, r20
3725  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
3726
3727  // iadd r0, r0, r10
3728  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
3729
3730  // ixor DST, r0, r10
3731  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
3732  return DST;
3733}
3734
3735SDValue
3736AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
3737{
3738  return SDValue(Op.getNode(), 0);
3739}
3740
3741SDValue
3742AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
3743{
3744  DebugLoc DL = Op.getDebugLoc();
3745  EVT OVT = Op.getValueType();
3746  MVT INTTY = MVT::i32;
3747  if (OVT == MVT::v2i8) {
3748    INTTY = MVT::v2i32;
3749  } else if (OVT == MVT::v4i8) {
3750    INTTY = MVT::v4i32;
3751  }
3752  SDValue LHS = Op.getOperand(0);
3753  SDValue RHS = Op.getOperand(1);
3754  // The LowerUREM8 function generates equivalent to the following IL.
3755  // mov r0, as_u32(LHS)
3756  // mov r1, as_u32(RHS)
3757  // and r10, r0, 0xFF
3758  // and r11, r1, 0xFF
3759  // cmov_logical r3, r11, r11, 0x1
3760  // udiv r3, r10, r3
3761  // cmov_logical r3, r11, r3, 0
3762  // umul r3, r3, r11
3763  // sub r3, r10, r3
3764  // and as_u8(DST), r3, 0xFF
3765
3766  // mov r0, as_u32(LHS)
3767  SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
3768
3769  // mov r1, as_u32(RHS)
3770  SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
3771
3772  // and r10, r0, 0xFF
3773  SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
3774      DAG.getConstant(0xFF, INTTY));
3775
3776  // and r11, r1, 0xFF
3777  SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
3778      DAG.getConstant(0xFF, INTTY));
3779
3780  // cmov_logical r3, r11, r11, 0x1
3781  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
3782      DAG.getConstant(0x01, INTTY));
3783
3784  // udiv r3, r10, r3
3785  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
3786
3787  // cmov_logical r3, r11, r3, 0
3788  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
3789      DAG.getConstant(0, INTTY));
3790
3791  // umul r3, r3, r11
3792  r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
3793
3794  // sub r3, r10, r3
3795  r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
3796
3797  // and as_u8(DST), r3, 0xFF
3798  SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
3799      DAG.getConstant(0xFF, INTTY));
3800  DST = DAG.getZExtOrTrunc(DST, DL, OVT);
3801  return DST;
3802}
3803
3804SDValue
3805AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
3806{
3807  DebugLoc DL = Op.getDebugLoc();
3808  EVT OVT = Op.getValueType();
3809  MVT INTTY = MVT::i32;
3810  if (OVT == MVT::v2i16) {
3811    INTTY = MVT::v2i32;
3812  } else if (OVT == MVT::v4i16) {
3813    INTTY = MVT::v4i32;
3814  }
3815  SDValue LHS = Op.getOperand(0);
3816  SDValue RHS = Op.getOperand(1);
3817  // The LowerUREM16 function generatest equivalent to the following IL.
3818  // mov r0, LHS
3819  // mov r1, RHS
3820  // DIV = LowerUDIV16(LHS, RHS)
3821  // and r10, r0, 0xFFFF
3822  // and r11, r1, 0xFFFF
3823  // cmov_logical r3, r11, r11, 0x1
3824  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
3825  // and r3, r3, 0xFFFF
3826  // cmov_logical r3, r11, r3, 0
3827  // umul r3, r3, r11
3828  // sub r3, r10, r3
3829  // and DST, r3, 0xFFFF
3830
3831  // mov r0, LHS
3832  SDValue r0 = LHS;
3833
3834  // mov r1, RHS
3835  SDValue r1 = RHS;
3836
3837  // and r10, r0, 0xFFFF
3838  SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
3839      DAG.getConstant(0xFFFF, OVT));
3840
3841  // and r11, r1, 0xFFFF
3842  SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
3843      DAG.getConstant(0xFFFF, OVT));
3844
3845  // cmov_logical r3, r11, r11, 0x1
3846  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
3847      DAG.getConstant(0x01, OVT));
3848
3849  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
3850  r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
3851  r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
3852  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
3853  r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
3854  r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
3855
3856  // and r3, r3, 0xFFFF
3857  r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
3858      DAG.getConstant(0xFFFF, OVT));
3859
3860  // cmov_logical r3, r11, r3, 0
3861  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
3862      DAG.getConstant(0, OVT));
3863  // umul r3, r3, r11
3864  r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
3865
3866  // sub r3, r10, r3
3867  r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
3868
3869  // and DST, r3, 0xFFFF
3870  SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
3871      DAG.getConstant(0xFFFF, OVT));
3872  return DST;
3873}
3874
3875SDValue
3876AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
3877{
3878  DebugLoc DL = Op.getDebugLoc();
3879  EVT OVT = Op.getValueType();
3880  SDValue LHS = Op.getOperand(0);
3881  SDValue RHS = Op.getOperand(1);
3882  // The LowerUREM32 function generates equivalent to the following IL.
3883  // udiv r20, LHS, RHS
3884  // umul r20, r20, RHS
3885  // sub DST, LHS, r20
3886
3887  // udiv r20, LHS, RHS
3888  SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
3889
3890  // umul r20, r20, RHS
3891  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
3892
3893  // sub DST, LHS, r20
3894  SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
3895  return DST;
3896}
3897
3898SDValue
3899AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
3900{
3901  return SDValue(Op.getNode(), 0);
3902}
3903
3904
3905SDValue
3906AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
3907{
3908  DebugLoc DL = Op.getDebugLoc();
3909  EVT OVT = Op.getValueType();
3910  MVT INTTY = MVT::i32;
3911  if (OVT == MVT::v2f32) {
3912    INTTY = MVT::v2i32;
3913  } else if (OVT == MVT::v4f32) {
3914    INTTY = MVT::v4i32;
3915  }
3916  SDValue LHS = Op.getOperand(0);
3917  SDValue RHS = Op.getOperand(1);
3918  SDValue DST;
3919  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
3920  if (STM.device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
3921    // TODO: This doesn't work for vector types yet
3922    // The LowerFDIV32 function generates equivalent to the following
3923    // IL:
3924    // mov r20, as_int(LHS)
3925    // mov r21, as_int(RHS)
3926    // and r30, r20, 0x7f800000
3927    // and r31, r20, 0x807FFFFF
3928    // and r32, r21, 0x7f800000
3929    // and r33, r21, 0x807FFFFF
3930    // ieq r40, r30, 0x7F800000
3931    // ieq r41, r31, 0x7F800000
3932    // ieq r42, r32, 0
3933    // ieq r43, r33, 0
3934    // and r50, r20, 0x80000000
3935    // and r51, r21, 0x80000000
3936    // ior r32, r32, 0x3f800000
3937    // ior r33, r33, 0x3f800000
3938    // cmov_logical r32, r42, r50, r32
3939    // cmov_logical r33, r43, r51, r33
3940    // cmov_logical r32, r40, r20, r32
3941    // cmov_logical r33, r41, r21, r33
3942    // ior r50, r40, r41
3943    // ior r51, r42, r43
3944    // ior r50, r50, r51
3945    // inegate r52, r31
3946    // iadd r30, r30, r52
3947    // cmov_logical r30, r50, 0, r30
3948    // div_zeroop(infinity) r21, 1.0, r33
3949    // mul_ieee r20, r32, r21
3950    // and r22, r20, 0x7FFFFFFF
3951    // and r23, r20, 0x80000000
3952    // ishr r60, r22, 0x00000017
3953    // ishr r61, r30, 0x00000017
3954    // iadd r20, r20, r30
3955    // iadd r21, r22, r30
3956    // iadd r60, r60, r61
3957    // ige r42, 0, R60
3958    // ior r41, r23, 0x7F800000
3959    // ige r40, r60, 0x000000FF
3960    // cmov_logical r40, r50, 0, r40
3961    // cmov_logical r20, r42, r23, r20
3962    // cmov_logical DST, r40, r41, r20
3963    // as_float(DST)
3964
3965    // mov r20, as_int(LHS)
3966    SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
3967
3968    // mov r21, as_int(RHS)
3969    SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
3970
3971    // and r30, r20, 0x7f800000
3972    SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
3973        DAG.getConstant(0x7F800000, INTTY));
3974
3975    // and r31, r21, 0x7f800000
3976    SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
3977        DAG.getConstant(0x7f800000, INTTY));
3978
3979    // and r32, r20, 0x807FFFFF
3980    SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
3981        DAG.getConstant(0x807FFFFF, INTTY));
3982
3983    // and r33, r21, 0x807FFFFF
3984    SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
3985        DAG.getConstant(0x807FFFFF, INTTY));
3986
3987    // ieq r40, r30, 0x7F800000
3988    SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3989        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
3990        R30, DAG.getConstant(0x7F800000, INTTY));
3991
3992    // ieq r41, r31, 0x7F800000
3993    SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3994        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
3995        R31, DAG.getConstant(0x7F800000, INTTY));
3996
3997    // ieq r42, r30, 0
3998    SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3999        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4000        R30, DAG.getConstant(0, INTTY));
4001
4002    // ieq r43, r31, 0
4003    SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4004        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4005        R31, DAG.getConstant(0, INTTY));
4006
4007    // and r50, r20, 0x80000000
4008    SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4009        DAG.getConstant(0x80000000, INTTY));
4010
4011    // and r51, r21, 0x80000000
4012    SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4013        DAG.getConstant(0x80000000, INTTY));
4014
4015    // ior r32, r32, 0x3f800000
4016    R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
4017        DAG.getConstant(0x3F800000, INTTY));
4018
4019    // ior r33, r33, 0x3f800000
4020    R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
4021        DAG.getConstant(0x3F800000, INTTY));
4022
4023    // cmov_logical r32, r42, r50, r32
4024    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
4025
4026    // cmov_logical r33, r43, r51, r33
4027    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
4028
4029    // cmov_logical r32, r40, r20, r32
4030    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
4031
4032    // cmov_logical r33, r41, r21, r33
4033    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
4034
4035    // ior r50, r40, r41
4036    R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
4037
4038    // ior r51, r42, r43
4039    R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
4040
4041    // ior r50, r50, r51
4042    R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
4043
4044    // inegate r52, r31
4045    SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
4046
4047    // iadd r30, r30, r52
4048    R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
4049
4050    // cmov_logical r30, r50, 0, r30
4051    R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
4052        DAG.getConstant(0, INTTY), R30);
4053
4054    // div_zeroop(infinity) r21, 1.0, as_float(r33)
4055    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
4056    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
4057        DAG.getConstantFP(1.0f, OVT), R33);
4058
4059    // mul_ieee as_int(r20), as_float(r32), r21
4060    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
4061    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
4062    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
4063
4064    // div_zeroop(infinity) r21, 1.0, as_float(r33)
4065    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
4066    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
4067        DAG.getConstantFP(1.0f, OVT), R33);
4068
4069    // mul_ieee as_int(r20), as_float(r32), r21
4070    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
4071    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
4072    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
4073
4074    // and r22, r20, 0x7FFFFFFF
4075    SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4076        DAG.getConstant(0x7FFFFFFF, INTTY));
4077
4078    // and r23, r20, 0x80000000
4079    SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4080        DAG.getConstant(0x80000000, INTTY));
4081
4082    // ishr r60, r22, 0x00000017
4083    SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
4084        DAG.getConstant(0x00000017, INTTY));
4085
4086    // ishr r61, r30, 0x00000017
4087    SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
4088        DAG.getConstant(0x00000017, INTTY));
4089
4090    // iadd r20, r20, r30
4091    R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
4092
4093    // iadd r21, r22, r30
4094    R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
4095
4096    // iadd r60, r60, r61
4097    R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
4098
4099    // ige r42, 0, R60
4100    R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4101        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
4102        DAG.getConstant(0, INTTY),
4103        R60);
4104
4105    // ior r41, r23, 0x7F800000
4106    R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
4107        DAG.getConstant(0x7F800000, INTTY));
4108
4109    // ige r40, r60, 0x000000FF
4110    R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4111        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
4112        R60,
4113        DAG.getConstant(0x0000000FF, INTTY));
4114
4115    // cmov_logical r40, r50, 0, r40
4116    R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
4117        DAG.getConstant(0, INTTY),
4118        R40);
4119
4120    // cmov_logical r20, r42, r23, r20
4121    R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
4122
4123    // cmov_logical DST, r40, r41, r20
4124    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
4125
4126    // as_float(DST)
4127    DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
4128  } else {
4129    // The following sequence of DAG nodes produce the following IL:
4130    // fabs r1, RHS
4131    // lt r2, 0x1.0p+96f, r1
4132    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4133    // mul_ieee r1, RHS, r3
4134    // div_zeroop(infinity) r0, LHS, r1
4135    // mul_ieee DST, r0, r3
4136
4137    // fabs r1, RHS
4138    SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
4139    // lt r2, 0x1.0p+96f, r1
4140    SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4141        DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
4142        DAG.getConstant(0x6f800000, INTTY), r1);
4143    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4144    SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
4145        DAG.getConstant(0x2f800000, INTTY),
4146        DAG.getConstant(0x3f800000, INTTY));
4147    // mul_ieee r1, RHS, r3
4148    r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
4149    // div_zeroop(infinity) r0, LHS, r1
4150    SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
4151    // mul_ieee DST, r0, r3
4152    DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
4153  }
4154  return DST;
4155}
4156
4157SDValue
4158AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
4159{
4160  return SDValue(Op.getNode(), 0);
4161}
4162