AMDILISelLowering.cpp revision 33e7db9a1dafdcf5c7c745180831403e0485544d
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file implements the interfaces that AMDIL uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDILISelLowering.h"
16#include "AMDILDevices.h"
17#include "AMDILIntrinsicInfo.h"
18#include "AMDILSubtarget.h"
19#include "AMDILTargetMachine.h"
20#include "AMDILUtilityFunctions.h"
21#include "llvm/CallingConv.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/PseudoSourceValue.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28#include "llvm/DerivedTypes.h"
29#include "llvm/Instructions.h"
30#include "llvm/Intrinsics.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/Target/TargetOptions.h"
33
34using namespace llvm;
35#define ISDBITCAST  ISD::BITCAST
36#define MVTGLUE     MVT::Glue
37//===----------------------------------------------------------------------===//
38// Calling Convention Implementation
39//===----------------------------------------------------------------------===//
40#include "AMDILGenCallingConv.inc"
41
42//===----------------------------------------------------------------------===//
43// TargetLowering Implementation Help Functions Begin
44//===----------------------------------------------------------------------===//
45  static SDValue
46getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
47{
48  DebugLoc DL = Src.getDebugLoc();
49  EVT svt = Src.getValueType().getScalarType();
50  EVT dvt = Dst.getValueType().getScalarType();
51  if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
52    if (dvt.bitsGT(svt)) {
53      Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
54    } else if (svt.bitsLT(svt)) {
55      Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
56          DAG.getConstant(1, MVT::i32));
57    }
58  } else if (svt.isInteger() && dvt.isInteger()) {
59    if (!svt.bitsEq(dvt)) {
60      Src = DAG.getSExtOrTrunc(Src, DL, dvt);
61    } else {
62      Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
63    }
64  } else if (svt.isInteger()) {
65    unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
66    if (!svt.bitsEq(dvt)) {
67      if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
68        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
69      } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
70        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
71      } else {
72        assert(0 && "We only support 32 and 64bit fp types");
73      }
74    }
75    Src = DAG.getNode(opcode, DL, dvt, Src);
76  } else if (dvt.isInteger()) {
77    unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
78    if (svt.getSimpleVT().SimpleTy == MVT::f32) {
79      Src = DAG.getNode(opcode, DL, MVT::i32, Src);
80    } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
81      Src = DAG.getNode(opcode, DL, MVT::i64, Src);
82    } else {
83      assert(0 && "We only support 32 and 64bit fp types");
84    }
85    Src = DAG.getSExtOrTrunc(Src, DL, dvt);
86  }
87  return Src;
88}
89// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
90// condition.
91  static AMDILCC::CondCodes
92CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
93{
94  switch (CC) {
95    default:
96      {
97        errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
98        assert(0 && "Unknown condition code!");
99      }
100    case ISD::SETO:
101      switch(type) {
102        case MVT::f32:
103          return AMDILCC::IL_CC_F_O;
104        case MVT::f64:
105          return AMDILCC::IL_CC_D_O;
106        default:
107          assert(0 && "Opcode combination not generated correctly!");
108          return AMDILCC::COND_ERROR;
109      };
110    case ISD::SETUO:
111      switch(type) {
112        case MVT::f32:
113          return AMDILCC::IL_CC_F_UO;
114        case MVT::f64:
115          return AMDILCC::IL_CC_D_UO;
116        default:
117          assert(0 && "Opcode combination not generated correctly!");
118          return AMDILCC::COND_ERROR;
119      };
120    case ISD::SETGT:
121      switch (type) {
122        case MVT::i1:
123        case MVT::i8:
124        case MVT::i16:
125        case MVT::i32:
126          return AMDILCC::IL_CC_I_GT;
127        case MVT::f32:
128          return AMDILCC::IL_CC_F_GT;
129        case MVT::f64:
130          return AMDILCC::IL_CC_D_GT;
131        case MVT::i64:
132          return AMDILCC::IL_CC_L_GT;
133        default:
134          assert(0 && "Opcode combination not generated correctly!");
135          return AMDILCC::COND_ERROR;
136      };
137    case ISD::SETGE:
138      switch (type) {
139        case MVT::i1:
140        case MVT::i8:
141        case MVT::i16:
142        case MVT::i32:
143          return AMDILCC::IL_CC_I_GE;
144        case MVT::f32:
145          return AMDILCC::IL_CC_F_GE;
146        case MVT::f64:
147          return AMDILCC::IL_CC_D_GE;
148        case MVT::i64:
149          return AMDILCC::IL_CC_L_GE;
150        default:
151          assert(0 && "Opcode combination not generated correctly!");
152          return AMDILCC::COND_ERROR;
153      };
154    case ISD::SETLT:
155      switch (type) {
156        case MVT::i1:
157        case MVT::i8:
158        case MVT::i16:
159        case MVT::i32:
160          return AMDILCC::IL_CC_I_LT;
161        case MVT::f32:
162          return AMDILCC::IL_CC_F_LT;
163        case MVT::f64:
164          return AMDILCC::IL_CC_D_LT;
165        case MVT::i64:
166          return AMDILCC::IL_CC_L_LT;
167        default:
168          assert(0 && "Opcode combination not generated correctly!");
169          return AMDILCC::COND_ERROR;
170      };
171    case ISD::SETLE:
172      switch (type) {
173        case MVT::i1:
174        case MVT::i8:
175        case MVT::i16:
176        case MVT::i32:
177          return AMDILCC::IL_CC_I_LE;
178        case MVT::f32:
179          return AMDILCC::IL_CC_F_LE;
180        case MVT::f64:
181          return AMDILCC::IL_CC_D_LE;
182        case MVT::i64:
183          return AMDILCC::IL_CC_L_LE;
184        default:
185          assert(0 && "Opcode combination not generated correctly!");
186          return AMDILCC::COND_ERROR;
187      };
188    case ISD::SETNE:
189      switch (type) {
190        case MVT::i1:
191        case MVT::i8:
192        case MVT::i16:
193        case MVT::i32:
194          return AMDILCC::IL_CC_I_NE;
195        case MVT::f32:
196          return AMDILCC::IL_CC_F_NE;
197        case MVT::f64:
198          return AMDILCC::IL_CC_D_NE;
199        case MVT::i64:
200          return AMDILCC::IL_CC_L_NE;
201        default:
202          assert(0 && "Opcode combination not generated correctly!");
203          return AMDILCC::COND_ERROR;
204      };
205    case ISD::SETEQ:
206      switch (type) {
207        case MVT::i1:
208        case MVT::i8:
209        case MVT::i16:
210        case MVT::i32:
211          return AMDILCC::IL_CC_I_EQ;
212        case MVT::f32:
213          return AMDILCC::IL_CC_F_EQ;
214        case MVT::f64:
215          return AMDILCC::IL_CC_D_EQ;
216        case MVT::i64:
217          return AMDILCC::IL_CC_L_EQ;
218        default:
219          assert(0 && "Opcode combination not generated correctly!");
220          return AMDILCC::COND_ERROR;
221      };
222    case ISD::SETUGT:
223      switch (type) {
224        case MVT::i1:
225        case MVT::i8:
226        case MVT::i16:
227        case MVT::i32:
228          return AMDILCC::IL_CC_U_GT;
229        case MVT::f32:
230          return AMDILCC::IL_CC_F_UGT;
231        case MVT::f64:
232          return AMDILCC::IL_CC_D_UGT;
233        case MVT::i64:
234          return AMDILCC::IL_CC_UL_GT;
235        default:
236          assert(0 && "Opcode combination not generated correctly!");
237          return AMDILCC::COND_ERROR;
238      };
239    case ISD::SETUGE:
240      switch (type) {
241        case MVT::i1:
242        case MVT::i8:
243        case MVT::i16:
244        case MVT::i32:
245          return AMDILCC::IL_CC_U_GE;
246        case MVT::f32:
247          return AMDILCC::IL_CC_F_UGE;
248        case MVT::f64:
249          return AMDILCC::IL_CC_D_UGE;
250        case MVT::i64:
251          return AMDILCC::IL_CC_UL_GE;
252        default:
253          assert(0 && "Opcode combination not generated correctly!");
254          return AMDILCC::COND_ERROR;
255      };
256    case ISD::SETULT:
257      switch (type) {
258        case MVT::i1:
259        case MVT::i8:
260        case MVT::i16:
261        case MVT::i32:
262          return AMDILCC::IL_CC_U_LT;
263        case MVT::f32:
264          return AMDILCC::IL_CC_F_ULT;
265        case MVT::f64:
266          return AMDILCC::IL_CC_D_ULT;
267        case MVT::i64:
268          return AMDILCC::IL_CC_UL_LT;
269        default:
270          assert(0 && "Opcode combination not generated correctly!");
271          return AMDILCC::COND_ERROR;
272      };
273    case ISD::SETULE:
274      switch (type) {
275        case MVT::i1:
276        case MVT::i8:
277        case MVT::i16:
278        case MVT::i32:
279          return AMDILCC::IL_CC_U_LE;
280        case MVT::f32:
281          return AMDILCC::IL_CC_F_ULE;
282        case MVT::f64:
283          return AMDILCC::IL_CC_D_ULE;
284        case MVT::i64:
285          return AMDILCC::IL_CC_UL_LE;
286        default:
287          assert(0 && "Opcode combination not generated correctly!");
288          return AMDILCC::COND_ERROR;
289      };
290    case ISD::SETUNE:
291      switch (type) {
292        case MVT::i1:
293        case MVT::i8:
294        case MVT::i16:
295        case MVT::i32:
296          return AMDILCC::IL_CC_U_NE;
297        case MVT::f32:
298          return AMDILCC::IL_CC_F_UNE;
299        case MVT::f64:
300          return AMDILCC::IL_CC_D_UNE;
301        case MVT::i64:
302          return AMDILCC::IL_CC_UL_NE;
303        default:
304          assert(0 && "Opcode combination not generated correctly!");
305          return AMDILCC::COND_ERROR;
306      };
307    case ISD::SETUEQ:
308      switch (type) {
309        case MVT::i1:
310        case MVT::i8:
311        case MVT::i16:
312        case MVT::i32:
313          return AMDILCC::IL_CC_U_EQ;
314        case MVT::f32:
315          return AMDILCC::IL_CC_F_UEQ;
316        case MVT::f64:
317          return AMDILCC::IL_CC_D_UEQ;
318        case MVT::i64:
319          return AMDILCC::IL_CC_UL_EQ;
320        default:
321          assert(0 && "Opcode combination not generated correctly!");
322          return AMDILCC::COND_ERROR;
323      };
324    case ISD::SETOGT:
325      switch (type) {
326        case MVT::f32:
327          return AMDILCC::IL_CC_F_OGT;
328        case MVT::f64:
329          return AMDILCC::IL_CC_D_OGT;
330        case MVT::i1:
331        case MVT::i8:
332        case MVT::i16:
333        case MVT::i32:
334        case MVT::i64:
335        default:
336          assert(0 && "Opcode combination not generated correctly!");
337          return AMDILCC::COND_ERROR;
338      };
339    case ISD::SETOGE:
340      switch (type) {
341        case MVT::f32:
342          return AMDILCC::IL_CC_F_OGE;
343        case MVT::f64:
344          return AMDILCC::IL_CC_D_OGE;
345        case MVT::i1:
346        case MVT::i8:
347        case MVT::i16:
348        case MVT::i32:
349        case MVT::i64:
350        default:
351          assert(0 && "Opcode combination not generated correctly!");
352          return AMDILCC::COND_ERROR;
353      };
354    case ISD::SETOLT:
355      switch (type) {
356        case MVT::f32:
357          return AMDILCC::IL_CC_F_OLT;
358        case MVT::f64:
359          return AMDILCC::IL_CC_D_OLT;
360        case MVT::i1:
361        case MVT::i8:
362        case MVT::i16:
363        case MVT::i32:
364        case MVT::i64:
365        default:
366          assert(0 && "Opcode combination not generated correctly!");
367          return AMDILCC::COND_ERROR;
368      };
369    case ISD::SETOLE:
370      switch (type) {
371        case MVT::f32:
372          return AMDILCC::IL_CC_F_OLE;
373        case MVT::f64:
374          return AMDILCC::IL_CC_D_OLE;
375        case MVT::i1:
376        case MVT::i8:
377        case MVT::i16:
378        case MVT::i32:
379        case MVT::i64:
380        default:
381          assert(0 && "Opcode combination not generated correctly!");
382          return AMDILCC::COND_ERROR;
383      };
384    case ISD::SETONE:
385      switch (type) {
386        case MVT::f32:
387          return AMDILCC::IL_CC_F_ONE;
388        case MVT::f64:
389          return AMDILCC::IL_CC_D_ONE;
390        case MVT::i1:
391        case MVT::i8:
392        case MVT::i16:
393        case MVT::i32:
394        case MVT::i64:
395        default:
396          assert(0 && "Opcode combination not generated correctly!");
397          return AMDILCC::COND_ERROR;
398      };
399    case ISD::SETOEQ:
400      switch (type) {
401        case MVT::f32:
402          return AMDILCC::IL_CC_F_OEQ;
403        case MVT::f64:
404          return AMDILCC::IL_CC_D_OEQ;
405        case MVT::i1:
406        case MVT::i8:
407        case MVT::i16:
408        case MVT::i32:
409        case MVT::i64:
410        default:
411          assert(0 && "Opcode combination not generated correctly!");
412          return AMDILCC::COND_ERROR;
413      };
414  };
415}
416
417/// Helper function used by LowerFormalArguments
418static const TargetRegisterClass*
419getRegClassFromType(unsigned int type) {
420  switch (type) {
421  default:
422    assert(0 && "Passed in type does not match any register classes.");
423  case MVT::i8:
424    return &AMDIL::GPRI8RegClass;
425  case MVT::i16:
426    return &AMDIL::GPRI16RegClass;
427  case MVT::i32:
428    return &AMDIL::GPRI32RegClass;
429  case MVT::f32:
430    return &AMDIL::GPRF32RegClass;
431  case MVT::i64:
432    return &AMDIL::GPRI64RegClass;
433  case MVT::f64:
434    return &AMDIL::GPRF64RegClass;
435  case MVT::v4f32:
436    return &AMDIL::GPRV4F32RegClass;
437  case MVT::v4i8:
438    return &AMDIL::GPRV4I8RegClass;
439  case MVT::v4i16:
440    return &AMDIL::GPRV4I16RegClass;
441  case MVT::v4i32:
442    return &AMDIL::GPRV4I32RegClass;
443  case MVT::v2f32:
444    return &AMDIL::GPRV2F32RegClass;
445  case MVT::v2i8:
446    return &AMDIL::GPRV2I8RegClass;
447  case MVT::v2i16:
448    return &AMDIL::GPRV2I16RegClass;
449  case MVT::v2i32:
450    return &AMDIL::GPRV2I32RegClass;
451  case MVT::v2f64:
452    return &AMDIL::GPRV2F64RegClass;
453  case MVT::v2i64:
454    return &AMDIL::GPRV2I64RegClass;
455  }
456}
457
458SDValue
459AMDILTargetLowering::LowerMemArgument(
460    SDValue Chain,
461    CallingConv::ID CallConv,
462    const SmallVectorImpl<ISD::InputArg> &Ins,
463    DebugLoc dl, SelectionDAG &DAG,
464    const CCValAssign &VA,
465    MachineFrameInfo *MFI,
466    unsigned i) const
467{
468  // Create the nodes corresponding to a load from this parameter slot.
469  ISD::ArgFlagsTy Flags = Ins[i].Flags;
470
471  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
472    getTargetMachine().Options.GuaranteedTailCallOpt;
473  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
474
475  // FIXME: For now, all byval parameter objects are marked mutable. This can
476  // be changed with more analysis.
477  // In case of tail call optimization mark all arguments mutable. Since they
478  // could be overwritten by lowering of arguments in case of a tail call.
479  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
480      VA.getLocMemOffset(), isImmutable);
481  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
482
483  if (Flags.isByVal())
484    return FIN;
485  return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
486      MachinePointerInfo::getFixedStack(FI),
487      false, false, false, 0);
488}
489//===----------------------------------------------------------------------===//
490// TargetLowering Implementation Help Functions End
491//===----------------------------------------------------------------------===//
492//===----------------------------------------------------------------------===//
493// Instruction generation functions
494//===----------------------------------------------------------------------===//
495MachineOperand
496AMDILTargetLowering::convertToReg(MachineOperand op) const
497{
498  if (op.isReg()) {
499    return op;
500  } else if (op.isImm()) {
501    uint32_t loadReg
502      = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
503    generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
504      .addImm(op.getImm());
505    op.ChangeToRegister(loadReg, false);
506  } else if (op.isFPImm()) {
507    uint32_t loadReg
508      = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
509    generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
510      .addFPImm(op.getFPImm());
511    op.ChangeToRegister(loadReg, false);
512  } else if (op.isMBB()) {
513    op.ChangeToRegister(0, false);
514  } else if (op.isFI()) {
515    op.ChangeToRegister(0, false);
516  } else if (op.isCPI()) {
517    op.ChangeToRegister(0, false);
518  } else if (op.isJTI()) {
519    op.ChangeToRegister(0, false);
520  } else if (op.isGlobal()) {
521    op.ChangeToRegister(0, false);
522  } else if (op.isSymbol()) {
523    op.ChangeToRegister(0, false);
524  }/* else if (op.isMetadata()) {
525      op.ChangeToRegister(0, false);
526      }*/
527  return op;
528}
529
530//===----------------------------------------------------------------------===//
531// TargetLowering Class Implementation Begins
532//===----------------------------------------------------------------------===//
533  AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
534: TargetLowering(TM, new TargetLoweringObjectFileELF())
535{
536  int types[] =
537  {
538    (int)MVT::i8,
539    (int)MVT::i16,
540    (int)MVT::i32,
541    (int)MVT::f32,
542    (int)MVT::f64,
543    (int)MVT::i64,
544    (int)MVT::v2i8,
545    (int)MVT::v4i8,
546    (int)MVT::v2i16,
547    (int)MVT::v4i16,
548    (int)MVT::v4f32,
549    (int)MVT::v4i32,
550    (int)MVT::v2f32,
551    (int)MVT::v2i32,
552    (int)MVT::v2f64,
553    (int)MVT::v2i64
554  };
555
556  int IntTypes[] =
557  {
558    (int)MVT::i8,
559    (int)MVT::i16,
560    (int)MVT::i32,
561    (int)MVT::i64
562  };
563
564  int FloatTypes[] =
565  {
566    (int)MVT::f32,
567    (int)MVT::f64
568  };
569
570  int VectorTypes[] =
571  {
572    (int)MVT::v2i8,
573    (int)MVT::v4i8,
574    (int)MVT::v2i16,
575    (int)MVT::v4i16,
576    (int)MVT::v4f32,
577    (int)MVT::v4i32,
578    (int)MVT::v2f32,
579    (int)MVT::v2i32,
580    (int)MVT::v2f64,
581    (int)MVT::v2i64
582  };
583  size_t numTypes = sizeof(types) / sizeof(*types);
584  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
585  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
586  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
587
588  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
589      &this->getTargetMachine())->getSubtargetImpl();
590  // These are the current register classes that are
591  // supported
592
593  addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
594  addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
595
596  if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
597    addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
598    addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
599  }
600  if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
601    addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
602    addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
603    addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
604    setOperationAction(ISD::Constant          , MVT::i8   , Legal);
605  }
606  if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
607    addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
608    addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
609    addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
610    setOperationAction(ISD::Constant          , MVT::i16  , Legal);
611  }
612  addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
613  addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
614  addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
615  addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
616  if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
617    addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
618    addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
619  }
620
621  for (unsigned int x  = 0; x < numTypes; ++x) {
622    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
623
624    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
625    // We cannot sextinreg, expand to shifts
626    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
627    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
628    setOperationAction(ISD::FP_ROUND, VT, Expand);
629    setOperationAction(ISD::SUBE, VT, Expand);
630    setOperationAction(ISD::SUBC, VT, Expand);
631    setOperationAction(ISD::ADDE, VT, Expand);
632    setOperationAction(ISD::ADDC, VT, Expand);
633    setOperationAction(ISD::SETCC, VT, Custom);
634    setOperationAction(ISD::BRCOND, VT, Custom);
635    setOperationAction(ISD::BR_CC, VT, Custom);
636    setOperationAction(ISD::BR_JT, VT, Expand);
637    setOperationAction(ISD::BRIND, VT, Expand);
638    // TODO: Implement custom UREM/SREM routines
639    setOperationAction(ISD::UREM, VT, Expand);
640    setOperationAction(ISD::SREM, VT, Expand);
641    setOperationAction(ISD::UINT_TO_FP, VT, Custom);
642    setOperationAction(ISD::FP_TO_UINT, VT, Custom);
643    setOperationAction(ISDBITCAST, VT, Custom);
644    setOperationAction(ISD::GlobalAddress, VT, Custom);
645    setOperationAction(ISD::JumpTable, VT, Custom);
646    setOperationAction(ISD::ConstantPool, VT, Custom);
647    setOperationAction(ISD::SELECT_CC, VT, Custom);
648    setOperationAction(ISD::SELECT, VT, Custom);
649    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
650    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
651    if (VT != MVT::i64 && VT != MVT::v2i64) {
652      setOperationAction(ISD::SDIV, VT, Custom);
653    }
654    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
655    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
656  }
657  for (unsigned int x = 0; x < numFloatTypes; ++x) {
658    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
659
660    // IL does not have these operations for floating point types
661    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
662    setOperationAction(ISD::FP_ROUND, VT, Custom);
663    setOperationAction(ISD::SETOLT, VT, Expand);
664    setOperationAction(ISD::SETOGE, VT, Expand);
665    setOperationAction(ISD::SETOGT, VT, Expand);
666    setOperationAction(ISD::SETOLE, VT, Expand);
667    setOperationAction(ISD::SETULT, VT, Expand);
668    setOperationAction(ISD::SETUGE, VT, Expand);
669    setOperationAction(ISD::SETUGT, VT, Expand);
670    setOperationAction(ISD::SETULE, VT, Expand);
671  }
672
673  for (unsigned int x = 0; x < numIntTypes; ++x) {
674    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
675
676    // GPU also does not have divrem function for signed or unsigned
677    setOperationAction(ISD::SDIVREM, VT, Expand);
678    setOperationAction(ISD::UDIVREM, VT, Expand);
679    setOperationAction(ISD::FP_ROUND, VT, Expand);
680
681    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
682    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
683    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
684
685    // GPU doesn't have a rotl, rotr, or byteswap instruction
686    setOperationAction(ISD::ROTR, VT, Expand);
687    setOperationAction(ISD::ROTL, VT, Expand);
688    setOperationAction(ISD::BSWAP, VT, Expand);
689
690    // GPU doesn't have any counting operators
691    setOperationAction(ISD::CTPOP, VT, Expand);
692    setOperationAction(ISD::CTTZ, VT, Expand);
693    setOperationAction(ISD::CTLZ, VT, Expand);
694  }
695
696  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
697  {
698    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
699
700    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
701    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
702    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
703    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
704    setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
705    setOperationAction(ISD::FP_ROUND, VT, Expand);
706    setOperationAction(ISD::SDIVREM, VT, Expand);
707    setOperationAction(ISD::UDIVREM, VT, Expand);
708    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
709    // setOperationAction(ISD::VSETCC, VT, Expand);
710    setOperationAction(ISD::SETCC, VT, Expand);
711    setOperationAction(ISD::SELECT_CC, VT, Expand);
712    setOperationAction(ISD::SELECT, VT, Expand);
713
714  }
715  setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
716  if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
717    if (stm->calVersion() < CAL_VERSION_SC_139
718        || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
719      setOperationAction(ISD::MUL, MVT::i64, Custom);
720    }
721    setOperationAction(ISD::SUB, MVT::i64, Custom);
722    setOperationAction(ISD::ADD, MVT::i64, Custom);
723    setOperationAction(ISD::MULHU, MVT::i64, Expand);
724    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
725    setOperationAction(ISD::MULHS, MVT::i64, Expand);
726    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
727    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
728    setOperationAction(ISD::SUB, MVT::v2i64, Expand);
729    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
730    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
731    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
732    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
733    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
734    setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
735    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
736    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
737    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
738    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
739  }
740  if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
741    // we support loading/storing v2f64 but not operations on the type
742    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
743    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
744    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
745    setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
746    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
747    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
748    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
749    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
750    // We want to expand vector conversions into their scalar
751    // counterparts.
752    setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
753    setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
754    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
755    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
756    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
757    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
758    setOperationAction(ISD::FABS, MVT::f64, Expand);
759    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
760  }
761  // TODO: Fix the UDIV24 algorithm so it works for these
762  // types correctly. This needs vector comparisons
763  // for this to work correctly.
764  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
765  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
766  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
767  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
768  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
769  setOperationAction(ISD::SUBC, MVT::Other, Expand);
770  setOperationAction(ISD::ADDE, MVT::Other, Expand);
771  setOperationAction(ISD::ADDC, MVT::Other, Expand);
772  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
773  setOperationAction(ISD::BR_CC, MVT::Other, Custom);
774  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
775  setOperationAction(ISD::BRIND, MVT::Other, Expand);
776  setOperationAction(ISD::SETCC, MVT::Other, Custom);
777  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
778  setOperationAction(ISD::FDIV, MVT::f32, Custom);
779  setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
780  setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
781
782  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
783  // Use the default implementation.
784  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
785  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
786  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
787  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
788  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
789  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
790  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
791  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
792  setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
793
794  setStackPointerRegisterToSaveRestore(AMDIL::SP);
795  setSchedulingPreference(Sched::RegPressure);
796  setPow2DivIsCheap(false);
797  setPrefLoopAlignment(16);
798  setSelectIsExpensive(true);
799  setJumpIsExpensive(true);
800  computeRegisterProperties();
801
802  maxStoresPerMemcpy  = 4096;
803  maxStoresPerMemmove = 4096;
804  maxStoresPerMemset  = 4096;
805
806#undef numTypes
807#undef numIntTypes
808#undef numVectorTypes
809#undef numFloatTypes
810}
811
812const char *
813AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
814{
815  switch (Opcode) {
816    default: return 0;
817    case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
818    case AMDILISD::DP_TO_FP:  return "AMDILISD::DP_TO_FP";
819    case AMDILISD::FP_TO_DP:  return "AMDILISD::FP_TO_DP";
820    case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
821    case AMDILISD::CMOV:  return "AMDILISD::CMOV";
822    case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
823    case AMDILISD::INEGATE:  return "AMDILISD::INEGATE";
824    case AMDILISD::MAD:  return "AMDILISD::MAD";
825    case AMDILISD::UMAD:  return "AMDILISD::UMAD";
826    case AMDILISD::CALL:  return "AMDILISD::CALL";
827    case AMDILISD::RET:   return "AMDILISD::RET";
828    case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
829    case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
830    case AMDILISD::ADD: return "AMDILISD::ADD";
831    case AMDILISD::UMUL: return "AMDILISD::UMUL";
832    case AMDILISD::AND: return "AMDILISD::AND";
833    case AMDILISD::OR: return "AMDILISD::OR";
834    case AMDILISD::NOT: return "AMDILISD::NOT";
835    case AMDILISD::XOR: return "AMDILISD::XOR";
836    case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
837    case AMDILISD::SMAX: return "AMDILISD::SMAX";
838    case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
839    case AMDILISD::MOVE: return "AMDILISD::MOVE";
840    case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
841    case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
842    case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
843    case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
844    case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
845    case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
846    case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
847    case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
848    case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
849    case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
850    case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
851    case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
852    case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
853    case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
854    case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
855    case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
856    case AMDILISD::CMP: return "AMDILISD::CMP";
857    case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
858    case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
859    case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
860    case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
861    case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
862    case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
863    case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
864    case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
865    case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
866    case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
867    case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
868    case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
869    case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
870    case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
871    case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
872    case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
873    case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
874    case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
875    case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
876    case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
877    case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
878    case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
879    case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
880    case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
881    case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
882    case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
883    case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
884    case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
885    case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
886    case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
887    case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
888    case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
889    case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
890    case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
891    case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
892    case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
893    case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
894    case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
895    case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
896    case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
897    case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
898    case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
899    case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
900    case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
901    case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
902    case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
903    case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
904    case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
905    case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
906    case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
907    case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
908    case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
909    case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
910    case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
911    case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
912    case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
913    case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
914    case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
915    case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
916    case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
917    case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
918    case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
919    case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
920    case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
921    case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
922    case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
923    case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
924    case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
925    case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
926    case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
927    case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
928    case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
929    case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
930    case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
931    case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
932    case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
933    case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
934    case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
935    case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
936    case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
937    case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
938    case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
939    case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
940    case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
941    case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
942    case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
943    case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
944    case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
945    case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
946    case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
947    case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
948    case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
949    case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
950    case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
951    case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
952    case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
953    case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
954    case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
955    case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
956    case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
957    case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
958    case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
959    case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
960    case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
961    case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
962    case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
963    case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
964    case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
965    case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
966
967  };
968}
969bool
970AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
971    const CallInst &I, unsigned Intrinsic) const
972{
973  if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
974      || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
975    return false;
976  }
977  bool bitCastToInt = false;
978  unsigned IntNo;
979  bool isRet = true;
980  const AMDILSubtarget *STM = &this->getTargetMachine()
981    .getSubtarget<AMDILSubtarget>();
982  switch (Intrinsic) {
983    default: return false; // Don't custom lower most intrinsics.
984    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
985    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
986             IntNo = AMDILISD::ATOM_G_ADD; break;
987    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
988    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
989             isRet = false;
990             IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
991    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
992    case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
993             IntNo = AMDILISD::ATOM_L_ADD; break;
994    case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
995    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
996             isRet = false;
997             IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
998    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
999    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
1000             IntNo = AMDILISD::ATOM_R_ADD; break;
1001    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
1002    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
1003             isRet = false;
1004             IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
1005    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
1006    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
1007             IntNo = AMDILISD::ATOM_G_AND; break;
1008    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
1009    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
1010             isRet = false;
1011             IntNo = AMDILISD::ATOM_G_AND_NORET; break;
1012    case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
1013    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
1014             IntNo = AMDILISD::ATOM_L_AND; break;
1015    case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
1016    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
1017             isRet = false;
1018             IntNo = AMDILISD::ATOM_L_AND_NORET; break;
1019    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
1020    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
1021             IntNo = AMDILISD::ATOM_R_AND; break;
1022    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
1023    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
1024             isRet = false;
1025             IntNo = AMDILISD::ATOM_R_AND_NORET; break;
1026    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
1027    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
1028             IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
1029    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
1030    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
1031             isRet = false;
1032             IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
1033    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
1034    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
1035             IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
1036    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
1037    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
1038             isRet = false;
1039             IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
1040    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
1041    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
1042             IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
1043    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
1044    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
1045             isRet = false;
1046             IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
1047    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
1048    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
1049             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1050               IntNo = AMDILISD::ATOM_G_DEC;
1051             } else {
1052               IntNo = AMDILISD::ATOM_G_SUB;
1053             }
1054             break;
1055    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
1056    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
1057             isRet = false;
1058             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1059               IntNo = AMDILISD::ATOM_G_DEC_NORET;
1060             } else {
1061               IntNo = AMDILISD::ATOM_G_SUB_NORET;
1062             }
1063             break;
1064    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
1065    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
1066             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1067               IntNo = AMDILISD::ATOM_L_DEC;
1068             } else {
1069               IntNo = AMDILISD::ATOM_L_SUB;
1070             }
1071             break;
1072    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
1073    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
1074             isRet = false;
1075             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1076               IntNo = AMDILISD::ATOM_L_DEC_NORET;
1077             } else {
1078               IntNo = AMDILISD::ATOM_L_SUB_NORET;
1079             }
1080             break;
1081    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
1082    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
1083             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1084               IntNo = AMDILISD::ATOM_R_DEC;
1085             } else {
1086               IntNo = AMDILISD::ATOM_R_SUB;
1087             }
1088             break;
1089    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
1090    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
1091             isRet = false;
1092             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1093               IntNo = AMDILISD::ATOM_R_DEC_NORET;
1094             } else {
1095               IntNo = AMDILISD::ATOM_R_SUB_NORET;
1096             }
1097             break;
1098    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
1099    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
1100             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1101               IntNo = AMDILISD::ATOM_G_INC;
1102             } else {
1103               IntNo = AMDILISD::ATOM_G_ADD;
1104             }
1105             break;
1106    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1107    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1108             isRet = false;
1109             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1110               IntNo = AMDILISD::ATOM_G_INC_NORET;
1111             } else {
1112               IntNo = AMDILISD::ATOM_G_ADD_NORET;
1113             }
1114             break;
1115    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1116    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1117             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1118               IntNo = AMDILISD::ATOM_L_INC;
1119             } else {
1120               IntNo = AMDILISD::ATOM_L_ADD;
1121             }
1122             break;
1123    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1124    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1125             isRet = false;
1126             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1127               IntNo = AMDILISD::ATOM_L_INC_NORET;
1128             } else {
1129               IntNo = AMDILISD::ATOM_L_ADD_NORET;
1130             }
1131             break;
1132    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1133    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1134             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1135               IntNo = AMDILISD::ATOM_R_INC;
1136             } else {
1137               IntNo = AMDILISD::ATOM_R_ADD;
1138             }
1139             break;
1140    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1141    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1142             isRet = false;
1143             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1144               IntNo = AMDILISD::ATOM_R_INC_NORET;
1145             } else {
1146               IntNo = AMDILISD::ATOM_R_ADD_NORET;
1147             }
1148             break;
1149    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1150             IntNo = AMDILISD::ATOM_G_MAX; break;
1151    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1152             IntNo = AMDILISD::ATOM_G_UMAX; break;
1153    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1154             isRet = false;
1155             IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1156    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1157             isRet = false;
1158             IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1159    case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1160             IntNo = AMDILISD::ATOM_L_MAX; break;
1161    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1162             IntNo = AMDILISD::ATOM_L_UMAX; break;
1163    case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1164             isRet = false;
1165             IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1166    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1167             isRet = false;
1168             IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1169    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1170             IntNo = AMDILISD::ATOM_R_MAX; break;
1171    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1172             IntNo = AMDILISD::ATOM_R_UMAX; break;
1173    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1174             isRet = false;
1175             IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1176    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1177             isRet = false;
1178             IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1179    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1180             IntNo = AMDILISD::ATOM_G_MIN; break;
1181    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1182             IntNo = AMDILISD::ATOM_G_UMIN; break;
1183    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1184             isRet = false;
1185             IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1186    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1187             isRet = false;
1188             IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1189    case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1190             IntNo = AMDILISD::ATOM_L_MIN; break;
1191    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1192             IntNo = AMDILISD::ATOM_L_UMIN; break;
1193    case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1194             isRet = false;
1195             IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1196    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1197             isRet = false;
1198             IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1199    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1200             IntNo = AMDILISD::ATOM_R_MIN; break;
1201    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1202             IntNo = AMDILISD::ATOM_R_UMIN; break;
1203    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1204             isRet = false;
1205             IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1206    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1207             isRet = false;
1208             IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1209    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1210    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1211             IntNo = AMDILISD::ATOM_G_OR; break;
1212    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1213    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1214             isRet = false;
1215             IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1216    case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1217    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1218             IntNo = AMDILISD::ATOM_L_OR; break;
1219    case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1220    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1221             isRet = false;
1222             IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1223    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1224    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1225             IntNo = AMDILISD::ATOM_R_OR; break;
1226    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1227    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1228             isRet = false;
1229             IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1230    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1231    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1232             IntNo = AMDILISD::ATOM_G_SUB; break;
1233    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1234    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1235             isRet = false;
1236             IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1237    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1238    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1239             IntNo = AMDILISD::ATOM_L_SUB; break;
1240    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1241    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1242             isRet = false;
1243             IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1244    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1245    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1246             IntNo = AMDILISD::ATOM_R_SUB; break;
1247    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1248    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1249             isRet = false;
1250             IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1251    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1252    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1253             IntNo = AMDILISD::ATOM_G_RSUB; break;
1254    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1255    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1256             isRet = false;
1257             IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1258    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1259    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1260             IntNo = AMDILISD::ATOM_L_RSUB; break;
1261    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1262    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1263             isRet = false;
1264             IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1265    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1266    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1267             IntNo = AMDILISD::ATOM_R_RSUB; break;
1268    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1269    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1270             isRet = false;
1271             IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1272    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1273             bitCastToInt = true;
1274    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1275    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1276             IntNo = AMDILISD::ATOM_G_XCHG; break;
1277    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1278             bitCastToInt = true;
1279    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1280    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1281             isRet = false;
1282             IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1283    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1284             bitCastToInt = true;
1285    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1286    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1287             IntNo = AMDILISD::ATOM_L_XCHG; break;
1288    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1289             bitCastToInt = true;
1290    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1291    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1292             isRet = false;
1293             IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1294    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1295             bitCastToInt = true;
1296    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1297    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1298             IntNo = AMDILISD::ATOM_R_XCHG; break;
1299    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1300             bitCastToInt = true;
1301    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1302    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1303             isRet = false;
1304             IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1305    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1306    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1307             IntNo = AMDILISD::ATOM_G_XOR; break;
1308    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1309    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1310             isRet = false;
1311             IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1312    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1313    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1314             IntNo = AMDILISD::ATOM_L_XOR; break;
1315    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1316    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1317             isRet = false;
1318             IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1319    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1320    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1321             IntNo = AMDILISD::ATOM_R_XOR; break;
1322    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1323    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1324             isRet = false;
1325             IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1326    case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1327             IntNo = AMDILISD::APPEND_ALLOC; break;
1328    case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1329             isRet = false;
1330             IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1331    case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1332             IntNo = AMDILISD::APPEND_CONSUME; break;
1333    case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1334             isRet = false;
1335             IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1336  };
1337
1338  Info.opc = IntNo;
1339  Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1340  Info.ptrVal = I.getOperand(0);
1341  Info.offset = 0;
1342  Info.align = 4;
1343  Info.vol = true;
1344  Info.readMem = isRet;
1345  Info.writeMem = true;
1346  return true;
1347}
1348// The backend supports 32 and 64 bit floating point immediates
1349bool
1350AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1351{
1352  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1353      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1354    return true;
1355  } else {
1356    return false;
1357  }
1358}
1359
1360bool
1361AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1362{
1363  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1364      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1365    return false;
1366  } else {
1367    return true;
1368  }
1369}
1370
1371
1372// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1373// be zero. Op is expected to be a target specific node. Used by DAG
1374// combiner.
1375
1376void
1377AMDILTargetLowering::computeMaskedBitsForTargetNode(
1378    const SDValue Op,
1379    APInt &KnownZero,
1380    APInt &KnownOne,
1381    const SelectionDAG &DAG,
1382    unsigned Depth) const
1383{
1384  APInt KnownZero2;
1385  APInt KnownOne2;
1386  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1387  switch (Op.getOpcode()) {
1388    default: break;
1389    case AMDILISD::SELECT_CC:
1390             DAG.ComputeMaskedBits(
1391                 Op.getOperand(1),
1392                 KnownZero,
1393                 KnownOne,
1394                 Depth + 1
1395                 );
1396             DAG.ComputeMaskedBits(
1397                 Op.getOperand(0),
1398                 KnownZero2,
1399                 KnownOne2
1400                 );
1401             assert((KnownZero & KnownOne) == 0
1402                 && "Bits known to be one AND zero?");
1403             assert((KnownZero2 & KnownOne2) == 0
1404                 && "Bits known to be one AND zero?");
1405             // Only known if known in both the LHS and RHS
1406             KnownOne &= KnownOne2;
1407             KnownZero &= KnownZero2;
1408             break;
1409  };
1410}
1411
1412// This is the function that determines which calling convention should
1413// be used. Currently there is only one calling convention
1414CCAssignFn*
1415AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
1416{
1417  //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1418  return CC_AMDIL32;
1419}
1420
1421// LowerCallResult - Lower the result values of an ISD::CALL into the
1422// appropriate copies out of appropriate physical registers.  This assumes that
1423// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
1424// being lowered.  The returns a SDNode with the same number of values as the
1425// ISD::CALL.
1426SDValue
1427AMDILTargetLowering::LowerCallResult(
1428    SDValue Chain,
1429    SDValue InFlag,
1430    CallingConv::ID CallConv,
1431    bool isVarArg,
1432    const SmallVectorImpl<ISD::InputArg> &Ins,
1433    DebugLoc dl,
1434    SelectionDAG &DAG,
1435    SmallVectorImpl<SDValue> &InVals) const
1436{
1437  // Assign locations to each value returned by this call
1438  SmallVector<CCValAssign, 16> RVLocs;
1439  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1440                 getTargetMachine(), RVLocs, *DAG.getContext());
1441  CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
1442
1443  // Copy all of the result registers out of their specified physreg.
1444  for (unsigned i = 0; i != RVLocs.size(); ++i) {
1445    EVT CopyVT = RVLocs[i].getValVT();
1446    if (RVLocs[i].isRegLoc()) {
1447      Chain = DAG.getCopyFromReg(
1448          Chain,
1449          dl,
1450          RVLocs[i].getLocReg(),
1451          CopyVT,
1452          InFlag
1453          ).getValue(1);
1454      SDValue Val = Chain.getValue(0);
1455      InFlag = Chain.getValue(2);
1456      InVals.push_back(Val);
1457    }
1458  }
1459
1460  return Chain;
1461
1462}
1463
1464//===----------------------------------------------------------------------===//
1465//                           Other Lowering Hooks
1466//===----------------------------------------------------------------------===//
1467
1468// Recursively assign SDNodeOrdering to any unordered nodes
1469// This is necessary to maintain source ordering of instructions
1470// under -O0 to avoid odd-looking "skipping around" issues.
1471  static const SDValue
1472Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
1473{
1474  if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
1475    DAG.AssignOrdering( New.getNode(), order );
1476    for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
1477      Ordered( DAG, order, New.getOperand(i) );
1478  }
1479  return New;
1480}
1481
1482#define LOWER(A) \
1483  case ISD:: A: \
1484return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
1485
1486SDValue
1487AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
1488{
1489  switch (Op.getOpcode()) {
1490    default:
1491      Op.getNode()->dump();
1492      assert(0 && "Custom lowering code for this"
1493          "instruction is not implemented yet!");
1494      break;
1495      LOWER(GlobalAddress);
1496      LOWER(JumpTable);
1497      LOWER(ConstantPool);
1498      LOWER(ExternalSymbol);
1499      LOWER(FP_TO_UINT);
1500      LOWER(UINT_TO_FP);
1501      LOWER(MUL);
1502      LOWER(SUB);
1503      LOWER(FDIV);
1504      LOWER(SDIV);
1505      LOWER(SREM);
1506      LOWER(UREM);
1507      LOWER(BUILD_VECTOR);
1508      LOWER(INSERT_VECTOR_ELT);
1509      LOWER(EXTRACT_VECTOR_ELT);
1510      LOWER(EXTRACT_SUBVECTOR);
1511      LOWER(SCALAR_TO_VECTOR);
1512      LOWER(CONCAT_VECTORS);
1513      LOWER(SELECT);
1514      LOWER(SETCC);
1515      LOWER(SIGN_EXTEND_INREG);
1516      LOWER(BITCAST);
1517      LOWER(DYNAMIC_STACKALLOC);
1518      LOWER(BRCOND);
1519      LOWER(BR_CC);
1520      LOWER(FP_ROUND);
1521  }
1522  return Op;
1523}
1524
1525int
1526AMDILTargetLowering::getVarArgsFrameOffset() const
1527{
1528  return VarArgsFrameOffset;
1529}
1530#undef LOWER
1531
1532SDValue
1533AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
1534{
1535  SDValue DST = Op;
1536  const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
1537  const GlobalValue *G = GADN->getGlobal();
1538  DebugLoc DL = Op.getDebugLoc();
1539  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
1540  if (!GV) {
1541    DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1542  } else {
1543    if (GV->hasInitializer()) {
1544      const Constant *C = dyn_cast<Constant>(GV->getInitializer());
1545      if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
1546        DST = DAG.getConstant(CI->getValue(), Op.getValueType());
1547      } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
1548        DST = DAG.getConstantFP(CF->getValueAPF(),
1549            Op.getValueType());
1550      } else if (dyn_cast<ConstantAggregateZero>(C)) {
1551        EVT VT = Op.getValueType();
1552        if (VT.isInteger()) {
1553          DST = DAG.getConstant(0, VT);
1554        } else {
1555          DST = DAG.getConstantFP(0, VT);
1556        }
1557      } else {
1558        assert(!"lowering this type of Global Address "
1559            "not implemented yet!");
1560        C->dump();
1561        DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1562      }
1563    } else {
1564      DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
1565    }
1566  }
1567  return DST;
1568}
1569
1570SDValue
1571AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
1572{
1573  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1574  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
1575  return Result;
1576}
1577SDValue
1578AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
1579{
1580  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
1581  EVT PtrVT = Op.getValueType();
1582  SDValue Result;
1583  if (CP->isMachineConstantPoolEntry()) {
1584    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
1585        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1586  } else {
1587    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
1588        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
1589  }
1590  return Result;
1591}
1592
1593SDValue
1594AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
1595{
1596  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
1597  SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
1598  return Result;
1599}
1600
1601/// LowerFORMAL_ARGUMENTS - transform physical registers into
1602/// virtual registers and generate load operations for
1603/// arguments places on the stack.
1604/// TODO: isVarArg, hasStructRet, isMemReg
1605  SDValue
1606AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
1607    CallingConv::ID CallConv,
1608    bool isVarArg,
1609    const SmallVectorImpl<ISD::InputArg> &Ins,
1610    DebugLoc dl,
1611    SelectionDAG &DAG,
1612    SmallVectorImpl<SDValue> &InVals)
1613const
1614{
1615
1616  MachineFunction &MF = DAG.getMachineFunction();
1617  MachineFrameInfo *MFI = MF.getFrameInfo();
1618  //const Function *Fn = MF.getFunction();
1619  //MachineRegisterInfo &RegInfo = MF.getRegInfo();
1620
1621  SmallVector<CCValAssign, 16> ArgLocs;
1622  CallingConv::ID CC = MF.getFunction()->getCallingConv();
1623  //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
1624
1625  CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
1626                 getTargetMachine(), ArgLocs, *DAG.getContext());
1627
1628  // When more calling conventions are added, they need to be chosen here
1629  CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
1630  SDValue StackPtr;
1631
1632  //unsigned int FirstStackArgLoc = 0;
1633
1634  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1635    CCValAssign &VA = ArgLocs[i];
1636    if (VA.isRegLoc()) {
1637      EVT RegVT = VA.getLocVT();
1638      const TargetRegisterClass *RC = getRegClassFromType(
1639          RegVT.getSimpleVT().SimpleTy);
1640
1641      unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
1642      SDValue ArgValue = DAG.getCopyFromReg(
1643          Chain,
1644          dl,
1645          Reg,
1646          RegVT);
1647      // If this is an 8 or 16-bit value, it is really passed
1648      // promoted to 32 bits.  Insert an assert[sz]ext to capture
1649      // this, then truncate to the right size.
1650
1651      if (VA.getLocInfo() == CCValAssign::SExt) {
1652        ArgValue = DAG.getNode(
1653            ISD::AssertSext,
1654            dl,
1655            RegVT,
1656            ArgValue,
1657            DAG.getValueType(VA.getValVT()));
1658      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
1659        ArgValue = DAG.getNode(
1660            ISD::AssertZext,
1661            dl,
1662            RegVT,
1663            ArgValue,
1664            DAG.getValueType(VA.getValVT()));
1665      }
1666      if (VA.getLocInfo() != CCValAssign::Full) {
1667        ArgValue = DAG.getNode(
1668            ISD::TRUNCATE,
1669            dl,
1670            VA.getValVT(),
1671            ArgValue);
1672      }
1673      // Add the value to the list of arguments
1674      // to be passed in registers
1675      InVals.push_back(ArgValue);
1676      if (isVarArg) {
1677        assert(0 && "Variable arguments are not yet supported");
1678        // See MipsISelLowering.cpp for ideas on how to implement
1679      }
1680    } else if(VA.isMemLoc()) {
1681      InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1682            dl, DAG, VA, MFI, i));
1683    } else {
1684      assert(0 && "found a Value Assign that is "
1685          "neither a register or a memory location");
1686    }
1687  }
1688  /*if (hasStructRet) {
1689    assert(0 && "Has struct return is not yet implemented");
1690  // See MipsISelLowering.cpp for ideas on how to implement
1691  }*/
1692
1693  if (isVarArg) {
1694    assert(0 && "Variable arguments are not yet supported");
1695    // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1696  }
1697  // This needs to be changed to non-zero if the return function needs
1698  // to pop bytes
1699  return Chain;
1700}
1701/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1702/// by "Src" to address "Dst" with size and alignment information specified by
1703/// the specific parameter attribute. The copy will be passed as a byval
1704/// function parameter.
1705static SDValue
1706CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1707    ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1708  assert(0 && "MemCopy does not exist yet");
1709  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1710
1711  return DAG.getMemcpy(Chain,
1712      Src.getDebugLoc(),
1713      Dst, Src, SizeNode, Flags.getByValAlign(),
1714      /*IsVol=*/false, /*AlwaysInline=*/true,
1715      MachinePointerInfo(), MachinePointerInfo());
1716}
1717
1718SDValue
1719AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1720    SDValue StackPtr, SDValue Arg,
1721    DebugLoc dl, SelectionDAG &DAG,
1722    const CCValAssign &VA,
1723    ISD::ArgFlagsTy Flags) const
1724{
1725  unsigned int LocMemOffset = VA.getLocMemOffset();
1726  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1727  PtrOff = DAG.getNode(ISD::ADD,
1728      dl,
1729      getPointerTy(), StackPtr, PtrOff);
1730  if (Flags.isByVal()) {
1731    PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1732  } else {
1733    PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1734        MachinePointerInfo::getStack(LocMemOffset),
1735        false, false, 0);
1736  }
1737  return PtrOff;
1738}
1739/// LowerCAL - functions arguments are copied from virtual
1740/// regs to (physical regs)/(stack frame), CALLSEQ_START and
1741/// CALLSEQ_END are emitted.
1742/// TODO: isVarArg, isTailCall, hasStructRet
1743SDValue
1744AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1745    CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1746    bool& isTailCall,
1747    const SmallVectorImpl<ISD::OutputArg> &Outs,
1748    const SmallVectorImpl<SDValue> &OutVals,
1749    const SmallVectorImpl<ISD::InputArg> &Ins,
1750    DebugLoc dl, SelectionDAG &DAG,
1751    SmallVectorImpl<SDValue> &InVals)
1752const
1753{
1754  isTailCall = false;
1755  MachineFunction& MF = DAG.getMachineFunction();
1756  // FIXME: DO we need to handle fast calling conventions and tail call
1757  // optimizations?? X86/PPC ISelLowering
1758  /*bool hasStructRet = (TheCall->getNumArgs())
1759    ? TheCall->getArgFlags(0).device()->isSRet()
1760    : false;*/
1761
1762  MachineFrameInfo *MFI = MF.getFrameInfo();
1763
1764  // Analyze operands of the call, assigning locations to each operand
1765  SmallVector<CCValAssign, 16> ArgLocs;
1766  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1767                 getTargetMachine(), ArgLocs, *DAG.getContext());
1768  // Analyize the calling operands, but need to change
1769  // if we have more than one calling convetion
1770  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1771
1772  unsigned int NumBytes = CCInfo.getNextStackOffset();
1773  if (isTailCall) {
1774    assert(isTailCall && "Tail Call not handled yet!");
1775    // See X86/PPC ISelLowering
1776  }
1777
1778  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1779
1780  SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1781  SmallVector<SDValue, 8> MemOpChains;
1782  SDValue StackPtr;
1783  //unsigned int FirstStacArgLoc = 0;
1784  //int LastArgStackLoc = 0;
1785
1786  // Walk the register/memloc assignments, insert copies/loads
1787  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1788    CCValAssign &VA = ArgLocs[i];
1789    //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1790    // Arguments start after the 5 first operands of ISD::CALL
1791    SDValue Arg = OutVals[i];
1792    //Promote the value if needed
1793    switch(VA.getLocInfo()) {
1794      default: assert(0 && "Unknown loc info!");
1795      case CCValAssign::Full:
1796               break;
1797      case CCValAssign::SExt:
1798               Arg = DAG.getNode(ISD::SIGN_EXTEND,
1799                   dl,
1800                   VA.getLocVT(), Arg);
1801               break;
1802      case CCValAssign::ZExt:
1803               Arg = DAG.getNode(ISD::ZERO_EXTEND,
1804                   dl,
1805                   VA.getLocVT(), Arg);
1806               break;
1807      case CCValAssign::AExt:
1808               Arg = DAG.getNode(ISD::ANY_EXTEND,
1809                   dl,
1810                   VA.getLocVT(), Arg);
1811               break;
1812    }
1813
1814    if (VA.isRegLoc()) {
1815      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1816    } else if (VA.isMemLoc()) {
1817      // Create the frame index object for this incoming parameter
1818      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1819          VA.getLocMemOffset(), true);
1820      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1821
1822      // emit ISD::STORE whichs stores the
1823      // parameter value to a stack Location
1824      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1825            MachinePointerInfo::getFixedStack(FI),
1826            false, false, 0));
1827    } else {
1828      assert(0 && "Not a Reg/Mem Loc, major error!");
1829    }
1830  }
1831  if (!MemOpChains.empty()) {
1832    Chain = DAG.getNode(ISD::TokenFactor,
1833        dl,
1834        MVT::Other,
1835        &MemOpChains[0],
1836        MemOpChains.size());
1837  }
1838  SDValue InFlag;
1839  if (!isTailCall) {
1840    for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1841      Chain = DAG.getCopyToReg(Chain,
1842          dl,
1843          RegsToPass[i].first,
1844          RegsToPass[i].second,
1845          InFlag);
1846      InFlag = Chain.getValue(1);
1847    }
1848  }
1849
1850  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1851  // every direct call is) turn it into a TargetGlobalAddress/
1852  // TargetExternalSymbol
1853  // node so that legalize doesn't hack it.
1854  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
1855    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1856  }
1857  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1858    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1859  }
1860  else if (isTailCall) {
1861    assert(0 && "Tail calls are not handled yet");
1862    // see X86 ISelLowering for ideas on implementation: 1708
1863  }
1864
1865  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1866  SmallVector<SDValue, 8> Ops;
1867
1868  if (isTailCall) {
1869    assert(0 && "Tail calls are not handled yet");
1870    // see X86 ISelLowering for ideas on implementation: 1721
1871  }
1872  // If this is a direct call, pass the chain and the callee
1873  if (Callee.getNode()) {
1874    Ops.push_back(Chain);
1875    Ops.push_back(Callee);
1876  }
1877
1878  if (isTailCall) {
1879    assert(0 && "Tail calls are not handled yet");
1880    // see X86 ISelLowering for ideas on implementation: 1739
1881  }
1882
1883  // Add argument registers to the end of the list so that they are known
1884  // live into the call
1885  for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1886    Ops.push_back(DAG.getRegister(
1887          RegsToPass[i].first,
1888          RegsToPass[i].second.getValueType()));
1889  }
1890  if (InFlag.getNode()) {
1891    Ops.push_back(InFlag);
1892  }
1893
1894  // Emit Tail Call
1895  if (isTailCall) {
1896    assert(0 && "Tail calls are not handled yet");
1897    // see X86 ISelLowering for ideas on implementation: 1762
1898  }
1899
1900  Chain = DAG.getNode(AMDILISD::CALL,
1901      dl,
1902      NodeTys, &Ops[0], Ops.size());
1903  InFlag = Chain.getValue(1);
1904
1905  // Create the CALLSEQ_END node
1906  Chain = DAG.getCALLSEQ_END(
1907      Chain,
1908      DAG.getIntPtrConstant(NumBytes, true),
1909      DAG.getIntPtrConstant(0, true),
1910      InFlag);
1911  InFlag = Chain.getValue(1);
1912  // Handle result values, copying them out of physregs into vregs that
1913  // we return
1914  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1915      InVals);
1916}
1917
1918SDValue
1919AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
1920    uint32_t bits) const
1921{
1922  DebugLoc DL = Op.getDebugLoc();
1923  EVT INTTY = Op.getValueType();
1924  EVT FPTY;
1925  if (INTTY.isVector()) {
1926    FPTY = EVT(MVT::getVectorVT(MVT::f32,
1927          INTTY.getVectorNumElements()));
1928  } else {
1929    FPTY = EVT(MVT::f32);
1930  }
1931  /* static inline uint
1932     __clz_Nbit(uint x)
1933     {
1934     int xor = 0x3f800000U | x;
1935     float tp = as_float(xor);
1936     float t = tp + -1.0f;
1937     uint tint = as_uint(t);
1938     int cmp = (x != 0);
1939     uint tsrc = tint >> 23;
1940     uint tmask = tsrc & 0xffU;
1941     uint cst = (103 + N)U - tmask;
1942     return cmp ? cst : N;
1943     }
1944     */
1945  assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
1946      && "genCLZu16 only works on 32bit types");
1947  // uint x = Op
1948  SDValue x = Op;
1949  // xornode = 0x3f800000 | x
1950  SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
1951      DAG.getConstant(0x3f800000, INTTY), x);
1952  // float tp = as_float(xornode)
1953  SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
1954  // float t = tp + -1.0f
1955  SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
1956      DAG.getConstantFP(-1.0f, FPTY));
1957  // uint tint = as_uint(t)
1958  SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
1959  // int cmp = (x != 0)
1960  SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
1961      DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
1962      DAG.getConstant(0, INTTY));
1963  // uint tsrc = tint >> 23
1964  SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
1965      DAG.getConstant(23, INTTY));
1966  // uint tmask = tsrc & 0xFF
1967  SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
1968      DAG.getConstant(0xFFU, INTTY));
1969  // uint cst = (103 + bits) - tmask
1970  SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
1971      DAG.getConstant((103U + bits), INTTY), tmask);
1972  // return cmp ? cst : N
1973  cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
1974      DAG.getConstant(bits, INTTY));
1975  return cst;
1976}
1977
1978SDValue
1979AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
1980{
1981  SDValue DST = SDValue();
1982  DebugLoc DL = Op.getDebugLoc();
1983  EVT INTTY = Op.getValueType();
1984  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1985      &this->getTargetMachine())->getSubtargetImpl();
1986  if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
1987    //__clz_32bit(uint u)
1988    //{
1989    // int z = __amdil_ffb_hi(u) ;
1990    // return z < 0 ? 32 : z;
1991    // }
1992    // uint u = op
1993    SDValue u = Op;
1994    // int z = __amdil_ffb_hi(u)
1995    SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
1996    // int cmp = z < 0
1997    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
1998        DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1999        z, DAG.getConstant(0, INTTY));
2000    // return cmp ? 32 : z
2001    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
2002        DAG.getConstant(32, INTTY), z);
2003  } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2004    //  static inline uint
2005    //__clz_32bit(uint x)
2006    //{
2007    //    uint zh = __clz_16bit(x >> 16);
2008    //    uint zl = __clz_16bit(x & 0xffffU);
2009    //   return zh == 16U ? 16U + zl : zh;
2010    //}
2011    // uint x = Op
2012    SDValue x = Op;
2013    // uint xs16 = x >> 16
2014    SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
2015        DAG.getConstant(16, INTTY));
2016    // uint zh = __clz_16bit(xs16)
2017    SDValue zh = genCLZuN(xs16, DAG, 16);
2018    // uint xa16 = x & 0xFFFF
2019    SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
2020        DAG.getConstant(0xFFFFU, INTTY));
2021    // uint zl = __clz_16bit(xa16)
2022    SDValue zl = genCLZuN(xa16, DAG, 16);
2023    // uint cmp = zh == 16U
2024    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2025        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2026        zh, DAG.getConstant(16U, INTTY));
2027    // uint zl16 = zl + 16
2028    SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
2029        DAG.getConstant(16, INTTY), zl);
2030    // return cmp ? zl16 : zh
2031    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2032        cmp, zl16, zh);
2033  } else {
2034    assert(0 && "Attempting to generate a CLZ function with an"
2035        " unknown graphics card");
2036  }
2037  return DST;
2038}
2039SDValue
2040AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
2041{
2042  SDValue DST = SDValue();
2043  DebugLoc DL = Op.getDebugLoc();
2044  EVT INTTY;
2045  EVT LONGTY = Op.getValueType();
2046  bool isVec = LONGTY.isVector();
2047  if (isVec) {
2048    INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
2049          .getVectorNumElements()));
2050  } else {
2051    INTTY = EVT(MVT::i32);
2052  }
2053  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2054      &this->getTargetMachine())->getSubtargetImpl();
2055  if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2056    // Evergreen:
2057    // static inline uint
2058    // __clz_u64(ulong x)
2059    // {
2060    //uint zhi = __clz_32bit((uint)(x >> 32));
2061    //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2062    //return zhi == 32U ? 32U + zlo : zhi;
2063    //}
2064    //ulong x = op
2065    SDValue x = Op;
2066    // uint xhi = x >> 32
2067    SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2068    // uint xlo = x & 0xFFFFFFFF
2069    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
2070    // uint zhi = __clz_32bit(xhi)
2071    SDValue zhi = genCLZu32(xhi, DAG);
2072    // uint zlo = __clz_32bit(xlo)
2073    SDValue zlo = genCLZu32(xlo, DAG);
2074    // uint cmp = zhi == 32
2075    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2076        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2077        zhi, DAG.getConstant(32U, INTTY));
2078    // uint zlop32 = 32 + zlo
2079    SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
2080        DAG.getConstant(32U, INTTY), zlo);
2081    // return cmp ? zlop32: zhi
2082    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
2083  } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2084    // HD4XXX:
2085    //  static inline uint
2086    //__clz_64bit(ulong x)
2087    //{
2088    //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2089    //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2090    //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2091    //uint r = zh == 18U ? 18U + zm : zh;
2092    //return zh + zm == 41U ? 41U + zl : r;
2093    //}
2094    //ulong x = Op
2095    SDValue x = Op;
2096    // ulong xs46 = x >> 46
2097    SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2098        DAG.getConstant(46, LONGTY));
2099    // uint ixs46 = (uint)xs46
2100    SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
2101    // ulong xs23 = x >> 23
2102    SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2103        DAG.getConstant(23, LONGTY));
2104    // uint ixs23 = (uint)xs23
2105    SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
2106    // uint xs23m23 = ixs23 & 0x7FFFFF
2107    SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
2108        DAG.getConstant(0x7fffffU, INTTY));
2109    // uint ix = (uint)x
2110    SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2111    // uint xm23 = ix & 0x7FFFFF
2112    SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
2113        DAG.getConstant(0x7fffffU, INTTY));
2114    // uint zh = __clz_23bit(ixs46)
2115    SDValue zh = genCLZuN(ixs46, DAG, 23);
2116    // uint zm = __clz_23bit(xs23m23)
2117    SDValue zm = genCLZuN(xs23m23, DAG, 23);
2118    // uint zl = __clz_23bit(xm23)
2119    SDValue zl = genCLZuN(xm23, DAG, 23);
2120    // uint zhm5 = zh - 5
2121    SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
2122        DAG.getConstant(-5U, INTTY));
2123    SDValue const18 = DAG.getConstant(18, INTTY);
2124    SDValue const41 = DAG.getConstant(41, INTTY);
2125    // uint cmp1 = zh = 18
2126    SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2127        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2128        zhm5, const18);
2129    // uint zhm5zm = zhm5 + zh
2130    SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
2131    // uint cmp2 = zhm5zm == 41
2132    SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2133        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2134        zhm5zm, const41);
2135    // uint zmp18 = zhm5 + 18
2136    SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
2137    // uint zlp41 = zl + 41
2138    SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
2139    // uint r = cmp1 ? zmp18 : zh
2140    SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2141        cmp1, zmp18, zhm5);
2142    // return cmp2 ? zlp41 : r
2143    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
2144  } else {
2145    assert(0 && "Attempting to generate a CLZ function with an"
2146        " unknown graphics card");
2147  }
2148  return DST;
2149}
2150SDValue
2151AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
2152    bool includeSign) const
2153{
2154  EVT INTVT;
2155  EVT LONGVT;
2156  SDValue DST;
2157  DebugLoc DL = RHS.getDebugLoc();
2158  EVT RHSVT = RHS.getValueType();
2159  bool isVec = RHSVT.isVector();
2160  if (isVec) {
2161    LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
2162          .getVectorNumElements()));
2163    INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
2164          .getVectorNumElements()));
2165  } else {
2166    LONGVT = EVT(MVT::i64);
2167    INTVT = EVT(MVT::i32);
2168  }
2169  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2170      &this->getTargetMachine())->getSubtargetImpl();
2171  if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2172    // unsigned version:
2173    // uint uhi = (uint)(d * 0x1.0p-32);
2174    // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2175    // return as_ulong2((uint2)(ulo, uhi));
2176    //
2177    // signed version:
2178    // double ad = fabs(d);
2179    // long l = unsigned_version(ad);
2180    // long nl = -l;
2181    // return d == ad ? l : nl;
2182    SDValue d = RHS;
2183    if (includeSign) {
2184      d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
2185    }
2186    SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
2187        DAG.getConstantFP(0x2f800000, RHSVT));
2188    SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
2189    SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
2190    ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
2191        DAG.getConstantFP(0xcf800000, RHSVT), d);
2192    SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
2193    SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
2194    if (includeSign) {
2195      SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
2196      SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
2197          DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
2198          RHS, d);
2199      l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
2200    }
2201    DST = l;
2202  } else {
2203    /*
2204       __attribute__((always_inline)) long
2205       cast_f64_to_i64(double d)
2206       {
2207    // Convert d in to 32-bit components
2208    long x = as_long(d);
2209    xhi = LCOMPHI(x);
2210    xlo = LCOMPLO(x);
2211
2212    // Generate 'normalized' mantissa
2213    mhi = xhi | 0x00100000; // hidden bit
2214    mhi <<= 11;
2215    temp = xlo >> (32 - 11);
2216    mhi |= temp
2217    mlo = xlo << 11;
2218
2219    // Compute shift right count from exponent
2220    e = (xhi >> (52-32)) & 0x7ff;
2221    sr = 1023 + 63 - e;
2222    srge64 = sr >= 64;
2223    srge32 = sr >= 32;
2224
2225    // Compute result for 0 <= sr < 32
2226    rhi0 = mhi >> (sr &31);
2227    rlo0 = mlo >> (sr &31);
2228    temp = mhi << (32 - sr);
2229    temp |= rlo0;
2230    rlo0 = sr ? temp : rlo0;
2231
2232    // Compute result for 32 <= sr
2233    rhi1 = 0;
2234    rlo1 = srge64 ? 0 : rhi0;
2235
2236    // Pick between the 2 results
2237    rhi = srge32 ? rhi1 : rhi0;
2238    rlo = srge32 ? rlo1 : rlo0;
2239
2240    // Optional saturate on overflow
2241    srlt0 = sr < 0;
2242    rhi = srlt0 ? MAXVALUE : rhi;
2243    rlo = srlt0 ? MAXVALUE : rlo;
2244
2245    // Create long
2246    res = LCREATE( rlo, rhi );
2247
2248    // Deal with sign bit (ignoring whether result is signed or unsigned value)
2249    if (includeSign) {
2250    sign = ((signed int) xhi) >> 31; fill with sign bit
2251    sign = LCREATE( sign, sign );
2252    res += sign;
2253    res ^= sign;
2254    }
2255
2256    return res;
2257    }
2258    */
2259    SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
2260    SDValue c32 = DAG.getConstant( 32, INTVT );
2261
2262    // Convert d in to 32-bit components
2263    SDValue d = RHS;
2264    SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
2265    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2266    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2267
2268    // Generate 'normalized' mantissa
2269    SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
2270        xhi, DAG.getConstant( 0x00100000, INTVT ) );
2271    mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
2272    SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
2273        xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
2274    mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
2275    SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
2276
2277    // Compute shift right count from exponent
2278    SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
2279        xhi, DAG.getConstant( 52-32, INTVT ) );
2280    e = DAG.getNode( ISD::AND, DL, INTVT,
2281        e, DAG.getConstant( 0x7ff, INTVT ) );
2282    SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
2283        DAG.getConstant( 1023 + 63, INTVT ), e );
2284    SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2285        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2286        sr, DAG.getConstant(64, INTVT));
2287    SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2288        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2289        sr, DAG.getConstant(32, INTVT));
2290
2291    // Compute result for 0 <= sr < 32
2292    SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
2293    SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
2294    temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
2295    temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
2296    temp = DAG.getNode( ISD::OR,  DL, INTVT, rlo0, temp );
2297    rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
2298
2299    // Compute result for 32 <= sr
2300    SDValue rhi1 = DAG.getConstant( 0, INTVT );
2301    SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2302        srge64, rhi1, rhi0 );
2303
2304    // Pick between the 2 results
2305    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2306        srge32, rhi1, rhi0 );
2307    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2308        srge32, rlo1, rlo0 );
2309
2310    // Create long
2311    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2312
2313    // Deal with sign bit
2314    if (includeSign) {
2315      SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
2316          xhi, DAG.getConstant( 31, INTVT ) );
2317      sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
2318      res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
2319      res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
2320    }
2321    DST = res;
2322  }
2323  return DST;
2324}
2325SDValue
2326AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
2327    bool includeSign) const
2328{
2329  EVT INTVT;
2330  EVT LONGVT;
2331  DebugLoc DL = RHS.getDebugLoc();
2332  EVT RHSVT = RHS.getValueType();
2333  bool isVec = RHSVT.isVector();
2334  if (isVec) {
2335    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
2336          RHSVT.getVectorNumElements()));
2337    INTVT = EVT(MVT::getVectorVT(MVT::i32,
2338          RHSVT.getVectorNumElements()));
2339  } else {
2340    LONGVT = EVT(MVT::i64);
2341    INTVT = EVT(MVT::i32);
2342  }
2343  /*
2344     __attribute__((always_inline)) int
2345     cast_f64_to_[u|i]32(double d)
2346     {
2347  // Convert d in to 32-bit components
2348  long x = as_long(d);
2349  xhi = LCOMPHI(x);
2350  xlo = LCOMPLO(x);
2351
2352  // Generate 'normalized' mantissa
2353  mhi = xhi | 0x00100000; // hidden bit
2354  mhi <<= 11;
2355  temp = xlo >> (32 - 11);
2356  mhi |= temp
2357
2358  // Compute shift right count from exponent
2359  e = (xhi >> (52-32)) & 0x7ff;
2360  sr = 1023 + 31 - e;
2361  srge32 = sr >= 32;
2362
2363  // Compute result for 0 <= sr < 32
2364  res = mhi >> (sr &31);
2365  res = srge32 ? 0 : res;
2366
2367  // Optional saturate on overflow
2368  srlt0 = sr < 0;
2369  res = srlt0 ? MAXVALUE : res;
2370
2371  // Deal with sign bit (ignoring whether result is signed or unsigned value)
2372  if (includeSign) {
2373  sign = ((signed int) xhi) >> 31; fill with sign bit
2374  res += sign;
2375  res ^= sign;
2376  }
2377
2378  return res;
2379  }
2380  */
2381  SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
2382
2383  // Convert d in to 32-bit components
2384  SDValue d = RHS;
2385  SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
2386  SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2387  SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2388
2389  // Generate 'normalized' mantissa
2390  SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
2391      xhi, DAG.getConstant( 0x00100000, INTVT ) );
2392  mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
2393  SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
2394      xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
2395  mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
2396
2397  // Compute shift right count from exponent
2398  SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
2399      xhi, DAG.getConstant( 52-32, INTVT ) );
2400  e = DAG.getNode( ISD::AND, DL, INTVT,
2401      e, DAG.getConstant( 0x7ff, INTVT ) );
2402  SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
2403      DAG.getConstant( 1023 + 31, INTVT ), e );
2404  SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2405      DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
2406      sr, DAG.getConstant(32, INTVT));
2407
2408  // Compute result for 0 <= sr < 32
2409  SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
2410  res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2411      srge32, DAG.getConstant(0,INTVT), res );
2412
2413  // Deal with sign bit
2414  if (includeSign) {
2415    SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
2416        xhi, DAG.getConstant( 31, INTVT ) );
2417    res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
2418    res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
2419  }
2420  return res;
2421}
2422
2423SDValue
2424AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
2425{
2426  SDValue DST;
2427  SDValue RHS = Op.getOperand(0);
2428  EVT RHSVT = RHS.getValueType();
2429  MVT RST = RHSVT.getScalarType().getSimpleVT();
2430  EVT LHSVT = Op.getValueType();
2431  MVT LST = LHSVT.getScalarType().getSimpleVT();
2432  DebugLoc DL = Op.getDebugLoc();
2433  const AMDILTargetMachine*
2434    amdtm = reinterpret_cast<const AMDILTargetMachine*>
2435    (&this->getTargetMachine());
2436  const AMDILSubtarget*
2437    stm = static_cast<const AMDILSubtarget*>(
2438        amdtm->getSubtargetImpl());
2439  if (RST == MVT::f64 && RHSVT.isVector()
2440      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
2441    // We dont support vector 64bit floating point convertions.
2442    for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
2443      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2444          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2445      op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
2446      if (!x) {
2447        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
2448      } else {
2449        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
2450            DST, op, DAG.getTargetConstant(x, MVT::i32));
2451      }
2452
2453    }
2454  } else {
2455    if (RST == MVT::f64
2456        && LST == MVT::i32) {
2457      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2458        DST = SDValue(Op.getNode(), 0);
2459      } else {
2460        DST = genf64toi32(RHS, DAG, false);
2461      }
2462    } else if (RST == MVT::f64
2463        && LST == MVT::i64) {
2464      DST = genf64toi64(RHS, DAG, false);
2465    } else if (RST == MVT::f64
2466        && (LST == MVT::i8 || LST == MVT::i16)) {
2467      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2468        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
2469      } else {
2470        SDValue ToInt = genf64toi32(RHS, DAG, false);
2471        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
2472      }
2473
2474    } else {
2475      DST = SDValue(Op.getNode(), 0);
2476    }
2477  }
2478  return DST;
2479}
2480SDValue
2481AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
2482    SelectionDAG &DAG) const
2483{
2484  EVT RHSVT = RHS.getValueType();
2485  DebugLoc DL = RHS.getDebugLoc();
2486  EVT INTVT;
2487  EVT LONGVT;
2488  bool isVec = RHSVT.isVector();
2489  if (isVec) {
2490    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
2491          RHSVT.getVectorNumElements()));
2492    INTVT = EVT(MVT::getVectorVT(MVT::i32,
2493          RHSVT.getVectorNumElements()));
2494  } else {
2495    LONGVT = EVT(MVT::i64);
2496    INTVT = EVT(MVT::i32);
2497  }
2498  SDValue x = RHS;
2499  const AMDILTargetMachine*
2500    amdtm = reinterpret_cast<const AMDILTargetMachine*>
2501    (&this->getTargetMachine());
2502  const AMDILSubtarget*
2503    stm = static_cast<const AMDILSubtarget*>(
2504        amdtm->getSubtargetImpl());
2505  if (stm->calVersion() >= CAL_VERSION_SC_135) {
2506    // unsigned x = RHS;
2507    // ulong xd = (ulong)(0x4330_0000 << 32) | x;
2508    // double d = as_double( xd );
2509    // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
2510    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
2511        DAG.getConstant( 0x43300000, INTVT ) );
2512    SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
2513    SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
2514        DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
2515    return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
2516  } else {
2517    SDValue clz = genCLZu32(x, DAG);
2518
2519    // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
2520    // Except for an input 0... which requires a 0 exponent
2521    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
2522        DAG.getConstant( (1023+31), INTVT), clz );
2523    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
2524
2525    // Normalize frac
2526    SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
2527
2528    // Eliminate hidden bit
2529    rhi = DAG.getNode( ISD::AND, DL, INTVT,
2530        rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
2531
2532    // Pack exponent and frac
2533    SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
2534        rhi, DAG.getConstant( (32 - 11), INTVT ) );
2535    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
2536        rhi, DAG.getConstant( 11, INTVT ) );
2537    exp = DAG.getNode( ISD::SHL, DL, INTVT,
2538        exp, DAG.getConstant( 20, INTVT ) );
2539    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
2540
2541    // Convert 2 x 32 in to 1 x 64, then to double precision float type
2542    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2543    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
2544  }
2545}
2546SDValue
2547AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
2548    SelectionDAG &DAG) const
2549{
2550  EVT RHSVT = RHS.getValueType();
2551  DebugLoc DL = RHS.getDebugLoc();
2552  EVT INTVT;
2553  EVT LONGVT;
2554  bool isVec = RHSVT.isVector();
2555  if (isVec) {
2556    INTVT = EVT(MVT::getVectorVT(MVT::i32,
2557          RHSVT.getVectorNumElements()));
2558  } else {
2559    INTVT = EVT(MVT::i32);
2560  }
2561  LONGVT = RHSVT;
2562  SDValue x = RHS;
2563  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2564      &this->getTargetMachine())->getSubtargetImpl();
2565  if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2566    // double dhi = (double)(as_uint2(x).y);
2567    // double dlo = (double)(as_uint2(x).x);
2568    // return mad(dhi, 0x1.0p+32, dlo)
2569    SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
2570    dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
2571    SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
2572    dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
2573    return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
2574        DAG.getConstantFP(0x4f800000, LHSVT), dlo);
2575  } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
2576    // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
2577    // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
2578    // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
2579    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );  // x & 0xffff_ffffUL
2580    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
2581    SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
2582    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 :  AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
2583    SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
2584    SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
2585    SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
2586        DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
2587    hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
2588    return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
2589
2590  } else {
2591    SDValue clz = genCLZu64(x, DAG);
2592    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
2593    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
2594
2595    // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
2596    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
2597        DAG.getConstant( (1023+63), INTVT), clz );
2598    SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
2599    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2600        mash, exp, mash );  // exp = exp, or 0 if input was 0
2601
2602    // Normalize frac
2603    SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
2604        clz, DAG.getConstant( 31, INTVT ) );
2605    SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
2606        DAG.getConstant( 32, INTVT ), clz31 );
2607    SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
2608    SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
2609    t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
2610    SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
2611    SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
2612    SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
2613    SDValue rlo2 = DAG.getConstant( 0, INTVT );
2614    SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
2615        clz, DAG.getConstant( 32, INTVT ) );
2616    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2617        clz32, rhi2, rhi1 );
2618    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
2619        clz32, rlo2, rlo1 );
2620
2621    // Eliminate hidden bit
2622    rhi = DAG.getNode( ISD::AND, DL, INTVT,
2623        rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
2624
2625    // Save bits needed to round properly
2626    SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
2627        rlo, DAG.getConstant( 0x7ff, INTVT ) );
2628
2629    // Pack exponent and frac
2630    rlo = DAG.getNode( ISD::SRL, DL, INTVT,
2631        rlo, DAG.getConstant( 11, INTVT ) );
2632    SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
2633        rhi, DAG.getConstant( (32 - 11), INTVT ) );
2634    rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
2635    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
2636        rhi, DAG.getConstant( 11, INTVT ) );
2637    exp = DAG.getNode( ISD::SHL, DL, INTVT,
2638        exp, DAG.getConstant( 20, INTVT ) );
2639    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
2640
2641    // Compute rounding bit
2642    SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
2643        rlo, DAG.getConstant( 1, INTVT ) );
2644    SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
2645        round, DAG.getConstant( 0x3ff, INTVT ) );
2646    grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
2647        DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
2648        grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
2649    grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
2650    round = DAG.getNode( ISD::SRL, DL, INTVT,
2651        round, DAG.getConstant( 10, INTVT ) );
2652    round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
2653
2654    // Add rounding bit
2655    SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
2656        round, DAG.getConstant( 0, INTVT ) );
2657    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
2658    res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
2659    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
2660  }
2661}
2662SDValue
2663AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
2664{
2665  SDValue RHS = Op.getOperand(0);
2666  EVT RHSVT = RHS.getValueType();
2667  MVT RST = RHSVT.getScalarType().getSimpleVT();
2668  EVT LHSVT = Op.getValueType();
2669  MVT LST = LHSVT.getScalarType().getSimpleVT();
2670  DebugLoc DL = Op.getDebugLoc();
2671  SDValue DST;
2672  EVT INTVT;
2673  EVT LONGVT;
2674  const AMDILTargetMachine*
2675    amdtm = reinterpret_cast<const AMDILTargetMachine*>
2676    (&this->getTargetMachine());
2677  const AMDILSubtarget*
2678    stm = static_cast<const AMDILSubtarget*>(
2679        amdtm->getSubtargetImpl());
2680  if (LST == MVT::f64 && LHSVT.isVector()
2681      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
2682    // We dont support vector 64bit floating point convertions.
2683    DST = Op;
2684    for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
2685      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2686          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
2687      op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
2688      if (!x) {
2689        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
2690      } else {
2691        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
2692            op, DAG.getTargetConstant(x, MVT::i32));
2693      }
2694
2695    }
2696  } else {
2697
2698    if (RST == MVT::i32
2699        && LST == MVT::f64) {
2700      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2701        DST = SDValue(Op.getNode(), 0);
2702      } else {
2703        DST = genu32tof64(RHS, LHSVT, DAG);
2704      }
2705    } else if (RST == MVT::i64
2706        && LST == MVT::f64) {
2707      DST = genu64tof64(RHS, LHSVT, DAG);
2708    } else {
2709      DST = SDValue(Op.getNode(), 0);
2710    }
2711  }
2712  return DST;
2713}
2714
2715SDValue
2716AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
2717{
2718  SDValue LHS = Op.getOperand(0);
2719  SDValue RHS = Op.getOperand(1);
2720  DebugLoc DL = Op.getDebugLoc();
2721  EVT OVT = Op.getValueType();
2722  SDValue DST;
2723  bool isVec = RHS.getValueType().isVector();
2724  if (OVT.getScalarType() == MVT::i64) {
2725    /*const AMDILTargetMachine*
2726      amdtm = reinterpret_cast<const AMDILTargetMachine*>
2727      (&this->getTargetMachine());
2728      const AMDILSubtarget*
2729      stm = dynamic_cast<const AMDILSubtarget*>(
2730      amdtm->getSubtargetImpl());*/
2731    MVT INTTY = MVT::i32;
2732    if (OVT == MVT::v2i64) {
2733      INTTY = MVT::v2i32;
2734    }
2735    SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
2736    // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2737    LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
2738    RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
2739    LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
2740    RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
2741    INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
2742    INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
2743    //TODO: need to use IBORROW on HD5XXX and later hardware
2744    SDValue cmp;
2745    if (OVT == MVT::i64) {
2746      cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2747          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2748          LHSLO, RHSLO);
2749    } else {
2750      SDValue cmplo;
2751      SDValue cmphi;
2752      SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2753          DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
2754      SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2755          DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
2756      SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2757          DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
2758      SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
2759          DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
2760      cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
2761          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2762          LHSRLO, RHSRLO);
2763      cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
2764          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2765          LHSRHI, RHSRHI);
2766      cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
2767      cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
2768          cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
2769    }
2770    INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
2771    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
2772        INTLO, INTHI);
2773  } else {
2774    DST = SDValue(Op.getNode(), 0);
2775  }
2776  return DST;
2777}
2778SDValue
2779AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
2780{
2781  EVT OVT = Op.getValueType();
2782  SDValue DST;
2783  if (OVT.getScalarType() == MVT::f64) {
2784    DST = LowerFDIV64(Op, DAG);
2785  } else if (OVT.getScalarType() == MVT::f32) {
2786    DST = LowerFDIV32(Op, DAG);
2787  } else {
2788    DST = SDValue(Op.getNode(), 0);
2789  }
2790  return DST;
2791}
2792
2793SDValue
2794AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
2795{
2796  EVT OVT = Op.getValueType();
2797  SDValue DST;
2798  if (OVT.getScalarType() == MVT::i64) {
2799    DST = LowerSDIV64(Op, DAG);
2800  } else if (OVT.getScalarType() == MVT::i32) {
2801    DST = LowerSDIV32(Op, DAG);
2802  } else if (OVT.getScalarType() == MVT::i16
2803      || OVT.getScalarType() == MVT::i8) {
2804    DST = LowerSDIV24(Op, DAG);
2805  } else {
2806    DST = SDValue(Op.getNode(), 0);
2807  }
2808  return DST;
2809}
2810
2811SDValue
2812AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
2813{
2814  EVT OVT = Op.getValueType();
2815  SDValue DST;
2816  if (OVT.getScalarType() == MVT::i64) {
2817    DST = LowerSREM64(Op, DAG);
2818  } else if (OVT.getScalarType() == MVT::i32) {
2819    DST = LowerSREM32(Op, DAG);
2820  } else if (OVT.getScalarType() == MVT::i16) {
2821    DST = LowerSREM16(Op, DAG);
2822  } else if (OVT.getScalarType() == MVT::i8) {
2823    DST = LowerSREM8(Op, DAG);
2824  } else {
2825    DST = SDValue(Op.getNode(), 0);
2826  }
2827  return DST;
2828}
2829
2830SDValue
2831AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
2832{
2833  EVT OVT = Op.getValueType();
2834  SDValue DST;
2835  if (OVT.getScalarType() == MVT::i64) {
2836    DST = LowerUREM64(Op, DAG);
2837  } else if (OVT.getScalarType() == MVT::i32) {
2838    DST = LowerUREM32(Op, DAG);
2839  } else if (OVT.getScalarType() == MVT::i16) {
2840    DST = LowerUREM16(Op, DAG);
2841  } else if (OVT.getScalarType() == MVT::i8) {
2842    DST = LowerUREM8(Op, DAG);
2843  } else {
2844    DST = SDValue(Op.getNode(), 0);
2845  }
2846  return DST;
2847}
2848
2849SDValue
2850AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
2851{
2852  DebugLoc DL = Op.getDebugLoc();
2853  EVT OVT = Op.getValueType();
2854  SDValue DST;
2855  bool isVec = OVT.isVector();
2856  if (OVT.getScalarType() != MVT::i64)
2857  {
2858    DST = SDValue(Op.getNode(), 0);
2859  } else {
2860    assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
2861    // TODO: This needs to be turned into a tablegen pattern
2862    SDValue LHS = Op.getOperand(0);
2863    SDValue RHS = Op.getOperand(1);
2864
2865    MVT INTTY = MVT::i32;
2866    if (OVT == MVT::v2i64) {
2867      INTTY = MVT::v2i32;
2868    }
2869    // mul64(h1, l1, h0, l0)
2870    SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
2871        DL,
2872        INTTY, LHS);
2873    SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
2874        DL,
2875        INTTY, LHS);
2876    SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
2877        DL,
2878        INTTY, RHS);
2879    SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
2880        DL,
2881        INTTY, RHS);
2882    // MULLO_UINT_1 r1, h0, l1
2883    SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
2884        DL,
2885        INTTY, RHSHI, LHSLO);
2886    // MULLO_UINT_1 r2, h1, l0
2887    SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
2888        DL,
2889        INTTY, RHSLO, LHSHI);
2890    // ADD_INT hr, r1, r2
2891    SDValue ADDHI = DAG.getNode(ISD::ADD,
2892        DL,
2893        INTTY, RHILLO, RLOHHI);
2894    // MULHI_UINT_1 r3, l1, l0
2895    SDValue RLOLLO = DAG.getNode(ISD::MULHU,
2896        DL,
2897        INTTY, RHSLO, LHSLO);
2898    // ADD_INT hr, hr, r3
2899    SDValue HIGH = DAG.getNode(ISD::ADD,
2900        DL,
2901        INTTY, ADDHI, RLOLLO);
2902    // MULLO_UINT_1 l3, l1, l0
2903    SDValue LOW = DAG.getNode(AMDILISD::UMUL,
2904        DL,
2905        INTTY, LHSLO, RHSLO);
2906    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
2907        DL,
2908        OVT, LOW, HIGH);
2909  }
2910  return DST;
2911}
2912SDValue
2913AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
2914{
2915  EVT VT = Op.getValueType();
2916  SDValue Nodes1;
2917  SDValue second;
2918  SDValue third;
2919  SDValue fourth;
2920  DebugLoc DL = Op.getDebugLoc();
2921  Nodes1 = DAG.getNode(AMDILISD::VBUILD,
2922      DL,
2923      VT, Op.getOperand(0));
2924#if 0
2925  bool allEqual = true;
2926  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
2927    if (Op.getOperand(0) != Op.getOperand(x)) {
2928      allEqual = false;
2929      break;
2930    }
2931  }
2932  if (allEqual) {
2933    return Nodes1;
2934  }
2935#endif
2936  switch(Op.getNumOperands()) {
2937    default:
2938    case 1:
2939      break;
2940    case 4:
2941      fourth = Op.getOperand(3);
2942      if (fourth.getOpcode() != ISD::UNDEF) {
2943        Nodes1 = DAG.getNode(
2944            ISD::INSERT_VECTOR_ELT,
2945            DL,
2946            Op.getValueType(),
2947            Nodes1,
2948            fourth,
2949            DAG.getConstant(7, MVT::i32));
2950      }
2951    case 3:
2952      third = Op.getOperand(2);
2953      if (third.getOpcode() != ISD::UNDEF) {
2954        Nodes1 = DAG.getNode(
2955            ISD::INSERT_VECTOR_ELT,
2956            DL,
2957            Op.getValueType(),
2958            Nodes1,
2959            third,
2960            DAG.getConstant(6, MVT::i32));
2961      }
2962    case 2:
2963      second = Op.getOperand(1);
2964      if (second.getOpcode() != ISD::UNDEF) {
2965        Nodes1 = DAG.getNode(
2966            ISD::INSERT_VECTOR_ELT,
2967            DL,
2968            Op.getValueType(),
2969            Nodes1,
2970            second,
2971            DAG.getConstant(5, MVT::i32));
2972      }
2973      break;
2974  };
2975  return Nodes1;
2976}
2977
2978SDValue
2979AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
2980    SelectionDAG &DAG) const
2981{
2982  DebugLoc DL = Op.getDebugLoc();
2983  EVT VT = Op.getValueType();
2984  const SDValue *ptr = NULL;
2985  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
2986  uint32_t swizzleNum = 0;
2987  SDValue DST;
2988  if (!VT.isVector()) {
2989    SDValue Res = Op.getOperand(0);
2990    return Res;
2991  }
2992
2993  if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
2994    ptr = &Op.getOperand(1);
2995  } else {
2996    ptr = &Op.getOperand(0);
2997  }
2998  if (CSDN) {
2999    swizzleNum = (uint32_t)CSDN->getZExtValue();
3000    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3001    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3002    DST = DAG.getNode(AMDILISD::VINSERT,
3003        DL,
3004        VT,
3005        Op.getOperand(0),
3006        *ptr,
3007        DAG.getTargetConstant(mask2, MVT::i32),
3008        DAG.getTargetConstant(mask3, MVT::i32));
3009  } else {
3010    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3011    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3012    SDValue res = DAG.getNode(AMDILISD::VINSERT,
3013        DL, VT, Op.getOperand(0), *ptr,
3014        DAG.getTargetConstant(mask2, MVT::i32),
3015        DAG.getTargetConstant(mask3, MVT::i32));
3016    for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
3017      mask2 = 0x04030201 & ~(0xFF << (x * 8));
3018      mask3 = 0x01010101 & (0xFF << (x * 8));
3019      SDValue t = DAG.getNode(AMDILISD::VINSERT,
3020          DL, VT, Op.getOperand(0), *ptr,
3021          DAG.getTargetConstant(mask2, MVT::i32),
3022          DAG.getTargetConstant(mask3, MVT::i32));
3023      SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
3024          DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3025          Op.getOperand(2), DAG.getConstant(x, MVT::i32));
3026      c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
3027      res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
3028    }
3029    DST = res;
3030  }
3031  return DST;
3032}
3033
3034SDValue
3035AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
3036    SelectionDAG &DAG) const
3037{
3038  EVT VT = Op.getValueType();
3039  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3040  uint64_t swizzleNum = 0;
3041  DebugLoc DL = Op.getDebugLoc();
3042  SDValue Res;
3043  if (!Op.getOperand(0).getValueType().isVector()) {
3044    Res = Op.getOperand(0);
3045    return Res;
3046  }
3047  if (CSDN) {
3048    // Static vector extraction
3049    swizzleNum = CSDN->getZExtValue() + 1;
3050    Res = DAG.getNode(AMDILISD::VEXTRACT,
3051        DL, VT,
3052        Op.getOperand(0),
3053        DAG.getTargetConstant(swizzleNum, MVT::i32));
3054  } else {
3055    SDValue Op1 = Op.getOperand(1);
3056    uint32_t vecSize = 4;
3057    SDValue Op0 = Op.getOperand(0);
3058    SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
3059        DL, VT, Op0,
3060        DAG.getTargetConstant(1, MVT::i32));
3061    if (Op0.getValueType().isVector()) {
3062      vecSize = Op0.getValueType().getVectorNumElements();
3063    }
3064    for (uint32_t x = 2; x <= vecSize; ++x) {
3065      SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
3066          DL, VT, Op0,
3067          DAG.getTargetConstant(x, MVT::i32));
3068      SDValue c = DAG.getNode(AMDILISD::CMP,
3069          DL, Op1.getValueType(),
3070          DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3071          Op1, DAG.getConstant(x, MVT::i32));
3072      res = DAG.getNode(AMDILISD::CMOVLOG, DL,
3073          VT, c, t, res);
3074
3075    }
3076    Res = res;
3077  }
3078  return Res;
3079}
3080
3081SDValue
3082AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
3083    SelectionDAG &DAG) const
3084{
3085  uint32_t vecSize = Op.getValueType().getVectorNumElements();
3086  SDValue src = Op.getOperand(0);
3087  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3088  uint64_t offset = 0;
3089  EVT vecType = Op.getValueType().getVectorElementType();
3090  DebugLoc DL = Op.getDebugLoc();
3091  SDValue Result;
3092  if (CSDN) {
3093    offset = CSDN->getZExtValue();
3094    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3095        DL,vecType, src, DAG.getConstant(offset, MVT::i32));
3096    Result = DAG.getNode(AMDILISD::VBUILD, DL,
3097        Op.getValueType(), Result);
3098    for (uint32_t x = 1; x < vecSize; ++x) {
3099      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
3100          src, DAG.getConstant(offset + x, MVT::i32));
3101      if (elt.getOpcode() != ISD::UNDEF) {
3102        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3103            Op.getValueType(), Result, elt,
3104            DAG.getConstant(x, MVT::i32));
3105      }
3106    }
3107  } else {
3108    SDValue idx = Op.getOperand(1);
3109    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3110        DL, vecType, src, idx);
3111    Result = DAG.getNode(AMDILISD::VBUILD, DL,
3112        Op.getValueType(), Result);
3113    for (uint32_t x = 1; x < vecSize; ++x) {
3114      idx = DAG.getNode(ISD::ADD, DL, vecType,
3115          idx, DAG.getConstant(1, MVT::i32));
3116      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
3117          src, idx);
3118      if (elt.getOpcode() != ISD::UNDEF) {
3119        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3120            Op.getValueType(), Result, elt, idx);
3121      }
3122    }
3123  }
3124  return Result;
3125}
3126SDValue
3127AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
3128    SelectionDAG &DAG) const
3129{
3130  SDValue Res = DAG.getNode(AMDILISD::VBUILD,
3131      Op.getDebugLoc(),
3132      Op.getValueType(),
3133      Op.getOperand(0));
3134  return Res;
3135}
3136SDValue
3137AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
3138{
3139  SDValue Cond = Op.getOperand(0);
3140  SDValue LHS = Op.getOperand(1);
3141  SDValue RHS = Op.getOperand(2);
3142  DebugLoc DL = Op.getDebugLoc();
3143  Cond = getConversionNode(DAG, Cond, Op, true);
3144  Cond = DAG.getNode(AMDILISD::CMOVLOG,
3145      DL,
3146      Op.getValueType(), Cond, LHS, RHS);
3147  return Cond;
3148}
3149SDValue
3150AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
3151{
3152  SDValue Cond;
3153  SDValue LHS = Op.getOperand(0);
3154  SDValue RHS = Op.getOperand(1);
3155  SDValue CC  = Op.getOperand(2);
3156  DebugLoc DL = Op.getDebugLoc();
3157  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
3158  unsigned int AMDILCC = CondCCodeToCC(
3159      SetCCOpcode,
3160      LHS.getValueType().getSimpleVT().SimpleTy);
3161  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
3162  Cond = DAG.getNode(
3163      ISD::SELECT_CC,
3164      Op.getDebugLoc(),
3165      LHS.getValueType(),
3166      LHS, RHS,
3167      DAG.getConstant(-1, MVT::i32),
3168      DAG.getConstant(0, MVT::i32),
3169      CC);
3170  Cond = getConversionNode(DAG, Cond, Op, true);
3171  Cond = DAG.getNode(
3172      ISD::AND,
3173      DL,
3174      Cond.getValueType(),
3175      DAG.getConstant(1, Cond.getValueType()),
3176      Cond);
3177  return Cond;
3178}
3179
3180SDValue
3181AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
3182{
3183  SDValue Data = Op.getOperand(0);
3184  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
3185  DebugLoc DL = Op.getDebugLoc();
3186  EVT DVT = Data.getValueType();
3187  EVT BVT = BaseType->getVT();
3188  unsigned baseBits = BVT.getScalarType().getSizeInBits();
3189  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
3190  unsigned shiftBits = srcBits - baseBits;
3191  if (srcBits < 32) {
3192    // If the op is less than 32 bits, then it needs to extend to 32bits
3193    // so it can properly keep the upper bits valid.
3194    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
3195    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
3196    shiftBits = 32 - baseBits;
3197    DVT = IVT;
3198  }
3199  SDValue Shift = DAG.getConstant(shiftBits, DVT);
3200  // Shift left by 'Shift' bits.
3201  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
3202  // Signed shift Right by 'Shift' bits.
3203  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
3204  if (srcBits < 32) {
3205    // Once the sign extension is done, the op needs to be converted to
3206    // its original type.
3207    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
3208  }
3209  return Data;
3210}
3211EVT
3212AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
3213{
3214  int iSize = (size * numEle);
3215  int vEle = (iSize >> ((size == 64) ? 6 : 5));
3216  if (!vEle) {
3217    vEle = 1;
3218  }
3219  if (size == 64) {
3220    if (vEle == 1) {
3221      return EVT(MVT::i64);
3222    } else {
3223      return EVT(MVT::getVectorVT(MVT::i64, vEle));
3224    }
3225  } else {
3226    if (vEle == 1) {
3227      return EVT(MVT::i32);
3228    } else {
3229      return EVT(MVT::getVectorVT(MVT::i32, vEle));
3230    }
3231  }
3232}
3233
3234SDValue
3235AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
3236{
3237  SDValue Src = Op.getOperand(0);
3238  SDValue Dst = Op;
3239  SDValue Res;
3240  DebugLoc DL = Op.getDebugLoc();
3241  EVT SrcVT = Src.getValueType();
3242  EVT DstVT = Dst.getValueType();
3243  // Lets bitcast the floating point types to an
3244  // equivalent integer type before converting to vectors.
3245  if (SrcVT.getScalarType().isFloatingPoint()) {
3246    Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
3247          SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
3248          SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
3249        Src);
3250    SrcVT = Src.getValueType();
3251  }
3252  uint32_t ScalarSrcSize = SrcVT.getScalarType()
3253    .getSimpleVT().getSizeInBits();
3254  uint32_t ScalarDstSize = DstVT.getScalarType()
3255    .getSimpleVT().getSizeInBits();
3256  uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
3257  uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
3258  bool isVec = SrcVT.isVector();
3259  if (DstVT.getScalarType().isInteger() &&
3260      (SrcVT.getScalarType().isInteger()
3261       || SrcVT.getScalarType().isFloatingPoint())) {
3262    if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
3263        || (ScalarSrcSize == 64
3264          && DstNumEle == 4
3265          && ScalarDstSize == 16)) {
3266      // This is the problematic case when bitcasting i64 <-> <4 x i16>
3267      // This approach is a little different as we cannot generate a
3268      // <4 x i64> vector
3269      // as that is illegal in our backend and we are already past
3270      // the DAG legalizer.
3271      // So, in this case, we will do the following conversion.
3272      // Case 1:
3273      // %dst = <4 x i16> %src bitconvert i64 ==>
3274      // %tmp = <4 x i16> %src convert <4 x i32>
3275      // %tmp = <4 x i32> %tmp and 0xFFFF
3276      // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
3277      // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
3278      // %dst = <2 x i32> %tmp bitcast i64
3279      // case 2:
3280      // %dst = i64 %src bitconvert <4 x i16> ==>
3281      // %tmp = i64 %src bitcast <2 x i32>
3282      // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
3283      // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
3284      // %tmp = <4 x i32> %tmp and 0xFFFF
3285      // %dst = <4 x i16> %tmp bitcast <4 x i32>
3286      SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
3287          DAG.getConstant(0xFFFF, MVT::i32));
3288      SDValue const16 = DAG.getConstant(16, MVT::i32);
3289      if (ScalarDstSize == 64) {
3290        // case 1
3291        Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
3292        Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
3293        SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3294            Op, DAG.getConstant(0, MVT::i32));
3295        SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3296            Op, DAG.getConstant(1, MVT::i32));
3297        y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
3298        SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3299            Op, DAG.getConstant(2, MVT::i32));
3300        SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
3301            Op, DAG.getConstant(3, MVT::i32));
3302        w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
3303        x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
3304        y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
3305        Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
3306        return Res;
3307      } else {
3308        // case 2
3309        SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
3310        SDValue lor16
3311          = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
3312        SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
3313        SDValue hir16
3314          = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
3315        SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
3316            MVT::v4i32, lo);
3317        SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
3318            getPointerTy(), DAG.getConstant(1, MVT::i32));
3319        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
3320            resVec, lor16, idxVal);
3321        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
3322            getPointerTy(), DAG.getConstant(2, MVT::i32));
3323        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
3324            resVec, hi, idxVal);
3325        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
3326            getPointerTy(), DAG.getConstant(3, MVT::i32));
3327        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
3328            resVec, hir16, idxVal);
3329        resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
3330        Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
3331        return Res;
3332      }
3333    } else {
3334      // There are four cases we need to worry about for bitcasts
3335      // where the size of all
3336      // source, intermediates and result is <= 128 bits, unlike
3337      // the above case
3338      // 1) Sub32bit bitcast 32bitAlign
3339      // %dst = <4 x i8> bitcast i32
3340      // (also <[2|4] x i16> to <[2|4] x i32>)
3341      // 2) 32bitAlign bitcast Sub32bit
3342      // %dst = i32 bitcast <4 x i8>
3343      // 3) Sub32bit bitcast LargerSub32bit
3344      // %dst = <2 x i8> bitcast i16
3345      // (also <4 x i8> to <2 x i16>)
3346      // 4) Sub32bit bitcast SmallerSub32bit
3347      // %dst = i16 bitcast <2 x i8>
3348      // (also <2 x i16> to <4 x i8>)
3349      // This also only handles types that are powers of two
3350      if ((ScalarDstSize & (ScalarDstSize - 1))
3351          || (ScalarSrcSize & (ScalarSrcSize - 1))) {
3352      } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
3353        // case 1:
3354        EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
3355#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
3356        SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
3357#else
3358        SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
3359            DAG.getUNDEF(IntTy.getScalarType()));
3360        for (uint32_t x = 0; x < SrcNumEle; ++x) {
3361          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3362              getPointerTy(), DAG.getConstant(x, MVT::i32));
3363          SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3364              SrcVT.getScalarType(), Src,
3365              DAG.getConstant(x, MVT::i32));
3366          temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
3367          res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
3368              res, temp, idx);
3369        }
3370#endif
3371        SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
3372            DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
3373        SDValue *newEle = new SDValue[SrcNumEle];
3374        res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
3375        for (uint32_t x = 0; x < SrcNumEle; ++x) {
3376          newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3377              IntTy.getScalarType(), res,
3378              DAG.getConstant(x, MVT::i32));
3379        }
3380        uint32_t Ratio = SrcNumEle / DstNumEle;
3381        for (uint32_t x = 0; x < SrcNumEle; ++x) {
3382          if (x % Ratio) {
3383            newEle[x] = DAG.getNode(ISD::SHL, DL,
3384                IntTy.getScalarType(), newEle[x],
3385                DAG.getConstant(ScalarSrcSize * (x % Ratio),
3386                  MVT::i32));
3387          }
3388        }
3389        for (uint32_t x = 0; x < SrcNumEle; x += 2) {
3390          newEle[x] = DAG.getNode(ISD::OR, DL,
3391              IntTy.getScalarType(), newEle[x], newEle[x + 1]);
3392        }
3393        if (ScalarSrcSize == 8) {
3394          for (uint32_t x = 0; x < SrcNumEle; x += 4) {
3395            newEle[x] = DAG.getNode(ISD::OR, DL,
3396                IntTy.getScalarType(), newEle[x], newEle[x + 2]);
3397          }
3398          if (DstNumEle == 1) {
3399            Dst = newEle[0];
3400          } else {
3401            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
3402                newEle[0]);
3403            for (uint32_t x = 1; x < DstNumEle; ++x) {
3404              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3405                  getPointerTy(), DAG.getConstant(x, MVT::i32));
3406              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3407                  DstVT, Dst, newEle[x * 4], idx);
3408            }
3409          }
3410        } else {
3411          if (DstNumEle == 1) {
3412            Dst = newEle[0];
3413          } else {
3414            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
3415                newEle[0]);
3416            for (uint32_t x = 1; x < DstNumEle; ++x) {
3417              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3418                  getPointerTy(), DAG.getConstant(x, MVT::i32));
3419              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
3420                  DstVT, Dst, newEle[x * 2], idx);
3421            }
3422          }
3423        }
3424        delete [] newEle;
3425        return Dst;
3426      } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
3427        // case 2:
3428        EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
3429        SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
3430            DAG.getUNDEF(IntTy.getScalarType()));
3431        uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
3432        for (uint32_t x = 0; x < SrcNumEle; ++x) {
3433          for (uint32_t y = 0; y < mult; ++y) {
3434            SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3435                getPointerTy(),
3436                DAG.getConstant(x * mult + y, MVT::i32));
3437            SDValue t;
3438            if (SrcNumEle > 1) {
3439              t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3440                  DL, SrcVT.getScalarType(), Src,
3441                  DAG.getConstant(x, MVT::i32));
3442            } else {
3443              t = Src;
3444            }
3445            if (y != 0) {
3446              t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
3447                  t, DAG.getConstant(y * ScalarDstSize,
3448                    MVT::i32));
3449            }
3450            vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
3451                DL, IntTy, vec, t, idx);
3452          }
3453        }
3454        Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
3455        return Dst;
3456      } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
3457        // case 3:
3458        SDValue *numEle = new SDValue[SrcNumEle];
3459        for (uint32_t x = 0; x < SrcNumEle; ++x) {
3460          numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3461              MVT::i8, Src, DAG.getConstant(x, MVT::i32));
3462          numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
3463          numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
3464              DAG.getConstant(0xFF, MVT::i16));
3465        }
3466        for (uint32_t x = 1; x < SrcNumEle; x += 2) {
3467          numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
3468              DAG.getConstant(8, MVT::i16));
3469          numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
3470              numEle[x-1], numEle[x]);
3471        }
3472        if (DstNumEle > 1) {
3473          // If we are not a scalar i16, the only other case is a
3474          // v2i16 since we can't have v8i8 at this point, v4i16
3475          // cannot be generated
3476          Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
3477              numEle[0]);
3478          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3479              getPointerTy(), DAG.getConstant(1, MVT::i32));
3480          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
3481              Dst, numEle[2], idx);
3482        } else {
3483          Dst = numEle[0];
3484        }
3485        delete [] numEle;
3486        return Dst;
3487      } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
3488        // case 4:
3489        SDValue *numEle = new SDValue[DstNumEle];
3490        for (uint32_t x = 0; x < SrcNumEle; ++x) {
3491          numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3492              MVT::i16, Src, DAG.getConstant(x, MVT::i32));
3493          numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
3494              numEle[x * 2], DAG.getConstant(8, MVT::i16));
3495        }
3496        MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
3497        Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
3498        for (uint32_t x = 1; x < DstNumEle; ++x) {
3499          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
3500              getPointerTy(), DAG.getConstant(x, MVT::i32));
3501          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
3502              Dst, numEle[x], idx);
3503        }
3504        delete [] numEle;
3505        ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
3506        Res = DAG.getSExtOrTrunc(Dst, DL, ty);
3507        return Res;
3508      }
3509    }
3510  }
3511  Res = DAG.getNode(AMDILISD::BITCONV,
3512      Dst.getDebugLoc(),
3513      Dst.getValueType(), Src);
3514  return Res;
3515}
3516
3517SDValue
3518AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
3519    SelectionDAG &DAG) const
3520{
3521  SDValue Chain = Op.getOperand(0);
3522  SDValue Size = Op.getOperand(1);
3523  unsigned int SPReg = AMDIL::SP;
3524  DebugLoc DL = Op.getDebugLoc();
3525  SDValue SP = DAG.getCopyFromReg(Chain,
3526      DL,
3527      SPReg, MVT::i32);
3528  SDValue NewSP = DAG.getNode(ISD::ADD,
3529      DL,
3530      MVT::i32, SP, Size);
3531  Chain = DAG.getCopyToReg(SP.getValue(1),
3532      DL,
3533      SPReg, NewSP);
3534  SDValue Ops[2] = {NewSP, Chain};
3535  Chain = DAG.getMergeValues(Ops, 2 ,DL);
3536  return Chain;
3537}
3538SDValue
3539AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
3540{
3541  SDValue Chain = Op.getOperand(0);
3542  SDValue Cond  = Op.getOperand(1);
3543  SDValue Jump  = Op.getOperand(2);
3544  SDValue Result;
3545  Result = DAG.getNode(
3546      AMDILISD::BRANCH_COND,
3547      Op.getDebugLoc(),
3548      Op.getValueType(),
3549      Chain, Jump, Cond);
3550  return Result;
3551}
3552
3553SDValue
3554AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
3555{
3556  SDValue Chain = Op.getOperand(0);
3557  SDValue CC = Op.getOperand(1);
3558  SDValue LHS   = Op.getOperand(2);
3559  SDValue RHS   = Op.getOperand(3);
3560  SDValue JumpT  = Op.getOperand(4);
3561  SDValue CmpValue;
3562  SDValue Result;
3563  CmpValue = DAG.getNode(
3564      ISD::SELECT_CC,
3565      Op.getDebugLoc(),
3566      LHS.getValueType(),
3567      LHS, RHS,
3568      DAG.getConstant(-1, MVT::i32),
3569      DAG.getConstant(0, MVT::i32),
3570      CC);
3571  Result = DAG.getNode(
3572      AMDILISD::BRANCH_COND,
3573      CmpValue.getDebugLoc(),
3574      MVT::Other, Chain,
3575      JumpT, CmpValue);
3576  return Result;
3577}
3578
3579SDValue
3580AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
3581{
3582  SDValue Result = DAG.getNode(
3583      AMDILISD::DP_TO_FP,
3584      Op.getDebugLoc(),
3585      Op.getValueType(),
3586      Op.getOperand(0),
3587      Op.getOperand(1));
3588  return Result;
3589}
3590
3591SDValue
3592AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
3593{
3594  SDValue Result = DAG.getNode(
3595      AMDILISD::VCONCAT,
3596      Op.getDebugLoc(),
3597      Op.getValueType(),
3598      Op.getOperand(0),
3599      Op.getOperand(1));
3600  return Result;
3601}
3602// LowerRET - Lower an ISD::RET node.
3603SDValue
3604AMDILTargetLowering::LowerReturn(SDValue Chain,
3605    CallingConv::ID CallConv, bool isVarArg,
3606    const SmallVectorImpl<ISD::OutputArg> &Outs,
3607    const SmallVectorImpl<SDValue> &OutVals,
3608    DebugLoc dl, SelectionDAG &DAG)
3609const
3610{
3611  //MachineFunction& MF = DAG.getMachineFunction();
3612  // CCValAssign - represent the assignment of the return value
3613  // to a location
3614  SmallVector<CCValAssign, 16> RVLocs;
3615
3616  // CCState - Info about the registers and stack slot
3617  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
3618                 getTargetMachine(), RVLocs, *DAG.getContext());
3619
3620  // Analyze return values of ISD::RET
3621  CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
3622  // If this is the first return lowered for this function, add
3623  // the regs to the liveout set for the function
3624  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
3625  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
3626    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
3627      MRI.addLiveOut(RVLocs[i].getLocReg());
3628    }
3629  }
3630  // FIXME: implement this when tail call is implemented
3631  // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
3632  // both x86 and ppc implement this in ISelLowering
3633
3634  // Regular return here
3635  SDValue Flag;
3636  SmallVector<SDValue, 6> RetOps;
3637  RetOps.push_back(Chain);
3638  RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
3639  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
3640    CCValAssign &VA = RVLocs[i];
3641    SDValue ValToCopy = OutVals[i];
3642    assert(VA.isRegLoc() && "Can only return in registers!");
3643    // ISD::Ret => ret chain, (regnum1, val1), ...
3644    // So i * 2 + 1 index only the regnums
3645    Chain = DAG.getCopyToReg(Chain,
3646        dl,
3647        VA.getLocReg(),
3648        ValToCopy,
3649        Flag);
3650    // guarantee that all emitted copies are stuck together
3651    // avoiding something bad
3652    Flag = Chain.getValue(1);
3653  }
3654  /*if (MF.getFunction()->hasStructRetAttr()) {
3655    assert(0 && "Struct returns are not yet implemented!");
3656  // Both MIPS and X86 have this
3657  }*/
3658  RetOps[0] = Chain;
3659  if (Flag.getNode())
3660    RetOps.push_back(Flag);
3661
3662  Flag = DAG.getNode(AMDILISD::RET_FLAG,
3663      dl,
3664      MVT::Other, &RetOps[0], RetOps.size());
3665  return Flag;
3666}
3667
3668unsigned int
3669AMDILTargetLowering::getFunctionAlignment(const Function *) const
3670{
3671  return 0;
3672}
3673
3674void
3675AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
3676    MachineBasicBlock::iterator &BBI,
3677    DebugLoc *DL, const TargetInstrInfo *TII) const
3678{
3679  mBB = BB;
3680  mBBI = BBI;
3681  mDL = DL;
3682  mTII = TII;
3683}
3684uint32_t
3685AMDILTargetLowering::genVReg(uint32_t regType) const
3686{
3687  return mBB->getParent()->getRegInfo().createVirtualRegister(
3688      getTargetMachine().getRegisterInfo()->getRegClass(regType));
3689}
3690
3691MachineInstrBuilder
3692AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
3693{
3694  return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
3695}
3696
3697MachineInstrBuilder
3698AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
3699    uint32_t src1) const
3700{
3701  return generateMachineInst(opcode, dst).addReg(src1);
3702}
3703
3704MachineInstrBuilder
3705AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
3706    uint32_t src1, uint32_t src2) const
3707{
3708  return generateMachineInst(opcode, dst, src1).addReg(src2);
3709}
3710
3711MachineInstrBuilder
3712AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
3713    uint32_t src1, uint32_t src2, uint32_t src3) const
3714{
3715  return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
3716}
3717
3718
3719SDValue
3720AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
3721{
3722  DebugLoc DL = Op.getDebugLoc();
3723  EVT OVT = Op.getValueType();
3724  SDValue LHS = Op.getOperand(0);
3725  SDValue RHS = Op.getOperand(1);
3726  MVT INTTY;
3727  MVT FLTTY;
3728  if (!OVT.isVector()) {
3729    INTTY = MVT::i32;
3730    FLTTY = MVT::f32;
3731  } else if (OVT.getVectorNumElements() == 2) {
3732    INTTY = MVT::v2i32;
3733    FLTTY = MVT::v2f32;
3734  } else if (OVT.getVectorNumElements() == 4) {
3735    INTTY = MVT::v4i32;
3736    FLTTY = MVT::v4f32;
3737  }
3738  unsigned bitsize = OVT.getScalarType().getSizeInBits();
3739  // char|short jq = ia ^ ib;
3740  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
3741
3742  // jq = jq >> (bitsize - 2)
3743  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
3744
3745  // jq = jq | 0x1
3746  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
3747
3748  // jq = (int)jq
3749  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
3750
3751  // int ia = (int)LHS;
3752  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
3753
3754  // int ib, (int)RHS;
3755  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
3756
3757  // float fa = (float)ia;
3758  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
3759
3760  // float fb = (float)ib;
3761  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
3762
3763  // float fq = native_divide(fa, fb);
3764  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
3765
3766  // fq = trunc(fq);
3767  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
3768
3769  // float fqneg = -fq;
3770  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
3771
3772  // float fr = mad(fqneg, fb, fa);
3773  SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
3774
3775  // int iq = (int)fq;
3776  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
3777
3778  // fr = fabs(fr);
3779  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
3780
3781  // fb = fabs(fb);
3782  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
3783
3784  // int cv = fr >= fb;
3785  SDValue cv;
3786  if (INTTY == MVT::i32) {
3787    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
3788  } else {
3789    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
3790  }
3791  // jq = (cv ? jq : 0);
3792  jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
3793      DAG.getConstant(0, OVT));
3794  // dst = iq + jq;
3795  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
3796  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
3797  return iq;
3798}
3799
3800SDValue
3801AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
3802{
3803  DebugLoc DL = Op.getDebugLoc();
3804  EVT OVT = Op.getValueType();
3805  SDValue LHS = Op.getOperand(0);
3806  SDValue RHS = Op.getOperand(1);
3807  // The LowerSDIV32 function generates equivalent to the following IL.
3808  // mov r0, LHS
3809  // mov r1, RHS
3810  // ilt r10, r0, 0
3811  // ilt r11, r1, 0
3812  // iadd r0, r0, r10
3813  // iadd r1, r1, r11
3814  // ixor r0, r0, r10
3815  // ixor r1, r1, r11
3816  // udiv r0, r0, r1
3817  // ixor r10, r10, r11
3818  // iadd r0, r0, r10
3819  // ixor DST, r0, r10
3820
3821  // mov r0, LHS
3822  SDValue r0 = LHS;
3823
3824  // mov r1, RHS
3825  SDValue r1 = RHS;
3826
3827  // ilt r10, r0, 0
3828  SDValue r10 = DAG.getSelectCC(DL,
3829      r0, DAG.getConstant(0, OVT),
3830      DAG.getConstant(-1, MVT::i32),
3831      DAG.getConstant(0, MVT::i32),
3832      ISD::SETLT);
3833
3834  // ilt r11, r1, 0
3835  SDValue r11 = DAG.getSelectCC(DL,
3836      r1, DAG.getConstant(0, OVT),
3837      DAG.getConstant(-1, MVT::i32),
3838      DAG.getConstant(0, MVT::i32),
3839      ISD::SETLT);
3840
3841  // iadd r0, r0, r10
3842  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
3843
3844  // iadd r1, r1, r11
3845  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
3846
3847  // ixor r0, r0, r10
3848  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
3849
3850  // ixor r1, r1, r11
3851  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
3852
3853  // udiv r0, r0, r1
3854  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
3855
3856  // ixor r10, r10, r11
3857  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
3858
3859  // iadd r0, r0, r10
3860  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
3861
3862  // ixor DST, r0, r10
3863  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
3864  return DST;
3865}
3866
3867SDValue
3868AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
3869{
3870  return SDValue(Op.getNode(), 0);
3871}
3872
3873SDValue
3874AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
3875{
3876  DebugLoc DL = Op.getDebugLoc();
3877  EVT OVT = Op.getValueType();
3878  SDValue LHS = Op.getOperand(0);
3879  SDValue RHS = Op.getOperand(1);
3880  MVT INTTY;
3881  MVT FLTTY;
3882  if (!OVT.isVector()) {
3883    INTTY = MVT::i32;
3884    FLTTY = MVT::f32;
3885  } else if (OVT.getVectorNumElements() == 2) {
3886    INTTY = MVT::v2i32;
3887    FLTTY = MVT::v2f32;
3888  } else if (OVT.getVectorNumElements() == 4) {
3889    INTTY = MVT::v4i32;
3890    FLTTY = MVT::v4f32;
3891  }
3892
3893  // The LowerUDIV24 function implements the following CL.
3894  // int ia = (int)LHS
3895  // float fa = (float)ia
3896  // int ib = (int)RHS
3897  // float fb = (float)ib
3898  // float fq = native_divide(fa, fb)
3899  // fq = trunc(fq)
3900  // float t = mad(fq, fb, fb)
3901  // int iq = (int)fq - (t <= fa)
3902  // return (type)iq
3903
3904  // int ia = (int)LHS
3905  SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
3906
3907  // float fa = (float)ia
3908  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
3909
3910  // int ib = (int)RHS
3911  SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
3912
3913  // float fb = (float)ib
3914  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
3915
3916  // float fq = native_divide(fa, fb)
3917  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
3918
3919  // fq = trunc(fq)
3920  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
3921
3922  // float t = mad(fq, fb, fb)
3923  SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
3924
3925  // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
3926  SDValue iq;
3927  fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
3928  if (INTTY == MVT::i32) {
3929    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
3930  } else {
3931    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
3932  }
3933  iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
3934
3935
3936  // return (type)iq
3937  iq = DAG.getZExtOrTrunc(iq, DL, OVT);
3938  return iq;
3939
3940}
3941
3942SDValue
3943AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
3944{
3945  DebugLoc DL = Op.getDebugLoc();
3946  EVT OVT = Op.getValueType();
3947  MVT INTTY = MVT::i32;
3948  if (OVT == MVT::v2i8) {
3949    INTTY = MVT::v2i32;
3950  } else if (OVT == MVT::v4i8) {
3951    INTTY = MVT::v4i32;
3952  }
3953  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
3954  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
3955  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
3956  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
3957  return LHS;
3958}
3959
3960SDValue
3961AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
3962{
3963  DebugLoc DL = Op.getDebugLoc();
3964  EVT OVT = Op.getValueType();
3965  MVT INTTY = MVT::i32;
3966  if (OVT == MVT::v2i16) {
3967    INTTY = MVT::v2i32;
3968  } else if (OVT == MVT::v4i16) {
3969    INTTY = MVT::v4i32;
3970  }
3971  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
3972  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
3973  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
3974  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
3975  return LHS;
3976}
3977
3978SDValue
3979AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
3980{
3981  DebugLoc DL = Op.getDebugLoc();
3982  EVT OVT = Op.getValueType();
3983  SDValue LHS = Op.getOperand(0);
3984  SDValue RHS = Op.getOperand(1);
3985  // The LowerSREM32 function generates equivalent to the following IL.
3986  // mov r0, LHS
3987  // mov r1, RHS
3988  // ilt r10, r0, 0
3989  // ilt r11, r1, 0
3990  // iadd r0, r0, r10
3991  // iadd r1, r1, r11
3992  // ixor r0, r0, r10
3993  // ixor r1, r1, r11
3994  // udiv r20, r0, r1
3995  // umul r20, r20, r1
3996  // sub r0, r0, r20
3997  // iadd r0, r0, r10
3998  // ixor DST, r0, r10
3999
4000  // mov r0, LHS
4001  SDValue r0 = LHS;
4002
4003  // mov r1, RHS
4004  SDValue r1 = RHS;
4005
4006  // ilt r10, r0, 0
4007  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4008      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4009      r0, DAG.getConstant(0, OVT));
4010
4011  // ilt r11, r1, 0
4012  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4013      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4014      r1, DAG.getConstant(0, OVT));
4015
4016  // iadd r0, r0, r10
4017  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4018
4019  // iadd r1, r1, r11
4020  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
4021
4022  // ixor r0, r0, r10
4023  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4024
4025  // ixor r1, r1, r11
4026  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
4027
4028  // udiv r20, r0, r1
4029  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
4030
4031  // umul r20, r20, r1
4032  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
4033
4034  // sub r0, r0, r20
4035  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
4036
4037  // iadd r0, r0, r10
4038  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4039
4040  // ixor DST, r0, r10
4041  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4042  return DST;
4043}
4044
4045SDValue
4046AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
4047{
4048  return SDValue(Op.getNode(), 0);
4049}
4050
4051SDValue
4052AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
4053{
4054  DebugLoc DL = Op.getDebugLoc();
4055  EVT OVT = Op.getValueType();
4056  MVT INTTY = MVT::i32;
4057  if (OVT == MVT::v2i8) {
4058    INTTY = MVT::v2i32;
4059  } else if (OVT == MVT::v4i8) {
4060    INTTY = MVT::v4i32;
4061  }
4062  SDValue LHS = Op.getOperand(0);
4063  SDValue RHS = Op.getOperand(1);
4064  // The LowerUREM8 function generates equivalent to the following IL.
4065  // mov r0, as_u32(LHS)
4066  // mov r1, as_u32(RHS)
4067  // and r10, r0, 0xFF
4068  // and r11, r1, 0xFF
4069  // cmov_logical r3, r11, r11, 0x1
4070  // udiv r3, r10, r3
4071  // cmov_logical r3, r11, r3, 0
4072  // umul r3, r3, r11
4073  // sub r3, r10, r3
4074  // and as_u8(DST), r3, 0xFF
4075
4076  // mov r0, as_u32(LHS)
4077  SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
4078
4079  // mov r1, as_u32(RHS)
4080  SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
4081
4082  // and r10, r0, 0xFF
4083  SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
4084      DAG.getConstant(0xFF, INTTY));
4085
4086  // and r11, r1, 0xFF
4087  SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
4088      DAG.getConstant(0xFF, INTTY));
4089
4090  // cmov_logical r3, r11, r11, 0x1
4091  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
4092      DAG.getConstant(0x01, INTTY));
4093
4094  // udiv r3, r10, r3
4095  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
4096
4097  // cmov_logical r3, r11, r3, 0
4098  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
4099      DAG.getConstant(0, INTTY));
4100
4101  // umul r3, r3, r11
4102  r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
4103
4104  // sub r3, r10, r3
4105  r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
4106
4107  // and as_u8(DST), r3, 0xFF
4108  SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
4109      DAG.getConstant(0xFF, INTTY));
4110  DST = DAG.getZExtOrTrunc(DST, DL, OVT);
4111  return DST;
4112}
4113
4114SDValue
4115AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
4116{
4117  DebugLoc DL = Op.getDebugLoc();
4118  EVT OVT = Op.getValueType();
4119  MVT INTTY = MVT::i32;
4120  if (OVT == MVT::v2i16) {
4121    INTTY = MVT::v2i32;
4122  } else if (OVT == MVT::v4i16) {
4123    INTTY = MVT::v4i32;
4124  }
4125  SDValue LHS = Op.getOperand(0);
4126  SDValue RHS = Op.getOperand(1);
4127  // The LowerUREM16 function generatest equivalent to the following IL.
4128  // mov r0, LHS
4129  // mov r1, RHS
4130  // DIV = LowerUDIV16(LHS, RHS)
4131  // and r10, r0, 0xFFFF
4132  // and r11, r1, 0xFFFF
4133  // cmov_logical r3, r11, r11, 0x1
4134  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
4135  // and r3, r3, 0xFFFF
4136  // cmov_logical r3, r11, r3, 0
4137  // umul r3, r3, r11
4138  // sub r3, r10, r3
4139  // and DST, r3, 0xFFFF
4140
4141  // mov r0, LHS
4142  SDValue r0 = LHS;
4143
4144  // mov r1, RHS
4145  SDValue r1 = RHS;
4146
4147  // and r10, r0, 0xFFFF
4148  SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
4149      DAG.getConstant(0xFFFF, OVT));
4150
4151  // and r11, r1, 0xFFFF
4152  SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
4153      DAG.getConstant(0xFFFF, OVT));
4154
4155  // cmov_logical r3, r11, r11, 0x1
4156  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
4157      DAG.getConstant(0x01, OVT));
4158
4159  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
4160  r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
4161  r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
4162  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
4163  r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
4164  r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
4165
4166  // and r3, r3, 0xFFFF
4167  r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
4168      DAG.getConstant(0xFFFF, OVT));
4169
4170  // cmov_logical r3, r11, r3, 0
4171  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
4172      DAG.getConstant(0, OVT));
4173  // umul r3, r3, r11
4174  r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
4175
4176  // sub r3, r10, r3
4177  r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
4178
4179  // and DST, r3, 0xFFFF
4180  SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
4181      DAG.getConstant(0xFFFF, OVT));
4182  return DST;
4183}
4184
4185SDValue
4186AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
4187{
4188  DebugLoc DL = Op.getDebugLoc();
4189  EVT OVT = Op.getValueType();
4190  SDValue LHS = Op.getOperand(0);
4191  SDValue RHS = Op.getOperand(1);
4192  // The LowerUREM32 function generates equivalent to the following IL.
4193  // udiv r20, LHS, RHS
4194  // umul r20, r20, RHS
4195  // sub DST, LHS, r20
4196
4197  // udiv r20, LHS, RHS
4198  SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
4199
4200  // umul r20, r20, RHS
4201  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
4202
4203  // sub DST, LHS, r20
4204  SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
4205  return DST;
4206}
4207
4208SDValue
4209AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
4210{
4211  return SDValue(Op.getNode(), 0);
4212}
4213
4214
4215SDValue
4216AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
4217{
4218  DebugLoc DL = Op.getDebugLoc();
4219  EVT OVT = Op.getValueType();
4220  MVT INTTY = MVT::i32;
4221  if (OVT == MVT::v2f32) {
4222    INTTY = MVT::v2i32;
4223  } else if (OVT == MVT::v4f32) {
4224    INTTY = MVT::v4i32;
4225  }
4226  SDValue LHS = Op.getOperand(0);
4227  SDValue RHS = Op.getOperand(1);
4228  SDValue DST;
4229  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
4230      &this->getTargetMachine())->getSubtargetImpl();
4231  if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
4232    // TODO: This doesn't work for vector types yet
4233    // The LowerFDIV32 function generates equivalent to the following
4234    // IL:
4235    // mov r20, as_int(LHS)
4236    // mov r21, as_int(RHS)
4237    // and r30, r20, 0x7f800000
4238    // and r31, r20, 0x807FFFFF
4239    // and r32, r21, 0x7f800000
4240    // and r33, r21, 0x807FFFFF
4241    // ieq r40, r30, 0x7F800000
4242    // ieq r41, r31, 0x7F800000
4243    // ieq r42, r32, 0
4244    // ieq r43, r33, 0
4245    // and r50, r20, 0x80000000
4246    // and r51, r21, 0x80000000
4247    // ior r32, r32, 0x3f800000
4248    // ior r33, r33, 0x3f800000
4249    // cmov_logical r32, r42, r50, r32
4250    // cmov_logical r33, r43, r51, r33
4251    // cmov_logical r32, r40, r20, r32
4252    // cmov_logical r33, r41, r21, r33
4253    // ior r50, r40, r41
4254    // ior r51, r42, r43
4255    // ior r50, r50, r51
4256    // inegate r52, r31
4257    // iadd r30, r30, r52
4258    // cmov_logical r30, r50, 0, r30
4259    // div_zeroop(infinity) r21, 1.0, r33
4260    // mul_ieee r20, r32, r21
4261    // and r22, r20, 0x7FFFFFFF
4262    // and r23, r20, 0x80000000
4263    // ishr r60, r22, 0x00000017
4264    // ishr r61, r30, 0x00000017
4265    // iadd r20, r20, r30
4266    // iadd r21, r22, r30
4267    // iadd r60, r60, r61
4268    // ige r42, 0, R60
4269    // ior r41, r23, 0x7F800000
4270    // ige r40, r60, 0x000000FF
4271    // cmov_logical r40, r50, 0, r40
4272    // cmov_logical r20, r42, r23, r20
4273    // cmov_logical DST, r40, r41, r20
4274    // as_float(DST)
4275
4276    // mov r20, as_int(LHS)
4277    SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
4278
4279    // mov r21, as_int(RHS)
4280    SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
4281
4282    // and r30, r20, 0x7f800000
4283    SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4284        DAG.getConstant(0x7F800000, INTTY));
4285
4286    // and r31, r21, 0x7f800000
4287    SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4288        DAG.getConstant(0x7f800000, INTTY));
4289
4290    // and r32, r20, 0x807FFFFF
4291    SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4292        DAG.getConstant(0x807FFFFF, INTTY));
4293
4294    // and r33, r21, 0x807FFFFF
4295    SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4296        DAG.getConstant(0x807FFFFF, INTTY));
4297
4298    // ieq r40, r30, 0x7F800000
4299    SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4300        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4301        R30, DAG.getConstant(0x7F800000, INTTY));
4302
4303    // ieq r41, r31, 0x7F800000
4304    SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4305        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4306        R31, DAG.getConstant(0x7F800000, INTTY));
4307
4308    // ieq r42, r30, 0
4309    SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4310        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4311        R30, DAG.getConstant(0, INTTY));
4312
4313    // ieq r43, r31, 0
4314    SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4315        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
4316        R31, DAG.getConstant(0, INTTY));
4317
4318    // and r50, r20, 0x80000000
4319    SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4320        DAG.getConstant(0x80000000, INTTY));
4321
4322    // and r51, r21, 0x80000000
4323    SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
4324        DAG.getConstant(0x80000000, INTTY));
4325
4326    // ior r32, r32, 0x3f800000
4327    R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
4328        DAG.getConstant(0x3F800000, INTTY));
4329
4330    // ior r33, r33, 0x3f800000
4331    R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
4332        DAG.getConstant(0x3F800000, INTTY));
4333
4334    // cmov_logical r32, r42, r50, r32
4335    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
4336
4337    // cmov_logical r33, r43, r51, r33
4338    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
4339
4340    // cmov_logical r32, r40, r20, r32
4341    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
4342
4343    // cmov_logical r33, r41, r21, r33
4344    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
4345
4346    // ior r50, r40, r41
4347    R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
4348
4349    // ior r51, r42, r43
4350    R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
4351
4352    // ior r50, r50, r51
4353    R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
4354
4355    // inegate r52, r31
4356    SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
4357
4358    // iadd r30, r30, r52
4359    R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
4360
4361    // cmov_logical r30, r50, 0, r30
4362    R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
4363        DAG.getConstant(0, INTTY), R30);
4364
4365    // div_zeroop(infinity) r21, 1.0, as_float(r33)
4366    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
4367    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
4368        DAG.getConstantFP(1.0f, OVT), R33);
4369
4370    // mul_ieee as_int(r20), as_float(r32), r21
4371    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
4372    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
4373    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
4374
4375    // div_zeroop(infinity) r21, 1.0, as_float(r33)
4376    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
4377    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
4378        DAG.getConstantFP(1.0f, OVT), R33);
4379
4380    // mul_ieee as_int(r20), as_float(r32), r21
4381    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
4382    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
4383    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
4384
4385    // and r22, r20, 0x7FFFFFFF
4386    SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4387        DAG.getConstant(0x7FFFFFFF, INTTY));
4388
4389    // and r23, r20, 0x80000000
4390    SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
4391        DAG.getConstant(0x80000000, INTTY));
4392
4393    // ishr r60, r22, 0x00000017
4394    SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
4395        DAG.getConstant(0x00000017, INTTY));
4396
4397    // ishr r61, r30, 0x00000017
4398    SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
4399        DAG.getConstant(0x00000017, INTTY));
4400
4401    // iadd r20, r20, r30
4402    R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
4403
4404    // iadd r21, r22, r30
4405    R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
4406
4407    // iadd r60, r60, r61
4408    R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
4409
4410    // ige r42, 0, R60
4411    R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4412        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
4413        DAG.getConstant(0, INTTY),
4414        R60);
4415
4416    // ior r41, r23, 0x7F800000
4417    R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
4418        DAG.getConstant(0x7F800000, INTTY));
4419
4420    // ige r40, r60, 0x000000FF
4421    R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
4422        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
4423        R60,
4424        DAG.getConstant(0x0000000FF, INTTY));
4425
4426    // cmov_logical r40, r50, 0, r40
4427    R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
4428        DAG.getConstant(0, INTTY),
4429        R40);
4430
4431    // cmov_logical r20, r42, r23, r20
4432    R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
4433
4434    // cmov_logical DST, r40, r41, r20
4435    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
4436
4437    // as_float(DST)
4438    DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
4439  } else {
4440    // The following sequence of DAG nodes produce the following IL:
4441    // fabs r1, RHS
4442    // lt r2, 0x1.0p+96f, r1
4443    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4444    // mul_ieee r1, RHS, r3
4445    // div_zeroop(infinity) r0, LHS, r1
4446    // mul_ieee DST, r0, r3
4447
4448    // fabs r1, RHS
4449    SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
4450    // lt r2, 0x1.0p+96f, r1
4451    SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4452        DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
4453        DAG.getConstant(0x6f800000, INTTY), r1);
4454    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
4455    SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
4456        DAG.getConstant(0x2f800000, INTTY),
4457        DAG.getConstant(0x3f800000, INTTY));
4458    // mul_ieee r1, RHS, r3
4459    r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
4460    // div_zeroop(infinity) r0, LHS, r1
4461    SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
4462    // mul_ieee DST, r0, r3
4463    DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
4464  }
4465  return DST;
4466}
4467
4468SDValue
4469AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
4470{
4471  return SDValue(Op.getNode(), 0);
4472}
4473