AMDILISelLowering.cpp revision 5aaaa6a426258dc714c7346bec062795998f9986
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file implements the interfaces that AMDIL uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDILISelLowering.h"
16#include "AMDILDevices.h"
17#include "AMDILIntrinsicInfo.h"
18#include "AMDILSubtarget.h"
19#include "AMDILTargetMachine.h"
20#include "AMDILUtilityFunctions.h"
21#include "llvm/CallingConv.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/PseudoSourceValue.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28#include "llvm/DerivedTypes.h"
29#include "llvm/Instructions.h"
30#include "llvm/Intrinsics.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/Target/TargetOptions.h"
33
34using namespace llvm;
35#define ISDBITCAST  ISD::BITCAST
36#define MVTGLUE     MVT::Glue
37//===----------------------------------------------------------------------===//
38// Calling Convention Implementation
39//===----------------------------------------------------------------------===//
40#include "AMDILGenCallingConv.inc"
41
42//===----------------------------------------------------------------------===//
43// TargetLowering Implementation Help Functions Begin
44//===----------------------------------------------------------------------===//
45  static SDValue
46getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
47{
48  DebugLoc DL = Src.getDebugLoc();
49  EVT svt = Src.getValueType().getScalarType();
50  EVT dvt = Dst.getValueType().getScalarType();
51  if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
52    if (dvt.bitsGT(svt)) {
53      Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
54    } else if (svt.bitsLT(svt)) {
55      Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
56          DAG.getConstant(1, MVT::i32));
57    }
58  } else if (svt.isInteger() && dvt.isInteger()) {
59    if (!svt.bitsEq(dvt)) {
60      Src = DAG.getSExtOrTrunc(Src, DL, dvt);
61    } else {
62      Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
63    }
64  } else if (svt.isInteger()) {
65    unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
66    if (!svt.bitsEq(dvt)) {
67      if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
68        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
69      } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
70        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
71      } else {
72        assert(0 && "We only support 32 and 64bit fp types");
73      }
74    }
75    Src = DAG.getNode(opcode, DL, dvt, Src);
76  } else if (dvt.isInteger()) {
77    unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
78    if (svt.getSimpleVT().SimpleTy == MVT::f32) {
79      Src = DAG.getNode(opcode, DL, MVT::i32, Src);
80    } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
81      Src = DAG.getNode(opcode, DL, MVT::i64, Src);
82    } else {
83      assert(0 && "We only support 32 and 64bit fp types");
84    }
85    Src = DAG.getSExtOrTrunc(Src, DL, dvt);
86  }
87  return Src;
88}
89// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
90// condition.
91  static AMDILCC::CondCodes
92CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
93{
94  switch (CC) {
95    default:
96      {
97        errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
98        assert(0 && "Unknown condition code!");
99      }
100    case ISD::SETO:
101      switch(type) {
102        case MVT::f32:
103          return AMDILCC::IL_CC_F_O;
104        case MVT::f64:
105          return AMDILCC::IL_CC_D_O;
106        default:
107          assert(0 && "Opcode combination not generated correctly!");
108          return AMDILCC::COND_ERROR;
109      };
110    case ISD::SETUO:
111      switch(type) {
112        case MVT::f32:
113          return AMDILCC::IL_CC_F_UO;
114        case MVT::f64:
115          return AMDILCC::IL_CC_D_UO;
116        default:
117          assert(0 && "Opcode combination not generated correctly!");
118          return AMDILCC::COND_ERROR;
119      };
120    case ISD::SETGT:
121      switch (type) {
122        case MVT::i1:
123        case MVT::i8:
124        case MVT::i16:
125        case MVT::i32:
126          return AMDILCC::IL_CC_I_GT;
127        case MVT::f32:
128          return AMDILCC::IL_CC_F_GT;
129        case MVT::f64:
130          return AMDILCC::IL_CC_D_GT;
131        case MVT::i64:
132          return AMDILCC::IL_CC_L_GT;
133        default:
134          assert(0 && "Opcode combination not generated correctly!");
135          return AMDILCC::COND_ERROR;
136      };
137    case ISD::SETGE:
138      switch (type) {
139        case MVT::i1:
140        case MVT::i8:
141        case MVT::i16:
142        case MVT::i32:
143          return AMDILCC::IL_CC_I_GE;
144        case MVT::f32:
145          return AMDILCC::IL_CC_F_GE;
146        case MVT::f64:
147          return AMDILCC::IL_CC_D_GE;
148        case MVT::i64:
149          return AMDILCC::IL_CC_L_GE;
150        default:
151          assert(0 && "Opcode combination not generated correctly!");
152          return AMDILCC::COND_ERROR;
153      };
154    case ISD::SETLT:
155      switch (type) {
156        case MVT::i1:
157        case MVT::i8:
158        case MVT::i16:
159        case MVT::i32:
160          return AMDILCC::IL_CC_I_LT;
161        case MVT::f32:
162          return AMDILCC::IL_CC_F_LT;
163        case MVT::f64:
164          return AMDILCC::IL_CC_D_LT;
165        case MVT::i64:
166          return AMDILCC::IL_CC_L_LT;
167        default:
168          assert(0 && "Opcode combination not generated correctly!");
169          return AMDILCC::COND_ERROR;
170      };
171    case ISD::SETLE:
172      switch (type) {
173        case MVT::i1:
174        case MVT::i8:
175        case MVT::i16:
176        case MVT::i32:
177          return AMDILCC::IL_CC_I_LE;
178        case MVT::f32:
179          return AMDILCC::IL_CC_F_LE;
180        case MVT::f64:
181          return AMDILCC::IL_CC_D_LE;
182        case MVT::i64:
183          return AMDILCC::IL_CC_L_LE;
184        default:
185          assert(0 && "Opcode combination not generated correctly!");
186          return AMDILCC::COND_ERROR;
187      };
188    case ISD::SETNE:
189      switch (type) {
190        case MVT::i1:
191        case MVT::i8:
192        case MVT::i16:
193        case MVT::i32:
194          return AMDILCC::IL_CC_I_NE;
195        case MVT::f32:
196          return AMDILCC::IL_CC_F_NE;
197        case MVT::f64:
198          return AMDILCC::IL_CC_D_NE;
199        case MVT::i64:
200          return AMDILCC::IL_CC_L_NE;
201        default:
202          assert(0 && "Opcode combination not generated correctly!");
203          return AMDILCC::COND_ERROR;
204      };
205    case ISD::SETEQ:
206      switch (type) {
207        case MVT::i1:
208        case MVT::i8:
209        case MVT::i16:
210        case MVT::i32:
211          return AMDILCC::IL_CC_I_EQ;
212        case MVT::f32:
213          return AMDILCC::IL_CC_F_EQ;
214        case MVT::f64:
215          return AMDILCC::IL_CC_D_EQ;
216        case MVT::i64:
217          return AMDILCC::IL_CC_L_EQ;
218        default:
219          assert(0 && "Opcode combination not generated correctly!");
220          return AMDILCC::COND_ERROR;
221      };
222    case ISD::SETUGT:
223      switch (type) {
224        case MVT::i1:
225        case MVT::i8:
226        case MVT::i16:
227        case MVT::i32:
228          return AMDILCC::IL_CC_U_GT;
229        case MVT::f32:
230          return AMDILCC::IL_CC_F_UGT;
231        case MVT::f64:
232          return AMDILCC::IL_CC_D_UGT;
233        case MVT::i64:
234          return AMDILCC::IL_CC_UL_GT;
235        default:
236          assert(0 && "Opcode combination not generated correctly!");
237          return AMDILCC::COND_ERROR;
238      };
239    case ISD::SETUGE:
240      switch (type) {
241        case MVT::i1:
242        case MVT::i8:
243        case MVT::i16:
244        case MVT::i32:
245          return AMDILCC::IL_CC_U_GE;
246        case MVT::f32:
247          return AMDILCC::IL_CC_F_UGE;
248        case MVT::f64:
249          return AMDILCC::IL_CC_D_UGE;
250        case MVT::i64:
251          return AMDILCC::IL_CC_UL_GE;
252        default:
253          assert(0 && "Opcode combination not generated correctly!");
254          return AMDILCC::COND_ERROR;
255      };
256    case ISD::SETULT:
257      switch (type) {
258        case MVT::i1:
259        case MVT::i8:
260        case MVT::i16:
261        case MVT::i32:
262          return AMDILCC::IL_CC_U_LT;
263        case MVT::f32:
264          return AMDILCC::IL_CC_F_ULT;
265        case MVT::f64:
266          return AMDILCC::IL_CC_D_ULT;
267        case MVT::i64:
268          return AMDILCC::IL_CC_UL_LT;
269        default:
270          assert(0 && "Opcode combination not generated correctly!");
271          return AMDILCC::COND_ERROR;
272      };
273    case ISD::SETULE:
274      switch (type) {
275        case MVT::i1:
276        case MVT::i8:
277        case MVT::i16:
278        case MVT::i32:
279          return AMDILCC::IL_CC_U_LE;
280        case MVT::f32:
281          return AMDILCC::IL_CC_F_ULE;
282        case MVT::f64:
283          return AMDILCC::IL_CC_D_ULE;
284        case MVT::i64:
285          return AMDILCC::IL_CC_UL_LE;
286        default:
287          assert(0 && "Opcode combination not generated correctly!");
288          return AMDILCC::COND_ERROR;
289      };
290    case ISD::SETUNE:
291      switch (type) {
292        case MVT::i1:
293        case MVT::i8:
294        case MVT::i16:
295        case MVT::i32:
296          return AMDILCC::IL_CC_U_NE;
297        case MVT::f32:
298          return AMDILCC::IL_CC_F_UNE;
299        case MVT::f64:
300          return AMDILCC::IL_CC_D_UNE;
301        case MVT::i64:
302          return AMDILCC::IL_CC_UL_NE;
303        default:
304          assert(0 && "Opcode combination not generated correctly!");
305          return AMDILCC::COND_ERROR;
306      };
307    case ISD::SETUEQ:
308      switch (type) {
309        case MVT::i1:
310        case MVT::i8:
311        case MVT::i16:
312        case MVT::i32:
313          return AMDILCC::IL_CC_U_EQ;
314        case MVT::f32:
315          return AMDILCC::IL_CC_F_UEQ;
316        case MVT::f64:
317          return AMDILCC::IL_CC_D_UEQ;
318        case MVT::i64:
319          return AMDILCC::IL_CC_UL_EQ;
320        default:
321          assert(0 && "Opcode combination not generated correctly!");
322          return AMDILCC::COND_ERROR;
323      };
324    case ISD::SETOGT:
325      switch (type) {
326        case MVT::f32:
327          return AMDILCC::IL_CC_F_OGT;
328        case MVT::f64:
329          return AMDILCC::IL_CC_D_OGT;
330        case MVT::i1:
331        case MVT::i8:
332        case MVT::i16:
333        case MVT::i32:
334        case MVT::i64:
335        default:
336          assert(0 && "Opcode combination not generated correctly!");
337          return AMDILCC::COND_ERROR;
338      };
339    case ISD::SETOGE:
340      switch (type) {
341        case MVT::f32:
342          return AMDILCC::IL_CC_F_OGE;
343        case MVT::f64:
344          return AMDILCC::IL_CC_D_OGE;
345        case MVT::i1:
346        case MVT::i8:
347        case MVT::i16:
348        case MVT::i32:
349        case MVT::i64:
350        default:
351          assert(0 && "Opcode combination not generated correctly!");
352          return AMDILCC::COND_ERROR;
353      };
354    case ISD::SETOLT:
355      switch (type) {
356        case MVT::f32:
357          return AMDILCC::IL_CC_F_OLT;
358        case MVT::f64:
359          return AMDILCC::IL_CC_D_OLT;
360        case MVT::i1:
361        case MVT::i8:
362        case MVT::i16:
363        case MVT::i32:
364        case MVT::i64:
365        default:
366          assert(0 && "Opcode combination not generated correctly!");
367          return AMDILCC::COND_ERROR;
368      };
369    case ISD::SETOLE:
370      switch (type) {
371        case MVT::f32:
372          return AMDILCC::IL_CC_F_OLE;
373        case MVT::f64:
374          return AMDILCC::IL_CC_D_OLE;
375        case MVT::i1:
376        case MVT::i8:
377        case MVT::i16:
378        case MVT::i32:
379        case MVT::i64:
380        default:
381          assert(0 && "Opcode combination not generated correctly!");
382          return AMDILCC::COND_ERROR;
383      };
384    case ISD::SETONE:
385      switch (type) {
386        case MVT::f32:
387          return AMDILCC::IL_CC_F_ONE;
388        case MVT::f64:
389          return AMDILCC::IL_CC_D_ONE;
390        case MVT::i1:
391        case MVT::i8:
392        case MVT::i16:
393        case MVT::i32:
394        case MVT::i64:
395        default:
396          assert(0 && "Opcode combination not generated correctly!");
397          return AMDILCC::COND_ERROR;
398      };
399    case ISD::SETOEQ:
400      switch (type) {
401        case MVT::f32:
402          return AMDILCC::IL_CC_F_OEQ;
403        case MVT::f64:
404          return AMDILCC::IL_CC_D_OEQ;
405        case MVT::i1:
406        case MVT::i8:
407        case MVT::i16:
408        case MVT::i32:
409        case MVT::i64:
410        default:
411          assert(0 && "Opcode combination not generated correctly!");
412          return AMDILCC::COND_ERROR;
413      };
414  };
415}
416
417  static unsigned int
418translateToOpcode(uint64_t CCCode, unsigned int regClass)
419{
420  switch (CCCode) {
421    case AMDILCC::IL_CC_D_EQ:
422    case AMDILCC::IL_CC_D_OEQ:
423      if (regClass == AMDIL::GPRV2F64RegClassID) {
424        return (unsigned int)AMDIL::DEQ_v2f64;
425      } else {
426        return (unsigned int)AMDIL::DEQ;
427      }
428    case AMDILCC::IL_CC_D_LE:
429    case AMDILCC::IL_CC_D_OLE:
430    case AMDILCC::IL_CC_D_ULE:
431    case AMDILCC::IL_CC_D_GE:
432    case AMDILCC::IL_CC_D_OGE:
433    case AMDILCC::IL_CC_D_UGE:
434      return (unsigned int)AMDIL::DGE;
435    case AMDILCC::IL_CC_D_LT:
436    case AMDILCC::IL_CC_D_OLT:
437    case AMDILCC::IL_CC_D_ULT:
438    case AMDILCC::IL_CC_D_GT:
439    case AMDILCC::IL_CC_D_OGT:
440    case AMDILCC::IL_CC_D_UGT:
441      return (unsigned int)AMDIL::DLT;
442    case AMDILCC::IL_CC_D_NE:
443    case AMDILCC::IL_CC_D_UNE:
444      return (unsigned int)AMDIL::DNE;
445    case AMDILCC::IL_CC_F_EQ:
446    case AMDILCC::IL_CC_F_OEQ:
447      return (unsigned int)AMDIL::FEQ;
448    case AMDILCC::IL_CC_F_LE:
449    case AMDILCC::IL_CC_F_ULE:
450    case AMDILCC::IL_CC_F_OLE:
451    case AMDILCC::IL_CC_F_GE:
452    case AMDILCC::IL_CC_F_UGE:
453    case AMDILCC::IL_CC_F_OGE:
454      return (unsigned int)AMDIL::FGE;
455    case AMDILCC::IL_CC_F_LT:
456    case AMDILCC::IL_CC_F_OLT:
457    case AMDILCC::IL_CC_F_ULT:
458    case AMDILCC::IL_CC_F_GT:
459    case AMDILCC::IL_CC_F_OGT:
460    case AMDILCC::IL_CC_F_UGT:
461      if (regClass == AMDIL::GPRV2F32RegClassID) {
462        return (unsigned int)AMDIL::FLT_v2f32;
463      } else if (regClass == AMDIL::GPRV4F32RegClassID) {
464        return (unsigned int)AMDIL::FLT_v4f32;
465      } else {
466        return (unsigned int)AMDIL::FLT;
467      }
468    case AMDILCC::IL_CC_F_NE:
469    case AMDILCC::IL_CC_F_UNE:
470      return (unsigned int)AMDIL::FNE;
471    case AMDILCC::IL_CC_I_EQ:
472    case AMDILCC::IL_CC_U_EQ:
473      if (regClass == AMDIL::GPRI32RegClassID
474          || regClass == AMDIL::GPRI8RegClassID
475          || regClass == AMDIL::GPRI16RegClassID) {
476        return (unsigned int)AMDIL::IEQ;
477      } else if (regClass == AMDIL::GPRV2I32RegClassID
478          || regClass == AMDIL::GPRV2I8RegClassID
479          || regClass == AMDIL::GPRV2I16RegClassID) {
480        return (unsigned int)AMDIL::IEQ_v2i32;
481      } else if (regClass == AMDIL::GPRV4I32RegClassID
482          || regClass == AMDIL::GPRV4I8RegClassID
483          || regClass == AMDIL::GPRV4I16RegClassID) {
484        return (unsigned int)AMDIL::IEQ_v4i32;
485      } else {
486        assert(!"Unknown reg class!");
487      }
488    case AMDILCC::IL_CC_L_EQ:
489    case AMDILCC::IL_CC_UL_EQ:
490      return (unsigned int)AMDIL::LEQ;
491    case AMDILCC::IL_CC_I_GE:
492    case AMDILCC::IL_CC_I_LE:
493      if (regClass == AMDIL::GPRI32RegClassID
494          || regClass == AMDIL::GPRI8RegClassID
495          || regClass == AMDIL::GPRI16RegClassID) {
496        return (unsigned int)AMDIL::IGE;
497      } else if (regClass == AMDIL::GPRV2I32RegClassID
498          || regClass == AMDIL::GPRI8RegClassID
499          || regClass == AMDIL::GPRI16RegClassID) {
500        return (unsigned int)AMDIL::IGE_v2i32;
501      } else if (regClass == AMDIL::GPRV4I32RegClassID
502          || regClass == AMDIL::GPRI8RegClassID
503          || regClass == AMDIL::GPRI16RegClassID) {
504        return (unsigned int)AMDIL::IGE_v4i32;
505      } else {
506        assert(!"Unknown reg class!");
507      }
508    case AMDILCC::IL_CC_I_LT:
509    case AMDILCC::IL_CC_I_GT:
510      if (regClass == AMDIL::GPRI32RegClassID
511          || regClass == AMDIL::GPRI8RegClassID
512          || regClass == AMDIL::GPRI16RegClassID) {
513        return (unsigned int)AMDIL::ILT;
514      } else if (regClass == AMDIL::GPRV2I32RegClassID
515          || regClass == AMDIL::GPRI8RegClassID
516          || regClass == AMDIL::GPRI16RegClassID) {
517        return (unsigned int)AMDIL::ILT_v2i32;
518      } else if (regClass == AMDIL::GPRV4I32RegClassID
519          || regClass == AMDIL::GPRI8RegClassID
520          || regClass == AMDIL::GPRI16RegClassID) {
521        return (unsigned int)AMDIL::ILT_v4i32;
522      } else {
523        assert(!"Unknown reg class!");
524      }
525    case AMDILCC::IL_CC_L_GE:
526      return (unsigned int)AMDIL::LGE;
527    case AMDILCC::IL_CC_L_LE:
528      return (unsigned int)AMDIL::LLE;
529    case AMDILCC::IL_CC_L_LT:
530      return (unsigned int)AMDIL::LLT;
531    case AMDILCC::IL_CC_L_GT:
532      return (unsigned int)AMDIL::LGT;
533    case AMDILCC::IL_CC_I_NE:
534    case AMDILCC::IL_CC_U_NE:
535      if (regClass == AMDIL::GPRI32RegClassID
536          || regClass == AMDIL::GPRI8RegClassID
537          || regClass == AMDIL::GPRI16RegClassID) {
538        return (unsigned int)AMDIL::INE;
539      } else if (regClass == AMDIL::GPRV2I32RegClassID
540          || regClass == AMDIL::GPRI8RegClassID
541          || regClass == AMDIL::GPRI16RegClassID) {
542        return (unsigned int)AMDIL::INE_v2i32;
543      } else if (regClass == AMDIL::GPRV4I32RegClassID
544          || regClass == AMDIL::GPRI8RegClassID
545          || regClass == AMDIL::GPRI16RegClassID) {
546        return (unsigned int)AMDIL::INE_v4i32;
547      } else {
548        assert(!"Unknown reg class!");
549      }
550    case AMDILCC::IL_CC_U_GE:
551    case AMDILCC::IL_CC_U_LE:
552      if (regClass == AMDIL::GPRI32RegClassID
553          || regClass == AMDIL::GPRI8RegClassID
554          || regClass == AMDIL::GPRI16RegClassID) {
555        return (unsigned int)AMDIL::UGE;
556      } else if (regClass == AMDIL::GPRV2I32RegClassID
557          || regClass == AMDIL::GPRI8RegClassID
558          || regClass == AMDIL::GPRI16RegClassID) {
559        return (unsigned int)AMDIL::UGE_v2i32;
560      } else if (regClass == AMDIL::GPRV4I32RegClassID
561          || regClass == AMDIL::GPRI8RegClassID
562          || regClass == AMDIL::GPRI16RegClassID) {
563        return (unsigned int)AMDIL::UGE_v4i32;
564      } else {
565        assert(!"Unknown reg class!");
566      }
567    case AMDILCC::IL_CC_L_NE:
568    case AMDILCC::IL_CC_UL_NE:
569      return (unsigned int)AMDIL::LNE;
570    case AMDILCC::IL_CC_UL_GE:
571      return (unsigned int)AMDIL::ULGE;
572    case AMDILCC::IL_CC_UL_LE:
573      return (unsigned int)AMDIL::ULLE;
574    case AMDILCC::IL_CC_U_LT:
575      if (regClass == AMDIL::GPRI32RegClassID
576          || regClass == AMDIL::GPRI8RegClassID
577          || regClass == AMDIL::GPRI16RegClassID) {
578        return (unsigned int)AMDIL::ULT;
579      } else if (regClass == AMDIL::GPRV2I32RegClassID
580          || regClass == AMDIL::GPRI8RegClassID
581          || regClass == AMDIL::GPRI16RegClassID) {
582        return (unsigned int)AMDIL::ULT_v2i32;
583      } else if (regClass == AMDIL::GPRV4I32RegClassID
584          || regClass == AMDIL::GPRI8RegClassID
585          || regClass == AMDIL::GPRI16RegClassID) {
586        return (unsigned int)AMDIL::ULT_v4i32;
587      } else {
588        assert(!"Unknown reg class!");
589      }
590    case AMDILCC::IL_CC_U_GT:
591      if (regClass == AMDIL::GPRI32RegClassID
592          || regClass == AMDIL::GPRI8RegClassID
593          || regClass == AMDIL::GPRI16RegClassID) {
594        return (unsigned int)AMDIL::UGT;
595      } else if (regClass == AMDIL::GPRV2I32RegClassID
596          || regClass == AMDIL::GPRI8RegClassID
597          || regClass == AMDIL::GPRI16RegClassID) {
598        return (unsigned int)AMDIL::UGT_v2i32;
599      } else if (regClass == AMDIL::GPRV4I32RegClassID
600          || regClass == AMDIL::GPRI8RegClassID
601          || regClass == AMDIL::GPRI16RegClassID) {
602        return (unsigned int)AMDIL::UGT_v4i32;
603      } else {
604        assert(!"Unknown reg class!");
605      }
606    case AMDILCC::IL_CC_UL_LT:
607      return (unsigned int)AMDIL::ULLT;
608    case AMDILCC::IL_CC_UL_GT:
609      return (unsigned int)AMDIL::ULGT;
610    case AMDILCC::IL_CC_F_UEQ:
611    case AMDILCC::IL_CC_D_UEQ:
612    case AMDILCC::IL_CC_F_ONE:
613    case AMDILCC::IL_CC_D_ONE:
614    case AMDILCC::IL_CC_F_O:
615    case AMDILCC::IL_CC_F_UO:
616    case AMDILCC::IL_CC_D_O:
617    case AMDILCC::IL_CC_D_UO:
618      // we don't care
619      return 0;
620
621  }
622  errs()<<"Opcode: "<<CCCode<<"\n";
623  assert(0 && "Unknown opcode retrieved");
624  return 0;
625}
626
627/// Helper function used by LowerFormalArguments
628static const TargetRegisterClass*
629getRegClassFromType(unsigned int type) {
630  switch (type) {
631  default:
632    assert(0 && "Passed in type does not match any register classes.");
633  case MVT::i8:
634    return &AMDIL::GPRI8RegClass;
635  case MVT::i16:
636    return &AMDIL::GPRI16RegClass;
637  case MVT::i32:
638    return &AMDIL::GPRI32RegClass;
639  case MVT::f32:
640    return &AMDIL::GPRF32RegClass;
641  case MVT::i64:
642    return &AMDIL::GPRI64RegClass;
643  case MVT::f64:
644    return &AMDIL::GPRF64RegClass;
645  case MVT::v4f32:
646    return &AMDIL::GPRV4F32RegClass;
647  case MVT::v4i8:
648    return &AMDIL::GPRV4I8RegClass;
649  case MVT::v4i16:
650    return &AMDIL::GPRV4I16RegClass;
651  case MVT::v4i32:
652    return &AMDIL::GPRV4I32RegClass;
653  case MVT::v2f32:
654    return &AMDIL::GPRV2F32RegClass;
655  case MVT::v2i8:
656    return &AMDIL::GPRV2I8RegClass;
657  case MVT::v2i16:
658    return &AMDIL::GPRV2I16RegClass;
659  case MVT::v2i32:
660    return &AMDIL::GPRV2I32RegClass;
661  case MVT::v2f64:
662    return &AMDIL::GPRV2F64RegClass;
663  case MVT::v2i64:
664    return &AMDIL::GPRV2I64RegClass;
665  }
666}
667
668SDValue
669AMDILTargetLowering::LowerMemArgument(
670    SDValue Chain,
671    CallingConv::ID CallConv,
672    const SmallVectorImpl<ISD::InputArg> &Ins,
673    DebugLoc dl, SelectionDAG &DAG,
674    const CCValAssign &VA,
675    MachineFrameInfo *MFI,
676    unsigned i) const
677{
678  // Create the nodes corresponding to a load from this parameter slot.
679  ISD::ArgFlagsTy Flags = Ins[i].Flags;
680
681  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
682    getTargetMachine().Options.GuaranteedTailCallOpt;
683  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
684
685  // FIXME: For now, all byval parameter objects are marked mutable. This can
686  // be changed with more analysis.
687  // In case of tail call optimization mark all arguments mutable. Since they
688  // could be overwritten by lowering of arguments in case of a tail call.
689  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
690      VA.getLocMemOffset(), isImmutable);
691  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
692
693  if (Flags.isByVal())
694    return FIN;
695  return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
696      MachinePointerInfo::getFixedStack(FI),
697      false, false, false, 0);
698}
699//===----------------------------------------------------------------------===//
700// TargetLowering Implementation Help Functions End
701//===----------------------------------------------------------------------===//
702//===----------------------------------------------------------------------===//
703// Instruction generation functions
704//===----------------------------------------------------------------------===//
705uint32_t
706AMDILTargetLowering::addExtensionInstructions(
707    uint32_t reg, bool signedShift,
708    unsigned int simpleVT) const
709{
710  int shiftSize = 0;
711  uint32_t LShift, RShift;
712  switch(simpleVT)
713  {
714    default:
715      return reg;
716    case AMDIL::GPRI8RegClassID:
717      shiftSize = 24;
718      LShift = AMDIL::SHL_i8;
719      if (signedShift) {
720        RShift = AMDIL::SHR_i8;
721      } else {
722        RShift = AMDIL::USHR_i8;
723      }
724      break;
725    case AMDIL::GPRV2I8RegClassID:
726      shiftSize = 24;
727      LShift = AMDIL::SHL_v2i8;
728      if (signedShift) {
729        RShift = AMDIL::SHR_v2i8;
730      } else {
731        RShift = AMDIL::USHR_v2i8;
732      }
733      break;
734    case AMDIL::GPRV4I8RegClassID:
735      shiftSize = 24;
736      LShift = AMDIL::SHL_v4i8;
737      if (signedShift) {
738        RShift = AMDIL::SHR_v4i8;
739      } else {
740        RShift = AMDIL::USHR_v4i8;
741      }
742      break;
743    case AMDIL::GPRI16RegClassID:
744      shiftSize = 16;
745      LShift = AMDIL::SHL_i16;
746      if (signedShift) {
747        RShift = AMDIL::SHR_i16;
748      } else {
749        RShift = AMDIL::USHR_i16;
750      }
751      break;
752    case AMDIL::GPRV2I16RegClassID:
753      shiftSize = 16;
754      LShift = AMDIL::SHL_v2i16;
755      if (signedShift) {
756        RShift = AMDIL::SHR_v2i16;
757      } else {
758        RShift = AMDIL::USHR_v2i16;
759      }
760      break;
761    case AMDIL::GPRV4I16RegClassID:
762      shiftSize = 16;
763      LShift = AMDIL::SHL_v4i16;
764      if (signedShift) {
765        RShift = AMDIL::SHR_v4i16;
766      } else {
767        RShift = AMDIL::USHR_v4i16;
768      }
769      break;
770  };
771  uint32_t LoadReg = genVReg(simpleVT);
772  uint32_t tmp1 = genVReg(simpleVT);
773  uint32_t tmp2 = genVReg(simpleVT);
774  generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
775  generateMachineInst(LShift, tmp1, reg, LoadReg);
776  generateMachineInst(RShift, tmp2, tmp1, LoadReg);
777  return tmp2;
778}
779
780MachineOperand
781AMDILTargetLowering::convertToReg(MachineOperand op) const
782{
783  if (op.isReg()) {
784    return op;
785  } else if (op.isImm()) {
786    uint32_t loadReg
787      = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
788    generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
789      .addImm(op.getImm());
790    op.ChangeToRegister(loadReg, false);
791  } else if (op.isFPImm()) {
792    uint32_t loadReg
793      = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
794    generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
795      .addFPImm(op.getFPImm());
796    op.ChangeToRegister(loadReg, false);
797  } else if (op.isMBB()) {
798    op.ChangeToRegister(0, false);
799  } else if (op.isFI()) {
800    op.ChangeToRegister(0, false);
801  } else if (op.isCPI()) {
802    op.ChangeToRegister(0, false);
803  } else if (op.isJTI()) {
804    op.ChangeToRegister(0, false);
805  } else if (op.isGlobal()) {
806    op.ChangeToRegister(0, false);
807  } else if (op.isSymbol()) {
808    op.ChangeToRegister(0, false);
809  }/* else if (op.isMetadata()) {
810      op.ChangeToRegister(0, false);
811      }*/
812  return op;
813}
814
815void
816AMDILTargetLowering::generateCMPInstr(
817    MachineInstr *MI,
818    MachineBasicBlock *BB,
819    const TargetInstrInfo& TII)
820const
821{
822  MachineOperand DST = MI->getOperand(0);
823  MachineOperand CC = MI->getOperand(1);
824  MachineOperand LHS = MI->getOperand(2);
825  MachineOperand RHS = MI->getOperand(3);
826  int64_t ccCode = CC.getImm();
827  unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
828  unsigned int opCode = translateToOpcode(ccCode, simpleVT);
829  DebugLoc DL = MI->getDebugLoc();
830  MachineBasicBlock::iterator BBI = MI;
831  setPrivateData(BB, BBI, &DL, &TII);
832  if (!LHS.isReg()) {
833    LHS = convertToReg(LHS);
834  }
835  if (!RHS.isReg()) {
836    RHS = convertToReg(RHS);
837  }
838  switch (ccCode) {
839    case AMDILCC::IL_CC_I_EQ:
840    case AMDILCC::IL_CC_I_NE:
841    case AMDILCC::IL_CC_I_GE:
842    case AMDILCC::IL_CC_I_LT:
843      {
844        uint32_t lhsreg = addExtensionInstructions(
845            LHS.getReg(), true, simpleVT);
846        uint32_t rhsreg = addExtensionInstructions(
847            RHS.getReg(), true, simpleVT);
848        generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
849      }
850      break;
851    case AMDILCC::IL_CC_U_EQ:
852    case AMDILCC::IL_CC_U_NE:
853    case AMDILCC::IL_CC_U_GE:
854    case AMDILCC::IL_CC_U_LT:
855    case AMDILCC::IL_CC_D_EQ:
856    case AMDILCC::IL_CC_F_EQ:
857    case AMDILCC::IL_CC_F_OEQ:
858    case AMDILCC::IL_CC_D_OEQ:
859    case AMDILCC::IL_CC_D_NE:
860    case AMDILCC::IL_CC_F_NE:
861    case AMDILCC::IL_CC_F_UNE:
862    case AMDILCC::IL_CC_D_UNE:
863    case AMDILCC::IL_CC_D_GE:
864    case AMDILCC::IL_CC_F_GE:
865    case AMDILCC::IL_CC_D_OGE:
866    case AMDILCC::IL_CC_F_OGE:
867    case AMDILCC::IL_CC_D_LT:
868    case AMDILCC::IL_CC_F_LT:
869    case AMDILCC::IL_CC_F_OLT:
870    case AMDILCC::IL_CC_D_OLT:
871      generateMachineInst(opCode, DST.getReg(),
872          LHS.getReg(), RHS.getReg());
873      break;
874    case AMDILCC::IL_CC_I_GT:
875    case AMDILCC::IL_CC_I_LE:
876      {
877        uint32_t lhsreg = addExtensionInstructions(
878            LHS.getReg(), true, simpleVT);
879        uint32_t rhsreg = addExtensionInstructions(
880            RHS.getReg(), true, simpleVT);
881        generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
882      }
883      break;
884    case AMDILCC::IL_CC_U_GT:
885    case AMDILCC::IL_CC_U_LE:
886    case AMDILCC::IL_CC_F_GT:
887    case AMDILCC::IL_CC_D_GT:
888    case AMDILCC::IL_CC_F_OGT:
889    case AMDILCC::IL_CC_D_OGT:
890    case AMDILCC::IL_CC_F_LE:
891    case AMDILCC::IL_CC_D_LE:
892    case AMDILCC::IL_CC_D_OLE:
893    case AMDILCC::IL_CC_F_OLE:
894      generateMachineInst(opCode, DST.getReg(),
895          RHS.getReg(), LHS.getReg());
896      break;
897    case AMDILCC::IL_CC_F_UGT:
898    case AMDILCC::IL_CC_F_ULE:
899      {
900        uint32_t VReg[4] = {
901          genVReg(simpleVT), genVReg(simpleVT),
902          genVReg(simpleVT), genVReg(simpleVT)
903        };
904        generateMachineInst(opCode, VReg[0],
905            RHS.getReg(), LHS.getReg());
906        generateMachineInst(AMDIL::FNE, VReg[1],
907            RHS.getReg(), RHS.getReg());
908        generateMachineInst(AMDIL::FNE, VReg[2],
909            LHS.getReg(), LHS.getReg());
910        generateMachineInst(AMDIL::BINARY_OR_f32,
911            VReg[3], VReg[0], VReg[1]);
912        generateMachineInst(AMDIL::BINARY_OR_f32,
913            DST.getReg(), VReg[2], VReg[3]);
914      }
915      break;
916    case AMDILCC::IL_CC_F_ULT:
917    case AMDILCC::IL_CC_F_UGE:
918      {
919        uint32_t VReg[4] = {
920          genVReg(simpleVT), genVReg(simpleVT),
921          genVReg(simpleVT), genVReg(simpleVT)
922        };
923        generateMachineInst(opCode, VReg[0],
924            LHS.getReg(), RHS.getReg());
925        generateMachineInst(AMDIL::FNE, VReg[1],
926            RHS.getReg(), RHS.getReg());
927        generateMachineInst(AMDIL::FNE, VReg[2],
928            LHS.getReg(), LHS.getReg());
929        generateMachineInst(AMDIL::BINARY_OR_f32,
930            VReg[3], VReg[0], VReg[1]);
931        generateMachineInst(AMDIL::BINARY_OR_f32,
932            DST.getReg(), VReg[2], VReg[3]);
933      }
934      break;
935    case AMDILCC::IL_CC_D_UGT:
936    case AMDILCC::IL_CC_D_ULE:
937      {
938        uint32_t regID = AMDIL::GPRF64RegClassID;
939        uint32_t VReg[4] = {
940          genVReg(regID), genVReg(regID),
941          genVReg(regID), genVReg(regID)
942        };
943        // The result of a double comparison is a 32bit result
944        generateMachineInst(opCode, VReg[0],
945            RHS.getReg(), LHS.getReg());
946        generateMachineInst(AMDIL::DNE, VReg[1],
947            RHS.getReg(), RHS.getReg());
948        generateMachineInst(AMDIL::DNE, VReg[2],
949            LHS.getReg(), LHS.getReg());
950        generateMachineInst(AMDIL::BINARY_OR_f32,
951            VReg[3], VReg[0], VReg[1]);
952        generateMachineInst(AMDIL::BINARY_OR_f32,
953            DST.getReg(), VReg[2], VReg[3]);
954      }
955      break;
956    case AMDILCC::IL_CC_D_UGE:
957    case AMDILCC::IL_CC_D_ULT:
958      {
959        uint32_t regID = AMDIL::GPRF64RegClassID;
960        uint32_t VReg[4] = {
961          genVReg(regID), genVReg(regID),
962          genVReg(regID), genVReg(regID)
963        };
964        // The result of a double comparison is a 32bit result
965        generateMachineInst(opCode, VReg[0],
966            LHS.getReg(), RHS.getReg());
967        generateMachineInst(AMDIL::DNE, VReg[1],
968            RHS.getReg(), RHS.getReg());
969        generateMachineInst(AMDIL::DNE, VReg[2],
970            LHS.getReg(), LHS.getReg());
971        generateMachineInst(AMDIL::BINARY_OR_f32,
972            VReg[3], VReg[0], VReg[1]);
973        generateMachineInst(AMDIL::BINARY_OR_f32,
974            DST.getReg(), VReg[2], VReg[3]);
975      }
976      break;
977    case AMDILCC::IL_CC_F_UEQ:
978      {
979        uint32_t VReg[4] = {
980          genVReg(simpleVT), genVReg(simpleVT),
981          genVReg(simpleVT), genVReg(simpleVT)
982        };
983        generateMachineInst(AMDIL::FEQ, VReg[0],
984            LHS.getReg(), RHS.getReg());
985        generateMachineInst(AMDIL::FNE, VReg[1],
986            LHS.getReg(), LHS.getReg());
987        generateMachineInst(AMDIL::FNE, VReg[2],
988            RHS.getReg(), RHS.getReg());
989        generateMachineInst(AMDIL::BINARY_OR_f32,
990            VReg[3], VReg[0], VReg[1]);
991        generateMachineInst(AMDIL::BINARY_OR_f32,
992            DST.getReg(), VReg[2], VReg[3]);
993      }
994      break;
995    case AMDILCC::IL_CC_F_ONE:
996      {
997        uint32_t VReg[4] = {
998          genVReg(simpleVT), genVReg(simpleVT),
999          genVReg(simpleVT), genVReg(simpleVT)
1000        };
1001        generateMachineInst(AMDIL::FNE, VReg[0],
1002            LHS.getReg(), RHS.getReg());
1003        generateMachineInst(AMDIL::FEQ, VReg[1],
1004            LHS.getReg(), LHS.getReg());
1005        generateMachineInst(AMDIL::FEQ, VReg[2],
1006            RHS.getReg(), RHS.getReg());
1007        generateMachineInst(AMDIL::BINARY_AND_f32,
1008            VReg[3], VReg[0], VReg[1]);
1009        generateMachineInst(AMDIL::BINARY_AND_f32,
1010            DST.getReg(), VReg[2], VReg[3]);
1011      }
1012      break;
1013    case AMDILCC::IL_CC_D_UEQ:
1014      {
1015        uint32_t regID = AMDIL::GPRF64RegClassID;
1016        uint32_t VReg[4] = {
1017          genVReg(regID), genVReg(regID),
1018          genVReg(regID), genVReg(regID)
1019        };
1020        // The result of a double comparison is a 32bit result
1021        generateMachineInst(AMDIL::DEQ, VReg[0],
1022            LHS.getReg(), RHS.getReg());
1023        generateMachineInst(AMDIL::DNE, VReg[1],
1024            LHS.getReg(), LHS.getReg());
1025        generateMachineInst(AMDIL::DNE, VReg[2],
1026            RHS.getReg(), RHS.getReg());
1027        generateMachineInst(AMDIL::BINARY_OR_f32,
1028            VReg[3], VReg[0], VReg[1]);
1029        generateMachineInst(AMDIL::BINARY_OR_f32,
1030            DST.getReg(), VReg[2], VReg[3]);
1031
1032      }
1033      break;
1034    case AMDILCC::IL_CC_D_ONE:
1035      {
1036        uint32_t regID = AMDIL::GPRF64RegClassID;
1037        uint32_t VReg[4] = {
1038          genVReg(regID), genVReg(regID),
1039          genVReg(regID), genVReg(regID)
1040        };
1041        // The result of a double comparison is a 32bit result
1042        generateMachineInst(AMDIL::DNE, VReg[0],
1043            LHS.getReg(), RHS.getReg());
1044        generateMachineInst(AMDIL::DEQ, VReg[1],
1045            LHS.getReg(), LHS.getReg());
1046        generateMachineInst(AMDIL::DEQ, VReg[2],
1047            RHS.getReg(), RHS.getReg());
1048        generateMachineInst(AMDIL::BINARY_AND_f32,
1049            VReg[3], VReg[0], VReg[1]);
1050        generateMachineInst(AMDIL::BINARY_AND_f32,
1051            DST.getReg(), VReg[2], VReg[3]);
1052
1053      }
1054      break;
1055    case AMDILCC::IL_CC_F_O:
1056      {
1057        uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
1058        generateMachineInst(AMDIL::FEQ, VReg[0],
1059            RHS.getReg(), RHS.getReg());
1060        generateMachineInst(AMDIL::FEQ, VReg[1],
1061            LHS.getReg(), LHS.getReg());
1062        generateMachineInst(AMDIL::BINARY_AND_f32,
1063            DST.getReg(), VReg[0], VReg[1]);
1064      }
1065      break;
1066    case AMDILCC::IL_CC_D_O:
1067      {
1068        uint32_t regID = AMDIL::GPRF64RegClassID;
1069        uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
1070        // The result of a double comparison is a 32bit result
1071        generateMachineInst(AMDIL::DEQ, VReg[0],
1072            RHS.getReg(), RHS.getReg());
1073        generateMachineInst(AMDIL::DEQ, VReg[1],
1074            LHS.getReg(), LHS.getReg());
1075        generateMachineInst(AMDIL::BINARY_AND_f32,
1076            DST.getReg(), VReg[0], VReg[1]);
1077      }
1078      break;
1079    case AMDILCC::IL_CC_F_UO:
1080      {
1081        uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
1082        generateMachineInst(AMDIL::FNE, VReg[0],
1083            RHS.getReg(), RHS.getReg());
1084        generateMachineInst(AMDIL::FNE, VReg[1],
1085            LHS.getReg(), LHS.getReg());
1086        generateMachineInst(AMDIL::BINARY_OR_f32,
1087            DST.getReg(), VReg[0], VReg[1]);
1088      }
1089      break;
1090    case AMDILCC::IL_CC_D_UO:
1091      {
1092        uint32_t regID = AMDIL::GPRF64RegClassID;
1093        uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
1094        // The result of a double comparison is a 32bit result
1095        generateMachineInst(AMDIL::DNE, VReg[0],
1096            RHS.getReg(), RHS.getReg());
1097        generateMachineInst(AMDIL::DNE, VReg[1],
1098            LHS.getReg(), LHS.getReg());
1099        generateMachineInst(AMDIL::BINARY_OR_f32,
1100            DST.getReg(), VReg[0], VReg[1]);
1101      }
1102      break;
1103    case AMDILCC::IL_CC_L_LE:
1104    case AMDILCC::IL_CC_L_GE:
1105    case AMDILCC::IL_CC_L_EQ:
1106    case AMDILCC::IL_CC_L_NE:
1107    case AMDILCC::IL_CC_L_LT:
1108    case AMDILCC::IL_CC_L_GT:
1109    case AMDILCC::IL_CC_UL_LE:
1110    case AMDILCC::IL_CC_UL_GE:
1111    case AMDILCC::IL_CC_UL_EQ:
1112    case AMDILCC::IL_CC_UL_NE:
1113    case AMDILCC::IL_CC_UL_LT:
1114    case AMDILCC::IL_CC_UL_GT:
1115      {
1116        const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1117            &this->getTargetMachine())->getSubtargetImpl();
1118        if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
1119          generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg());
1120        } else {
1121          generateLongRelational(MI, opCode);
1122        }
1123      }
1124      break;
1125    case AMDILCC::COND_ERROR:
1126      assert(0 && "Invalid CC code");
1127      break;
1128  };
1129}
1130
1131//===----------------------------------------------------------------------===//
1132// TargetLowering Class Implementation Begins
1133//===----------------------------------------------------------------------===//
1134  AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
1135: TargetLowering(TM, new TargetLoweringObjectFileELF())
1136{
1137  int types[] =
1138  {
1139    (int)MVT::i8,
1140    (int)MVT::i16,
1141    (int)MVT::i32,
1142    (int)MVT::f32,
1143    (int)MVT::f64,
1144    (int)MVT::i64,
1145    (int)MVT::v2i8,
1146    (int)MVT::v4i8,
1147    (int)MVT::v2i16,
1148    (int)MVT::v4i16,
1149    (int)MVT::v4f32,
1150    (int)MVT::v4i32,
1151    (int)MVT::v2f32,
1152    (int)MVT::v2i32,
1153    (int)MVT::v2f64,
1154    (int)MVT::v2i64
1155  };
1156
1157  int IntTypes[] =
1158  {
1159    (int)MVT::i8,
1160    (int)MVT::i16,
1161    (int)MVT::i32,
1162    (int)MVT::i64
1163  };
1164
1165  int FloatTypes[] =
1166  {
1167    (int)MVT::f32,
1168    (int)MVT::f64
1169  };
1170
1171  int VectorTypes[] =
1172  {
1173    (int)MVT::v2i8,
1174    (int)MVT::v4i8,
1175    (int)MVT::v2i16,
1176    (int)MVT::v4i16,
1177    (int)MVT::v4f32,
1178    (int)MVT::v4i32,
1179    (int)MVT::v2f32,
1180    (int)MVT::v2i32,
1181    (int)MVT::v2f64,
1182    (int)MVT::v2i64
1183  };
1184  size_t numTypes = sizeof(types) / sizeof(*types);
1185  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
1186  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
1187  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
1188
1189  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1190      &this->getTargetMachine())->getSubtargetImpl();
1191  // These are the current register classes that are
1192  // supported
1193
1194  addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
1195  addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
1196
1197  if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
1198    addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
1199    addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
1200  }
1201  if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
1202    addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
1203    addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
1204    addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
1205    setOperationAction(ISD::Constant          , MVT::i8   , Legal);
1206  }
1207  if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
1208    addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
1209    addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
1210    addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
1211    setOperationAction(ISD::Constant          , MVT::i16  , Legal);
1212  }
1213  addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
1214  addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
1215  addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
1216  addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
1217  if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
1218    addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
1219    addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
1220  }
1221
1222  for (unsigned int x  = 0; x < numTypes; ++x) {
1223    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
1224
1225    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
1226    // We cannot sextinreg, expand to shifts
1227    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1228    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1229    setOperationAction(ISD::FP_ROUND, VT, Expand);
1230    setOperationAction(ISD::OR, VT, Custom);
1231    setOperationAction(ISD::SUBE, VT, Expand);
1232    setOperationAction(ISD::SUBC, VT, Expand);
1233    setOperationAction(ISD::ADD, VT, Custom);
1234    setOperationAction(ISD::ADDE, VT, Expand);
1235    setOperationAction(ISD::ADDC, VT, Expand);
1236    setOperationAction(ISD::SETCC, VT, Custom);
1237    setOperationAction(ISD::BRCOND, VT, Custom);
1238    setOperationAction(ISD::BR_CC, VT, Custom);
1239    setOperationAction(ISD::BR_JT, VT, Expand);
1240    setOperationAction(ISD::BRIND, VT, Expand);
1241    // TODO: Implement custom UREM/SREM routines
1242    setOperationAction(ISD::UREM, VT, Expand);
1243    setOperationAction(ISD::SREM, VT, Expand);
1244    setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1245    setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1246    setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1247    setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1248    setOperationAction(ISDBITCAST, VT, Custom);
1249    setOperationAction(ISD::GlobalAddress, VT, Custom);
1250    setOperationAction(ISD::JumpTable, VT, Custom);
1251    setOperationAction(ISD::ConstantPool, VT, Custom);
1252    setOperationAction(ISD::SELECT_CC, VT, Custom);
1253    setOperationAction(ISD::SELECT, VT, Custom);
1254    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1255    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1256    if (VT != MVT::i64 && VT != MVT::v2i64) {
1257      setOperationAction(ISD::SDIV, VT, Custom);
1258      setOperationAction(ISD::UDIV, VT, Custom);
1259    }
1260    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1261    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1262  }
1263  for (unsigned int x = 0; x < numFloatTypes; ++x) {
1264    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
1265
1266    // IL does not have these operations for floating point types
1267    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
1268    setOperationAction(ISD::FP_ROUND, VT, Custom);
1269    setOperationAction(ISD::SETOLT, VT, Expand);
1270    setOperationAction(ISD::SETOGE, VT, Expand);
1271    setOperationAction(ISD::SETOGT, VT, Expand);
1272    setOperationAction(ISD::SETOLE, VT, Expand);
1273    setOperationAction(ISD::SETULT, VT, Expand);
1274    setOperationAction(ISD::SETUGE, VT, Expand);
1275    setOperationAction(ISD::SETUGT, VT, Expand);
1276    setOperationAction(ISD::SETULE, VT, Expand);
1277  }
1278
1279  for (unsigned int x = 0; x < numIntTypes; ++x) {
1280    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
1281
1282    // GPU also does not have divrem function for signed or unsigned
1283    setOperationAction(ISD::SDIVREM, VT, Expand);
1284    setOperationAction(ISD::UDIVREM, VT, Expand);
1285    setOperationAction(ISD::FP_ROUND, VT, Expand);
1286
1287    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
1288    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1289    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1290
1291    // GPU doesn't have a rotl, rotr, or byteswap instruction
1292    setOperationAction(ISD::ROTR, VT, Expand);
1293    setOperationAction(ISD::ROTL, VT, Expand);
1294    setOperationAction(ISD::BSWAP, VT, Expand);
1295
1296    // GPU doesn't have any counting operators
1297    setOperationAction(ISD::CTPOP, VT, Expand);
1298    setOperationAction(ISD::CTTZ, VT, Expand);
1299    setOperationAction(ISD::CTLZ, VT, Expand);
1300  }
1301
1302  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
1303  {
1304    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
1305
1306    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1307    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1308    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1309    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
1310    setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1311    setOperationAction(ISD::FP_ROUND, VT, Expand);
1312    setOperationAction(ISD::SDIVREM, VT, Expand);
1313    setOperationAction(ISD::UDIVREM, VT, Expand);
1314    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1315    // setOperationAction(ISD::VSETCC, VT, Expand);
1316    setOperationAction(ISD::SETCC, VT, Expand);
1317    setOperationAction(ISD::SELECT_CC, VT, Expand);
1318    setOperationAction(ISD::SELECT, VT, Expand);
1319
1320  }
1321  setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
1322  if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
1323    if (stm->calVersion() < CAL_VERSION_SC_139
1324        || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
1325      setOperationAction(ISD::MUL, MVT::i64, Custom);
1326    }
1327    setOperationAction(ISD::SUB, MVT::i64, Custom);
1328    setOperationAction(ISD::ADD, MVT::i64, Custom);
1329    setOperationAction(ISD::MULHU, MVT::i64, Expand);
1330    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
1331    setOperationAction(ISD::MULHS, MVT::i64, Expand);
1332    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
1333    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1334    setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1335    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1336    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
1337    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
1338    setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
1339    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
1340    setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
1341    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
1342    setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
1343    setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
1344    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
1345    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
1346    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
1347    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
1348  }
1349  if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
1350    // we support loading/storing v2f64 but not operations on the type
1351    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
1352    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
1353    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
1354    setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
1355    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
1356    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
1357    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
1358    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
1359    // We want to expand vector conversions into their scalar
1360    // counterparts.
1361    setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
1362    setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
1363    setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
1364    setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
1365    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
1366    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
1367    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
1368    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
1369    setOperationAction(ISD::FABS, MVT::f64, Expand);
1370    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
1371  }
1372  // TODO: Fix the UDIV24 algorithm so it works for these
1373  // types correctly. This needs vector comparisons
1374  // for this to work correctly.
1375  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
1376  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
1377  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
1378  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
1379  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
1380  setOperationAction(ISD::SUBC, MVT::Other, Expand);
1381  setOperationAction(ISD::ADDE, MVT::Other, Expand);
1382  setOperationAction(ISD::ADDC, MVT::Other, Expand);
1383  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1384  setOperationAction(ISD::BR_CC, MVT::Other, Custom);
1385  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1386  setOperationAction(ISD::BRIND, MVT::Other, Expand);
1387  setOperationAction(ISD::SETCC, MVT::Other, Custom);
1388  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
1389  setOperationAction(ISD::FDIV, MVT::f32, Custom);
1390  setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
1391  setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
1392
1393  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
1394  // Use the default implementation.
1395  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
1396  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
1397  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
1398  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
1399  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
1400  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
1401  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
1402  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
1403  setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
1404
1405  setStackPointerRegisterToSaveRestore(AMDIL::SP);
1406  setSchedulingPreference(Sched::RegPressure);
1407  setPow2DivIsCheap(false);
1408  setPrefLoopAlignment(16);
1409  setSelectIsExpensive(true);
1410  setJumpIsExpensive(true);
1411  computeRegisterProperties();
1412
1413  maxStoresPerMemcpy  = 4096;
1414  maxStoresPerMemmove = 4096;
1415  maxStoresPerMemset  = 4096;
1416
1417#undef numTypes
1418#undef numIntTypes
1419#undef numVectorTypes
1420#undef numFloatTypes
1421}
1422
1423const char *
1424AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
1425{
1426  switch (Opcode) {
1427    default: return 0;
1428    case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
1429    case AMDILISD::DP_TO_FP:  return "AMDILISD::DP_TO_FP";
1430    case AMDILISD::FP_TO_DP:  return "AMDILISD::FP_TO_DP";
1431    case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
1432    case AMDILISD::CMOV:  return "AMDILISD::CMOV";
1433    case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
1434    case AMDILISD::INEGATE:  return "AMDILISD::INEGATE";
1435    case AMDILISD::MAD:  return "AMDILISD::MAD";
1436    case AMDILISD::UMAD:  return "AMDILISD::UMAD";
1437    case AMDILISD::CALL:  return "AMDILISD::CALL";
1438    case AMDILISD::RET:   return "AMDILISD::RET";
1439    case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
1440    case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
1441    case AMDILISD::ADD: return "AMDILISD::ADD";
1442    case AMDILISD::UMUL: return "AMDILISD::UMUL";
1443    case AMDILISD::AND: return "AMDILISD::AND";
1444    case AMDILISD::OR: return "AMDILISD::OR";
1445    case AMDILISD::NOT: return "AMDILISD::NOT";
1446    case AMDILISD::XOR: return "AMDILISD::XOR";
1447    case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
1448    case AMDILISD::SMAX: return "AMDILISD::SMAX";
1449    case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
1450    case AMDILISD::MOVE: return "AMDILISD::MOVE";
1451    case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
1452    case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
1453    case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
1454    case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
1455    case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
1456    case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
1457    case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
1458    case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
1459    case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
1460    case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
1461    case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
1462    case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
1463    case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
1464    case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
1465    case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
1466    case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
1467    case AMDILISD::CMP: return "AMDILISD::CMP";
1468    case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
1469    case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
1470    case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
1471    case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
1472    case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
1473    case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
1474    case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
1475    case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
1476    case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
1477    case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
1478    case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
1479    case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
1480    case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
1481    case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
1482    case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
1483    case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
1484    case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
1485    case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
1486    case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
1487    case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
1488    case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
1489    case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
1490    case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
1491    case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
1492    case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
1493    case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
1494    case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
1495    case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
1496    case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
1497    case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
1498    case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
1499    case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
1500    case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
1501    case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
1502    case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
1503    case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
1504    case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
1505    case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
1506    case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
1507    case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
1508    case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
1509    case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
1510    case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
1511    case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
1512    case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
1513    case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
1514    case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
1515    case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
1516    case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
1517    case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
1518    case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
1519    case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
1520    case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
1521    case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
1522    case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
1523    case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
1524    case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
1525    case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
1526    case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
1527    case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
1528    case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
1529    case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
1530    case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
1531    case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
1532    case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
1533    case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
1534    case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
1535    case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
1536    case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
1537    case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
1538    case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
1539    case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
1540    case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
1541    case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
1542    case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
1543    case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
1544    case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
1545    case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
1546    case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
1547    case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
1548    case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
1549    case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
1550    case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
1551    case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
1552    case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
1553    case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
1554    case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
1555    case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
1556    case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
1557    case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
1558    case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
1559    case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
1560    case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
1561    case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
1562    case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
1563    case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
1564    case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
1565    case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
1566    case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
1567    case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
1568    case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
1569    case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
1570    case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
1571    case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
1572    case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
1573    case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
1574    case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
1575    case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
1576    case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
1577
1578  };
1579}
1580bool
1581AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1582    const CallInst &I, unsigned Intrinsic) const
1583{
1584  if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
1585      || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
1586    return false;
1587  }
1588  bool bitCastToInt = false;
1589  unsigned IntNo;
1590  bool isRet = true;
1591  const AMDILSubtarget *STM = &this->getTargetMachine()
1592    .getSubtarget<AMDILSubtarget>();
1593  switch (Intrinsic) {
1594    default: return false; // Don't custom lower most intrinsics.
1595    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
1596    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
1597             IntNo = AMDILISD::ATOM_G_ADD; break;
1598    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
1599    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
1600             isRet = false;
1601             IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
1602    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
1603    case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
1604             IntNo = AMDILISD::ATOM_L_ADD; break;
1605    case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
1606    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
1607             isRet = false;
1608             IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
1609    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
1610    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
1611             IntNo = AMDILISD::ATOM_R_ADD; break;
1612    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
1613    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
1614             isRet = false;
1615             IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
1616    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
1617    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
1618             IntNo = AMDILISD::ATOM_G_AND; break;
1619    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
1620    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
1621             isRet = false;
1622             IntNo = AMDILISD::ATOM_G_AND_NORET; break;
1623    case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
1624    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
1625             IntNo = AMDILISD::ATOM_L_AND; break;
1626    case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
1627    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
1628             isRet = false;
1629             IntNo = AMDILISD::ATOM_L_AND_NORET; break;
1630    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
1631    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
1632             IntNo = AMDILISD::ATOM_R_AND; break;
1633    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
1634    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
1635             isRet = false;
1636             IntNo = AMDILISD::ATOM_R_AND_NORET; break;
1637    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
1638    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
1639             IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
1640    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
1641    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
1642             isRet = false;
1643             IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
1644    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
1645    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
1646             IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
1647    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
1648    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
1649             isRet = false;
1650             IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
1651    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
1652    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
1653             IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
1654    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
1655    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
1656             isRet = false;
1657             IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
1658    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
1659    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
1660             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1661               IntNo = AMDILISD::ATOM_G_DEC;
1662             } else {
1663               IntNo = AMDILISD::ATOM_G_SUB;
1664             }
1665             break;
1666    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
1667    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
1668             isRet = false;
1669             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1670               IntNo = AMDILISD::ATOM_G_DEC_NORET;
1671             } else {
1672               IntNo = AMDILISD::ATOM_G_SUB_NORET;
1673             }
1674             break;
1675    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
1676    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
1677             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1678               IntNo = AMDILISD::ATOM_L_DEC;
1679             } else {
1680               IntNo = AMDILISD::ATOM_L_SUB;
1681             }
1682             break;
1683    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
1684    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
1685             isRet = false;
1686             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1687               IntNo = AMDILISD::ATOM_L_DEC_NORET;
1688             } else {
1689               IntNo = AMDILISD::ATOM_L_SUB_NORET;
1690             }
1691             break;
1692    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
1693    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
1694             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1695               IntNo = AMDILISD::ATOM_R_DEC;
1696             } else {
1697               IntNo = AMDILISD::ATOM_R_SUB;
1698             }
1699             break;
1700    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
1701    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
1702             isRet = false;
1703             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1704               IntNo = AMDILISD::ATOM_R_DEC_NORET;
1705             } else {
1706               IntNo = AMDILISD::ATOM_R_SUB_NORET;
1707             }
1708             break;
1709    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
1710    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
1711             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1712               IntNo = AMDILISD::ATOM_G_INC;
1713             } else {
1714               IntNo = AMDILISD::ATOM_G_ADD;
1715             }
1716             break;
1717    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1718    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1719             isRet = false;
1720             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1721               IntNo = AMDILISD::ATOM_G_INC_NORET;
1722             } else {
1723               IntNo = AMDILISD::ATOM_G_ADD_NORET;
1724             }
1725             break;
1726    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1727    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1728             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1729               IntNo = AMDILISD::ATOM_L_INC;
1730             } else {
1731               IntNo = AMDILISD::ATOM_L_ADD;
1732             }
1733             break;
1734    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1735    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1736             isRet = false;
1737             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1738               IntNo = AMDILISD::ATOM_L_INC_NORET;
1739             } else {
1740               IntNo = AMDILISD::ATOM_L_ADD_NORET;
1741             }
1742             break;
1743    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1744    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1745             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1746               IntNo = AMDILISD::ATOM_R_INC;
1747             } else {
1748               IntNo = AMDILISD::ATOM_R_ADD;
1749             }
1750             break;
1751    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1752    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1753             isRet = false;
1754             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1755               IntNo = AMDILISD::ATOM_R_INC_NORET;
1756             } else {
1757               IntNo = AMDILISD::ATOM_R_ADD_NORET;
1758             }
1759             break;
1760    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1761             IntNo = AMDILISD::ATOM_G_MAX; break;
1762    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1763             IntNo = AMDILISD::ATOM_G_UMAX; break;
1764    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1765             isRet = false;
1766             IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1767    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1768             isRet = false;
1769             IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1770    case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1771             IntNo = AMDILISD::ATOM_L_MAX; break;
1772    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1773             IntNo = AMDILISD::ATOM_L_UMAX; break;
1774    case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1775             isRet = false;
1776             IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1777    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1778             isRet = false;
1779             IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1780    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1781             IntNo = AMDILISD::ATOM_R_MAX; break;
1782    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1783             IntNo = AMDILISD::ATOM_R_UMAX; break;
1784    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1785             isRet = false;
1786             IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1787    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1788             isRet = false;
1789             IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1790    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1791             IntNo = AMDILISD::ATOM_G_MIN; break;
1792    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1793             IntNo = AMDILISD::ATOM_G_UMIN; break;
1794    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1795             isRet = false;
1796             IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1797    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1798             isRet = false;
1799             IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1800    case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1801             IntNo = AMDILISD::ATOM_L_MIN; break;
1802    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1803             IntNo = AMDILISD::ATOM_L_UMIN; break;
1804    case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1805             isRet = false;
1806             IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1807    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1808             isRet = false;
1809             IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1810    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1811             IntNo = AMDILISD::ATOM_R_MIN; break;
1812    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1813             IntNo = AMDILISD::ATOM_R_UMIN; break;
1814    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1815             isRet = false;
1816             IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1817    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1818             isRet = false;
1819             IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1820    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1821    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1822             IntNo = AMDILISD::ATOM_G_OR; break;
1823    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1824    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1825             isRet = false;
1826             IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1827    case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1828    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1829             IntNo = AMDILISD::ATOM_L_OR; break;
1830    case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1831    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1832             isRet = false;
1833             IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1834    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1835    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1836             IntNo = AMDILISD::ATOM_R_OR; break;
1837    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1838    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1839             isRet = false;
1840             IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1841    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1842    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1843             IntNo = AMDILISD::ATOM_G_SUB; break;
1844    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1845    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1846             isRet = false;
1847             IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1848    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1849    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1850             IntNo = AMDILISD::ATOM_L_SUB; break;
1851    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1852    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1853             isRet = false;
1854             IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1855    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1856    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1857             IntNo = AMDILISD::ATOM_R_SUB; break;
1858    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1859    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1860             isRet = false;
1861             IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1862    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1863    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1864             IntNo = AMDILISD::ATOM_G_RSUB; break;
1865    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1866    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1867             isRet = false;
1868             IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1869    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1870    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1871             IntNo = AMDILISD::ATOM_L_RSUB; break;
1872    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1873    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1874             isRet = false;
1875             IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1876    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1877    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1878             IntNo = AMDILISD::ATOM_R_RSUB; break;
1879    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1880    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1881             isRet = false;
1882             IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1883    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1884             bitCastToInt = true;
1885    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1886    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1887             IntNo = AMDILISD::ATOM_G_XCHG; break;
1888    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1889             bitCastToInt = true;
1890    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1891    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1892             isRet = false;
1893             IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1894    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1895             bitCastToInt = true;
1896    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1897    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1898             IntNo = AMDILISD::ATOM_L_XCHG; break;
1899    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1900             bitCastToInt = true;
1901    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1902    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1903             isRet = false;
1904             IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1905    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1906             bitCastToInt = true;
1907    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1908    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1909             IntNo = AMDILISD::ATOM_R_XCHG; break;
1910    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1911             bitCastToInt = true;
1912    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1913    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1914             isRet = false;
1915             IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1916    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1917    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1918             IntNo = AMDILISD::ATOM_G_XOR; break;
1919    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1920    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1921             isRet = false;
1922             IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1923    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1924    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1925             IntNo = AMDILISD::ATOM_L_XOR; break;
1926    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1927    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1928             isRet = false;
1929             IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1930    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1931    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1932             IntNo = AMDILISD::ATOM_R_XOR; break;
1933    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1934    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1935             isRet = false;
1936             IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1937    case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1938             IntNo = AMDILISD::APPEND_ALLOC; break;
1939    case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1940             isRet = false;
1941             IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1942    case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1943             IntNo = AMDILISD::APPEND_CONSUME; break;
1944    case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1945             isRet = false;
1946             IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1947  };
1948
1949  Info.opc = IntNo;
1950  Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1951  Info.ptrVal = I.getOperand(0);
1952  Info.offset = 0;
1953  Info.align = 4;
1954  Info.vol = true;
1955  Info.readMem = isRet;
1956  Info.writeMem = true;
1957  return true;
1958}
1959// The backend supports 32 and 64 bit floating point immediates
1960bool
1961AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1962{
1963  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1964      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1965    return true;
1966  } else {
1967    return false;
1968  }
1969}
1970
1971bool
1972AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1973{
1974  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1975      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1976    return false;
1977  } else {
1978    return true;
1979  }
1980}
1981
1982
1983// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1984// be zero. Op is expected to be a target specific node. Used by DAG
1985// combiner.
1986
1987void
1988AMDILTargetLowering::computeMaskedBitsForTargetNode(
1989    const SDValue Op,
1990    APInt &KnownZero,
1991    APInt &KnownOne,
1992    const SelectionDAG &DAG,
1993    unsigned Depth) const
1994{
1995  APInt KnownZero2;
1996  APInt KnownOne2;
1997  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1998  switch (Op.getOpcode()) {
1999    default: break;
2000    case AMDILISD::SELECT_CC:
2001             DAG.ComputeMaskedBits(
2002                 Op.getOperand(1),
2003                 KnownZero,
2004                 KnownOne,
2005                 Depth + 1
2006                 );
2007             DAG.ComputeMaskedBits(
2008                 Op.getOperand(0),
2009                 KnownZero2,
2010                 KnownOne2
2011                 );
2012             assert((KnownZero & KnownOne) == 0
2013                 && "Bits known to be one AND zero?");
2014             assert((KnownZero2 & KnownOne2) == 0
2015                 && "Bits known to be one AND zero?");
2016             // Only known if known in both the LHS and RHS
2017             KnownOne &= KnownOne2;
2018             KnownZero &= KnownZero2;
2019             break;
2020  };
2021}
2022
2023// This is the function that determines which calling convention should
2024// be used. Currently there is only one calling convention
2025CCAssignFn*
2026AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
2027{
2028  //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
2029  return CC_AMDIL32;
2030}
2031
2032// LowerCallResult - Lower the result values of an ISD::CALL into the
2033// appropriate copies out of appropriate physical registers.  This assumes that
2034// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
2035// being lowered.  The returns a SDNode with the same number of values as the
2036// ISD::CALL.
2037SDValue
2038AMDILTargetLowering::LowerCallResult(
2039    SDValue Chain,
2040    SDValue InFlag,
2041    CallingConv::ID CallConv,
2042    bool isVarArg,
2043    const SmallVectorImpl<ISD::InputArg> &Ins,
2044    DebugLoc dl,
2045    SelectionDAG &DAG,
2046    SmallVectorImpl<SDValue> &InVals) const
2047{
2048  // Assign locations to each value returned by this call
2049  SmallVector<CCValAssign, 16> RVLocs;
2050  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2051                 getTargetMachine(), RVLocs, *DAG.getContext());
2052  CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
2053
2054  // Copy all of the result registers out of their specified physreg.
2055  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2056    EVT CopyVT = RVLocs[i].getValVT();
2057    if (RVLocs[i].isRegLoc()) {
2058      Chain = DAG.getCopyFromReg(
2059          Chain,
2060          dl,
2061          RVLocs[i].getLocReg(),
2062          CopyVT,
2063          InFlag
2064          ).getValue(1);
2065      SDValue Val = Chain.getValue(0);
2066      InFlag = Chain.getValue(2);
2067      InVals.push_back(Val);
2068    }
2069  }
2070
2071  return Chain;
2072
2073}
2074
2075//===----------------------------------------------------------------------===//
2076//                           Other Lowering Hooks
2077//===----------------------------------------------------------------------===//
2078
2079MachineBasicBlock *
2080AMDILTargetLowering::EmitInstrWithCustomInserter(
2081    MachineInstr *MI, MachineBasicBlock *BB) const
2082{
2083  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
2084  switch (MI->getOpcode()) {
2085    ExpandCaseToAllTypes(AMDIL::CMP);
2086    generateCMPInstr(MI, BB, TII);
2087    MI->eraseFromParent();
2088    break;
2089    default:
2090    break;
2091  }
2092  return BB;
2093}
2094
2095// Recursively assign SDNodeOrdering to any unordered nodes
2096// This is necessary to maintain source ordering of instructions
2097// under -O0 to avoid odd-looking "skipping around" issues.
2098  static const SDValue
2099Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
2100{
2101  if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
2102    DAG.AssignOrdering( New.getNode(), order );
2103    for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
2104      Ordered( DAG, order, New.getOperand(i) );
2105  }
2106  return New;
2107}
2108
2109#define LOWER(A) \
2110  case ISD:: A: \
2111return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
2112
2113SDValue
2114AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2115{
2116  switch (Op.getOpcode()) {
2117    default:
2118      Op.getNode()->dump();
2119      assert(0 && "Custom lowering code for this"
2120          "instruction is not implemented yet!");
2121      break;
2122      LOWER(GlobalAddress);
2123      LOWER(JumpTable);
2124      LOWER(ConstantPool);
2125      LOWER(ExternalSymbol);
2126      LOWER(FP_TO_SINT);
2127      LOWER(FP_TO_UINT);
2128      LOWER(SINT_TO_FP);
2129      LOWER(UINT_TO_FP);
2130      LOWER(ADD);
2131      LOWER(MUL);
2132      LOWER(SUB);
2133      LOWER(FDIV);
2134      LOWER(SDIV);
2135      LOWER(SREM);
2136      LOWER(UDIV);
2137      LOWER(UREM);
2138      LOWER(BUILD_VECTOR);
2139      LOWER(INSERT_VECTOR_ELT);
2140      LOWER(EXTRACT_VECTOR_ELT);
2141      LOWER(EXTRACT_SUBVECTOR);
2142      LOWER(SCALAR_TO_VECTOR);
2143      LOWER(CONCAT_VECTORS);
2144      LOWER(AND);
2145      LOWER(OR);
2146      LOWER(SELECT);
2147      LOWER(SELECT_CC);
2148      LOWER(SETCC);
2149      LOWER(SIGN_EXTEND_INREG);
2150      LOWER(BITCAST);
2151      LOWER(DYNAMIC_STACKALLOC);
2152      LOWER(BRCOND);
2153      LOWER(BR_CC);
2154      LOWER(FP_ROUND);
2155  }
2156  return Op;
2157}
2158
2159int
2160AMDILTargetLowering::getVarArgsFrameOffset() const
2161{
2162  return VarArgsFrameOffset;
2163}
2164#undef LOWER
2165
2166SDValue
2167AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
2168{
2169  SDValue DST = Op;
2170  const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
2171  const GlobalValue *G = GADN->getGlobal();
2172  DebugLoc DL = Op.getDebugLoc();
2173  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
2174  if (!GV) {
2175    DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2176  } else {
2177    if (GV->hasInitializer()) {
2178      const Constant *C = dyn_cast<Constant>(GV->getInitializer());
2179      if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
2180        DST = DAG.getConstant(CI->getValue(), Op.getValueType());
2181      } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
2182        DST = DAG.getConstantFP(CF->getValueAPF(),
2183            Op.getValueType());
2184      } else if (dyn_cast<ConstantAggregateZero>(C)) {
2185        EVT VT = Op.getValueType();
2186        if (VT.isInteger()) {
2187          DST = DAG.getConstant(0, VT);
2188        } else {
2189          DST = DAG.getConstantFP(0, VT);
2190        }
2191      } else {
2192        assert(!"lowering this type of Global Address "
2193            "not implemented yet!");
2194        C->dump();
2195        DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2196      }
2197    } else {
2198      DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2199    }
2200  }
2201  return DST;
2202}
2203
2204SDValue
2205AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
2206{
2207  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2208  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
2209  return Result;
2210}
2211SDValue
2212AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
2213{
2214  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2215  EVT PtrVT = Op.getValueType();
2216  SDValue Result;
2217  if (CP->isMachineConstantPoolEntry()) {
2218    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2219        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
2220  } else {
2221    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2222        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
2223  }
2224  return Result;
2225}
2226
2227SDValue
2228AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
2229{
2230  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
2231  SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
2232  return Result;
2233}
2234
2235/// LowerFORMAL_ARGUMENTS - transform physical registers into
2236/// virtual registers and generate load operations for
2237/// arguments places on the stack.
2238/// TODO: isVarArg, hasStructRet, isMemReg
2239  SDValue
2240AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
2241    CallingConv::ID CallConv,
2242    bool isVarArg,
2243    const SmallVectorImpl<ISD::InputArg> &Ins,
2244    DebugLoc dl,
2245    SelectionDAG &DAG,
2246    SmallVectorImpl<SDValue> &InVals)
2247const
2248{
2249
2250  MachineFunction &MF = DAG.getMachineFunction();
2251  MachineFrameInfo *MFI = MF.getFrameInfo();
2252  //const Function *Fn = MF.getFunction();
2253  //MachineRegisterInfo &RegInfo = MF.getRegInfo();
2254
2255  SmallVector<CCValAssign, 16> ArgLocs;
2256  CallingConv::ID CC = MF.getFunction()->getCallingConv();
2257  //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
2258
2259  CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
2260                 getTargetMachine(), ArgLocs, *DAG.getContext());
2261
2262  // When more calling conventions are added, they need to be chosen here
2263  CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
2264  SDValue StackPtr;
2265
2266  //unsigned int FirstStackArgLoc = 0;
2267
2268  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
2269    CCValAssign &VA = ArgLocs[i];
2270    if (VA.isRegLoc()) {
2271      EVT RegVT = VA.getLocVT();
2272      const TargetRegisterClass *RC = getRegClassFromType(
2273          RegVT.getSimpleVT().SimpleTy);
2274
2275      unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
2276      SDValue ArgValue = DAG.getCopyFromReg(
2277          Chain,
2278          dl,
2279          Reg,
2280          RegVT);
2281      // If this is an 8 or 16-bit value, it is really passed
2282      // promoted to 32 bits.  Insert an assert[sz]ext to capture
2283      // this, then truncate to the right size.
2284
2285      if (VA.getLocInfo() == CCValAssign::SExt) {
2286        ArgValue = DAG.getNode(
2287            ISD::AssertSext,
2288            dl,
2289            RegVT,
2290            ArgValue,
2291            DAG.getValueType(VA.getValVT()));
2292      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
2293        ArgValue = DAG.getNode(
2294            ISD::AssertZext,
2295            dl,
2296            RegVT,
2297            ArgValue,
2298            DAG.getValueType(VA.getValVT()));
2299      }
2300      if (VA.getLocInfo() != CCValAssign::Full) {
2301        ArgValue = DAG.getNode(
2302            ISD::TRUNCATE,
2303            dl,
2304            VA.getValVT(),
2305            ArgValue);
2306      }
2307      // Add the value to the list of arguments
2308      // to be passed in registers
2309      InVals.push_back(ArgValue);
2310      if (isVarArg) {
2311        assert(0 && "Variable arguments are not yet supported");
2312        // See MipsISelLowering.cpp for ideas on how to implement
2313      }
2314    } else if(VA.isMemLoc()) {
2315      InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
2316            dl, DAG, VA, MFI, i));
2317    } else {
2318      assert(0 && "found a Value Assign that is "
2319          "neither a register or a memory location");
2320    }
2321  }
2322  /*if (hasStructRet) {
2323    assert(0 && "Has struct return is not yet implemented");
2324  // See MipsISelLowering.cpp for ideas on how to implement
2325  }*/
2326
2327  if (isVarArg) {
2328    assert(0 && "Variable arguments are not yet supported");
2329    // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
2330  }
2331  // This needs to be changed to non-zero if the return function needs
2332  // to pop bytes
2333  return Chain;
2334}
2335/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2336/// by "Src" to address "Dst" with size and alignment information specified by
2337/// the specific parameter attribute. The copy will be passed as a byval
2338/// function parameter.
2339static SDValue
2340CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2341    ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
2342  assert(0 && "MemCopy does not exist yet");
2343  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2344
2345  return DAG.getMemcpy(Chain,
2346      Src.getDebugLoc(),
2347      Dst, Src, SizeNode, Flags.getByValAlign(),
2348      /*IsVol=*/false, /*AlwaysInline=*/true,
2349      MachinePointerInfo(), MachinePointerInfo());
2350}
2351
2352SDValue
2353AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
2354    SDValue StackPtr, SDValue Arg,
2355    DebugLoc dl, SelectionDAG &DAG,
2356    const CCValAssign &VA,
2357    ISD::ArgFlagsTy Flags) const
2358{
2359  unsigned int LocMemOffset = VA.getLocMemOffset();
2360  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
2361  PtrOff = DAG.getNode(ISD::ADD,
2362      dl,
2363      getPointerTy(), StackPtr, PtrOff);
2364  if (Flags.isByVal()) {
2365    PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
2366  } else {
2367    PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
2368        MachinePointerInfo::getStack(LocMemOffset),
2369        false, false, 0);
2370  }
2371  return PtrOff;
2372}
2373/// LowerCAL - functions arguments are copied from virtual
2374/// regs to (physical regs)/(stack frame), CALLSEQ_START and
2375/// CALLSEQ_END are emitted.
2376/// TODO: isVarArg, isTailCall, hasStructRet
2377SDValue
2378AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
2379    CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
2380    bool& isTailCall,
2381    const SmallVectorImpl<ISD::OutputArg> &Outs,
2382    const SmallVectorImpl<SDValue> &OutVals,
2383    const SmallVectorImpl<ISD::InputArg> &Ins,
2384    DebugLoc dl, SelectionDAG &DAG,
2385    SmallVectorImpl<SDValue> &InVals)
2386const
2387{
2388  isTailCall = false;
2389  MachineFunction& MF = DAG.getMachineFunction();
2390  // FIXME: DO we need to handle fast calling conventions and tail call
2391  // optimizations?? X86/PPC ISelLowering
2392  /*bool hasStructRet = (TheCall->getNumArgs())
2393    ? TheCall->getArgFlags(0).device()->isSRet()
2394    : false;*/
2395
2396  MachineFrameInfo *MFI = MF.getFrameInfo();
2397
2398  // Analyze operands of the call, assigning locations to each operand
2399  SmallVector<CCValAssign, 16> ArgLocs;
2400  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2401                 getTargetMachine(), ArgLocs, *DAG.getContext());
2402  // Analyize the calling operands, but need to change
2403  // if we have more than one calling convetion
2404  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
2405
2406  unsigned int NumBytes = CCInfo.getNextStackOffset();
2407  if (isTailCall) {
2408    assert(isTailCall && "Tail Call not handled yet!");
2409    // See X86/PPC ISelLowering
2410  }
2411
2412  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
2413
2414  SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
2415  SmallVector<SDValue, 8> MemOpChains;
2416  SDValue StackPtr;
2417  //unsigned int FirstStacArgLoc = 0;
2418  //int LastArgStackLoc = 0;
2419
2420  // Walk the register/memloc assignments, insert copies/loads
2421  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
2422    CCValAssign &VA = ArgLocs[i];
2423    //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
2424    // Arguments start after the 5 first operands of ISD::CALL
2425    SDValue Arg = OutVals[i];
2426    //Promote the value if needed
2427    switch(VA.getLocInfo()) {
2428      default: assert(0 && "Unknown loc info!");
2429      case CCValAssign::Full:
2430               break;
2431      case CCValAssign::SExt:
2432               Arg = DAG.getNode(ISD::SIGN_EXTEND,
2433                   dl,
2434                   VA.getLocVT(), Arg);
2435               break;
2436      case CCValAssign::ZExt:
2437               Arg = DAG.getNode(ISD::ZERO_EXTEND,
2438                   dl,
2439                   VA.getLocVT(), Arg);
2440               break;
2441      case CCValAssign::AExt:
2442               Arg = DAG.getNode(ISD::ANY_EXTEND,
2443                   dl,
2444                   VA.getLocVT(), Arg);
2445               break;
2446    }
2447
2448    if (VA.isRegLoc()) {
2449      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2450    } else if (VA.isMemLoc()) {
2451      // Create the frame index object for this incoming parameter
2452      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
2453          VA.getLocMemOffset(), true);
2454      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
2455
2456      // emit ISD::STORE whichs stores the
2457      // parameter value to a stack Location
2458      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
2459            MachinePointerInfo::getFixedStack(FI),
2460            false, false, 0));
2461    } else {
2462      assert(0 && "Not a Reg/Mem Loc, major error!");
2463    }
2464  }
2465  if (!MemOpChains.empty()) {
2466    Chain = DAG.getNode(ISD::TokenFactor,
2467        dl,
2468        MVT::Other,
2469        &MemOpChains[0],
2470        MemOpChains.size());
2471  }
2472  SDValue InFlag;
2473  if (!isTailCall) {
2474    for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
2475      Chain = DAG.getCopyToReg(Chain,
2476          dl,
2477          RegsToPass[i].first,
2478          RegsToPass[i].second,
2479          InFlag);
2480      InFlag = Chain.getValue(1);
2481    }
2482  }
2483
2484  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
2485  // every direct call is) turn it into a TargetGlobalAddress/
2486  // TargetExternalSymbol
2487  // node so that legalize doesn't hack it.
2488  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
2489    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
2490  }
2491  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2492    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
2493  }
2494  else if (isTailCall) {
2495    assert(0 && "Tail calls are not handled yet");
2496    // see X86 ISelLowering for ideas on implementation: 1708
2497  }
2498
2499  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
2500  SmallVector<SDValue, 8> Ops;
2501
2502  if (isTailCall) {
2503    assert(0 && "Tail calls are not handled yet");
2504    // see X86 ISelLowering for ideas on implementation: 1721
2505  }
2506  // If this is a direct call, pass the chain and the callee
2507  if (Callee.getNode()) {
2508    Ops.push_back(Chain);
2509    Ops.push_back(Callee);
2510  }
2511
2512  if (isTailCall) {
2513    assert(0 && "Tail calls are not handled yet");
2514    // see X86 ISelLowering for ideas on implementation: 1739
2515  }
2516
2517  // Add argument registers to the end of the list so that they are known
2518  // live into the call
2519  for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
2520    Ops.push_back(DAG.getRegister(
2521          RegsToPass[i].first,
2522          RegsToPass[i].second.getValueType()));
2523  }
2524  if (InFlag.getNode()) {
2525    Ops.push_back(InFlag);
2526  }
2527
2528  // Emit Tail Call
2529  if (isTailCall) {
2530    assert(0 && "Tail calls are not handled yet");
2531    // see X86 ISelLowering for ideas on implementation: 1762
2532  }
2533
2534  Chain = DAG.getNode(AMDILISD::CALL,
2535      dl,
2536      NodeTys, &Ops[0], Ops.size());
2537  InFlag = Chain.getValue(1);
2538
2539  // Create the CALLSEQ_END node
2540  Chain = DAG.getCALLSEQ_END(
2541      Chain,
2542      DAG.getIntPtrConstant(NumBytes, true),
2543      DAG.getIntPtrConstant(0, true),
2544      InFlag);
2545  InFlag = Chain.getValue(1);
2546  // Handle result values, copying them out of physregs into vregs that
2547  // we return
2548  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2549      InVals);
2550}
2551static void checkMADType(
2552    SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
2553{
2554  bool globalLoadStore = false;
2555  is24bitMAD = false;
2556  is32bitMAD = false;
2557  return;
2558  assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
2559      "this to work correctly!");
2560  if (Op.getNode()->use_empty()) {
2561    return;
2562  }
2563  for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
2564      nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
2565    SDNode *ptr = *nBegin;
2566    const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
2567    // If we are not a LSBaseSDNode then we don't do this
2568    // optimization.
2569    // If we are a LSBaseSDNode, but the op is not the offset
2570    // or base pointer, then we don't do this optimization
2571    // (i.e. we are the value being stored)
2572    if (!lsNode ||
2573        (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
2574      return;
2575    }
2576    const PointerType *PT =
2577      dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
2578    unsigned as = PT->getAddressSpace();
2579    switch(as) {
2580      default:
2581        globalLoadStore = true;
2582      case AMDILAS::PRIVATE_ADDRESS:
2583        if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
2584          globalLoadStore = true;
2585        }
2586        break;
2587      case AMDILAS::CONSTANT_ADDRESS:
2588        if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
2589          globalLoadStore = true;
2590        }
2591        break;
2592      case AMDILAS::LOCAL_ADDRESS:
2593        if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
2594          globalLoadStore = true;
2595        }
2596        break;
2597      case AMDILAS::REGION_ADDRESS:
2598        if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
2599          globalLoadStore = true;
2600        }
2601        break;
2602    }
2603  }
2604  if (globalLoadStore) {
2605    is32bitMAD = true;
2606  } else {
2607    is24bitMAD = true;
2608  }
2609}
2610
2611SDValue
2612AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
2613{
2614  SDValue LHS = Op.getOperand(0);
2615  SDValue RHS = Op.getOperand(1);
2616  DebugLoc DL = Op.getDebugLoc();
2617  EVT OVT = Op.getValueType();
2618  SDValue DST;
2619  const AMDILSubtarget *stm = &this->getTargetMachine()
2620    .getSubtarget<AMDILSubtarget>();
2621  bool isVec = OVT.isVector();
2622  if (OVT.getScalarType() == MVT::i64) {
2623    MVT INTTY = MVT::i32;
2624    if (OVT == MVT::v2i64) {
2625      INTTY = MVT::v2i32;
2626    }
2627    if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
2628        && INTTY == MVT::i32) {
2629      DST = DAG.getNode(AMDILISD::ADD,
2630          DL,
2631          OVT,
2632          LHS, RHS);
2633    } else {
2634      SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
2635      // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2636      LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
2637      RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
2638      LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
2639      RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
2640      INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
2641      INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
2642      SDValue cmp;
2643      cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2644          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2645          INTLO, RHSLO);
2646      cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
2647      INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
2648      DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
2649          INTLO, INTHI);
2650    }
2651  } else {
2652    if (LHS.getOpcode() == ISD::FrameIndex ||
2653        RHS.getOpcode() == ISD::FrameIndex) {
2654      DST = DAG.getNode(AMDILISD::ADDADDR,
2655          DL,
2656          OVT,
2657          LHS, RHS);
2658    } else {
2659      if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
2660          && LHS.getNumOperands()
2661          && RHS.getNumOperands()) {
2662        bool is24bitMAD = false;
2663        bool is32bitMAD = false;
2664        const ConstantSDNode *LHSConstOpCode =
2665          dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
2666        const ConstantSDNode *RHSConstOpCode =
2667          dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
2668        if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
2669            || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
2670            || LHS.getOpcode() == ISD::MUL
2671            || RHS.getOpcode() == ISD::MUL) {
2672          SDValue Op1, Op2, Op3;
2673          // FIXME: Fix this so that it works for unsigned 24bit ops.
2674          if (LHS.getOpcode() == ISD::MUL) {
2675            Op1 = LHS.getOperand(0);
2676            Op2 = LHS.getOperand(1);
2677            Op3 = RHS;
2678          } else if (RHS.getOpcode() == ISD::MUL) {
2679            Op1 = RHS.getOperand(0);
2680            Op2 = RHS.getOperand(1);
2681            Op3 = LHS;
2682          } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
2683            Op1 = LHS.getOperand(0);
2684            Op2 = DAG.getConstant(
2685                1 << LHSConstOpCode->getZExtValue(), MVT::i32);
2686            Op3 = RHS;
2687          } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
2688            Op1 = RHS.getOperand(0);
2689            Op2 = DAG.getConstant(
2690                1 << RHSConstOpCode->getZExtValue(), MVT::i32);
2691            Op3 = LHS;
2692          }
2693          checkMADType(Op, stm, is24bitMAD, is32bitMAD);
2694          // We can possibly do a MAD transform!
2695          if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
2696            uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
2697            SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2698            DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2699                DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
2700                Op1, Op2, Op3);
2701          } else if(is32bitMAD) {
2702            SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2703            DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2704                DL, Tys, DAG.getEntryNode(),
2705                DAG.getConstant(
2706                  AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
2707                Op1, Op2, Op3);
2708          }
2709        }
2710      }
2711      DST = DAG.getNode(AMDILISD::ADD,
2712          DL,
2713          OVT,
2714          LHS, RHS);
2715    }
2716  }
2717  return DST;
2718}
2719SDValue
2720AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
2721    uint32_t bits) const
2722{
2723  DebugLoc DL = Op.getDebugLoc();
2724  EVT INTTY = Op.getValueType();
2725  EVT FPTY;
2726  if (INTTY.isVector()) {
2727    FPTY = EVT(MVT::getVectorVT(MVT::f32,
2728          INTTY.getVectorNumElements()));
2729  } else {
2730    FPTY = EVT(MVT::f32);
2731  }
2732  /* static inline uint
2733     __clz_Nbit(uint x)
2734     {
2735     int xor = 0x3f800000U | x;
2736     float tp = as_float(xor);
2737     float t = tp + -1.0f;
2738     uint tint = as_uint(t);
2739     int cmp = (x != 0);
2740     uint tsrc = tint >> 23;
2741     uint tmask = tsrc & 0xffU;
2742     uint cst = (103 + N)U - tmask;
2743     return cmp ? cst : N;
2744     }
2745     */
2746  assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
2747      && "genCLZu16 only works on 32bit types");
2748  // uint x = Op
2749  SDValue x = Op;
2750  // xornode = 0x3f800000 | x
2751  SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
2752      DAG.getConstant(0x3f800000, INTTY), x);
2753  // float tp = as_float(xornode)
2754  SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
2755  // float t = tp + -1.0f
2756  SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
2757      DAG.getConstantFP(-1.0f, FPTY));
2758  // uint tint = as_uint(t)
2759  SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
2760  // int cmp = (x != 0)
2761  SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2762      DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
2763      DAG.getConstant(0, INTTY));
2764  // uint tsrc = tint >> 23
2765  SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
2766      DAG.getConstant(23, INTTY));
2767  // uint tmask = tsrc & 0xFF
2768  SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
2769      DAG.getConstant(0xFFU, INTTY));
2770  // uint cst = (103 + bits) - tmask
2771  SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
2772      DAG.getConstant((103U + bits), INTTY), tmask);
2773  // return cmp ? cst : N
2774  cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
2775      DAG.getConstant(bits, INTTY));
2776  return cst;
2777}
2778
2779SDValue
2780AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
2781{
2782  SDValue DST = SDValue();
2783  DebugLoc DL = Op.getDebugLoc();
2784  EVT INTTY = Op.getValueType();
2785  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2786      &this->getTargetMachine())->getSubtargetImpl();
2787  if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2788    //__clz_32bit(uint u)
2789    //{
2790    // int z = __amdil_ffb_hi(u) ;
2791    // return z < 0 ? 32 : z;
2792    // }
2793    // uint u = op
2794    SDValue u = Op;
2795    // int z = __amdil_ffb_hi(u)
2796    SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
2797    // int cmp = z < 0
2798    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2799        DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2800        z, DAG.getConstant(0, INTTY));
2801    // return cmp ? 32 : z
2802    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
2803        DAG.getConstant(32, INTTY), z);
2804  } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2805    //  static inline uint
2806    //__clz_32bit(uint x)
2807    //{
2808    //    uint zh = __clz_16bit(x >> 16);
2809    //    uint zl = __clz_16bit(x & 0xffffU);
2810    //   return zh == 16U ? 16U + zl : zh;
2811    //}
2812    // uint x = Op
2813    SDValue x = Op;
2814    // uint xs16 = x >> 16
2815    SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
2816        DAG.getConstant(16, INTTY));
2817    // uint zh = __clz_16bit(xs16)
2818    SDValue zh = genCLZuN(xs16, DAG, 16);
2819    // uint xa16 = x & 0xFFFF
2820    SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
2821        DAG.getConstant(0xFFFFU, INTTY));
2822    // uint zl = __clz_16bit(xa16)
2823    SDValue zl = genCLZuN(xa16, DAG, 16);
2824    // uint cmp = zh == 16U
2825    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2826        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2827        zh, DAG.getConstant(16U, INTTY));
2828    // uint zl16 = zl + 16
2829    SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
2830        DAG.getConstant(16, INTTY), zl);
2831    // return cmp ? zl16 : zh
2832    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2833        cmp, zl16, zh);
2834  } else {
2835    assert(0 && "Attempting to generate a CLZ function with an"
2836        " unknown graphics card");
2837  }
2838  return DST;
2839}
2840SDValue
2841AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
2842{
2843  SDValue DST = SDValue();
2844  DebugLoc DL = Op.getDebugLoc();
2845  EVT INTTY;
2846  EVT LONGTY = Op.getValueType();
2847  bool isVec = LONGTY.isVector();
2848  if (isVec) {
2849    INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
2850          .getVectorNumElements()));
2851  } else {
2852    INTTY = EVT(MVT::i32);
2853  }
2854  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2855      &this->getTargetMachine())->getSubtargetImpl();
2856  if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2857    // Evergreen:
2858    // static inline uint
2859    // __clz_u64(ulong x)
2860    // {
2861    //uint zhi = __clz_32bit((uint)(x >> 32));
2862    //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2863    //return zhi == 32U ? 32U + zlo : zhi;
2864    //}
2865    //ulong x = op
2866    SDValue x = Op;
2867    // uint xhi = x >> 32
2868    SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2869    // uint xlo = x & 0xFFFFFFFF
2870    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
2871    // uint zhi = __clz_32bit(xhi)
2872    SDValue zhi = genCLZu32(xhi, DAG);
2873    // uint zlo = __clz_32bit(xlo)
2874    SDValue zlo = genCLZu32(xlo, DAG);
2875    // uint cmp = zhi == 32
2876    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2877        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2878        zhi, DAG.getConstant(32U, INTTY));
2879    // uint zlop32 = 32 + zlo
2880    SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
2881        DAG.getConstant(32U, INTTY), zlo);
2882    // return cmp ? zlop32: zhi
2883    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
2884  } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2885    // HD4XXX:
2886    //  static inline uint
2887    //__clz_64bit(ulong x)
2888    //{
2889    //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2890    //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2891    //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2892    //uint r = zh == 18U ? 18U + zm : zh;
2893    //return zh + zm == 41U ? 41U + zl : r;
2894    //}
2895    //ulong x = Op
2896    SDValue x = Op;
2897    // ulong xs46 = x >> 46
2898    SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2899        DAG.getConstant(46, LONGTY));
2900    // uint ixs46 = (uint)xs46
2901    SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
2902    // ulong xs23 = x >> 23
2903    SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2904        DAG.getConstant(23, LONGTY));
2905    // uint ixs23 = (uint)xs23
2906    SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
2907    // uint xs23m23 = ixs23 & 0x7FFFFF
2908    SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
2909        DAG.getConstant(0x7fffffU, INTTY));
2910    // uint ix = (uint)x
2911    SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2912    // uint xm23 = ix & 0x7FFFFF
2913    SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
2914        DAG.getConstant(0x7fffffU, INTTY));
2915    // uint zh = __clz_23bit(ixs46)
2916    SDValue zh = genCLZuN(ixs46, DAG, 23);
2917    // uint zm = __clz_23bit(xs23m23)
2918    SDValue zm = genCLZuN(xs23m23, DAG, 23);
2919    // uint zl = __clz_23bit(xm23)
2920    SDValue zl = genCLZuN(xm23, DAG, 23);
2921    // uint zhm5 = zh - 5
2922    SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
2923        DAG.getConstant(-5U, INTTY));
2924    SDValue const18 = DAG.getConstant(18, INTTY);
2925    SDValue const41 = DAG.getConstant(41, INTTY);
2926    // uint cmp1 = zh = 18
2927    SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2928        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2929        zhm5, const18);
2930    // uint zhm5zm = zhm5 + zh
2931    SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
2932    // uint cmp2 = zhm5zm == 41
2933    SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2934        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2935        zhm5zm, const41);
2936    // uint zmp18 = zhm5 + 18
2937    SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
2938    // uint zlp41 = zl + 41
2939    SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
2940    // uint r = cmp1 ? zmp18 : zh
2941    SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2942        cmp1, zmp18, zhm5);
2943    // return cmp2 ? zlp41 : r
2944    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
2945  } else {
2946    assert(0 && "Attempting to generate a CLZ function with an"
2947        " unknown graphics card");
2948  }
2949  return DST;
2950}
2951SDValue
2952AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
2953    bool includeSign) const
2954{
2955  EVT INTVT;
2956  EVT LONGVT;
2957  SDValue DST;
2958  DebugLoc DL = RHS.getDebugLoc();
2959  EVT RHSVT = RHS.getValueType();
2960  bool isVec = RHSVT.isVector();
2961  if (isVec) {
2962    LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
2963          .getVectorNumElements()));
2964    INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
2965          .getVectorNumElements()));
2966  } else {
2967    LONGVT = EVT(MVT::i64);
2968    INTVT = EVT(MVT::i32);
2969  }
2970  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2971      &this->getTargetMachine())->getSubtargetImpl();
2972  if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2973    // unsigned version:
2974    // uint uhi = (uint)(d * 0x1.0p-32);
2975    // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2976    // return as_ulong2((uint2)(ulo, uhi));
2977    //
2978    // signed version:
2979    // double ad = fabs(d);
2980    // long l = unsigned_version(ad);
2981    // long nl = -l;
2982    // return d == ad ? l : nl;
2983    SDValue d = RHS;
2984    if (includeSign) {
2985      d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
2986    }
2987    SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
2988        DAG.getConstantFP(0x2f800000, RHSVT));
2989    SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
2990    SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
2991    ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
2992        DAG.getConstantFP(0xcf800000, RHSVT), d);
2993    SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
2994    SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
2995    if (includeSign) {
2996      SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
2997      SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
2998          DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
2999          RHS, d);
3000      l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
3001    }
3002    DST = l;
3003  } else {
3004    /*
3005       __attribute__((always_inline)) long
3006       cast_f64_to_i64(double d)
3007       {
3008    // Convert d in to 32-bit components
3009    long x = as_long(d);
3010    xhi = LCOMPHI(x);
3011    xlo = LCOMPLO(x);
3012
3013    // Generate 'normalized' mantissa
3014    mhi = xhi | 0x00100000; // hidden bit
3015    mhi <<= 11;
3016    temp = xlo >> (32 - 11);
3017    mhi |= temp
3018    mlo = xlo << 11;
3019
3020    // Compute shift right count from exponent
3021    e = (xhi >> (52-32)) & 0x7ff;
3022    sr = 1023 + 63 - e;
3023    srge64 = sr >= 64;
3024    srge32 = sr >= 32;
3025
3026    // Compute result for 0 <= sr < 32
3027    rhi0 = mhi >> (sr &31);
3028    rlo0 = mlo >> (sr &31);
3029    temp = mhi << (32 - sr);
3030    temp |= rlo0;
3031    rlo0 = sr ? temp : rlo0;
3032
3033    // Compute result for 32 <= sr
3034    rhi1 = 0;
3035    rlo1 = srge64 ? 0 : rhi0;
3036
3037    // Pick between the 2 results
3038    rhi = srge32 ? rhi1 : rhi0;
3039    rlo = srge32 ? rlo1 : rlo0;
3040
3041    // Optional saturate on overflow
3042    srlt0 = sr < 0;
3043    rhi = srlt0 ? MAXVALUE : rhi;
3044    rlo = srlt0 ? MAXVALUE : rlo;
3045
3046    // Create long
3047    res = LCREATE( rlo, rhi );
3048
3049    // Deal with sign bit (ignoring whether result is signed or unsigned value)
3050    if (includeSign) {
3051    sign = ((signed int) xhi) >> 31; fill with sign bit
3052    sign = LCREATE( sign, sign );
3053    res += sign;
3054    res ^= sign;
3055    }
3056
3057    return res;
3058    }
3059    */
3060    SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
3061    SDValue c32 = DAG.getConstant( 32, INTVT );
3062
3063    // Convert d in to 32-bit components
3064    SDValue d = RHS;
3065    SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
3066    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3067    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3068
3069    // Generate 'normalized' mantissa
3070    SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
3071        xhi, DAG.getConstant( 0x00100000, INTVT ) );
3072    mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
3073    SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
3074        xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
3075    mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
3076    SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
3077
3078    // Compute shift right count from exponent
3079    SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
3080        xhi, DAG.getConstant( 52-32, INTVT ) );
3081    e = DAG.getNode( ISD::AND, DL, INTVT,
3082        e, DAG.getConstant( 0x7ff, INTVT ) );
3083    SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
3084        DAG.getConstant( 1023 + 63, INTVT ), e );
3085    SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3086        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3087        sr, DAG.getConstant(64, INTVT));
3088    SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3089        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3090        sr, DAG.getConstant(32, INTVT));
3091
3092    // Compute result for 0 <= sr < 32
3093    SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
3094    SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
3095    temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
3096    temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
3097    temp = DAG.getNode( ISD::OR,  DL, INTVT, rlo0, temp );
3098    rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
3099
3100    // Compute result for 32 <= sr
3101    SDValue rhi1 = DAG.getConstant( 0, INTVT );
3102    SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3103        srge64, rhi1, rhi0 );
3104
3105    // Pick between the 2 results
3106    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3107        srge32, rhi1, rhi0 );
3108    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3109        srge32, rlo1, rlo0 );
3110
3111    // Create long
3112    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3113
3114    // Deal with sign bit
3115    if (includeSign) {
3116      SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
3117          xhi, DAG.getConstant( 31, INTVT ) );
3118      sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
3119      res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
3120      res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
3121    }
3122    DST = res;
3123  }
3124  return DST;
3125}
3126SDValue
3127AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
3128    bool includeSign) const
3129{
3130  EVT INTVT;
3131  EVT LONGVT;
3132  DebugLoc DL = RHS.getDebugLoc();
3133  EVT RHSVT = RHS.getValueType();
3134  bool isVec = RHSVT.isVector();
3135  if (isVec) {
3136    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3137          RHSVT.getVectorNumElements()));
3138    INTVT = EVT(MVT::getVectorVT(MVT::i32,
3139          RHSVT.getVectorNumElements()));
3140  } else {
3141    LONGVT = EVT(MVT::i64);
3142    INTVT = EVT(MVT::i32);
3143  }
3144  /*
3145     __attribute__((always_inline)) int
3146     cast_f64_to_[u|i]32(double d)
3147     {
3148  // Convert d in to 32-bit components
3149  long x = as_long(d);
3150  xhi = LCOMPHI(x);
3151  xlo = LCOMPLO(x);
3152
3153  // Generate 'normalized' mantissa
3154  mhi = xhi | 0x00100000; // hidden bit
3155  mhi <<= 11;
3156  temp = xlo >> (32 - 11);
3157  mhi |= temp
3158
3159  // Compute shift right count from exponent
3160  e = (xhi >> (52-32)) & 0x7ff;
3161  sr = 1023 + 31 - e;
3162  srge32 = sr >= 32;
3163
3164  // Compute result for 0 <= sr < 32
3165  res = mhi >> (sr &31);
3166  res = srge32 ? 0 : res;
3167
3168  // Optional saturate on overflow
3169  srlt0 = sr < 0;
3170  res = srlt0 ? MAXVALUE : res;
3171
3172  // Deal with sign bit (ignoring whether result is signed or unsigned value)
3173  if (includeSign) {
3174  sign = ((signed int) xhi) >> 31; fill with sign bit
3175  res += sign;
3176  res ^= sign;
3177  }
3178
3179  return res;
3180  }
3181  */
3182  SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
3183
3184  // Convert d in to 32-bit components
3185  SDValue d = RHS;
3186  SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
3187  SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3188  SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3189
3190  // Generate 'normalized' mantissa
3191  SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
3192      xhi, DAG.getConstant( 0x00100000, INTVT ) );
3193  mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
3194  SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
3195      xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
3196  mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
3197
3198  // Compute shift right count from exponent
3199  SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
3200      xhi, DAG.getConstant( 52-32, INTVT ) );
3201  e = DAG.getNode( ISD::AND, DL, INTVT,
3202      e, DAG.getConstant( 0x7ff, INTVT ) );
3203  SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
3204      DAG.getConstant( 1023 + 31, INTVT ), e );
3205  SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3206      DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3207      sr, DAG.getConstant(32, INTVT));
3208
3209  // Compute result for 0 <= sr < 32
3210  SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
3211  res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3212      srge32, DAG.getConstant(0,INTVT), res );
3213
3214  // Deal with sign bit
3215  if (includeSign) {
3216    SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
3217        xhi, DAG.getConstant( 31, INTVT ) );
3218    res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
3219    res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
3220  }
3221  return res;
3222}
3223SDValue
3224AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
3225{
3226  SDValue RHS = Op.getOperand(0);
3227  EVT RHSVT = RHS.getValueType();
3228  MVT RST = RHSVT.getScalarType().getSimpleVT();
3229  EVT LHSVT = Op.getValueType();
3230  MVT LST = LHSVT.getScalarType().getSimpleVT();
3231  DebugLoc DL = Op.getDebugLoc();
3232  SDValue DST;
3233  const AMDILTargetMachine*
3234    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3235    (&this->getTargetMachine());
3236  const AMDILSubtarget*
3237    stm = static_cast<const AMDILSubtarget*>(
3238        amdtm->getSubtargetImpl());
3239  if (RST == MVT::f64 && RHSVT.isVector()
3240      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3241    // We dont support vector 64bit floating point convertions.
3242    for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
3243      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3244          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3245      op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
3246      if (!x) {
3247        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3248      } else {
3249        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
3250            DST, op, DAG.getTargetConstant(x, MVT::i32));
3251      }
3252    }
3253  } else {
3254    if (RST == MVT::f64
3255        && LST == MVT::i32) {
3256      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3257        DST = SDValue(Op.getNode(), 0);
3258      } else {
3259        DST = genf64toi32(RHS, DAG, true);
3260      }
3261    } else if (RST == MVT::f64
3262        && LST == MVT::i64) {
3263      DST = genf64toi64(RHS, DAG, true);
3264    } else if (RST == MVT::f64
3265        && (LST == MVT::i8 || LST == MVT::i16)) {
3266      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3267        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
3268      } else {
3269        SDValue ToInt = genf64toi32(RHS, DAG, true);
3270        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
3271      }
3272
3273    } else {
3274      DST = SDValue(Op.getNode(), 0);
3275    }
3276  }
3277  return DST;
3278}
3279
3280SDValue
3281AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
3282{
3283  SDValue DST;
3284  SDValue RHS = Op.getOperand(0);
3285  EVT RHSVT = RHS.getValueType();
3286  MVT RST = RHSVT.getScalarType().getSimpleVT();
3287  EVT LHSVT = Op.getValueType();
3288  MVT LST = LHSVT.getScalarType().getSimpleVT();
3289  DebugLoc DL = Op.getDebugLoc();
3290  const AMDILTargetMachine*
3291    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3292    (&this->getTargetMachine());
3293  const AMDILSubtarget*
3294    stm = static_cast<const AMDILSubtarget*>(
3295        amdtm->getSubtargetImpl());
3296  if (RST == MVT::f64 && RHSVT.isVector()
3297      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3298    // We dont support vector 64bit floating point convertions.
3299    for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
3300      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3301          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3302      op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
3303      if (!x) {
3304        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3305      } else {
3306        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
3307            DST, op, DAG.getTargetConstant(x, MVT::i32));
3308      }
3309
3310    }
3311  } else {
3312    if (RST == MVT::f64
3313        && LST == MVT::i32) {
3314      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3315        DST = SDValue(Op.getNode(), 0);
3316      } else {
3317        DST = genf64toi32(RHS, DAG, false);
3318      }
3319    } else if (RST == MVT::f64
3320        && LST == MVT::i64) {
3321      DST = genf64toi64(RHS, DAG, false);
3322    } else if (RST == MVT::f64
3323        && (LST == MVT::i8 || LST == MVT::i16)) {
3324      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3325        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
3326      } else {
3327        SDValue ToInt = genf64toi32(RHS, DAG, false);
3328        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
3329      }
3330
3331    } else {
3332      DST = SDValue(Op.getNode(), 0);
3333    }
3334  }
3335  return DST;
3336}
3337SDValue
3338AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
3339    SelectionDAG &DAG) const
3340{
3341  EVT RHSVT = RHS.getValueType();
3342  DebugLoc DL = RHS.getDebugLoc();
3343  EVT INTVT;
3344  EVT LONGVT;
3345  bool isVec = RHSVT.isVector();
3346  if (isVec) {
3347    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3348          RHSVT.getVectorNumElements()));
3349    INTVT = EVT(MVT::getVectorVT(MVT::i32,
3350          RHSVT.getVectorNumElements()));
3351  } else {
3352    LONGVT = EVT(MVT::i64);
3353    INTVT = EVT(MVT::i32);
3354  }
3355  SDValue x = RHS;
3356  const AMDILTargetMachine*
3357    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3358    (&this->getTargetMachine());
3359  const AMDILSubtarget*
3360    stm = static_cast<const AMDILSubtarget*>(
3361        amdtm->getSubtargetImpl());
3362  if (stm->calVersion() >= CAL_VERSION_SC_135) {
3363    // unsigned x = RHS;
3364    // ulong xd = (ulong)(0x4330_0000 << 32) | x;
3365    // double d = as_double( xd );
3366    // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
3367    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
3368        DAG.getConstant( 0x43300000, INTVT ) );
3369    SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
3370    SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
3371        DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
3372    return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
3373  } else {
3374    SDValue clz = genCLZu32(x, DAG);
3375
3376    // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
3377    // Except for an input 0... which requires a 0 exponent
3378    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
3379        DAG.getConstant( (1023+31), INTVT), clz );
3380    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
3381
3382    // Normalize frac
3383    SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
3384
3385    // Eliminate hidden bit
3386    rhi = DAG.getNode( ISD::AND, DL, INTVT,
3387        rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
3388
3389    // Pack exponent and frac
3390    SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
3391        rhi, DAG.getConstant( (32 - 11), INTVT ) );
3392    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
3393        rhi, DAG.getConstant( 11, INTVT ) );
3394    exp = DAG.getNode( ISD::SHL, DL, INTVT,
3395        exp, DAG.getConstant( 20, INTVT ) );
3396    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
3397
3398    // Convert 2 x 32 in to 1 x 64, then to double precision float type
3399    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3400    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
3401  }
3402}
3403SDValue
3404AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
3405    SelectionDAG &DAG) const
3406{
3407  EVT RHSVT = RHS.getValueType();
3408  DebugLoc DL = RHS.getDebugLoc();
3409  EVT INTVT;
3410  EVT LONGVT;
3411  bool isVec = RHSVT.isVector();
3412  if (isVec) {
3413    INTVT = EVT(MVT::getVectorVT(MVT::i32,
3414          RHSVT.getVectorNumElements()));
3415  } else {
3416    INTVT = EVT(MVT::i32);
3417  }
3418  LONGVT = RHSVT;
3419  SDValue x = RHS;
3420  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
3421      &this->getTargetMachine())->getSubtargetImpl();
3422  if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3423    // double dhi = (double)(as_uint2(x).y);
3424    // double dlo = (double)(as_uint2(x).x);
3425    // return mad(dhi, 0x1.0p+32, dlo)
3426    SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
3427    dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
3428    SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
3429    dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
3430    return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
3431        DAG.getConstantFP(0x4f800000, LHSVT), dlo);
3432  } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
3433    // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
3434    // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
3435    // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
3436    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );  // x & 0xffff_ffffUL
3437    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
3438    SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
3439    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 :  AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
3440    SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
3441    SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
3442    SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
3443        DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
3444    hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
3445    return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
3446
3447  } else {
3448    SDValue clz = genCLZu64(x, DAG);
3449    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3450    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3451
3452    // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
3453    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
3454        DAG.getConstant( (1023+63), INTVT), clz );
3455    SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
3456    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3457        mash, exp, mash );  // exp = exp, or 0 if input was 0
3458
3459    // Normalize frac
3460    SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
3461        clz, DAG.getConstant( 31, INTVT ) );
3462    SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
3463        DAG.getConstant( 32, INTVT ), clz31 );
3464    SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
3465    SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
3466    t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
3467    SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
3468    SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
3469    SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
3470    SDValue rlo2 = DAG.getConstant( 0, INTVT );
3471    SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
3472        clz, DAG.getConstant( 32, INTVT ) );
3473    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3474        clz32, rhi2, rhi1 );
3475    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3476        clz32, rlo2, rlo1 );
3477
3478    // Eliminate hidden bit
3479    rhi = DAG.getNode( ISD::AND, DL, INTVT,
3480        rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
3481
3482    // Save bits needed to round properly
3483    SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
3484        rlo, DAG.getConstant( 0x7ff, INTVT ) );
3485
3486    // Pack exponent and frac
3487    rlo = DAG.getNode( ISD::SRL, DL, INTVT,
3488        rlo, DAG.getConstant( 11, INTVT ) );
3489    SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
3490        rhi, DAG.getConstant( (32 - 11), INTVT ) );
3491    rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
3492    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
3493        rhi, DAG.getConstant( 11, INTVT ) );
3494    exp = DAG.getNode( ISD::SHL, DL, INTVT,
3495        exp, DAG.getConstant( 20, INTVT ) );
3496    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
3497
3498    // Compute rounding bit
3499    SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
3500        rlo, DAG.getConstant( 1, INTVT ) );
3501    SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
3502        round, DAG.getConstant( 0x3ff, INTVT ) );
3503    grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3504        DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
3505        grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
3506    grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
3507    round = DAG.getNode( ISD::SRL, DL, INTVT,
3508        round, DAG.getConstant( 10, INTVT ) );
3509    round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
3510
3511    // Add rounding bit
3512    SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
3513        round, DAG.getConstant( 0, INTVT ) );
3514    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3515    res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
3516    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
3517  }
3518}
3519SDValue
3520AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3521{
3522  SDValue RHS = Op.getOperand(0);
3523  EVT RHSVT = RHS.getValueType();
3524  MVT RST = RHSVT.getScalarType().getSimpleVT();
3525  EVT LHSVT = Op.getValueType();
3526  MVT LST = LHSVT.getScalarType().getSimpleVT();
3527  DebugLoc DL = Op.getDebugLoc();
3528  SDValue DST;
3529  EVT INTVT;
3530  EVT LONGVT;
3531  const AMDILTargetMachine*
3532    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3533    (&this->getTargetMachine());
3534  const AMDILSubtarget*
3535    stm = static_cast<const AMDILSubtarget*>(
3536        amdtm->getSubtargetImpl());
3537  if (LST == MVT::f64 && LHSVT.isVector()
3538      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3539    // We dont support vector 64bit floating point convertions.
3540    DST = Op;
3541    for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3542      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3543          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3544      op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3545      if (!x) {
3546        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3547      } else {
3548        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3549            op, DAG.getTargetConstant(x, MVT::i32));
3550      }
3551
3552    }
3553  } else {
3554
3555    if (RST == MVT::i32
3556        && LST == MVT::f64) {
3557      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3558        DST = SDValue(Op.getNode(), 0);
3559      } else {
3560        DST = genu32tof64(RHS, LHSVT, DAG);
3561      }
3562    } else if (RST == MVT::i64
3563        && LST == MVT::f64) {
3564      DST = genu64tof64(RHS, LHSVT, DAG);
3565    } else {
3566      DST = SDValue(Op.getNode(), 0);
3567    }
3568  }
3569  return DST;
3570}
3571
3572SDValue
3573AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3574{
3575  SDValue RHS = Op.getOperand(0);
3576  EVT RHSVT = RHS.getValueType();
3577  MVT RST = RHSVT.getScalarType().getSimpleVT();
3578  EVT INTVT;
3579  EVT LONGVT;
3580  SDValue DST;
3581  bool isVec = RHSVT.isVector();
3582  DebugLoc DL = Op.getDebugLoc();
3583  EVT LHSVT = Op.getValueType();
3584  MVT LST = LHSVT.getScalarType().getSimpleVT();
3585  const AMDILTargetMachine*
3586    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3587    (&this->getTargetMachine());
3588  const AMDILSubtarget*
3589    stm = static_cast<const AMDILSubtarget*>(
3590        amdtm->getSubtargetImpl());
3591  if (LST == MVT::f64 && LHSVT.isVector()
3592      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3593    // We dont support vector 64bit floating point convertions.
3594    for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3595      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3596          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3597      op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3598      if (!x) {
3599        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3600      } else {
3601        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3602            op, DAG.getTargetConstant(x, MVT::i32));
3603      }
3604
3605    }
3606  } else {
3607
3608    if (isVec) {
3609      LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3610            RHSVT.getVectorNumElements()));
3611      INTVT = EVT(MVT::getVectorVT(MVT::i32,
3612            RHSVT.getVectorNumElements()));
3613    } else {
3614      LONGVT = EVT(MVT::i64);
3615      INTVT = EVT(MVT::i32);
3616    }
3617    MVT RST = RHSVT.getScalarType().getSimpleVT();
3618    if ((RST == MVT::i32 || RST == MVT::i64)
3619        && LST == MVT::f64) {
3620      if (RST == MVT::i32) {
3621        if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3622          DST = SDValue(Op.getNode(), 0);
3623          return DST;
3624        }
3625      }
3626      SDValue c31 = DAG.getConstant( 31, INTVT );
3627      SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
3628
3629      SDValue S;      // Sign, as 0 or -1
3630      SDValue Sbit;   // Sign bit, as one bit, MSB only.
3631      if (RST == MVT::i32) {
3632        Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
3633        S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
3634      } else { // 64-bit case... SRA of 64-bit values is slow
3635        SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
3636        Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
3637        SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
3638        S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
3639      }
3640
3641      // get abs() of input value, given sign as S (0 or -1)
3642      // SpI = RHS + S
3643      SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
3644      // SpIxS = SpI ^ S
3645      SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
3646
3647      // Convert unsigned value to double precision
3648      SDValue R;
3649      if (RST == MVT::i32) {
3650        // r = cast_u32_to_f64(SpIxS)
3651        R = genu32tof64(SpIxS, LHSVT, DAG);
3652      } else {
3653        // r = cast_u64_to_f64(SpIxS)
3654        R = genu64tof64(SpIxS, LHSVT, DAG);
3655      }
3656
3657      // drop in the sign bit
3658      SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
3659      SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
3660      SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
3661      thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
3662      t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
3663      DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
3664    } else {
3665      DST = SDValue(Op.getNode(), 0);
3666    }
3667  }
3668  return DST;
3669}
3670SDValue
3671AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
3672{
3673  SDValue LHS = Op.getOperand(0);
3674  SDValue RHS = Op.getOperand(1);
3675  DebugLoc DL = Op.getDebugLoc();
3676  EVT OVT = Op.getValueType();
3677  SDValue DST;
3678  bool isVec = RHS.getValueType().isVector();
3679  if (OVT.getScalarType() == MVT::i64) {
3680    /*const AMDILTargetMachine*
3681      amdtm = reinterpret_cast<const AMDILTargetMachine*>
3682      (&this->getTargetMachine());
3683      const AMDILSubtarget*
3684      stm = dynamic_cast<const AMDILSubtarget*>(
3685      amdtm->getSubtargetImpl());*/
3686    MVT INTTY = MVT::i32;
3687    if (OVT == MVT::v2i64) {
3688      INTTY = MVT::v2i32;
3689    }
3690    SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
3691    // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
3692    LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
3693    RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
3694    LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
3695    RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
3696    INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
3697    INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
3698    //TODO: need to use IBORROW on HD5XXX and later hardware
3699    SDValue cmp;
3700    if (OVT == MVT::i64) {
3701      cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3702          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3703          LHSLO, RHSLO);
3704    } else {
3705      SDValue cmplo;
3706      SDValue cmphi;
3707      SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3708          DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
3709      SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3710          DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
3711      SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3712          DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
3713      SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3714          DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
3715      cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3716          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3717          LHSRLO, RHSRLO);
3718      cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3719          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3720          LHSRHI, RHSRHI);
3721      cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
3722      cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
3723          cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
3724    }
3725    INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
3726    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
3727        INTLO, INTHI);
3728  } else {
3729    DST = SDValue(Op.getNode(), 0);
3730  }
3731  return DST;
3732}
3733SDValue
3734AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
3735{
3736  EVT OVT = Op.getValueType();
3737  SDValue DST;
3738  if (OVT.getScalarType() == MVT::f64) {
3739    DST = LowerFDIV64(Op, DAG);
3740  } else if (OVT.getScalarType() == MVT::f32) {
3741    DST = LowerFDIV32(Op, DAG);
3742  } else {
3743    DST = SDValue(Op.getNode(), 0);
3744  }
3745  return DST;
3746}
3747
3748SDValue
3749AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
3750{
3751  EVT OVT = Op.getValueType();
3752  SDValue DST;
3753  if (OVT.getScalarType() == MVT::i64) {
3754    DST = LowerSDIV64(Op, DAG);
3755  } else if (OVT.getScalarType() == MVT::i32) {
3756    DST = LowerSDIV32(Op, DAG);
3757  } else if (OVT.getScalarType() == MVT::i16
3758      || OVT.getScalarType() == MVT::i8) {
3759    DST = LowerSDIV24(Op, DAG);
3760  } else {
3761    DST = SDValue(Op.getNode(), 0);
3762  }
3763  return DST;
3764}
3765
3766SDValue
3767AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
3768{
3769  EVT OVT = Op.getValueType();
3770  SDValue DST;
3771  if (OVT.getScalarType() == MVT::i64) {
3772    DST = LowerUDIV64(Op, DAG);
3773  } else if (OVT.getScalarType() == MVT::i32) {
3774    DST = LowerUDIV32(Op, DAG);
3775  } else if (OVT.getScalarType() == MVT::i16
3776      || OVT.getScalarType() == MVT::i8) {
3777    DST = LowerUDIV24(Op, DAG);
3778  } else {
3779    DST = SDValue(Op.getNode(), 0);
3780  }
3781  return DST;
3782}
3783
3784SDValue
3785AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
3786{
3787  EVT OVT = Op.getValueType();
3788  SDValue DST;
3789  if (OVT.getScalarType() == MVT::i64) {
3790    DST = LowerSREM64(Op, DAG);
3791  } else if (OVT.getScalarType() == MVT::i32) {
3792    DST = LowerSREM32(Op, DAG);
3793  } else if (OVT.getScalarType() == MVT::i16) {
3794    DST = LowerSREM16(Op, DAG);
3795  } else if (OVT.getScalarType() == MVT::i8) {
3796    DST = LowerSREM8(Op, DAG);
3797  } else {
3798    DST = SDValue(Op.getNode(), 0);
3799  }
3800  return DST;
3801}
3802
3803SDValue
3804AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
3805{
3806  EVT OVT = Op.getValueType();
3807  SDValue DST;
3808  if (OVT.getScalarType() == MVT::i64) {
3809    DST = LowerUREM64(Op, DAG);
3810  } else if (OVT.getScalarType() == MVT::i32) {
3811    DST = LowerUREM32(Op, DAG);
3812  } else if (OVT.getScalarType() == MVT::i16) {
3813    DST = LowerUREM16(Op, DAG);
3814  } else if (OVT.getScalarType() == MVT::i8) {
3815    DST = LowerUREM8(Op, DAG);
3816  } else {
3817    DST = SDValue(Op.getNode(), 0);
3818  }
3819  return DST;
3820}
3821
3822SDValue
3823AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
3824{
3825  DebugLoc DL = Op.getDebugLoc();
3826  EVT OVT = Op.getValueType();
3827  SDValue DST;
3828  bool isVec = OVT.isVector();
3829  if (OVT.getScalarType() != MVT::i64)
3830  {
3831    DST = SDValue(Op.getNode(), 0);
3832  } else {
3833    assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
3834    // TODO: This needs to be turned into a tablegen pattern
3835    SDValue LHS = Op.getOperand(0);
3836    SDValue RHS = Op.getOperand(1);
3837
3838    MVT INTTY = MVT::i32;
3839    if (OVT == MVT::v2i64) {
3840      INTTY = MVT::v2i32;
3841    }
3842    // mul64(h1, l1, h0, l0)
3843    SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3844        DL,
3845        INTTY, LHS);
3846    SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3847        DL,
3848        INTTY, LHS);
3849    SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3850        DL,
3851        INTTY, RHS);
3852    SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3853        DL,
3854        INTTY, RHS);
3855    // MULLO_UINT_1 r1, h0, l1
3856    SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
3857        DL,
3858        INTTY, RHSHI, LHSLO);
3859    // MULLO_UINT_1 r2, h1, l0
3860    SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
3861        DL,
3862        INTTY, RHSLO, LHSHI);
3863    // ADD_INT hr, r1, r2
3864    SDValue ADDHI = DAG.getNode(ISD::ADD,
3865        DL,
3866        INTTY, RHILLO, RLOHHI);
3867    // MULHI_UINT_1 r3, l1, l0
3868    SDValue RLOLLO = DAG.getNode(ISD::MULHU,
3869        DL,
3870        INTTY, RHSLO, LHSLO);
3871    // ADD_INT hr, hr, r3
3872    SDValue HIGH = DAG.getNode(ISD::ADD,
3873        DL,
3874        INTTY, ADDHI, RLOLLO);
3875    // MULLO_UINT_1 l3, l1, l0
3876    SDValue LOW = DAG.getNode(AMDILISD::UMUL,
3877        DL,
3878        INTTY, LHSLO, RHSLO);
3879    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
3880        DL,
3881        OVT, LOW, HIGH);
3882  }
3883  return DST;
3884}
3885SDValue
3886AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
3887{
3888  EVT VT = Op.getValueType();
3889  SDValue Nodes1;
3890  SDValue second;
3891  SDValue third;
3892  SDValue fourth;
3893  DebugLoc DL = Op.getDebugLoc();
3894  Nodes1 = DAG.getNode(AMDILISD::VBUILD,
3895      DL,
3896      VT, Op.getOperand(0));
3897  bool allEqual = true;
3898  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
3899    if (Op.getOperand(0) != Op.getOperand(x)) {
3900      allEqual = false;
3901      break;
3902    }
3903  }
3904  if (allEqual) {
3905    return Nodes1;
3906  }
3907  switch(Op.getNumOperands()) {
3908    default:
3909    case 1:
3910      break;
3911    case 4:
3912      fourth = Op.getOperand(3);
3913      if (fourth.getOpcode() != ISD::UNDEF) {
3914        Nodes1 = DAG.getNode(
3915            ISD::INSERT_VECTOR_ELT,
3916            DL,
3917            Op.getValueType(),
3918            Nodes1,
3919            fourth,
3920            DAG.getConstant(7, MVT::i32));
3921      }
3922    case 3:
3923      third = Op.getOperand(2);
3924      if (third.getOpcode() != ISD::UNDEF) {
3925        Nodes1 = DAG.getNode(
3926            ISD::INSERT_VECTOR_ELT,
3927            DL,
3928            Op.getValueType(),
3929            Nodes1,
3930            third,
3931            DAG.getConstant(6, MVT::i32));
3932      }
3933    case 2:
3934      second = Op.getOperand(1);
3935      if (second.getOpcode() != ISD::UNDEF) {
3936        Nodes1 = DAG.getNode(
3937            ISD::INSERT_VECTOR_ELT,
3938            DL,
3939            Op.getValueType(),
3940            Nodes1,
3941            second,
3942            DAG.getConstant(5, MVT::i32));
3943      }
3944      break;
3945  };
3946  return Nodes1;
3947}
3948
3949SDValue
3950AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3951    SelectionDAG &DAG) const
3952{
3953  DebugLoc DL = Op.getDebugLoc();
3954  EVT VT = Op.getValueType();
3955  const SDValue *ptr = NULL;
3956  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3957  uint32_t swizzleNum = 0;
3958  SDValue DST;
3959  if (!VT.isVector()) {
3960    SDValue Res = Op.getOperand(0);
3961    return Res;
3962  }
3963
3964  if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
3965    ptr = &Op.getOperand(1);
3966  } else {
3967    ptr = &Op.getOperand(0);
3968  }
3969  if (CSDN) {
3970    swizzleNum = (uint32_t)CSDN->getZExtValue();
3971    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3972    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3973    DST = DAG.getNode(AMDILISD::VINSERT,
3974        DL,
3975        VT,
3976        Op.getOperand(0),
3977        *ptr,
3978        DAG.getTargetConstant(mask2, MVT::i32),
3979        DAG.getTargetConstant(mask3, MVT::i32));
3980  } else {
3981    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3982    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3983    SDValue res = DAG.getNode(AMDILISD::VINSERT,
3984        DL, VT, Op.getOperand(0), *ptr,
3985        DAG.getTargetConstant(mask2, MVT::i32),
3986        DAG.getTargetConstant(mask3, MVT::i32));
3987    for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
3988      mask2 = 0x04030201 & ~(0xFF << (x * 8));
3989      mask3 = 0x01010101 & (0xFF << (x * 8));
3990      SDValue t = DAG.getNode(AMDILISD::VINSERT,
3991          DL, VT, Op.getOperand(0), *ptr,
3992          DAG.getTargetConstant(mask2, MVT::i32),
3993          DAG.getTargetConstant(mask3, MVT::i32));
3994      SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
3995          DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3996          Op.getOperand(2), DAG.getConstant(x, MVT::i32));
3997      c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
3998      res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
3999    }
4000    DST = res;
4001  }
4002  return DST;
4003}
4004
4005SDValue
4006AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
4007    SelectionDAG &DAG) const
4008{
4009  EVT VT = Op.getValueType();
4010  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4011  uint64_t swizzleNum = 0;
4012  DebugLoc DL = Op.getDebugLoc();
4013  SDValue Res;
4014  if (!Op.getOperand(0).getValueType().isVector()) {
4015    Res = Op.getOperand(0);
4016    return Res;
4017  }
4018  if (CSDN) {
4019    // Static vector extraction
4020    swizzleNum = CSDN->getZExtValue() + 1;
4021    Res = DAG.getNode(AMDILISD::VEXTRACT,
4022        DL, VT,
4023        Op.getOperand(0),
4024        DAG.getTargetConstant(swizzleNum, MVT::i32));
4025  } else {
4026    SDValue Op1 = Op.getOperand(1);
4027    uint32_t vecSize = 4;
4028    SDValue Op0 = Op.getOperand(0);
4029    SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
4030        DL, VT, Op0,
4031        DAG.getTargetConstant(1, MVT::i32));
4032    if (Op0.getValueType().isVector()) {
4033      vecSize = Op0.getValueType().getVectorNumElements();
4034    }
4035    for (uint32_t x = 2; x <= vecSize; ++x) {
4036      SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
4037          DL, VT, Op0,
4038          DAG.getTargetConstant(x, MVT::i32));
4039      SDValue c = DAG.getNode(AMDILISD::CMP,
4040          DL, Op1.getValueType(),
4041          DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
4042          Op1, DAG.getConstant(x, MVT::i32));
4043      res = DAG.getNode(AMDILISD::CMOVLOG, DL,
4044          VT, c, t, res);
4045
4046    }
4047    Res = res;
4048  }
4049  return Res;
4050}
4051
4052SDValue
4053AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
4054    SelectionDAG &DAG) const
4055{
4056  uint32_t vecSize = Op.getValueType().getVectorNumElements();
4057  SDValue src = Op.getOperand(0);
4058  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4059  uint64_t offset = 0;
4060  EVT vecType = Op.getValueType().getVectorElementType();
4061  DebugLoc DL = Op.getDebugLoc();
4062  SDValue Result;
4063  if (CSDN) {
4064    offset = CSDN->getZExtValue();
4065    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4066        DL,vecType, src, DAG.getConstant(offset, MVT::i32));
4067    Result = DAG.getNode(AMDILISD::VBUILD, DL,
4068        Op.getValueType(), Result);
4069    for (uint32_t x = 1; x < vecSize; ++x) {
4070      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
4071          src, DAG.getConstant(offset + x, MVT::i32));
4072      if (elt.getOpcode() != ISD::UNDEF) {
4073        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4074            Op.getValueType(), Result, elt,
4075            DAG.getConstant(x, MVT::i32));
4076      }
4077    }
4078  } else {
4079    SDValue idx = Op.getOperand(1);
4080    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4081        DL, vecType, src, idx);
4082    Result = DAG.getNode(AMDILISD::VBUILD, DL,
4083        Op.getValueType(), Result);
4084    for (uint32_t x = 1; x < vecSize; ++x) {
4085      idx = DAG.getNode(ISD::ADD, DL, vecType,
4086          idx, DAG.getConstant(1, MVT::i32));
4087      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
4088          src, idx);
4089      if (elt.getOpcode() != ISD::UNDEF) {
4090        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4091            Op.getValueType(), Result, elt, idx);
4092      }
4093    }
4094  }
4095  return Result;
4096}
4097SDValue
4098AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
4099    SelectionDAG &DAG) const
4100{
4101  SDValue Res = DAG.getNode(AMDILISD::VBUILD,
4102      Op.getDebugLoc(),
4103      Op.getValueType(),
4104      Op.getOperand(0));
4105  return Res;
4106}
4107SDValue
4108AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
4109{
4110  SDValue andOp;
4111  andOp = DAG.getNode(
4112      AMDILISD::AND,
4113      Op.getDebugLoc(),
4114      Op.getValueType(),
4115      Op.getOperand(0),
4116      Op.getOperand(1));
4117  return andOp;
4118}
4119SDValue
4120AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
4121{
4122  SDValue orOp;
4123  orOp = DAG.getNode(AMDILISD::OR,
4124      Op.getDebugLoc(),
4125      Op.getValueType(),
4126      Op.getOperand(0),
4127      Op.getOperand(1));
4128  return orOp;
4129}
4130SDValue
4131AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
4132{
4133  SDValue Cond = Op.getOperand(0);
4134  SDValue LHS = Op.getOperand(1);
4135  SDValue RHS = Op.getOperand(2);
4136  DebugLoc DL = Op.getDebugLoc();
4137  Cond = getConversionNode(DAG, Cond, Op, true);
4138  Cond = DAG.getNode(AMDILISD::CMOVLOG,
4139      DL,
4140      Op.getValueType(), Cond, LHS, RHS);
4141  return Cond;
4142}
4143SDValue
4144AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
4145{
4146  SDValue Cond;
4147  SDValue LHS = Op.getOperand(0);
4148  SDValue RHS = Op.getOperand(1);
4149  SDValue TRUE = Op.getOperand(2);
4150  SDValue FALSE = Op.getOperand(3);
4151  SDValue CC = Op.getOperand(4);
4152  DebugLoc DL = Op.getDebugLoc();
4153  bool skipCMov = false;
4154  bool genINot = false;
4155  EVT OVT = Op.getValueType();
4156
4157  // Check for possible elimination of cmov
4158  if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
4159    const ConstantSDNode *trueConst
4160      = dyn_cast<ConstantSDNode>( TRUE.getNode() );
4161    const ConstantSDNode *falseConst
4162      = dyn_cast<ConstantSDNode>( FALSE.getNode() );
4163    if (trueConst && falseConst) {
4164      // both possible result values are constants
4165      if (trueConst->isAllOnesValue()
4166          && falseConst->isNullValue()) { // and convenient constants
4167        skipCMov = true;
4168      }
4169      else if (trueConst->isNullValue()
4170          && falseConst->isAllOnesValue()) { // less convenient
4171        skipCMov = true;
4172        genINot = true;
4173      }
4174    }
4175  }
4176  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4177  unsigned int AMDILCC = CondCCodeToCC(
4178      SetCCOpcode,
4179      LHS.getValueType().getSimpleVT().SimpleTy);
4180  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
4181  Cond = DAG.getNode(
4182      AMDILISD::CMP,
4183      DL,
4184      LHS.getValueType(),
4185      DAG.getConstant(AMDILCC, MVT::i32),
4186      LHS,
4187      RHS);
4188  Cond = getConversionNode(DAG, Cond, Op, true);
4189  if (genINot) {
4190    Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
4191  }
4192  if (!skipCMov) {
4193    Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
4194  }
4195  return Cond;
4196}
4197SDValue
4198AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
4199{
4200  SDValue Cond;
4201  SDValue LHS = Op.getOperand(0);
4202  SDValue RHS = Op.getOperand(1);
4203  SDValue CC  = Op.getOperand(2);
4204  DebugLoc DL = Op.getDebugLoc();
4205  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4206  unsigned int AMDILCC = CondCCodeToCC(
4207      SetCCOpcode,
4208      LHS.getValueType().getSimpleVT().SimpleTy);
4209  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
4210  Cond = DAG.getNode(
4211      AMDILISD::CMP,
4212      DL,
4213      LHS.getValueType(),
4214      DAG.getConstant(AMDILCC, MVT::i32),
4215      LHS,
4216      RHS);
4217  Cond = getConversionNode(DAG, Cond, Op, true);
4218  Cond = DAG.getNode(
4219      ISD::AND,
4220      DL,
4221      Cond.getValueType(),
4222      DAG.getConstant(1, Cond.getValueType()),
4223      Cond);
4224  return Cond;
4225}
4226
4227SDValue
4228AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
4229{
4230  SDValue Data = Op.getOperand(0);
4231  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
4232  DebugLoc DL = Op.getDebugLoc();
4233  EVT DVT = Data.getValueType();
4234  EVT BVT = BaseType->getVT();
4235  unsigned baseBits = BVT.getScalarType().getSizeInBits();
4236  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
4237  unsigned shiftBits = srcBits - baseBits;
4238  if (srcBits < 32) {
4239    // If the op is less than 32 bits, then it needs to extend to 32bits
4240    // so it can properly keep the upper bits valid.
4241    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
4242    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
4243    shiftBits = 32 - baseBits;
4244    DVT = IVT;
4245  }
4246  SDValue Shift = DAG.getConstant(shiftBits, DVT);
4247  // Shift left by 'Shift' bits.
4248  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
4249  // Signed shift Right by 'Shift' bits.
4250  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
4251  if (srcBits < 32) {
4252    // Once the sign extension is done, the op needs to be converted to
4253    // its original type.
4254    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
4255  }
4256  return Data;
4257}
4258EVT
4259AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
4260{
4261  int iSize = (size * numEle);
4262  int vEle = (iSize >> ((size == 64) ? 6 : 5));
4263  if (!vEle) {
4264    vEle = 1;
4265  }
4266  if (size == 64) {
4267    if (vEle == 1) {
4268      return EVT(MVT::i64);
4269    } else {
4270      return EVT(MVT::getVectorVT(MVT::i64, vEle));
4271    }
4272  } else {
4273    if (vEle == 1) {
4274      return EVT(MVT::i32);
4275    } else {
4276      return EVT(MVT::getVectorVT(MVT::i32, vEle));
4277    }
4278  }
4279}
4280
4281SDValue
4282AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
4283{
4284  SDValue Src = Op.getOperand(0);
4285  SDValue Dst = Op;
4286  SDValue Res;
4287  DebugLoc DL = Op.getDebugLoc();
4288  EVT SrcVT = Src.getValueType();
4289  EVT DstVT = Dst.getValueType();
4290  // Lets bitcast the floating point types to an
4291  // equivalent integer type before converting to vectors.
4292  if (SrcVT.getScalarType().isFloatingPoint()) {
4293    Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
4294          SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
4295          SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
4296        Src);
4297    SrcVT = Src.getValueType();
4298  }
4299  uint32_t ScalarSrcSize = SrcVT.getScalarType()
4300    .getSimpleVT().getSizeInBits();
4301  uint32_t ScalarDstSize = DstVT.getScalarType()
4302    .getSimpleVT().getSizeInBits();
4303  uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
4304  uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
4305  bool isVec = SrcVT.isVector();
4306  if (DstVT.getScalarType().isInteger() &&
4307      (SrcVT.getScalarType().isInteger()
4308       || SrcVT.getScalarType().isFloatingPoint())) {
4309    if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
4310        || (ScalarSrcSize == 64
4311          && DstNumEle == 4
4312          && ScalarDstSize == 16)) {
4313      // This is the problematic case when bitcasting i64 <-> <4 x i16>
4314      // This approach is a little different as we cannot generate a
4315      // <4 x i64> vector
4316      // as that is illegal in our backend and we are already past
4317      // the DAG legalizer.
4318      // So, in this case, we will do the following conversion.
4319      // Case 1:
4320      // %dst = <4 x i16> %src bitconvert i64 ==>
4321      // %tmp = <4 x i16> %src convert <4 x i32>
4322      // %tmp = <4 x i32> %tmp and 0xFFFF
4323      // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
4324      // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
4325      // %dst = <2 x i32> %tmp bitcast i64
4326      // case 2:
4327      // %dst = i64 %src bitconvert <4 x i16> ==>
4328      // %tmp = i64 %src bitcast <2 x i32>
4329      // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
4330      // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
4331      // %tmp = <4 x i32> %tmp and 0xFFFF
4332      // %dst = <4 x i16> %tmp bitcast <4 x i32>
4333      SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
4334          DAG.getConstant(0xFFFF, MVT::i32));
4335      SDValue const16 = DAG.getConstant(16, MVT::i32);
4336      if (ScalarDstSize == 64) {
4337        // case 1
4338        Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
4339        Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
4340        SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4341            Op, DAG.getConstant(0, MVT::i32));
4342        SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4343            Op, DAG.getConstant(1, MVT::i32));
4344        y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
4345        SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4346            Op, DAG.getConstant(2, MVT::i32));
4347        SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4348            Op, DAG.getConstant(3, MVT::i32));
4349        w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
4350        x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
4351        y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
4352        Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
4353        return Res;
4354      } else {
4355        // case 2
4356        SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
4357        SDValue lor16
4358          = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
4359        SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
4360        SDValue hir16
4361          = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
4362        SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
4363            MVT::v4i32, lo);
4364        SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4365            getPointerTy(), DAG.getConstant(1, MVT::i32));
4366        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4367            resVec, lor16, idxVal);
4368        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4369            getPointerTy(), DAG.getConstant(2, MVT::i32));
4370        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4371            resVec, hi, idxVal);
4372        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4373            getPointerTy(), DAG.getConstant(3, MVT::i32));
4374        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4375            resVec, hir16, idxVal);
4376        resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
4377        Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
4378        return Res;
4379      }
4380    } else {
4381      // There are four cases we need to worry about for bitcasts
4382      // where the size of all
4383      // source, intermediates and result is <= 128 bits, unlike
4384      // the above case
4385      // 1) Sub32bit bitcast 32bitAlign
4386      // %dst = <4 x i8> bitcast i32
4387      // (also <[2|4] x i16> to <[2|4] x i32>)
4388      // 2) 32bitAlign bitcast Sub32bit
4389      // %dst = i32 bitcast <4 x i8>
4390      // 3) Sub32bit bitcast LargerSub32bit
4391      // %dst = <2 x i8> bitcast i16
4392      // (also <4 x i8> to <2 x i16>)
4393      // 4) Sub32bit bitcast SmallerSub32bit
4394      // %dst = i16 bitcast <2 x i8>
4395      // (also <2 x i16> to <4 x i8>)
4396      // This also only handles types that are powers of two
4397      if ((ScalarDstSize & (ScalarDstSize - 1))
4398          || (ScalarSrcSize & (ScalarSrcSize - 1))) {
4399      } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
4400        // case 1:
4401        EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
4402#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
4403        SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
4404#else
4405        SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4406            DAG.getUNDEF(IntTy.getScalarType()));
4407        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4408          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4409              getPointerTy(), DAG.getConstant(x, MVT::i32));
4410          SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4411              SrcVT.getScalarType(), Src,
4412              DAG.getConstant(x, MVT::i32));
4413          temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
4414          res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
4415              res, temp, idx);
4416        }
4417#endif
4418        SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4419            DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
4420        SDValue *newEle = new SDValue[SrcNumEle];
4421        res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
4422        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4423          newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4424              IntTy.getScalarType(), res,
4425              DAG.getConstant(x, MVT::i32));
4426        }
4427        uint32_t Ratio = SrcNumEle / DstNumEle;
4428        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4429          if (x % Ratio) {
4430            newEle[x] = DAG.getNode(ISD::SHL, DL,
4431                IntTy.getScalarType(), newEle[x],
4432                DAG.getConstant(ScalarSrcSize * (x % Ratio),
4433                  MVT::i32));
4434          }
4435        }
4436        for (uint32_t x = 0; x < SrcNumEle; x += 2) {
4437          newEle[x] = DAG.getNode(ISD::OR, DL,
4438              IntTy.getScalarType(), newEle[x], newEle[x + 1]);
4439        }
4440        if (ScalarSrcSize == 8) {
4441          for (uint32_t x = 0; x < SrcNumEle; x += 4) {
4442            newEle[x] = DAG.getNode(ISD::OR, DL,
4443                IntTy.getScalarType(), newEle[x], newEle[x + 2]);
4444          }
4445          if (DstNumEle == 1) {
4446            Dst = newEle[0];
4447          } else {
4448            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
4449                newEle[0]);
4450            for (uint32_t x = 1; x < DstNumEle; ++x) {
4451              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4452                  getPointerTy(), DAG.getConstant(x, MVT::i32));
4453              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4454                  DstVT, Dst, newEle[x * 4], idx);
4455            }
4456          }
4457        } else {
4458          if (DstNumEle == 1) {
4459            Dst = newEle[0];
4460          } else {
4461            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
4462                newEle[0]);
4463            for (uint32_t x = 1; x < DstNumEle; ++x) {
4464              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4465                  getPointerTy(), DAG.getConstant(x, MVT::i32));
4466              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4467                  DstVT, Dst, newEle[x * 2], idx);
4468            }
4469          }
4470        }
4471        delete [] newEle;
4472        return Dst;
4473      } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
4474        // case 2:
4475        EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
4476        SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4477            DAG.getUNDEF(IntTy.getScalarType()));
4478        uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
4479        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4480          for (uint32_t y = 0; y < mult; ++y) {
4481            SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4482                getPointerTy(),
4483                DAG.getConstant(x * mult + y, MVT::i32));
4484            SDValue t;
4485            if (SrcNumEle > 1) {
4486              t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4487                  DL, SrcVT.getScalarType(), Src,
4488                  DAG.getConstant(x, MVT::i32));
4489            } else {
4490              t = Src;
4491            }
4492            if (y != 0) {
4493              t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
4494                  t, DAG.getConstant(y * ScalarDstSize,
4495                    MVT::i32));
4496            }
4497            vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
4498                DL, IntTy, vec, t, idx);
4499          }
4500        }
4501        Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
4502        return Dst;
4503      } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
4504        // case 3:
4505        SDValue *numEle = new SDValue[SrcNumEle];
4506        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4507          numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4508              MVT::i8, Src, DAG.getConstant(x, MVT::i32));
4509          numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
4510          numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
4511              DAG.getConstant(0xFF, MVT::i16));
4512        }
4513        for (uint32_t x = 1; x < SrcNumEle; x += 2) {
4514          numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
4515              DAG.getConstant(8, MVT::i16));
4516          numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
4517              numEle[x-1], numEle[x]);
4518        }
4519        if (DstNumEle > 1) {
4520          // If we are not a scalar i16, the only other case is a
4521          // v2i16 since we can't have v8i8 at this point, v4i16
4522          // cannot be generated
4523          Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
4524              numEle[0]);
4525          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4526              getPointerTy(), DAG.getConstant(1, MVT::i32));
4527          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
4528              Dst, numEle[2], idx);
4529        } else {
4530          Dst = numEle[0];
4531        }
4532        delete [] numEle;
4533        return Dst;
4534      } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
4535        // case 4:
4536        SDValue *numEle = new SDValue[DstNumEle];
4537        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4538          numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4539              MVT::i16, Src, DAG.getConstant(x, MVT::i32));
4540          numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
4541              numEle[x * 2], DAG.getConstant(8, MVT::i16));
4542        }
4543        MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
4544        Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
4545        for (uint32_t x = 1; x < DstNumEle; ++x) {
4546          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4547              getPointerTy(), DAG.getConstant(x, MVT::i32));
4548          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
4549              Dst, numEle[x], idx);
4550        }
4551        delete [] numEle;
4552        ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
4553        Res = DAG.getSExtOrTrunc(Dst, DL, ty);
4554        return Res;
4555      }
4556    }
4557  }
4558  Res = DAG.getNode(AMDILISD::BITCONV,
4559      Dst.getDebugLoc(),
4560      Dst.getValueType(), Src);
4561  return Res;
4562}
4563
4564SDValue
4565AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4566    SelectionDAG &DAG) const
4567{
4568  SDValue Chain = Op.getOperand(0);
4569  SDValue Size = Op.getOperand(1);
4570  unsigned int SPReg = AMDIL::SP;
4571  DebugLoc DL = Op.getDebugLoc();
4572  SDValue SP = DAG.getCopyFromReg(Chain,
4573      DL,
4574      SPReg, MVT::i32);
4575  SDValue NewSP = DAG.getNode(ISD::ADD,
4576      DL,
4577      MVT::i32, SP, Size);
4578  Chain = DAG.getCopyToReg(SP.getValue(1),
4579      DL,
4580      SPReg, NewSP);
4581  SDValue Ops[2] = {NewSP, Chain};
4582  Chain = DAG.getMergeValues(Ops, 2 ,DL);
4583  return Chain;
4584}
4585SDValue
4586AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
4587{
4588  SDValue Chain = Op.getOperand(0);
4589  SDValue Cond  = Op.getOperand(1);
4590  SDValue Jump  = Op.getOperand(2);
4591  SDValue Result;
4592  Result = DAG.getNode(
4593      AMDILISD::BRANCH_COND,
4594      Op.getDebugLoc(),
4595      Op.getValueType(),
4596      Chain, Jump, Cond);
4597  return Result;
4598}
4599
4600SDValue
4601AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
4602{
4603  SDValue Chain = Op.getOperand(0);
4604  CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
4605  SDValue LHS   = Op.getOperand(2);
4606  SDValue RHS   = Op.getOperand(3);
4607  SDValue JumpT  = Op.getOperand(4);
4608  SDValue CmpValue;
4609  ISD::CondCode CC = CCNode->get();
4610  SDValue Result;
4611  unsigned int cmpOpcode = CondCCodeToCC(
4612      CC,
4613      LHS.getValueType().getSimpleVT().SimpleTy);
4614  CmpValue = DAG.getNode(
4615      AMDILISD::CMP,
4616      Op.getDebugLoc(),
4617      LHS.getValueType(),
4618      DAG.getConstant(cmpOpcode, MVT::i32),
4619      LHS, RHS);
4620  Result = DAG.getNode(
4621      AMDILISD::BRANCH_COND,
4622      CmpValue.getDebugLoc(),
4623      MVT::Other, Chain,
4624      JumpT, CmpValue);
4625  return Result;
4626}
4627
4628SDValue
4629AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
4630{
4631  SDValue Result = DAG.getNode(
4632      AMDILISD::DP_TO_FP,
4633      Op.getDebugLoc(),
4634      Op.getValueType(),
4635      Op.getOperand(0),
4636      Op.getOperand(1));
4637  return Result;
4638}
4639
4640SDValue
4641AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
4642{
4643  SDValue Result = DAG.getNode(
4644      AMDILISD::VCONCAT,
4645      Op.getDebugLoc(),
4646      Op.getValueType(),
4647      Op.getOperand(0),
4648      Op.getOperand(1));
4649  return Result;
4650}
4651// LowerRET - Lower an ISD::RET node.
4652SDValue
4653AMDILTargetLowering::LowerReturn(SDValue Chain,
4654    CallingConv::ID CallConv, bool isVarArg,
4655    const SmallVectorImpl<ISD::OutputArg> &Outs,
4656    const SmallVectorImpl<SDValue> &OutVals,
4657    DebugLoc dl, SelectionDAG &DAG)
4658const
4659{
4660  //MachineFunction& MF = DAG.getMachineFunction();
4661  // CCValAssign - represent the assignment of the return value
4662  // to a location
4663  SmallVector<CCValAssign, 16> RVLocs;
4664
4665  // CCState - Info about the registers and stack slot
4666  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4667                 getTargetMachine(), RVLocs, *DAG.getContext());
4668
4669  // Analyze return values of ISD::RET
4670  CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
4671  // If this is the first return lowered for this function, add
4672  // the regs to the liveout set for the function
4673  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
4674  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4675    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
4676      MRI.addLiveOut(RVLocs[i].getLocReg());
4677    }
4678  }
4679  // FIXME: implement this when tail call is implemented
4680  // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
4681  // both x86 and ppc implement this in ISelLowering
4682
4683  // Regular return here
4684  SDValue Flag;
4685  SmallVector<SDValue, 6> RetOps;
4686  RetOps.push_back(Chain);
4687  RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
4688  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4689    CCValAssign &VA = RVLocs[i];
4690    SDValue ValToCopy = OutVals[i];
4691    assert(VA.isRegLoc() && "Can only return in registers!");
4692    // ISD::Ret => ret chain, (regnum1, val1), ...
4693    // So i * 2 + 1 index only the regnums
4694    Chain = DAG.getCopyToReg(Chain,
4695        dl,
4696        VA.getLocReg(),
4697        ValToCopy,
4698        Flag);
4699    // guarantee that all emitted copies are stuck together
4700    // avoiding something bad
4701    Flag = Chain.getValue(1);
4702  }
4703  /*if (MF.getFunction()->hasStructRetAttr()) {
4704    assert(0 && "Struct returns are not yet implemented!");
4705  // Both MIPS and X86 have this
4706  }*/
4707  RetOps[0] = Chain;
4708  if (Flag.getNode())
4709    RetOps.push_back(Flag);
4710
4711  Flag = DAG.getNode(AMDILISD::RET_FLAG,
4712      dl,
4713      MVT::Other, &RetOps[0], RetOps.size());
4714  return Flag;
4715}
4716void
4717AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
4718    unsigned int opCode) const
4719{
4720  MachineOperand DST = MI->getOperand(0);
4721  MachineOperand LHS = MI->getOperand(2);
4722  MachineOperand RHS = MI->getOperand(3);
4723  unsigned int opi32Code = 0, si32Code = 0;
4724  unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
4725  uint32_t REGS[12];
4726  // All the relationals can be generated with with 6 temp registers
4727  for (int x = 0; x < 12; ++x) {
4728    REGS[x] = genVReg(simpleVT);
4729  }
4730  // Pull out the high and low components of each 64 bit register
4731  generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
4732  generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
4733  generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
4734  generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
4735  // Determine the correct opcode that we should use
4736  switch(opCode) {
4737    default:
4738      assert(!"comparison case not handled!");
4739      break;
4740    case AMDIL::LEQ:
4741      si32Code = opi32Code = AMDIL::IEQ;
4742      break;
4743    case AMDIL::LNE:
4744      si32Code = opi32Code = AMDIL::INE;
4745      break;
4746    case AMDIL::LLE:
4747    case AMDIL::ULLE:
4748    case AMDIL::LGE:
4749    case AMDIL::ULGE:
4750      if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
4751        std::swap(REGS[0], REGS[2]);
4752      } else {
4753        std::swap(REGS[1], REGS[3]);
4754      }
4755      if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
4756        opi32Code = AMDIL::ILT;
4757      } else {
4758        opi32Code = AMDIL::ULT;
4759      }
4760      si32Code = AMDIL::UGE;
4761      break;
4762    case AMDIL::LGT:
4763    case AMDIL::ULGT:
4764      std::swap(REGS[0], REGS[2]);
4765      std::swap(REGS[1], REGS[3]);
4766    case AMDIL::LLT:
4767    case AMDIL::ULLT:
4768      if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
4769        opi32Code = AMDIL::ILT;
4770      } else {
4771        opi32Code = AMDIL::ULT;
4772      }
4773      si32Code = AMDIL::ULT;
4774      break;
4775  };
4776  // Do the initial opcode on the high and low components.
4777  // This leaves the following:
4778  // REGS[4] = L_HI OP R_HI
4779  // REGS[5] = L_LO OP R_LO
4780  generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
4781  generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
4782  switch(opi32Code) {
4783    case AMDIL::IEQ:
4784    case AMDIL::INE:
4785      {
4786        // combine the results with an and or or depending on if
4787        // we are eq or ne
4788        uint32_t combineOp = (opi32Code == AMDIL::IEQ)
4789          ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
4790        generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
4791      }
4792      break;
4793    default:
4794      // this finishes codegen for the following pattern
4795      // REGS[4] || (REGS[5] && (L_HI == R_HI))
4796      generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
4797      generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
4798          REGS[9]);
4799      generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
4800          REGS[10]);
4801      break;
4802  }
4803  generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
4804}
4805
4806unsigned int
4807AMDILTargetLowering::getFunctionAlignment(const Function *) const
4808{
4809  return 0;
4810}
4811
4812void
4813AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
4814    MachineBasicBlock::iterator &BBI,
4815    DebugLoc *DL, const TargetInstrInfo *TII) const
4816{
4817  mBB = BB;
4818  mBBI = BBI;
4819  mDL = DL;
4820  mTII = TII;
4821}
4822uint32_t
4823AMDILTargetLowering::genVReg(uint32_t regType) const
4824{
4825  return mBB->getParent()->getRegInfo().createVirtualRegister(
4826      getTargetMachine().getRegisterInfo()->getRegClass(regType));
4827}
4828
4829MachineInstrBuilder
4830AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
4831{
4832  return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
4833}
4834
4835MachineInstrBuilder
4836AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4837    uint32_t src1) const
4838{
4839  return generateMachineInst(opcode, dst).addReg(src1);
4840}
4841
4842MachineInstrBuilder
4843AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4844    uint32_t src1, uint32_t src2) const
4845{
4846  return generateMachineInst(opcode, dst, src1).addReg(src2);
4847}
4848
4849MachineInstrBuilder
4850AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4851    uint32_t src1, uint32_t src2, uint32_t src3) const
4852{
4853  return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
4854}
4855
4856
4857SDValue
4858AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
4859{
4860  DebugLoc DL = Op.getDebugLoc();
4861  EVT OVT = Op.getValueType();
4862  SDValue LHS = Op.getOperand(0);
4863  SDValue RHS = Op.getOperand(1);
4864  MVT INTTY;
4865  MVT FLTTY;
4866  if (!OVT.isVector()) {
4867    INTTY = MVT::i32;
4868    FLTTY = MVT::f32;
4869  } else if (OVT.getVectorNumElements() == 2) {
4870    INTTY = MVT::v2i32;
4871    FLTTY = MVT::v2f32;
4872  } else if (OVT.getVectorNumElements() == 4) {
4873    INTTY = MVT::v4i32;
4874    FLTTY = MVT::v4f32;
4875  }
4876  unsigned bitsize = OVT.getScalarType().getSizeInBits();
4877  // char|short jq = ia ^ ib;
4878  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
4879
4880  // jq = jq >> (bitsize - 2)
4881  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
4882
4883  // jq = jq | 0x1
4884  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
4885
4886  // jq = (int)jq
4887  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
4888
4889  // int ia = (int)LHS;
4890  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
4891
4892  // int ib, (int)RHS;
4893  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
4894
4895  // float fa = (float)ia;
4896  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
4897
4898  // float fb = (float)ib;
4899  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
4900
4901  // float fq = native_divide(fa, fb);
4902  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
4903
4904  // fq = trunc(fq);
4905  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
4906
4907  // float fqneg = -fq;
4908  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
4909
4910  // float fr = mad(fqneg, fb, fa);
4911  SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
4912
4913  // int iq = (int)fq;
4914  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
4915
4916  // fr = fabs(fr);
4917  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
4918
4919  // fb = fabs(fb);
4920  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
4921
4922  // int cv = fr >= fb;
4923  SDValue cv;
4924  if (INTTY == MVT::i32) {
4925    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4926  } else {
4927    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4928  }
4929  // jq = (cv ? jq : 0);
4930  jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
4931      DAG.getConstant(0, OVT));
4932  // dst = iq + jq;
4933  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
4934  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
4935  return iq;
4936}
4937
4938SDValue
4939AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
4940{
4941  DebugLoc DL = Op.getDebugLoc();
4942  EVT OVT = Op.getValueType();
4943  SDValue LHS = Op.getOperand(0);
4944  SDValue RHS = Op.getOperand(1);
4945  // The LowerSDIV32 function generates equivalent to the following IL.
4946  // mov r0, LHS
4947  // mov r1, RHS
4948  // ilt r10, r0, 0
4949  // ilt r11, r1, 0
4950  // iadd r0, r0, r10
4951  // iadd r1, r1, r11
4952  // ixor r0, r0, r10
4953  // ixor r1, r1, r11
4954  // udiv r0, r0, r1
4955  // ixor r10, r10, r11
4956  // iadd r0, r0, r10
4957  // ixor DST, r0, r10
4958
4959  // mov r0, LHS
4960  SDValue r0 = LHS;
4961
4962  // mov r1, RHS
4963  SDValue r1 = RHS;
4964
4965  // ilt r10, r0, 0
4966  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4967      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4968      r0, DAG.getConstant(0, OVT));
4969
4970  // ilt r11, r1, 0
4971  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4972      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4973      r1, DAG.getConstant(0, OVT));
4974
4975  // iadd r0, r0, r10
4976  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4977
4978  // iadd r1, r1, r11
4979  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
4980
4981  // ixor r0, r0, r10
4982  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4983
4984  // ixor r1, r1, r11
4985  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
4986
4987  // udiv r0, r0, r1
4988  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
4989
4990  // ixor r10, r10, r11
4991  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
4992
4993  // iadd r0, r0, r10
4994  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4995
4996  // ixor DST, r0, r10
4997  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4998  return DST;
4999}
5000
5001SDValue
5002AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
5003{
5004  return SDValue(Op.getNode(), 0);
5005}
5006
5007SDValue
5008AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
5009{
5010  DebugLoc DL = Op.getDebugLoc();
5011  EVT OVT = Op.getValueType();
5012  SDValue LHS = Op.getOperand(0);
5013  SDValue RHS = Op.getOperand(1);
5014  MVT INTTY;
5015  MVT FLTTY;
5016  if (!OVT.isVector()) {
5017    INTTY = MVT::i32;
5018    FLTTY = MVT::f32;
5019  } else if (OVT.getVectorNumElements() == 2) {
5020    INTTY = MVT::v2i32;
5021    FLTTY = MVT::v2f32;
5022  } else if (OVT.getVectorNumElements() == 4) {
5023    INTTY = MVT::v4i32;
5024    FLTTY = MVT::v4f32;
5025  }
5026
5027  // The LowerUDIV24 function implements the following CL.
5028  // int ia = (int)LHS
5029  // float fa = (float)ia
5030  // int ib = (int)RHS
5031  // float fb = (float)ib
5032  // float fq = native_divide(fa, fb)
5033  // fq = trunc(fq)
5034  // float t = mad(fq, fb, fb)
5035  // int iq = (int)fq - (t <= fa)
5036  // return (type)iq
5037
5038  // int ia = (int)LHS
5039  SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
5040
5041  // float fa = (float)ia
5042  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
5043
5044  // int ib = (int)RHS
5045  SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
5046
5047  // float fb = (float)ib
5048  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
5049
5050  // float fq = native_divide(fa, fb)
5051  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
5052
5053  // fq = trunc(fq)
5054  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
5055
5056  // float t = mad(fq, fb, fb)
5057  SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
5058
5059  // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
5060  SDValue iq;
5061  fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
5062  if (INTTY == MVT::i32) {
5063    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
5064  } else {
5065    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
5066  }
5067  iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
5068
5069
5070  // return (type)iq
5071  iq = DAG.getZExtOrTrunc(iq, DL, OVT);
5072  return iq;
5073
5074}
5075
5076SDValue
5077AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
5078{
5079  return SDValue(Op.getNode(), 0);
5080}
5081
5082SDValue
5083AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
5084{
5085  return SDValue(Op.getNode(), 0);
5086}
5087SDValue
5088AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
5089{
5090  DebugLoc DL = Op.getDebugLoc();
5091  EVT OVT = Op.getValueType();
5092  MVT INTTY = MVT::i32;
5093  if (OVT == MVT::v2i8) {
5094    INTTY = MVT::v2i32;
5095  } else if (OVT == MVT::v4i8) {
5096    INTTY = MVT::v4i32;
5097  }
5098  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
5099  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
5100  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
5101  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
5102  return LHS;
5103}
5104
5105SDValue
5106AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
5107{
5108  DebugLoc DL = Op.getDebugLoc();
5109  EVT OVT = Op.getValueType();
5110  MVT INTTY = MVT::i32;
5111  if (OVT == MVT::v2i16) {
5112    INTTY = MVT::v2i32;
5113  } else if (OVT == MVT::v4i16) {
5114    INTTY = MVT::v4i32;
5115  }
5116  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
5117  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
5118  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
5119  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
5120  return LHS;
5121}
5122
5123SDValue
5124AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
5125{
5126  DebugLoc DL = Op.getDebugLoc();
5127  EVT OVT = Op.getValueType();
5128  SDValue LHS = Op.getOperand(0);
5129  SDValue RHS = Op.getOperand(1);
5130  // The LowerSREM32 function generates equivalent to the following IL.
5131  // mov r0, LHS
5132  // mov r1, RHS
5133  // ilt r10, r0, 0
5134  // ilt r11, r1, 0
5135  // iadd r0, r0, r10
5136  // iadd r1, r1, r11
5137  // ixor r0, r0, r10
5138  // ixor r1, r1, r11
5139  // udiv r20, r0, r1
5140  // umul r20, r20, r1
5141  // sub r0, r0, r20
5142  // iadd r0, r0, r10
5143  // ixor DST, r0, r10
5144
5145  // mov r0, LHS
5146  SDValue r0 = LHS;
5147
5148  // mov r1, RHS
5149  SDValue r1 = RHS;
5150
5151  // ilt r10, r0, 0
5152  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5153      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
5154      r0, DAG.getConstant(0, OVT));
5155
5156  // ilt r11, r1, 0
5157  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5158      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
5159      r1, DAG.getConstant(0, OVT));
5160
5161  // iadd r0, r0, r10
5162  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
5163
5164  // iadd r1, r1, r11
5165  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
5166
5167  // ixor r0, r0, r10
5168  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5169
5170  // ixor r1, r1, r11
5171  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
5172
5173  // udiv r20, r0, r1
5174  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
5175
5176  // umul r20, r20, r1
5177  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
5178
5179  // sub r0, r0, r20
5180  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
5181
5182  // iadd r0, r0, r10
5183  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
5184
5185  // ixor DST, r0, r10
5186  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5187  return DST;
5188}
5189
5190SDValue
5191AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
5192{
5193  return SDValue(Op.getNode(), 0);
5194}
5195
5196SDValue
5197AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
5198{
5199  DebugLoc DL = Op.getDebugLoc();
5200  EVT OVT = Op.getValueType();
5201  MVT INTTY = MVT::i32;
5202  if (OVT == MVT::v2i8) {
5203    INTTY = MVT::v2i32;
5204  } else if (OVT == MVT::v4i8) {
5205    INTTY = MVT::v4i32;
5206  }
5207  SDValue LHS = Op.getOperand(0);
5208  SDValue RHS = Op.getOperand(1);
5209  // The LowerUREM8 function generates equivalent to the following IL.
5210  // mov r0, as_u32(LHS)
5211  // mov r1, as_u32(RHS)
5212  // and r10, r0, 0xFF
5213  // and r11, r1, 0xFF
5214  // cmov_logical r3, r11, r11, 0x1
5215  // udiv r3, r10, r3
5216  // cmov_logical r3, r11, r3, 0
5217  // umul r3, r3, r11
5218  // sub r3, r10, r3
5219  // and as_u8(DST), r3, 0xFF
5220
5221  // mov r0, as_u32(LHS)
5222  SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
5223
5224  // mov r1, as_u32(RHS)
5225  SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
5226
5227  // and r10, r0, 0xFF
5228  SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
5229      DAG.getConstant(0xFF, INTTY));
5230
5231  // and r11, r1, 0xFF
5232  SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
5233      DAG.getConstant(0xFF, INTTY));
5234
5235  // cmov_logical r3, r11, r11, 0x1
5236  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
5237      DAG.getConstant(0x01, INTTY));
5238
5239  // udiv r3, r10, r3
5240  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
5241
5242  // cmov_logical r3, r11, r3, 0
5243  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
5244      DAG.getConstant(0, INTTY));
5245
5246  // umul r3, r3, r11
5247  r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
5248
5249  // sub r3, r10, r3
5250  r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
5251
5252  // and as_u8(DST), r3, 0xFF
5253  SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
5254      DAG.getConstant(0xFF, INTTY));
5255  DST = DAG.getZExtOrTrunc(DST, DL, OVT);
5256  return DST;
5257}
5258
5259SDValue
5260AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
5261{
5262  DebugLoc DL = Op.getDebugLoc();
5263  EVT OVT = Op.getValueType();
5264  MVT INTTY = MVT::i32;
5265  if (OVT == MVT::v2i16) {
5266    INTTY = MVT::v2i32;
5267  } else if (OVT == MVT::v4i16) {
5268    INTTY = MVT::v4i32;
5269  }
5270  SDValue LHS = Op.getOperand(0);
5271  SDValue RHS = Op.getOperand(1);
5272  // The LowerUREM16 function generatest equivalent to the following IL.
5273  // mov r0, LHS
5274  // mov r1, RHS
5275  // DIV = LowerUDIV16(LHS, RHS)
5276  // and r10, r0, 0xFFFF
5277  // and r11, r1, 0xFFFF
5278  // cmov_logical r3, r11, r11, 0x1
5279  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5280  // and r3, r3, 0xFFFF
5281  // cmov_logical r3, r11, r3, 0
5282  // umul r3, r3, r11
5283  // sub r3, r10, r3
5284  // and DST, r3, 0xFFFF
5285
5286  // mov r0, LHS
5287  SDValue r0 = LHS;
5288
5289  // mov r1, RHS
5290  SDValue r1 = RHS;
5291
5292  // and r10, r0, 0xFFFF
5293  SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
5294      DAG.getConstant(0xFFFF, OVT));
5295
5296  // and r11, r1, 0xFFFF
5297  SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
5298      DAG.getConstant(0xFFFF, OVT));
5299
5300  // cmov_logical r3, r11, r11, 0x1
5301  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
5302      DAG.getConstant(0x01, OVT));
5303
5304  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5305  r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
5306  r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
5307  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
5308  r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
5309  r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
5310
5311  // and r3, r3, 0xFFFF
5312  r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
5313      DAG.getConstant(0xFFFF, OVT));
5314
5315  // cmov_logical r3, r11, r3, 0
5316  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
5317      DAG.getConstant(0, OVT));
5318  // umul r3, r3, r11
5319  r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
5320
5321  // sub r3, r10, r3
5322  r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
5323
5324  // and DST, r3, 0xFFFF
5325  SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
5326      DAG.getConstant(0xFFFF, OVT));
5327  return DST;
5328}
5329
5330SDValue
5331AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
5332{
5333  DebugLoc DL = Op.getDebugLoc();
5334  EVT OVT = Op.getValueType();
5335  SDValue LHS = Op.getOperand(0);
5336  SDValue RHS = Op.getOperand(1);
5337  // The LowerUREM32 function generates equivalent to the following IL.
5338  // udiv r20, LHS, RHS
5339  // umul r20, r20, RHS
5340  // sub DST, LHS, r20
5341
5342  // udiv r20, LHS, RHS
5343  SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
5344
5345  // umul r20, r20, RHS
5346  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
5347
5348  // sub DST, LHS, r20
5349  SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
5350  return DST;
5351}
5352
5353SDValue
5354AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
5355{
5356  return SDValue(Op.getNode(), 0);
5357}
5358
5359
5360SDValue
5361AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
5362{
5363  DebugLoc DL = Op.getDebugLoc();
5364  EVT OVT = Op.getValueType();
5365  MVT INTTY = MVT::i32;
5366  if (OVT == MVT::v2f32) {
5367    INTTY = MVT::v2i32;
5368  } else if (OVT == MVT::v4f32) {
5369    INTTY = MVT::v4i32;
5370  }
5371  SDValue LHS = Op.getOperand(0);
5372  SDValue RHS = Op.getOperand(1);
5373  SDValue DST;
5374  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
5375      &this->getTargetMachine())->getSubtargetImpl();
5376  if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
5377    // TODO: This doesn't work for vector types yet
5378    // The LowerFDIV32 function generates equivalent to the following
5379    // IL:
5380    // mov r20, as_int(LHS)
5381    // mov r21, as_int(RHS)
5382    // and r30, r20, 0x7f800000
5383    // and r31, r20, 0x807FFFFF
5384    // and r32, r21, 0x7f800000
5385    // and r33, r21, 0x807FFFFF
5386    // ieq r40, r30, 0x7F800000
5387    // ieq r41, r31, 0x7F800000
5388    // ieq r42, r32, 0
5389    // ieq r43, r33, 0
5390    // and r50, r20, 0x80000000
5391    // and r51, r21, 0x80000000
5392    // ior r32, r32, 0x3f800000
5393    // ior r33, r33, 0x3f800000
5394    // cmov_logical r32, r42, r50, r32
5395    // cmov_logical r33, r43, r51, r33
5396    // cmov_logical r32, r40, r20, r32
5397    // cmov_logical r33, r41, r21, r33
5398    // ior r50, r40, r41
5399    // ior r51, r42, r43
5400    // ior r50, r50, r51
5401    // inegate r52, r31
5402    // iadd r30, r30, r52
5403    // cmov_logical r30, r50, 0, r30
5404    // div_zeroop(infinity) r21, 1.0, r33
5405    // mul_ieee r20, r32, r21
5406    // and r22, r20, 0x7FFFFFFF
5407    // and r23, r20, 0x80000000
5408    // ishr r60, r22, 0x00000017
5409    // ishr r61, r30, 0x00000017
5410    // iadd r20, r20, r30
5411    // iadd r21, r22, r30
5412    // iadd r60, r60, r61
5413    // ige r42, 0, R60
5414    // ior r41, r23, 0x7F800000
5415    // ige r40, r60, 0x000000FF
5416    // cmov_logical r40, r50, 0, r40
5417    // cmov_logical r20, r42, r23, r20
5418    // cmov_logical DST, r40, r41, r20
5419    // as_float(DST)
5420
5421    // mov r20, as_int(LHS)
5422    SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
5423
5424    // mov r21, as_int(RHS)
5425    SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
5426
5427    // and r30, r20, 0x7f800000
5428    SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5429        DAG.getConstant(0x7F800000, INTTY));
5430
5431    // and r31, r21, 0x7f800000
5432    SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5433        DAG.getConstant(0x7f800000, INTTY));
5434
5435    // and r32, r20, 0x807FFFFF
5436    SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5437        DAG.getConstant(0x807FFFFF, INTTY));
5438
5439    // and r33, r21, 0x807FFFFF
5440    SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5441        DAG.getConstant(0x807FFFFF, INTTY));
5442
5443    // ieq r40, r30, 0x7F800000
5444    SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5445        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5446        R30, DAG.getConstant(0x7F800000, INTTY));
5447
5448    // ieq r41, r31, 0x7F800000
5449    SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5450        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5451        R31, DAG.getConstant(0x7F800000, INTTY));
5452
5453    // ieq r42, r30, 0
5454    SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5455        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5456        R30, DAG.getConstant(0, INTTY));
5457
5458    // ieq r43, r31, 0
5459    SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5460        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5461        R31, DAG.getConstant(0, INTTY));
5462
5463    // and r50, r20, 0x80000000
5464    SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5465        DAG.getConstant(0x80000000, INTTY));
5466
5467    // and r51, r21, 0x80000000
5468    SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5469        DAG.getConstant(0x80000000, INTTY));
5470
5471    // ior r32, r32, 0x3f800000
5472    R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
5473        DAG.getConstant(0x3F800000, INTTY));
5474
5475    // ior r33, r33, 0x3f800000
5476    R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
5477        DAG.getConstant(0x3F800000, INTTY));
5478
5479    // cmov_logical r32, r42, r50, r32
5480    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
5481
5482    // cmov_logical r33, r43, r51, r33
5483    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
5484
5485    // cmov_logical r32, r40, r20, r32
5486    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
5487
5488    // cmov_logical r33, r41, r21, r33
5489    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
5490
5491    // ior r50, r40, r41
5492    R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
5493
5494    // ior r51, r42, r43
5495    R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
5496
5497    // ior r50, r50, r51
5498    R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
5499
5500    // inegate r52, r31
5501    SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
5502
5503    // iadd r30, r30, r52
5504    R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
5505
5506    // cmov_logical r30, r50, 0, r30
5507    R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
5508        DAG.getConstant(0, INTTY), R30);
5509
5510    // div_zeroop(infinity) r21, 1.0, as_float(r33)
5511    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
5512    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
5513        DAG.getConstantFP(1.0f, OVT), R33);
5514
5515    // mul_ieee as_int(r20), as_float(r32), r21
5516    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
5517    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
5518    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
5519
5520    // div_zeroop(infinity) r21, 1.0, as_float(r33)
5521    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
5522    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
5523        DAG.getConstantFP(1.0f, OVT), R33);
5524
5525    // mul_ieee as_int(r20), as_float(r32), r21
5526    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
5527    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
5528    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
5529
5530    // and r22, r20, 0x7FFFFFFF
5531    SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5532        DAG.getConstant(0x7FFFFFFF, INTTY));
5533
5534    // and r23, r20, 0x80000000
5535    SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5536        DAG.getConstant(0x80000000, INTTY));
5537
5538    // ishr r60, r22, 0x00000017
5539    SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
5540        DAG.getConstant(0x00000017, INTTY));
5541
5542    // ishr r61, r30, 0x00000017
5543    SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
5544        DAG.getConstant(0x00000017, INTTY));
5545
5546    // iadd r20, r20, r30
5547    R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
5548
5549    // iadd r21, r22, r30
5550    R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
5551
5552    // iadd r60, r60, r61
5553    R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
5554
5555    // ige r42, 0, R60
5556    R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5557        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
5558        DAG.getConstant(0, INTTY),
5559        R60);
5560
5561    // ior r41, r23, 0x7F800000
5562    R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
5563        DAG.getConstant(0x7F800000, INTTY));
5564
5565    // ige r40, r60, 0x000000FF
5566    R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5567        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
5568        R60,
5569        DAG.getConstant(0x0000000FF, INTTY));
5570
5571    // cmov_logical r40, r50, 0, r40
5572    R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
5573        DAG.getConstant(0, INTTY),
5574        R40);
5575
5576    // cmov_logical r20, r42, r23, r20
5577    R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
5578
5579    // cmov_logical DST, r40, r41, r20
5580    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
5581
5582    // as_float(DST)
5583    DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
5584  } else {
5585    // The following sequence of DAG nodes produce the following IL:
5586    // fabs r1, RHS
5587    // lt r2, 0x1.0p+96f, r1
5588    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5589    // mul_ieee r1, RHS, r3
5590    // div_zeroop(infinity) r0, LHS, r1
5591    // mul_ieee DST, r0, r3
5592
5593    // fabs r1, RHS
5594    SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
5595    // lt r2, 0x1.0p+96f, r1
5596    SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5597        DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
5598        DAG.getConstant(0x6f800000, INTTY), r1);
5599    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5600    SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
5601        DAG.getConstant(0x2f800000, INTTY),
5602        DAG.getConstant(0x3f800000, INTTY));
5603    // mul_ieee r1, RHS, r3
5604    r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
5605    // div_zeroop(infinity) r0, LHS, r1
5606    SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
5607    // mul_ieee DST, r0, r3
5608    DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
5609  }
5610  return DST;
5611}
5612
5613SDValue
5614AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
5615{
5616  return SDValue(Op.getNode(), 0);
5617}
5618