AMDILISelLowering.cpp revision 04993c963008ded3a6ad5e5b4d69ba08d1948a93
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file implements the interfaces that AMDIL uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDILISelLowering.h"
16#include "AMDILDevices.h"
17#include "AMDILIntrinsicInfo.h"
18#include "AMDILMachineFunctionInfo.h"
19#include "AMDILSubtarget.h"
20#include "AMDILTargetMachine.h"
21#include "AMDILUtilityFunctions.h"
22#include "llvm/CallingConv.h"
23#include "llvm/CodeGen/MachineFrameInfo.h"
24#include "llvm/CodeGen/MachineRegisterInfo.h"
25#include "llvm/CodeGen/PseudoSourceValue.h"
26#include "llvm/CodeGen/SelectionDAG.h"
27#include "llvm/CodeGen/SelectionDAGNodes.h"
28#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
29#include "llvm/DerivedTypes.h"
30#include "llvm/Instructions.h"
31#include "llvm/Intrinsics.h"
32#include "llvm/Support/raw_ostream.h"
33#include "llvm/Target/TargetOptions.h"
34
35using namespace llvm;
36#define ISDBITCAST  ISD::BITCAST
37#define MVTGLUE     MVT::Glue
38//===----------------------------------------------------------------------===//
39// Calling Convention Implementation
40//===----------------------------------------------------------------------===//
41#include "AMDILGenCallingConv.inc"
42
43//===----------------------------------------------------------------------===//
44// TargetLowering Implementation Help Functions Begin
45//===----------------------------------------------------------------------===//
46  static SDValue
47getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
48{
49  DebugLoc DL = Src.getDebugLoc();
50  EVT svt = Src.getValueType().getScalarType();
51  EVT dvt = Dst.getValueType().getScalarType();
52  if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
53    if (dvt.bitsGT(svt)) {
54      Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
55    } else if (svt.bitsLT(svt)) {
56      Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
57          DAG.getConstant(1, MVT::i32));
58    }
59  } else if (svt.isInteger() && dvt.isInteger()) {
60    if (!svt.bitsEq(dvt)) {
61      Src = DAG.getSExtOrTrunc(Src, DL, dvt);
62    } else {
63      Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
64    }
65  } else if (svt.isInteger()) {
66    unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
67    if (!svt.bitsEq(dvt)) {
68      if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
69        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
70      } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
71        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
72      } else {
73        assert(0 && "We only support 32 and 64bit fp types");
74      }
75    }
76    Src = DAG.getNode(opcode, DL, dvt, Src);
77  } else if (dvt.isInteger()) {
78    unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
79    if (svt.getSimpleVT().SimpleTy == MVT::f32) {
80      Src = DAG.getNode(opcode, DL, MVT::i32, Src);
81    } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
82      Src = DAG.getNode(opcode, DL, MVT::i64, Src);
83    } else {
84      assert(0 && "We only support 32 and 64bit fp types");
85    }
86    Src = DAG.getSExtOrTrunc(Src, DL, dvt);
87  }
88  return Src;
89}
90// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
91// condition.
92  static AMDILCC::CondCodes
93CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
94{
95  switch (CC) {
96    default:
97      {
98        errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
99        assert(0 && "Unknown condition code!");
100      }
101    case ISD::SETO:
102      switch(type) {
103        case MVT::f32:
104          return AMDILCC::IL_CC_F_O;
105        case MVT::f64:
106          return AMDILCC::IL_CC_D_O;
107        default:
108          assert(0 && "Opcode combination not generated correctly!");
109          return AMDILCC::COND_ERROR;
110      };
111    case ISD::SETUO:
112      switch(type) {
113        case MVT::f32:
114          return AMDILCC::IL_CC_F_UO;
115        case MVT::f64:
116          return AMDILCC::IL_CC_D_UO;
117        default:
118          assert(0 && "Opcode combination not generated correctly!");
119          return AMDILCC::COND_ERROR;
120      };
121    case ISD::SETGT:
122      switch (type) {
123        case MVT::i1:
124        case MVT::i8:
125        case MVT::i16:
126        case MVT::i32:
127          return AMDILCC::IL_CC_I_GT;
128        case MVT::f32:
129          return AMDILCC::IL_CC_F_GT;
130        case MVT::f64:
131          return AMDILCC::IL_CC_D_GT;
132        case MVT::i64:
133          return AMDILCC::IL_CC_L_GT;
134        default:
135          assert(0 && "Opcode combination not generated correctly!");
136          return AMDILCC::COND_ERROR;
137      };
138    case ISD::SETGE:
139      switch (type) {
140        case MVT::i1:
141        case MVT::i8:
142        case MVT::i16:
143        case MVT::i32:
144          return AMDILCC::IL_CC_I_GE;
145        case MVT::f32:
146          return AMDILCC::IL_CC_F_GE;
147        case MVT::f64:
148          return AMDILCC::IL_CC_D_GE;
149        case MVT::i64:
150          return AMDILCC::IL_CC_L_GE;
151        default:
152          assert(0 && "Opcode combination not generated correctly!");
153          return AMDILCC::COND_ERROR;
154      };
155    case ISD::SETLT:
156      switch (type) {
157        case MVT::i1:
158        case MVT::i8:
159        case MVT::i16:
160        case MVT::i32:
161          return AMDILCC::IL_CC_I_LT;
162        case MVT::f32:
163          return AMDILCC::IL_CC_F_LT;
164        case MVT::f64:
165          return AMDILCC::IL_CC_D_LT;
166        case MVT::i64:
167          return AMDILCC::IL_CC_L_LT;
168        default:
169          assert(0 && "Opcode combination not generated correctly!");
170          return AMDILCC::COND_ERROR;
171      };
172    case ISD::SETLE:
173      switch (type) {
174        case MVT::i1:
175        case MVT::i8:
176        case MVT::i16:
177        case MVT::i32:
178          return AMDILCC::IL_CC_I_LE;
179        case MVT::f32:
180          return AMDILCC::IL_CC_F_LE;
181        case MVT::f64:
182          return AMDILCC::IL_CC_D_LE;
183        case MVT::i64:
184          return AMDILCC::IL_CC_L_LE;
185        default:
186          assert(0 && "Opcode combination not generated correctly!");
187          return AMDILCC::COND_ERROR;
188      };
189    case ISD::SETNE:
190      switch (type) {
191        case MVT::i1:
192        case MVT::i8:
193        case MVT::i16:
194        case MVT::i32:
195          return AMDILCC::IL_CC_I_NE;
196        case MVT::f32:
197          return AMDILCC::IL_CC_F_NE;
198        case MVT::f64:
199          return AMDILCC::IL_CC_D_NE;
200        case MVT::i64:
201          return AMDILCC::IL_CC_L_NE;
202        default:
203          assert(0 && "Opcode combination not generated correctly!");
204          return AMDILCC::COND_ERROR;
205      };
206    case ISD::SETEQ:
207      switch (type) {
208        case MVT::i1:
209        case MVT::i8:
210        case MVT::i16:
211        case MVT::i32:
212          return AMDILCC::IL_CC_I_EQ;
213        case MVT::f32:
214          return AMDILCC::IL_CC_F_EQ;
215        case MVT::f64:
216          return AMDILCC::IL_CC_D_EQ;
217        case MVT::i64:
218          return AMDILCC::IL_CC_L_EQ;
219        default:
220          assert(0 && "Opcode combination not generated correctly!");
221          return AMDILCC::COND_ERROR;
222      };
223    case ISD::SETUGT:
224      switch (type) {
225        case MVT::i1:
226        case MVT::i8:
227        case MVT::i16:
228        case MVT::i32:
229          return AMDILCC::IL_CC_U_GT;
230        case MVT::f32:
231          return AMDILCC::IL_CC_F_UGT;
232        case MVT::f64:
233          return AMDILCC::IL_CC_D_UGT;
234        case MVT::i64:
235          return AMDILCC::IL_CC_UL_GT;
236        default:
237          assert(0 && "Opcode combination not generated correctly!");
238          return AMDILCC::COND_ERROR;
239      };
240    case ISD::SETUGE:
241      switch (type) {
242        case MVT::i1:
243        case MVT::i8:
244        case MVT::i16:
245        case MVT::i32:
246          return AMDILCC::IL_CC_U_GE;
247        case MVT::f32:
248          return AMDILCC::IL_CC_F_UGE;
249        case MVT::f64:
250          return AMDILCC::IL_CC_D_UGE;
251        case MVT::i64:
252          return AMDILCC::IL_CC_UL_GE;
253        default:
254          assert(0 && "Opcode combination not generated correctly!");
255          return AMDILCC::COND_ERROR;
256      };
257    case ISD::SETULT:
258      switch (type) {
259        case MVT::i1:
260        case MVT::i8:
261        case MVT::i16:
262        case MVT::i32:
263          return AMDILCC::IL_CC_U_LT;
264        case MVT::f32:
265          return AMDILCC::IL_CC_F_ULT;
266        case MVT::f64:
267          return AMDILCC::IL_CC_D_ULT;
268        case MVT::i64:
269          return AMDILCC::IL_CC_UL_LT;
270        default:
271          assert(0 && "Opcode combination not generated correctly!");
272          return AMDILCC::COND_ERROR;
273      };
274    case ISD::SETULE:
275      switch (type) {
276        case MVT::i1:
277        case MVT::i8:
278        case MVT::i16:
279        case MVT::i32:
280          return AMDILCC::IL_CC_U_LE;
281        case MVT::f32:
282          return AMDILCC::IL_CC_F_ULE;
283        case MVT::f64:
284          return AMDILCC::IL_CC_D_ULE;
285        case MVT::i64:
286          return AMDILCC::IL_CC_UL_LE;
287        default:
288          assert(0 && "Opcode combination not generated correctly!");
289          return AMDILCC::COND_ERROR;
290      };
291    case ISD::SETUNE:
292      switch (type) {
293        case MVT::i1:
294        case MVT::i8:
295        case MVT::i16:
296        case MVT::i32:
297          return AMDILCC::IL_CC_U_NE;
298        case MVT::f32:
299          return AMDILCC::IL_CC_F_UNE;
300        case MVT::f64:
301          return AMDILCC::IL_CC_D_UNE;
302        case MVT::i64:
303          return AMDILCC::IL_CC_UL_NE;
304        default:
305          assert(0 && "Opcode combination not generated correctly!");
306          return AMDILCC::COND_ERROR;
307      };
308    case ISD::SETUEQ:
309      switch (type) {
310        case MVT::i1:
311        case MVT::i8:
312        case MVT::i16:
313        case MVT::i32:
314          return AMDILCC::IL_CC_U_EQ;
315        case MVT::f32:
316          return AMDILCC::IL_CC_F_UEQ;
317        case MVT::f64:
318          return AMDILCC::IL_CC_D_UEQ;
319        case MVT::i64:
320          return AMDILCC::IL_CC_UL_EQ;
321        default:
322          assert(0 && "Opcode combination not generated correctly!");
323          return AMDILCC::COND_ERROR;
324      };
325    case ISD::SETOGT:
326      switch (type) {
327        case MVT::f32:
328          return AMDILCC::IL_CC_F_OGT;
329        case MVT::f64:
330          return AMDILCC::IL_CC_D_OGT;
331        case MVT::i1:
332        case MVT::i8:
333        case MVT::i16:
334        case MVT::i32:
335        case MVT::i64:
336        default:
337          assert(0 && "Opcode combination not generated correctly!");
338          return AMDILCC::COND_ERROR;
339      };
340    case ISD::SETOGE:
341      switch (type) {
342        case MVT::f32:
343          return AMDILCC::IL_CC_F_OGE;
344        case MVT::f64:
345          return AMDILCC::IL_CC_D_OGE;
346        case MVT::i1:
347        case MVT::i8:
348        case MVT::i16:
349        case MVT::i32:
350        case MVT::i64:
351        default:
352          assert(0 && "Opcode combination not generated correctly!");
353          return AMDILCC::COND_ERROR;
354      };
355    case ISD::SETOLT:
356      switch (type) {
357        case MVT::f32:
358          return AMDILCC::IL_CC_F_OLT;
359        case MVT::f64:
360          return AMDILCC::IL_CC_D_OLT;
361        case MVT::i1:
362        case MVT::i8:
363        case MVT::i16:
364        case MVT::i32:
365        case MVT::i64:
366        default:
367          assert(0 && "Opcode combination not generated correctly!");
368          return AMDILCC::COND_ERROR;
369      };
370    case ISD::SETOLE:
371      switch (type) {
372        case MVT::f32:
373          return AMDILCC::IL_CC_F_OLE;
374        case MVT::f64:
375          return AMDILCC::IL_CC_D_OLE;
376        case MVT::i1:
377        case MVT::i8:
378        case MVT::i16:
379        case MVT::i32:
380        case MVT::i64:
381        default:
382          assert(0 && "Opcode combination not generated correctly!");
383          return AMDILCC::COND_ERROR;
384      };
385    case ISD::SETONE:
386      switch (type) {
387        case MVT::f32:
388          return AMDILCC::IL_CC_F_ONE;
389        case MVT::f64:
390          return AMDILCC::IL_CC_D_ONE;
391        case MVT::i1:
392        case MVT::i8:
393        case MVT::i16:
394        case MVT::i32:
395        case MVT::i64:
396        default:
397          assert(0 && "Opcode combination not generated correctly!");
398          return AMDILCC::COND_ERROR;
399      };
400    case ISD::SETOEQ:
401      switch (type) {
402        case MVT::f32:
403          return AMDILCC::IL_CC_F_OEQ;
404        case MVT::f64:
405          return AMDILCC::IL_CC_D_OEQ;
406        case MVT::i1:
407        case MVT::i8:
408        case MVT::i16:
409        case MVT::i32:
410        case MVT::i64:
411        default:
412          assert(0 && "Opcode combination not generated correctly!");
413          return AMDILCC::COND_ERROR;
414      };
415  };
416}
417
418  static unsigned int
419translateToOpcode(uint64_t CCCode, unsigned int regClass)
420{
421  switch (CCCode) {
422    case AMDILCC::IL_CC_D_EQ:
423    case AMDILCC::IL_CC_D_OEQ:
424      if (regClass == AMDIL::GPRV2F64RegClassID) {
425        return (unsigned int)AMDIL::DEQ_v2f64;
426      } else {
427        return (unsigned int)AMDIL::DEQ;
428      }
429    case AMDILCC::IL_CC_D_LE:
430    case AMDILCC::IL_CC_D_OLE:
431    case AMDILCC::IL_CC_D_ULE:
432    case AMDILCC::IL_CC_D_GE:
433    case AMDILCC::IL_CC_D_OGE:
434    case AMDILCC::IL_CC_D_UGE:
435      return (unsigned int)AMDIL::DGE;
436    case AMDILCC::IL_CC_D_LT:
437    case AMDILCC::IL_CC_D_OLT:
438    case AMDILCC::IL_CC_D_ULT:
439    case AMDILCC::IL_CC_D_GT:
440    case AMDILCC::IL_CC_D_OGT:
441    case AMDILCC::IL_CC_D_UGT:
442      return (unsigned int)AMDIL::DLT;
443    case AMDILCC::IL_CC_D_NE:
444    case AMDILCC::IL_CC_D_UNE:
445      return (unsigned int)AMDIL::DNE;
446    case AMDILCC::IL_CC_F_EQ:
447    case AMDILCC::IL_CC_F_OEQ:
448      return (unsigned int)AMDIL::FEQ;
449    case AMDILCC::IL_CC_F_LE:
450    case AMDILCC::IL_CC_F_ULE:
451    case AMDILCC::IL_CC_F_OLE:
452    case AMDILCC::IL_CC_F_GE:
453    case AMDILCC::IL_CC_F_UGE:
454    case AMDILCC::IL_CC_F_OGE:
455      return (unsigned int)AMDIL::FGE;
456    case AMDILCC::IL_CC_F_LT:
457    case AMDILCC::IL_CC_F_OLT:
458    case AMDILCC::IL_CC_F_ULT:
459    case AMDILCC::IL_CC_F_GT:
460    case AMDILCC::IL_CC_F_OGT:
461    case AMDILCC::IL_CC_F_UGT:
462      if (regClass == AMDIL::GPRV2F32RegClassID) {
463        return (unsigned int)AMDIL::FLT_v2f32;
464      } else if (regClass == AMDIL::GPRV4F32RegClassID) {
465        return (unsigned int)AMDIL::FLT_v4f32;
466      } else {
467        return (unsigned int)AMDIL::FLT;
468      }
469    case AMDILCC::IL_CC_F_NE:
470    case AMDILCC::IL_CC_F_UNE:
471      return (unsigned int)AMDIL::FNE;
472    case AMDILCC::IL_CC_I_EQ:
473    case AMDILCC::IL_CC_U_EQ:
474      if (regClass == AMDIL::GPRI32RegClassID
475          || regClass == AMDIL::GPRI8RegClassID
476          || regClass == AMDIL::GPRI16RegClassID) {
477        return (unsigned int)AMDIL::IEQ;
478      } else if (regClass == AMDIL::GPRV2I32RegClassID
479          || regClass == AMDIL::GPRV2I8RegClassID
480          || regClass == AMDIL::GPRV2I16RegClassID) {
481        return (unsigned int)AMDIL::IEQ_v2i32;
482      } else if (regClass == AMDIL::GPRV4I32RegClassID
483          || regClass == AMDIL::GPRV4I8RegClassID
484          || regClass == AMDIL::GPRV4I16RegClassID) {
485        return (unsigned int)AMDIL::IEQ_v4i32;
486      } else {
487        assert(!"Unknown reg class!");
488      }
489    case AMDILCC::IL_CC_L_EQ:
490    case AMDILCC::IL_CC_UL_EQ:
491      return (unsigned int)AMDIL::LEQ;
492    case AMDILCC::IL_CC_I_GE:
493    case AMDILCC::IL_CC_I_LE:
494      if (regClass == AMDIL::GPRI32RegClassID
495          || regClass == AMDIL::GPRI8RegClassID
496          || regClass == AMDIL::GPRI16RegClassID) {
497        return (unsigned int)AMDIL::IGE;
498      } else if (regClass == AMDIL::GPRV2I32RegClassID
499          || regClass == AMDIL::GPRI8RegClassID
500          || regClass == AMDIL::GPRI16RegClassID) {
501        return (unsigned int)AMDIL::IGE_v2i32;
502      } else if (regClass == AMDIL::GPRV4I32RegClassID
503          || regClass == AMDIL::GPRI8RegClassID
504          || regClass == AMDIL::GPRI16RegClassID) {
505        return (unsigned int)AMDIL::IGE_v4i32;
506      } else {
507        assert(!"Unknown reg class!");
508      }
509    case AMDILCC::IL_CC_I_LT:
510    case AMDILCC::IL_CC_I_GT:
511      if (regClass == AMDIL::GPRI32RegClassID
512          || regClass == AMDIL::GPRI8RegClassID
513          || regClass == AMDIL::GPRI16RegClassID) {
514        return (unsigned int)AMDIL::ILT;
515      } else if (regClass == AMDIL::GPRV2I32RegClassID
516          || regClass == AMDIL::GPRI8RegClassID
517          || regClass == AMDIL::GPRI16RegClassID) {
518        return (unsigned int)AMDIL::ILT_v2i32;
519      } else if (regClass == AMDIL::GPRV4I32RegClassID
520          || regClass == AMDIL::GPRI8RegClassID
521          || regClass == AMDIL::GPRI16RegClassID) {
522        return (unsigned int)AMDIL::ILT_v4i32;
523      } else {
524        assert(!"Unknown reg class!");
525      }
526    case AMDILCC::IL_CC_L_GE:
527      return (unsigned int)AMDIL::LGE;
528    case AMDILCC::IL_CC_L_LE:
529      return (unsigned int)AMDIL::LLE;
530    case AMDILCC::IL_CC_L_LT:
531      return (unsigned int)AMDIL::LLT;
532    case AMDILCC::IL_CC_L_GT:
533      return (unsigned int)AMDIL::LGT;
534    case AMDILCC::IL_CC_I_NE:
535    case AMDILCC::IL_CC_U_NE:
536      if (regClass == AMDIL::GPRI32RegClassID
537          || regClass == AMDIL::GPRI8RegClassID
538          || regClass == AMDIL::GPRI16RegClassID) {
539        return (unsigned int)AMDIL::INE;
540      } else if (regClass == AMDIL::GPRV2I32RegClassID
541          || regClass == AMDIL::GPRI8RegClassID
542          || regClass == AMDIL::GPRI16RegClassID) {
543        return (unsigned int)AMDIL::INE_v2i32;
544      } else if (regClass == AMDIL::GPRV4I32RegClassID
545          || regClass == AMDIL::GPRI8RegClassID
546          || regClass == AMDIL::GPRI16RegClassID) {
547        return (unsigned int)AMDIL::INE_v4i32;
548      } else {
549        assert(!"Unknown reg class!");
550      }
551    case AMDILCC::IL_CC_U_GE:
552    case AMDILCC::IL_CC_U_LE:
553      if (regClass == AMDIL::GPRI32RegClassID
554          || regClass == AMDIL::GPRI8RegClassID
555          || regClass == AMDIL::GPRI16RegClassID) {
556        return (unsigned int)AMDIL::UGE;
557      } else if (regClass == AMDIL::GPRV2I32RegClassID
558          || regClass == AMDIL::GPRI8RegClassID
559          || regClass == AMDIL::GPRI16RegClassID) {
560        return (unsigned int)AMDIL::UGE_v2i32;
561      } else if (regClass == AMDIL::GPRV4I32RegClassID
562          || regClass == AMDIL::GPRI8RegClassID
563          || regClass == AMDIL::GPRI16RegClassID) {
564        return (unsigned int)AMDIL::UGE_v4i32;
565      } else {
566        assert(!"Unknown reg class!");
567      }
568    case AMDILCC::IL_CC_L_NE:
569    case AMDILCC::IL_CC_UL_NE:
570      return (unsigned int)AMDIL::LNE;
571    case AMDILCC::IL_CC_UL_GE:
572      return (unsigned int)AMDIL::ULGE;
573    case AMDILCC::IL_CC_UL_LE:
574      return (unsigned int)AMDIL::ULLE;
575    case AMDILCC::IL_CC_U_LT:
576      if (regClass == AMDIL::GPRI32RegClassID
577          || regClass == AMDIL::GPRI8RegClassID
578          || regClass == AMDIL::GPRI16RegClassID) {
579        return (unsigned int)AMDIL::ULT;
580      } else if (regClass == AMDIL::GPRV2I32RegClassID
581          || regClass == AMDIL::GPRI8RegClassID
582          || regClass == AMDIL::GPRI16RegClassID) {
583        return (unsigned int)AMDIL::ULT_v2i32;
584      } else if (regClass == AMDIL::GPRV4I32RegClassID
585          || regClass == AMDIL::GPRI8RegClassID
586          || regClass == AMDIL::GPRI16RegClassID) {
587        return (unsigned int)AMDIL::ULT_v4i32;
588      } else {
589        assert(!"Unknown reg class!");
590      }
591    case AMDILCC::IL_CC_U_GT:
592      if (regClass == AMDIL::GPRI32RegClassID
593          || regClass == AMDIL::GPRI8RegClassID
594          || regClass == AMDIL::GPRI16RegClassID) {
595        return (unsigned int)AMDIL::UGT;
596      } else if (regClass == AMDIL::GPRV2I32RegClassID
597          || regClass == AMDIL::GPRI8RegClassID
598          || regClass == AMDIL::GPRI16RegClassID) {
599        return (unsigned int)AMDIL::UGT_v2i32;
600      } else if (regClass == AMDIL::GPRV4I32RegClassID
601          || regClass == AMDIL::GPRI8RegClassID
602          || regClass == AMDIL::GPRI16RegClassID) {
603        return (unsigned int)AMDIL::UGT_v4i32;
604      } else {
605        assert(!"Unknown reg class!");
606      }
607    case AMDILCC::IL_CC_UL_LT:
608      return (unsigned int)AMDIL::ULLT;
609    case AMDILCC::IL_CC_UL_GT:
610      return (unsigned int)AMDIL::ULGT;
611    case AMDILCC::IL_CC_F_UEQ:
612    case AMDILCC::IL_CC_D_UEQ:
613    case AMDILCC::IL_CC_F_ONE:
614    case AMDILCC::IL_CC_D_ONE:
615    case AMDILCC::IL_CC_F_O:
616    case AMDILCC::IL_CC_F_UO:
617    case AMDILCC::IL_CC_D_O:
618    case AMDILCC::IL_CC_D_UO:
619      // we don't care
620      return 0;
621
622  }
623  errs()<<"Opcode: "<<CCCode<<"\n";
624  assert(0 && "Unknown opcode retrieved");
625  return 0;
626}
627SDValue
628AMDILTargetLowering::LowerMemArgument(
629    SDValue Chain,
630    CallingConv::ID CallConv,
631    const SmallVectorImpl<ISD::InputArg> &Ins,
632    DebugLoc dl, SelectionDAG &DAG,
633    const CCValAssign &VA,
634    MachineFrameInfo *MFI,
635    unsigned i) const
636{
637  // Create the nodes corresponding to a load from this parameter slot.
638  ISD::ArgFlagsTy Flags = Ins[i].Flags;
639
640  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
641    getTargetMachine().Options.GuaranteedTailCallOpt;
642  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
643
644  // FIXME: For now, all byval parameter objects are marked mutable. This can
645  // be changed with more analysis.
646  // In case of tail call optimization mark all arguments mutable. Since they
647  // could be overwritten by lowering of arguments in case of a tail call.
648  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
649      VA.getLocMemOffset(), isImmutable);
650  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
651
652  if (Flags.isByVal())
653    return FIN;
654  return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
655      MachinePointerInfo::getFixedStack(FI),
656      false, false, false, 0);
657}
658//===----------------------------------------------------------------------===//
659// TargetLowering Implementation Help Functions End
660//===----------------------------------------------------------------------===//
661//===----------------------------------------------------------------------===//
662// Instruction generation functions
663//===----------------------------------------------------------------------===//
664uint32_t
665AMDILTargetLowering::addExtensionInstructions(
666    uint32_t reg, bool signedShift,
667    unsigned int simpleVT) const
668{
669  int shiftSize = 0;
670  uint32_t LShift, RShift;
671  switch(simpleVT)
672  {
673    default:
674      return reg;
675    case AMDIL::GPRI8RegClassID:
676      shiftSize = 24;
677      LShift = AMDIL::SHL_i8;
678      if (signedShift) {
679        RShift = AMDIL::SHR_i8;
680      } else {
681        RShift = AMDIL::USHR_i8;
682      }
683      break;
684    case AMDIL::GPRV2I8RegClassID:
685      shiftSize = 24;
686      LShift = AMDIL::SHL_v2i8;
687      if (signedShift) {
688        RShift = AMDIL::SHR_v2i8;
689      } else {
690        RShift = AMDIL::USHR_v2i8;
691      }
692      break;
693    case AMDIL::GPRV4I8RegClassID:
694      shiftSize = 24;
695      LShift = AMDIL::SHL_v4i8;
696      if (signedShift) {
697        RShift = AMDIL::SHR_v4i8;
698      } else {
699        RShift = AMDIL::USHR_v4i8;
700      }
701      break;
702    case AMDIL::GPRI16RegClassID:
703      shiftSize = 16;
704      LShift = AMDIL::SHL_i16;
705      if (signedShift) {
706        RShift = AMDIL::SHR_i16;
707      } else {
708        RShift = AMDIL::USHR_i16;
709      }
710      break;
711    case AMDIL::GPRV2I16RegClassID:
712      shiftSize = 16;
713      LShift = AMDIL::SHL_v2i16;
714      if (signedShift) {
715        RShift = AMDIL::SHR_v2i16;
716      } else {
717        RShift = AMDIL::USHR_v2i16;
718      }
719      break;
720    case AMDIL::GPRV4I16RegClassID:
721      shiftSize = 16;
722      LShift = AMDIL::SHL_v4i16;
723      if (signedShift) {
724        RShift = AMDIL::SHR_v4i16;
725      } else {
726        RShift = AMDIL::USHR_v4i16;
727      }
728      break;
729  };
730  uint32_t LoadReg = genVReg(simpleVT);
731  uint32_t tmp1 = genVReg(simpleVT);
732  uint32_t tmp2 = genVReg(simpleVT);
733  generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
734  generateMachineInst(LShift, tmp1, reg, LoadReg);
735  generateMachineInst(RShift, tmp2, tmp1, LoadReg);
736  return tmp2;
737}
738
739MachineOperand
740AMDILTargetLowering::convertToReg(MachineOperand op) const
741{
742  if (op.isReg()) {
743    return op;
744  } else if (op.isImm()) {
745    uint32_t loadReg
746      = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
747    generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
748      .addImm(op.getImm());
749    op.ChangeToRegister(loadReg, false);
750  } else if (op.isFPImm()) {
751    uint32_t loadReg
752      = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
753    generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
754      .addFPImm(op.getFPImm());
755    op.ChangeToRegister(loadReg, false);
756  } else if (op.isMBB()) {
757    op.ChangeToRegister(0, false);
758  } else if (op.isFI()) {
759    op.ChangeToRegister(0, false);
760  } else if (op.isCPI()) {
761    op.ChangeToRegister(0, false);
762  } else if (op.isJTI()) {
763    op.ChangeToRegister(0, false);
764  } else if (op.isGlobal()) {
765    op.ChangeToRegister(0, false);
766  } else if (op.isSymbol()) {
767    op.ChangeToRegister(0, false);
768  }/* else if (op.isMetadata()) {
769      op.ChangeToRegister(0, false);
770      }*/
771  return op;
772}
773
774void
775AMDILTargetLowering::generateCMPInstr(
776    MachineInstr *MI,
777    MachineBasicBlock *BB,
778    const TargetInstrInfo& TII)
779const
780{
781  MachineOperand DST = MI->getOperand(0);
782  MachineOperand CC = MI->getOperand(1);
783  MachineOperand LHS = MI->getOperand(2);
784  MachineOperand RHS = MI->getOperand(3);
785  int64_t ccCode = CC.getImm();
786  unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
787  unsigned int opCode = translateToOpcode(ccCode, simpleVT);
788  DebugLoc DL = MI->getDebugLoc();
789  MachineBasicBlock::iterator BBI = MI;
790  setPrivateData(BB, BBI, &DL, &TII);
791  if (!LHS.isReg()) {
792    LHS = convertToReg(LHS);
793  }
794  if (!RHS.isReg()) {
795    RHS = convertToReg(RHS);
796  }
797  switch (ccCode) {
798    case AMDILCC::IL_CC_I_EQ:
799    case AMDILCC::IL_CC_I_NE:
800    case AMDILCC::IL_CC_I_GE:
801    case AMDILCC::IL_CC_I_LT:
802      {
803        uint32_t lhsreg = addExtensionInstructions(
804            LHS.getReg(), true, simpleVT);
805        uint32_t rhsreg = addExtensionInstructions(
806            RHS.getReg(), true, simpleVT);
807        generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
808      }
809      break;
810    case AMDILCC::IL_CC_U_EQ:
811    case AMDILCC::IL_CC_U_NE:
812    case AMDILCC::IL_CC_U_GE:
813    case AMDILCC::IL_CC_U_LT:
814    case AMDILCC::IL_CC_D_EQ:
815    case AMDILCC::IL_CC_F_EQ:
816    case AMDILCC::IL_CC_F_OEQ:
817    case AMDILCC::IL_CC_D_OEQ:
818    case AMDILCC::IL_CC_D_NE:
819    case AMDILCC::IL_CC_F_NE:
820    case AMDILCC::IL_CC_F_UNE:
821    case AMDILCC::IL_CC_D_UNE:
822    case AMDILCC::IL_CC_D_GE:
823    case AMDILCC::IL_CC_F_GE:
824    case AMDILCC::IL_CC_D_OGE:
825    case AMDILCC::IL_CC_F_OGE:
826    case AMDILCC::IL_CC_D_LT:
827    case AMDILCC::IL_CC_F_LT:
828    case AMDILCC::IL_CC_F_OLT:
829    case AMDILCC::IL_CC_D_OLT:
830      generateMachineInst(opCode, DST.getReg(),
831          LHS.getReg(), RHS.getReg());
832      break;
833    case AMDILCC::IL_CC_I_GT:
834    case AMDILCC::IL_CC_I_LE:
835      {
836        uint32_t lhsreg = addExtensionInstructions(
837            LHS.getReg(), true, simpleVT);
838        uint32_t rhsreg = addExtensionInstructions(
839            RHS.getReg(), true, simpleVT);
840        generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
841      }
842      break;
843    case AMDILCC::IL_CC_U_GT:
844    case AMDILCC::IL_CC_U_LE:
845    case AMDILCC::IL_CC_F_GT:
846    case AMDILCC::IL_CC_D_GT:
847    case AMDILCC::IL_CC_F_OGT:
848    case AMDILCC::IL_CC_D_OGT:
849    case AMDILCC::IL_CC_F_LE:
850    case AMDILCC::IL_CC_D_LE:
851    case AMDILCC::IL_CC_D_OLE:
852    case AMDILCC::IL_CC_F_OLE:
853      generateMachineInst(opCode, DST.getReg(),
854          RHS.getReg(), LHS.getReg());
855      break;
856    case AMDILCC::IL_CC_F_UGT:
857    case AMDILCC::IL_CC_F_ULE:
858      {
859        uint32_t VReg[4] = {
860          genVReg(simpleVT), genVReg(simpleVT),
861          genVReg(simpleVT), genVReg(simpleVT)
862        };
863        generateMachineInst(opCode, VReg[0],
864            RHS.getReg(), LHS.getReg());
865        generateMachineInst(AMDIL::FNE, VReg[1],
866            RHS.getReg(), RHS.getReg());
867        generateMachineInst(AMDIL::FNE, VReg[2],
868            LHS.getReg(), LHS.getReg());
869        generateMachineInst(AMDIL::BINARY_OR_f32,
870            VReg[3], VReg[0], VReg[1]);
871        generateMachineInst(AMDIL::BINARY_OR_f32,
872            DST.getReg(), VReg[2], VReg[3]);
873      }
874      break;
875    case AMDILCC::IL_CC_F_ULT:
876    case AMDILCC::IL_CC_F_UGE:
877      {
878        uint32_t VReg[4] = {
879          genVReg(simpleVT), genVReg(simpleVT),
880          genVReg(simpleVT), genVReg(simpleVT)
881        };
882        generateMachineInst(opCode, VReg[0],
883            LHS.getReg(), RHS.getReg());
884        generateMachineInst(AMDIL::FNE, VReg[1],
885            RHS.getReg(), RHS.getReg());
886        generateMachineInst(AMDIL::FNE, VReg[2],
887            LHS.getReg(), LHS.getReg());
888        generateMachineInst(AMDIL::BINARY_OR_f32,
889            VReg[3], VReg[0], VReg[1]);
890        generateMachineInst(AMDIL::BINARY_OR_f32,
891            DST.getReg(), VReg[2], VReg[3]);
892      }
893      break;
894    case AMDILCC::IL_CC_D_UGT:
895    case AMDILCC::IL_CC_D_ULE:
896      {
897        uint32_t regID = AMDIL::GPRF64RegClassID;
898        uint32_t VReg[4] = {
899          genVReg(regID), genVReg(regID),
900          genVReg(regID), genVReg(regID)
901        };
902        // The result of a double comparison is a 32bit result
903        generateMachineInst(opCode, VReg[0],
904            RHS.getReg(), LHS.getReg());
905        generateMachineInst(AMDIL::DNE, VReg[1],
906            RHS.getReg(), RHS.getReg());
907        generateMachineInst(AMDIL::DNE, VReg[2],
908            LHS.getReg(), LHS.getReg());
909        generateMachineInst(AMDIL::BINARY_OR_f32,
910            VReg[3], VReg[0], VReg[1]);
911        generateMachineInst(AMDIL::BINARY_OR_f32,
912            DST.getReg(), VReg[2], VReg[3]);
913      }
914      break;
915    case AMDILCC::IL_CC_D_UGE:
916    case AMDILCC::IL_CC_D_ULT:
917      {
918        uint32_t regID = AMDIL::GPRF64RegClassID;
919        uint32_t VReg[4] = {
920          genVReg(regID), genVReg(regID),
921          genVReg(regID), genVReg(regID)
922        };
923        // The result of a double comparison is a 32bit result
924        generateMachineInst(opCode, VReg[0],
925            LHS.getReg(), RHS.getReg());
926        generateMachineInst(AMDIL::DNE, VReg[1],
927            RHS.getReg(), RHS.getReg());
928        generateMachineInst(AMDIL::DNE, VReg[2],
929            LHS.getReg(), LHS.getReg());
930        generateMachineInst(AMDIL::BINARY_OR_f32,
931            VReg[3], VReg[0], VReg[1]);
932        generateMachineInst(AMDIL::BINARY_OR_f32,
933            DST.getReg(), VReg[2], VReg[3]);
934      }
935      break;
936    case AMDILCC::IL_CC_F_UEQ:
937      {
938        uint32_t VReg[4] = {
939          genVReg(simpleVT), genVReg(simpleVT),
940          genVReg(simpleVT), genVReg(simpleVT)
941        };
942        generateMachineInst(AMDIL::FEQ, VReg[0],
943            LHS.getReg(), RHS.getReg());
944        generateMachineInst(AMDIL::FNE, VReg[1],
945            LHS.getReg(), LHS.getReg());
946        generateMachineInst(AMDIL::FNE, VReg[2],
947            RHS.getReg(), RHS.getReg());
948        generateMachineInst(AMDIL::BINARY_OR_f32,
949            VReg[3], VReg[0], VReg[1]);
950        generateMachineInst(AMDIL::BINARY_OR_f32,
951            DST.getReg(), VReg[2], VReg[3]);
952      }
953      break;
954    case AMDILCC::IL_CC_F_ONE:
955      {
956        uint32_t VReg[4] = {
957          genVReg(simpleVT), genVReg(simpleVT),
958          genVReg(simpleVT), genVReg(simpleVT)
959        };
960        generateMachineInst(AMDIL::FNE, VReg[0],
961            LHS.getReg(), RHS.getReg());
962        generateMachineInst(AMDIL::FEQ, VReg[1],
963            LHS.getReg(), LHS.getReg());
964        generateMachineInst(AMDIL::FEQ, VReg[2],
965            RHS.getReg(), RHS.getReg());
966        generateMachineInst(AMDIL::BINARY_AND_f32,
967            VReg[3], VReg[0], VReg[1]);
968        generateMachineInst(AMDIL::BINARY_AND_f32,
969            DST.getReg(), VReg[2], VReg[3]);
970      }
971      break;
972    case AMDILCC::IL_CC_D_UEQ:
973      {
974        uint32_t regID = AMDIL::GPRF64RegClassID;
975        uint32_t VReg[4] = {
976          genVReg(regID), genVReg(regID),
977          genVReg(regID), genVReg(regID)
978        };
979        // The result of a double comparison is a 32bit result
980        generateMachineInst(AMDIL::DEQ, VReg[0],
981            LHS.getReg(), RHS.getReg());
982        generateMachineInst(AMDIL::DNE, VReg[1],
983            LHS.getReg(), LHS.getReg());
984        generateMachineInst(AMDIL::DNE, VReg[2],
985            RHS.getReg(), RHS.getReg());
986        generateMachineInst(AMDIL::BINARY_OR_f32,
987            VReg[3], VReg[0], VReg[1]);
988        generateMachineInst(AMDIL::BINARY_OR_f32,
989            DST.getReg(), VReg[2], VReg[3]);
990
991      }
992      break;
993    case AMDILCC::IL_CC_D_ONE:
994      {
995        uint32_t regID = AMDIL::GPRF64RegClassID;
996        uint32_t VReg[4] = {
997          genVReg(regID), genVReg(regID),
998          genVReg(regID), genVReg(regID)
999        };
1000        // The result of a double comparison is a 32bit result
1001        generateMachineInst(AMDIL::DNE, VReg[0],
1002            LHS.getReg(), RHS.getReg());
1003        generateMachineInst(AMDIL::DEQ, VReg[1],
1004            LHS.getReg(), LHS.getReg());
1005        generateMachineInst(AMDIL::DEQ, VReg[2],
1006            RHS.getReg(), RHS.getReg());
1007        generateMachineInst(AMDIL::BINARY_AND_f32,
1008            VReg[3], VReg[0], VReg[1]);
1009        generateMachineInst(AMDIL::BINARY_AND_f32,
1010            DST.getReg(), VReg[2], VReg[3]);
1011
1012      }
1013      break;
1014    case AMDILCC::IL_CC_F_O:
1015      {
1016        uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
1017        generateMachineInst(AMDIL::FEQ, VReg[0],
1018            RHS.getReg(), RHS.getReg());
1019        generateMachineInst(AMDIL::FEQ, VReg[1],
1020            LHS.getReg(), LHS.getReg());
1021        generateMachineInst(AMDIL::BINARY_AND_f32,
1022            DST.getReg(), VReg[0], VReg[1]);
1023      }
1024      break;
1025    case AMDILCC::IL_CC_D_O:
1026      {
1027        uint32_t regID = AMDIL::GPRF64RegClassID;
1028        uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
1029        // The result of a double comparison is a 32bit result
1030        generateMachineInst(AMDIL::DEQ, VReg[0],
1031            RHS.getReg(), RHS.getReg());
1032        generateMachineInst(AMDIL::DEQ, VReg[1],
1033            LHS.getReg(), LHS.getReg());
1034        generateMachineInst(AMDIL::BINARY_AND_f32,
1035            DST.getReg(), VReg[0], VReg[1]);
1036      }
1037      break;
1038    case AMDILCC::IL_CC_F_UO:
1039      {
1040        uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
1041        generateMachineInst(AMDIL::FNE, VReg[0],
1042            RHS.getReg(), RHS.getReg());
1043        generateMachineInst(AMDIL::FNE, VReg[1],
1044            LHS.getReg(), LHS.getReg());
1045        generateMachineInst(AMDIL::BINARY_OR_f32,
1046            DST.getReg(), VReg[0], VReg[1]);
1047      }
1048      break;
1049    case AMDILCC::IL_CC_D_UO:
1050      {
1051        uint32_t regID = AMDIL::GPRF64RegClassID;
1052        uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
1053        // The result of a double comparison is a 32bit result
1054        generateMachineInst(AMDIL::DNE, VReg[0],
1055            RHS.getReg(), RHS.getReg());
1056        generateMachineInst(AMDIL::DNE, VReg[1],
1057            LHS.getReg(), LHS.getReg());
1058        generateMachineInst(AMDIL::BINARY_OR_f32,
1059            DST.getReg(), VReg[0], VReg[1]);
1060      }
1061      break;
1062    case AMDILCC::IL_CC_L_LE:
1063    case AMDILCC::IL_CC_L_GE:
1064    case AMDILCC::IL_CC_L_EQ:
1065    case AMDILCC::IL_CC_L_NE:
1066    case AMDILCC::IL_CC_L_LT:
1067    case AMDILCC::IL_CC_L_GT:
1068    case AMDILCC::IL_CC_UL_LE:
1069    case AMDILCC::IL_CC_UL_GE:
1070    case AMDILCC::IL_CC_UL_EQ:
1071    case AMDILCC::IL_CC_UL_NE:
1072    case AMDILCC::IL_CC_UL_LT:
1073    case AMDILCC::IL_CC_UL_GT:
1074      {
1075        const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1076            &this->getTargetMachine())->getSubtargetImpl();
1077        if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
1078          generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg());
1079        } else {
1080          generateLongRelational(MI, opCode);
1081        }
1082      }
1083      break;
1084    case AMDILCC::COND_ERROR:
1085      assert(0 && "Invalid CC code");
1086      break;
1087  };
1088}
1089
1090//===----------------------------------------------------------------------===//
1091// TargetLowering Class Implementation Begins
1092//===----------------------------------------------------------------------===//
1093  AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
1094: TargetLowering(TM, new TargetLoweringObjectFileELF())
1095{
1096  int types[] =
1097  {
1098    (int)MVT::i8,
1099    (int)MVT::i16,
1100    (int)MVT::i32,
1101    (int)MVT::f32,
1102    (int)MVT::f64,
1103    (int)MVT::i64,
1104    (int)MVT::v2i8,
1105    (int)MVT::v4i8,
1106    (int)MVT::v2i16,
1107    (int)MVT::v4i16,
1108    (int)MVT::v4f32,
1109    (int)MVT::v4i32,
1110    (int)MVT::v2f32,
1111    (int)MVT::v2i32,
1112    (int)MVT::v2f64,
1113    (int)MVT::v2i64
1114  };
1115
1116  int IntTypes[] =
1117  {
1118    (int)MVT::i8,
1119    (int)MVT::i16,
1120    (int)MVT::i32,
1121    (int)MVT::i64
1122  };
1123
1124  int FloatTypes[] =
1125  {
1126    (int)MVT::f32,
1127    (int)MVT::f64
1128  };
1129
1130  int VectorTypes[] =
1131  {
1132    (int)MVT::v2i8,
1133    (int)MVT::v4i8,
1134    (int)MVT::v2i16,
1135    (int)MVT::v4i16,
1136    (int)MVT::v4f32,
1137    (int)MVT::v4i32,
1138    (int)MVT::v2f32,
1139    (int)MVT::v2i32,
1140    (int)MVT::v2f64,
1141    (int)MVT::v2i64
1142  };
1143  size_t numTypes = sizeof(types) / sizeof(*types);
1144  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
1145  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
1146  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
1147
1148  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1149      &this->getTargetMachine())->getSubtargetImpl();
1150  // These are the current register classes that are
1151  // supported
1152
1153  addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
1154  addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
1155
1156  if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
1157    addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
1158    addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
1159  }
1160  if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
1161    addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
1162    addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
1163    addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
1164    setOperationAction(ISD::Constant          , MVT::i8   , Legal);
1165  }
1166  if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
1167    addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
1168    addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
1169    addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
1170    setOperationAction(ISD::Constant          , MVT::i16  , Legal);
1171  }
1172  addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
1173  addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
1174  addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
1175  addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
1176  if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
1177    addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
1178    addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
1179  }
1180
1181  for (unsigned int x  = 0; x < numTypes; ++x) {
1182    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
1183
1184    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
1185    // We cannot sextinreg, expand to shifts
1186    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1187    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1188    setOperationAction(ISD::FP_ROUND, VT, Expand);
1189    setOperationAction(ISD::OR, VT, Custom);
1190    setOperationAction(ISD::SUBE, VT, Expand);
1191    setOperationAction(ISD::SUBC, VT, Expand);
1192    setOperationAction(ISD::ADD, VT, Custom);
1193    setOperationAction(ISD::ADDE, VT, Expand);
1194    setOperationAction(ISD::ADDC, VT, Expand);
1195    setOperationAction(ISD::SETCC, VT, Custom);
1196    setOperationAction(ISD::BRCOND, VT, Custom);
1197    setOperationAction(ISD::BR_CC, VT, Custom);
1198    setOperationAction(ISD::BR_JT, VT, Expand);
1199    setOperationAction(ISD::BRIND, VT, Expand);
1200    // TODO: Implement custom UREM/SREM routines
1201    setOperationAction(ISD::UREM, VT, Expand);
1202    setOperationAction(ISD::SREM, VT, Expand);
1203    setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1204    setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1205    setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1206    setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1207    setOperationAction(ISDBITCAST, VT, Custom);
1208    setOperationAction(ISD::GlobalAddress, VT, Custom);
1209    setOperationAction(ISD::JumpTable, VT, Custom);
1210    setOperationAction(ISD::ConstantPool, VT, Custom);
1211    setOperationAction(ISD::SELECT_CC, VT, Custom);
1212    setOperationAction(ISD::SELECT, VT, Custom);
1213    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1214    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1215    if (VT != MVT::i64 && VT != MVT::v2i64) {
1216      setOperationAction(ISD::SDIV, VT, Custom);
1217      setOperationAction(ISD::UDIV, VT, Custom);
1218    }
1219    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1220    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1221  }
1222  for (unsigned int x = 0; x < numFloatTypes; ++x) {
1223    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
1224
1225    // IL does not have these operations for floating point types
1226    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
1227    setOperationAction(ISD::FP_ROUND, VT, Custom);
1228    setOperationAction(ISD::SETOLT, VT, Expand);
1229    setOperationAction(ISD::SETOGE, VT, Expand);
1230    setOperationAction(ISD::SETOGT, VT, Expand);
1231    setOperationAction(ISD::SETOLE, VT, Expand);
1232    setOperationAction(ISD::SETULT, VT, Expand);
1233    setOperationAction(ISD::SETUGE, VT, Expand);
1234    setOperationAction(ISD::SETUGT, VT, Expand);
1235    setOperationAction(ISD::SETULE, VT, Expand);
1236  }
1237
1238  for (unsigned int x = 0; x < numIntTypes; ++x) {
1239    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
1240
1241    // GPU also does not have divrem function for signed or unsigned
1242    setOperationAction(ISD::SDIVREM, VT, Expand);
1243    setOperationAction(ISD::UDIVREM, VT, Expand);
1244    setOperationAction(ISD::FP_ROUND, VT, Expand);
1245
1246    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
1247    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1248    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1249
1250    // GPU doesn't have a rotl, rotr, or byteswap instruction
1251    setOperationAction(ISD::ROTR, VT, Expand);
1252    setOperationAction(ISD::ROTL, VT, Expand);
1253    setOperationAction(ISD::BSWAP, VT, Expand);
1254
1255    // GPU doesn't have any counting operators
1256    setOperationAction(ISD::CTPOP, VT, Expand);
1257    setOperationAction(ISD::CTTZ, VT, Expand);
1258    setOperationAction(ISD::CTLZ, VT, Expand);
1259  }
1260
1261  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
1262  {
1263    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
1264
1265    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1266    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1267    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1268    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
1269    setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1270    setOperationAction(ISD::FP_ROUND, VT, Expand);
1271    setOperationAction(ISD::SDIVREM, VT, Expand);
1272    setOperationAction(ISD::UDIVREM, VT, Expand);
1273    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1274    // setOperationAction(ISD::VSETCC, VT, Expand);
1275    setOperationAction(ISD::SETCC, VT, Expand);
1276    setOperationAction(ISD::SELECT_CC, VT, Expand);
1277    setOperationAction(ISD::SELECT, VT, Expand);
1278
1279  }
1280  setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
1281  if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
1282    if (stm->calVersion() < CAL_VERSION_SC_139
1283        || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
1284      setOperationAction(ISD::MUL, MVT::i64, Custom);
1285    }
1286    setOperationAction(ISD::SUB, MVT::i64, Custom);
1287    setOperationAction(ISD::ADD, MVT::i64, Custom);
1288    setOperationAction(ISD::MULHU, MVT::i64, Expand);
1289    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
1290    setOperationAction(ISD::MULHS, MVT::i64, Expand);
1291    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
1292    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1293    setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1294    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1295    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
1296    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
1297    setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
1298    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
1299    setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
1300    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
1301    setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
1302    setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
1303    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
1304    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
1305    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
1306    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
1307  }
1308  if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
1309    // we support loading/storing v2f64 but not operations on the type
1310    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
1311    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
1312    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
1313    setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
1314    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
1315    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
1316    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
1317    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
1318    // We want to expand vector conversions into their scalar
1319    // counterparts.
1320    setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
1321    setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
1322    setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
1323    setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
1324    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
1325    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
1326    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
1327    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
1328    setOperationAction(ISD::FABS, MVT::f64, Expand);
1329    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
1330  }
1331  // TODO: Fix the UDIV24 algorithm so it works for these
1332  // types correctly. This needs vector comparisons
1333  // for this to work correctly.
1334  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
1335  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
1336  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
1337  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
1338  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
1339  setOperationAction(ISD::SUBC, MVT::Other, Expand);
1340  setOperationAction(ISD::ADDE, MVT::Other, Expand);
1341  setOperationAction(ISD::ADDC, MVT::Other, Expand);
1342  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1343  setOperationAction(ISD::BR_CC, MVT::Other, Custom);
1344  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1345  setOperationAction(ISD::BRIND, MVT::Other, Expand);
1346  setOperationAction(ISD::SETCC, MVT::Other, Custom);
1347  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
1348  setOperationAction(ISD::FDIV, MVT::f32, Custom);
1349  setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
1350  setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
1351
1352  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
1353  // Use the default implementation.
1354  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
1355  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
1356  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
1357  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
1358  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
1359  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
1360  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
1361  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
1362  setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
1363
1364  setStackPointerRegisterToSaveRestore(AMDIL::SP);
1365  setSchedulingPreference(Sched::RegPressure);
1366  setPow2DivIsCheap(false);
1367  setPrefLoopAlignment(16);
1368  setSelectIsExpensive(true);
1369  setJumpIsExpensive(true);
1370  computeRegisterProperties();
1371
1372  maxStoresPerMemcpy  = 4096;
1373  maxStoresPerMemmove = 4096;
1374  maxStoresPerMemset  = 4096;
1375
1376#undef numTypes
1377#undef numIntTypes
1378#undef numVectorTypes
1379#undef numFloatTypes
1380}
1381
1382const char *
1383AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
1384{
1385  switch (Opcode) {
1386    default: return 0;
1387    case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
1388    case AMDILISD::DP_TO_FP:  return "AMDILISD::DP_TO_FP";
1389    case AMDILISD::FP_TO_DP:  return "AMDILISD::FP_TO_DP";
1390    case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
1391    case AMDILISD::CMOV:  return "AMDILISD::CMOV";
1392    case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
1393    case AMDILISD::INEGATE:  return "AMDILISD::INEGATE";
1394    case AMDILISD::MAD:  return "AMDILISD::MAD";
1395    case AMDILISD::UMAD:  return "AMDILISD::UMAD";
1396    case AMDILISD::CALL:  return "AMDILISD::CALL";
1397    case AMDILISD::RET:   return "AMDILISD::RET";
1398    case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
1399    case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
1400    case AMDILISD::ADD: return "AMDILISD::ADD";
1401    case AMDILISD::UMUL: return "AMDILISD::UMUL";
1402    case AMDILISD::AND: return "AMDILISD::AND";
1403    case AMDILISD::OR: return "AMDILISD::OR";
1404    case AMDILISD::NOT: return "AMDILISD::NOT";
1405    case AMDILISD::XOR: return "AMDILISD::XOR";
1406    case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
1407    case AMDILISD::SMAX: return "AMDILISD::SMAX";
1408    case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
1409    case AMDILISD::MOVE: return "AMDILISD::MOVE";
1410    case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
1411    case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
1412    case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
1413    case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
1414    case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
1415    case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
1416    case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
1417    case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
1418    case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
1419    case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
1420    case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
1421    case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
1422    case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
1423    case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
1424    case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
1425    case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
1426    case AMDILISD::CMP: return "AMDILISD::CMP";
1427    case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
1428    case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
1429    case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
1430    case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
1431    case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
1432    case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
1433    case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
1434    case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
1435    case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
1436    case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
1437    case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
1438    case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
1439    case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
1440    case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
1441    case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
1442    case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
1443    case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
1444    case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
1445    case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
1446    case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
1447    case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
1448    case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
1449    case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
1450    case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
1451    case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
1452    case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
1453    case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
1454    case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
1455    case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
1456    case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
1457    case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
1458    case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
1459    case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
1460    case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
1461    case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
1462    case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
1463    case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
1464    case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
1465    case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
1466    case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
1467    case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
1468    case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
1469    case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
1470    case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
1471    case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
1472    case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
1473    case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
1474    case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
1475    case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
1476    case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
1477    case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
1478    case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
1479    case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
1480    case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
1481    case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
1482    case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
1483    case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
1484    case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
1485    case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
1486    case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
1487    case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
1488    case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
1489    case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
1490    case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
1491    case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
1492    case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
1493    case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
1494    case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
1495    case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
1496    case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
1497    case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
1498    case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
1499    case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
1500    case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
1501    case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
1502    case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
1503    case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
1504    case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
1505    case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
1506    case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
1507    case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
1508    case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
1509    case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
1510    case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
1511    case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
1512    case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
1513    case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
1514    case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
1515    case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
1516    case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
1517    case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
1518    case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
1519    case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
1520    case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
1521    case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
1522    case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
1523    case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
1524    case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
1525    case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
1526    case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
1527    case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
1528    case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
1529    case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
1530    case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
1531    case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
1532    case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
1533    case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
1534    case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
1535    case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
1536
1537  };
1538}
1539bool
1540AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1541    const CallInst &I, unsigned Intrinsic) const
1542{
1543  if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
1544      || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
1545    return false;
1546  }
1547  bool bitCastToInt = false;
1548  unsigned IntNo;
1549  bool isRet = true;
1550  const AMDILSubtarget *STM = &this->getTargetMachine()
1551    .getSubtarget<AMDILSubtarget>();
1552  switch (Intrinsic) {
1553    default: return false; // Don't custom lower most intrinsics.
1554    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
1555    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
1556             IntNo = AMDILISD::ATOM_G_ADD; break;
1557    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
1558    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
1559             isRet = false;
1560             IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
1561    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
1562    case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
1563             IntNo = AMDILISD::ATOM_L_ADD; break;
1564    case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
1565    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
1566             isRet = false;
1567             IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
1568    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
1569    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
1570             IntNo = AMDILISD::ATOM_R_ADD; break;
1571    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
1572    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
1573             isRet = false;
1574             IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
1575    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
1576    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
1577             IntNo = AMDILISD::ATOM_G_AND; break;
1578    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
1579    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
1580             isRet = false;
1581             IntNo = AMDILISD::ATOM_G_AND_NORET; break;
1582    case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
1583    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
1584             IntNo = AMDILISD::ATOM_L_AND; break;
1585    case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
1586    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
1587             isRet = false;
1588             IntNo = AMDILISD::ATOM_L_AND_NORET; break;
1589    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
1590    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
1591             IntNo = AMDILISD::ATOM_R_AND; break;
1592    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
1593    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
1594             isRet = false;
1595             IntNo = AMDILISD::ATOM_R_AND_NORET; break;
1596    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
1597    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
1598             IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
1599    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
1600    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
1601             isRet = false;
1602             IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
1603    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
1604    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
1605             IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
1606    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
1607    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
1608             isRet = false;
1609             IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
1610    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
1611    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
1612             IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
1613    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
1614    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
1615             isRet = false;
1616             IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
1617    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
1618    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
1619             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1620               IntNo = AMDILISD::ATOM_G_DEC;
1621             } else {
1622               IntNo = AMDILISD::ATOM_G_SUB;
1623             }
1624             break;
1625    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
1626    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
1627             isRet = false;
1628             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1629               IntNo = AMDILISD::ATOM_G_DEC_NORET;
1630             } else {
1631               IntNo = AMDILISD::ATOM_G_SUB_NORET;
1632             }
1633             break;
1634    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
1635    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
1636             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1637               IntNo = AMDILISD::ATOM_L_DEC;
1638             } else {
1639               IntNo = AMDILISD::ATOM_L_SUB;
1640             }
1641             break;
1642    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
1643    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
1644             isRet = false;
1645             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1646               IntNo = AMDILISD::ATOM_L_DEC_NORET;
1647             } else {
1648               IntNo = AMDILISD::ATOM_L_SUB_NORET;
1649             }
1650             break;
1651    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
1652    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
1653             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1654               IntNo = AMDILISD::ATOM_R_DEC;
1655             } else {
1656               IntNo = AMDILISD::ATOM_R_SUB;
1657             }
1658             break;
1659    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
1660    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
1661             isRet = false;
1662             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1663               IntNo = AMDILISD::ATOM_R_DEC_NORET;
1664             } else {
1665               IntNo = AMDILISD::ATOM_R_SUB_NORET;
1666             }
1667             break;
1668    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
1669    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
1670             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1671               IntNo = AMDILISD::ATOM_G_INC;
1672             } else {
1673               IntNo = AMDILISD::ATOM_G_ADD;
1674             }
1675             break;
1676    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1677    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1678             isRet = false;
1679             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1680               IntNo = AMDILISD::ATOM_G_INC_NORET;
1681             } else {
1682               IntNo = AMDILISD::ATOM_G_ADD_NORET;
1683             }
1684             break;
1685    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1686    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1687             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1688               IntNo = AMDILISD::ATOM_L_INC;
1689             } else {
1690               IntNo = AMDILISD::ATOM_L_ADD;
1691             }
1692             break;
1693    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1694    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1695             isRet = false;
1696             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1697               IntNo = AMDILISD::ATOM_L_INC_NORET;
1698             } else {
1699               IntNo = AMDILISD::ATOM_L_ADD_NORET;
1700             }
1701             break;
1702    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1703    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1704             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1705               IntNo = AMDILISD::ATOM_R_INC;
1706             } else {
1707               IntNo = AMDILISD::ATOM_R_ADD;
1708             }
1709             break;
1710    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1711    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1712             isRet = false;
1713             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1714               IntNo = AMDILISD::ATOM_R_INC_NORET;
1715             } else {
1716               IntNo = AMDILISD::ATOM_R_ADD_NORET;
1717             }
1718             break;
1719    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1720             IntNo = AMDILISD::ATOM_G_MAX; break;
1721    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1722             IntNo = AMDILISD::ATOM_G_UMAX; break;
1723    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1724             isRet = false;
1725             IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1726    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1727             isRet = false;
1728             IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1729    case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1730             IntNo = AMDILISD::ATOM_L_MAX; break;
1731    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1732             IntNo = AMDILISD::ATOM_L_UMAX; break;
1733    case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1734             isRet = false;
1735             IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1736    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1737             isRet = false;
1738             IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1739    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1740             IntNo = AMDILISD::ATOM_R_MAX; break;
1741    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1742             IntNo = AMDILISD::ATOM_R_UMAX; break;
1743    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1744             isRet = false;
1745             IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1746    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1747             isRet = false;
1748             IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1749    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1750             IntNo = AMDILISD::ATOM_G_MIN; break;
1751    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1752             IntNo = AMDILISD::ATOM_G_UMIN; break;
1753    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1754             isRet = false;
1755             IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1756    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1757             isRet = false;
1758             IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1759    case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1760             IntNo = AMDILISD::ATOM_L_MIN; break;
1761    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1762             IntNo = AMDILISD::ATOM_L_UMIN; break;
1763    case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1764             isRet = false;
1765             IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1766    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1767             isRet = false;
1768             IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1769    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1770             IntNo = AMDILISD::ATOM_R_MIN; break;
1771    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1772             IntNo = AMDILISD::ATOM_R_UMIN; break;
1773    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1774             isRet = false;
1775             IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1776    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1777             isRet = false;
1778             IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1779    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1780    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1781             IntNo = AMDILISD::ATOM_G_OR; break;
1782    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1783    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1784             isRet = false;
1785             IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1786    case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1787    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1788             IntNo = AMDILISD::ATOM_L_OR; break;
1789    case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1790    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1791             isRet = false;
1792             IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1793    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1794    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1795             IntNo = AMDILISD::ATOM_R_OR; break;
1796    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1797    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1798             isRet = false;
1799             IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1800    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1801    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1802             IntNo = AMDILISD::ATOM_G_SUB; break;
1803    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1804    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1805             isRet = false;
1806             IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1807    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1808    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1809             IntNo = AMDILISD::ATOM_L_SUB; break;
1810    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1811    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1812             isRet = false;
1813             IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1814    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1815    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1816             IntNo = AMDILISD::ATOM_R_SUB; break;
1817    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1818    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1819             isRet = false;
1820             IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1821    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1822    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1823             IntNo = AMDILISD::ATOM_G_RSUB; break;
1824    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1825    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1826             isRet = false;
1827             IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1828    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1829    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1830             IntNo = AMDILISD::ATOM_L_RSUB; break;
1831    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1832    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1833             isRet = false;
1834             IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1835    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1836    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1837             IntNo = AMDILISD::ATOM_R_RSUB; break;
1838    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1839    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1840             isRet = false;
1841             IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1842    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1843             bitCastToInt = true;
1844    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1845    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1846             IntNo = AMDILISD::ATOM_G_XCHG; break;
1847    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1848             bitCastToInt = true;
1849    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1850    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1851             isRet = false;
1852             IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1853    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1854             bitCastToInt = true;
1855    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1856    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1857             IntNo = AMDILISD::ATOM_L_XCHG; break;
1858    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1859             bitCastToInt = true;
1860    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1861    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1862             isRet = false;
1863             IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1864    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1865             bitCastToInt = true;
1866    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1867    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1868             IntNo = AMDILISD::ATOM_R_XCHG; break;
1869    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1870             bitCastToInt = true;
1871    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1872    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1873             isRet = false;
1874             IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1875    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1876    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1877             IntNo = AMDILISD::ATOM_G_XOR; break;
1878    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1879    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1880             isRet = false;
1881             IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1882    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1883    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1884             IntNo = AMDILISD::ATOM_L_XOR; break;
1885    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1886    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1887             isRet = false;
1888             IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1889    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1890    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1891             IntNo = AMDILISD::ATOM_R_XOR; break;
1892    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1893    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1894             isRet = false;
1895             IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1896    case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1897             IntNo = AMDILISD::APPEND_ALLOC; break;
1898    case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1899             isRet = false;
1900             IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1901    case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1902             IntNo = AMDILISD::APPEND_CONSUME; break;
1903    case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1904             isRet = false;
1905             IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1906  };
1907
1908  Info.opc = IntNo;
1909  Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1910  Info.ptrVal = I.getOperand(0);
1911  Info.offset = 0;
1912  Info.align = 4;
1913  Info.vol = true;
1914  Info.readMem = isRet;
1915  Info.writeMem = true;
1916  return true;
1917}
1918// The backend supports 32 and 64 bit floating point immediates
1919bool
1920AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1921{
1922  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1923      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1924    return true;
1925  } else {
1926    return false;
1927  }
1928}
1929
1930bool
1931AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1932{
1933  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1934      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1935    return false;
1936  } else {
1937    return true;
1938  }
1939}
1940
1941
1942// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1943// be zero. Op is expected to be a target specific node. Used by DAG
1944// combiner.
1945
1946void
1947AMDILTargetLowering::computeMaskedBitsForTargetNode(
1948    const SDValue Op,
1949    APInt &KnownZero,
1950    APInt &KnownOne,
1951    const SelectionDAG &DAG,
1952    unsigned Depth) const
1953{
1954  APInt KnownZero2;
1955  APInt KnownOne2;
1956  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1957  switch (Op.getOpcode()) {
1958    default: break;
1959    case AMDILISD::SELECT_CC:
1960             DAG.ComputeMaskedBits(
1961                 Op.getOperand(1),
1962                 KnownZero,
1963                 KnownOne,
1964                 Depth + 1
1965                 );
1966             DAG.ComputeMaskedBits(
1967                 Op.getOperand(0),
1968                 KnownZero2,
1969                 KnownOne2
1970                 );
1971             assert((KnownZero & KnownOne) == 0
1972                 && "Bits known to be one AND zero?");
1973             assert((KnownZero2 & KnownOne2) == 0
1974                 && "Bits known to be one AND zero?");
1975             // Only known if known in both the LHS and RHS
1976             KnownOne &= KnownOne2;
1977             KnownZero &= KnownZero2;
1978             break;
1979  };
1980}
1981
1982// This is the function that determines which calling convention should
1983// be used. Currently there is only one calling convention
1984CCAssignFn*
1985AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
1986{
1987  //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1988  return CC_AMDIL32;
1989}
1990
1991// LowerCallResult - Lower the result values of an ISD::CALL into the
1992// appropriate copies out of appropriate physical registers.  This assumes that
1993// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
1994// being lowered.  The returns a SDNode with the same number of values as the
1995// ISD::CALL.
1996SDValue
1997AMDILTargetLowering::LowerCallResult(
1998    SDValue Chain,
1999    SDValue InFlag,
2000    CallingConv::ID CallConv,
2001    bool isVarArg,
2002    const SmallVectorImpl<ISD::InputArg> &Ins,
2003    DebugLoc dl,
2004    SelectionDAG &DAG,
2005    SmallVectorImpl<SDValue> &InVals) const
2006{
2007  // Assign locations to each value returned by this call
2008  SmallVector<CCValAssign, 16> RVLocs;
2009  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2010                 getTargetMachine(), RVLocs, *DAG.getContext());
2011  CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
2012
2013  // Copy all of the result registers out of their specified physreg.
2014  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2015    EVT CopyVT = RVLocs[i].getValVT();
2016    if (RVLocs[i].isRegLoc()) {
2017      Chain = DAG.getCopyFromReg(
2018          Chain,
2019          dl,
2020          RVLocs[i].getLocReg(),
2021          CopyVT,
2022          InFlag
2023          ).getValue(1);
2024      SDValue Val = Chain.getValue(0);
2025      InFlag = Chain.getValue(2);
2026      InVals.push_back(Val);
2027    }
2028  }
2029
2030  return Chain;
2031
2032}
2033
2034//===----------------------------------------------------------------------===//
2035//                           Other Lowering Hooks
2036//===----------------------------------------------------------------------===//
2037
2038MachineBasicBlock *
2039AMDILTargetLowering::EmitInstrWithCustomInserter(
2040    MachineInstr *MI, MachineBasicBlock *BB) const
2041{
2042  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
2043  switch (MI->getOpcode()) {
2044    ExpandCaseToAllTypes(AMDIL::CMP);
2045    generateCMPInstr(MI, BB, TII);
2046    MI->eraseFromParent();
2047    break;
2048    default:
2049    break;
2050  }
2051  return BB;
2052}
2053
2054// Recursively assign SDNodeOrdering to any unordered nodes
2055// This is necessary to maintain source ordering of instructions
2056// under -O0 to avoid odd-looking "skipping around" issues.
2057  static const SDValue
2058Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
2059{
2060  if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
2061    DAG.AssignOrdering( New.getNode(), order );
2062    for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
2063      Ordered( DAG, order, New.getOperand(i) );
2064  }
2065  return New;
2066}
2067
2068#define LOWER(A) \
2069  case ISD:: A: \
2070return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
2071
2072SDValue
2073AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2074{
2075  switch (Op.getOpcode()) {
2076    default:
2077      Op.getNode()->dump();
2078      assert(0 && "Custom lowering code for this"
2079          "instruction is not implemented yet!");
2080      break;
2081      LOWER(GlobalAddress);
2082      LOWER(JumpTable);
2083      LOWER(ConstantPool);
2084      LOWER(ExternalSymbol);
2085      LOWER(FP_TO_SINT);
2086      LOWER(FP_TO_UINT);
2087      LOWER(SINT_TO_FP);
2088      LOWER(UINT_TO_FP);
2089      LOWER(ADD);
2090      LOWER(MUL);
2091      LOWER(SUB);
2092      LOWER(FDIV);
2093      LOWER(SDIV);
2094      LOWER(SREM);
2095      LOWER(UDIV);
2096      LOWER(UREM);
2097      LOWER(BUILD_VECTOR);
2098      LOWER(INSERT_VECTOR_ELT);
2099      LOWER(EXTRACT_VECTOR_ELT);
2100      LOWER(EXTRACT_SUBVECTOR);
2101      LOWER(SCALAR_TO_VECTOR);
2102      LOWER(CONCAT_VECTORS);
2103      LOWER(AND);
2104      LOWER(OR);
2105      LOWER(SELECT);
2106      LOWER(SELECT_CC);
2107      LOWER(SETCC);
2108      LOWER(SIGN_EXTEND_INREG);
2109      LOWER(BITCAST);
2110      LOWER(DYNAMIC_STACKALLOC);
2111      LOWER(BRCOND);
2112      LOWER(BR_CC);
2113      LOWER(FP_ROUND);
2114  }
2115  return Op;
2116}
2117
2118int
2119AMDILTargetLowering::getVarArgsFrameOffset() const
2120{
2121  return VarArgsFrameOffset;
2122}
2123#undef LOWER
2124
2125SDValue
2126AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
2127{
2128  SDValue DST = Op;
2129  const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
2130  const GlobalValue *G = GADN->getGlobal();
2131  DebugLoc DL = Op.getDebugLoc();
2132  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
2133  if (!GV) {
2134    DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2135  } else {
2136    if (GV->hasInitializer()) {
2137      const Constant *C = dyn_cast<Constant>(GV->getInitializer());
2138      if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
2139        DST = DAG.getConstant(CI->getValue(), Op.getValueType());
2140      } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
2141        DST = DAG.getConstantFP(CF->getValueAPF(),
2142            Op.getValueType());
2143      } else if (dyn_cast<ConstantAggregateZero>(C)) {
2144        EVT VT = Op.getValueType();
2145        if (VT.isInteger()) {
2146          DST = DAG.getConstant(0, VT);
2147        } else {
2148          DST = DAG.getConstantFP(0, VT);
2149        }
2150      } else {
2151        assert(!"lowering this type of Global Address "
2152            "not implemented yet!");
2153        C->dump();
2154        DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2155      }
2156    } else {
2157      DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2158    }
2159  }
2160  return DST;
2161}
2162
2163SDValue
2164AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
2165{
2166  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2167  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
2168  return Result;
2169}
2170SDValue
2171AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
2172{
2173  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2174  EVT PtrVT = Op.getValueType();
2175  SDValue Result;
2176  if (CP->isMachineConstantPoolEntry()) {
2177    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2178        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
2179  } else {
2180    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2181        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
2182  }
2183  return Result;
2184}
2185
2186SDValue
2187AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
2188{
2189  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
2190  SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
2191  return Result;
2192}
2193/// LowerFORMAL_ARGUMENTS - transform physical registers into
2194/// virtual registers and generate load operations for
2195/// arguments places on the stack.
2196/// TODO: isVarArg, hasStructRet, isMemReg
2197  SDValue
2198AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
2199    CallingConv::ID CallConv,
2200    bool isVarArg,
2201    const SmallVectorImpl<ISD::InputArg> &Ins,
2202    DebugLoc dl,
2203    SelectionDAG &DAG,
2204    SmallVectorImpl<SDValue> &InVals)
2205const
2206{
2207
2208  MachineFunction &MF = DAG.getMachineFunction();
2209  AMDILMachineFunctionInfo *FuncInfo
2210    = MF.getInfo<AMDILMachineFunctionInfo>();
2211  MachineFrameInfo *MFI = MF.getFrameInfo();
2212  //const Function *Fn = MF.getFunction();
2213  //MachineRegisterInfo &RegInfo = MF.getRegInfo();
2214
2215  SmallVector<CCValAssign, 16> ArgLocs;
2216  CallingConv::ID CC = MF.getFunction()->getCallingConv();
2217  //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
2218
2219  CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
2220                 getTargetMachine(), ArgLocs, *DAG.getContext());
2221
2222  // When more calling conventions are added, they need to be chosen here
2223  CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
2224  SDValue StackPtr;
2225
2226  //unsigned int FirstStackArgLoc = 0;
2227
2228  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
2229    CCValAssign &VA = ArgLocs[i];
2230    if (VA.isRegLoc()) {
2231      EVT RegVT = VA.getLocVT();
2232      const TargetRegisterClass *RC = getRegClassFromType(
2233          RegVT.getSimpleVT().SimpleTy);
2234
2235      unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
2236      SDValue ArgValue = DAG.getCopyFromReg(
2237          Chain,
2238          dl,
2239          Reg,
2240          RegVT);
2241      // If this is an 8 or 16-bit value, it is really passed
2242      // promoted to 32 bits.  Insert an assert[sz]ext to capture
2243      // this, then truncate to the right size.
2244
2245      if (VA.getLocInfo() == CCValAssign::SExt) {
2246        ArgValue = DAG.getNode(
2247            ISD::AssertSext,
2248            dl,
2249            RegVT,
2250            ArgValue,
2251            DAG.getValueType(VA.getValVT()));
2252      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
2253        ArgValue = DAG.getNode(
2254            ISD::AssertZext,
2255            dl,
2256            RegVT,
2257            ArgValue,
2258            DAG.getValueType(VA.getValVT()));
2259      }
2260      if (VA.getLocInfo() != CCValAssign::Full) {
2261        ArgValue = DAG.getNode(
2262            ISD::TRUNCATE,
2263            dl,
2264            VA.getValVT(),
2265            ArgValue);
2266      }
2267      // Add the value to the list of arguments
2268      // to be passed in registers
2269      InVals.push_back(ArgValue);
2270      if (isVarArg) {
2271        assert(0 && "Variable arguments are not yet supported");
2272        // See MipsISelLowering.cpp for ideas on how to implement
2273      }
2274    } else if(VA.isMemLoc()) {
2275      InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
2276            dl, DAG, VA, MFI, i));
2277    } else {
2278      assert(0 && "found a Value Assign that is "
2279          "neither a register or a memory location");
2280    }
2281  }
2282  /*if (hasStructRet) {
2283    assert(0 && "Has struct return is not yet implemented");
2284  // See MipsISelLowering.cpp for ideas on how to implement
2285  }*/
2286
2287  unsigned int StackSize = CCInfo.getNextStackOffset();
2288  if (isVarArg) {
2289    assert(0 && "Variable arguments are not yet supported");
2290    // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
2291  }
2292  // This needs to be changed to non-zero if the return function needs
2293  // to pop bytes
2294  FuncInfo->setBytesToPopOnReturn(StackSize);
2295  return Chain;
2296}
2297/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2298/// by "Src" to address "Dst" with size and alignment information specified by
2299/// the specific parameter attribute. The copy will be passed as a byval
2300/// function parameter.
2301static SDValue
2302CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2303    ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
2304  assert(0 && "MemCopy does not exist yet");
2305  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2306
2307  return DAG.getMemcpy(Chain,
2308      Src.getDebugLoc(),
2309      Dst, Src, SizeNode, Flags.getByValAlign(),
2310      /*IsVol=*/false, /*AlwaysInline=*/true,
2311      MachinePointerInfo(), MachinePointerInfo());
2312}
2313
2314SDValue
2315AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
2316    SDValue StackPtr, SDValue Arg,
2317    DebugLoc dl, SelectionDAG &DAG,
2318    const CCValAssign &VA,
2319    ISD::ArgFlagsTy Flags) const
2320{
2321  unsigned int LocMemOffset = VA.getLocMemOffset();
2322  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
2323  PtrOff = DAG.getNode(ISD::ADD,
2324      dl,
2325      getPointerTy(), StackPtr, PtrOff);
2326  if (Flags.isByVal()) {
2327    PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
2328  } else {
2329    PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
2330        MachinePointerInfo::getStack(LocMemOffset),
2331        false, false, 0);
2332  }
2333  return PtrOff;
2334}
2335/// LowerCAL - functions arguments are copied from virtual
2336/// regs to (physical regs)/(stack frame), CALLSEQ_START and
2337/// CALLSEQ_END are emitted.
2338/// TODO: isVarArg, isTailCall, hasStructRet
2339SDValue
2340AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
2341    CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
2342    bool& isTailCall,
2343    const SmallVectorImpl<ISD::OutputArg> &Outs,
2344    const SmallVectorImpl<SDValue> &OutVals,
2345    const SmallVectorImpl<ISD::InputArg> &Ins,
2346    DebugLoc dl, SelectionDAG &DAG,
2347    SmallVectorImpl<SDValue> &InVals)
2348const
2349{
2350  isTailCall = false;
2351  MachineFunction& MF = DAG.getMachineFunction();
2352  // FIXME: DO we need to handle fast calling conventions and tail call
2353  // optimizations?? X86/PPC ISelLowering
2354  /*bool hasStructRet = (TheCall->getNumArgs())
2355    ? TheCall->getArgFlags(0).device()->isSRet()
2356    : false;*/
2357
2358  MachineFrameInfo *MFI = MF.getFrameInfo();
2359
2360  // Analyze operands of the call, assigning locations to each operand
2361  SmallVector<CCValAssign, 16> ArgLocs;
2362  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2363                 getTargetMachine(), ArgLocs, *DAG.getContext());
2364  // Analyize the calling operands, but need to change
2365  // if we have more than one calling convetion
2366  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
2367
2368  unsigned int NumBytes = CCInfo.getNextStackOffset();
2369  if (isTailCall) {
2370    assert(isTailCall && "Tail Call not handled yet!");
2371    // See X86/PPC ISelLowering
2372  }
2373
2374  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
2375
2376  SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
2377  SmallVector<SDValue, 8> MemOpChains;
2378  SDValue StackPtr;
2379  //unsigned int FirstStacArgLoc = 0;
2380  //int LastArgStackLoc = 0;
2381
2382  // Walk the register/memloc assignments, insert copies/loads
2383  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
2384    CCValAssign &VA = ArgLocs[i];
2385    //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
2386    // Arguments start after the 5 first operands of ISD::CALL
2387    SDValue Arg = OutVals[i];
2388    //Promote the value if needed
2389    switch(VA.getLocInfo()) {
2390      default: assert(0 && "Unknown loc info!");
2391      case CCValAssign::Full:
2392               break;
2393      case CCValAssign::SExt:
2394               Arg = DAG.getNode(ISD::SIGN_EXTEND,
2395                   dl,
2396                   VA.getLocVT(), Arg);
2397               break;
2398      case CCValAssign::ZExt:
2399               Arg = DAG.getNode(ISD::ZERO_EXTEND,
2400                   dl,
2401                   VA.getLocVT(), Arg);
2402               break;
2403      case CCValAssign::AExt:
2404               Arg = DAG.getNode(ISD::ANY_EXTEND,
2405                   dl,
2406                   VA.getLocVT(), Arg);
2407               break;
2408    }
2409
2410    if (VA.isRegLoc()) {
2411      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2412    } else if (VA.isMemLoc()) {
2413      // Create the frame index object for this incoming parameter
2414      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
2415          VA.getLocMemOffset(), true);
2416      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
2417
2418      // emit ISD::STORE whichs stores the
2419      // parameter value to a stack Location
2420      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
2421            MachinePointerInfo::getFixedStack(FI),
2422            false, false, 0));
2423    } else {
2424      assert(0 && "Not a Reg/Mem Loc, major error!");
2425    }
2426  }
2427  if (!MemOpChains.empty()) {
2428    Chain = DAG.getNode(ISD::TokenFactor,
2429        dl,
2430        MVT::Other,
2431        &MemOpChains[0],
2432        MemOpChains.size());
2433  }
2434  SDValue InFlag;
2435  if (!isTailCall) {
2436    for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
2437      Chain = DAG.getCopyToReg(Chain,
2438          dl,
2439          RegsToPass[i].first,
2440          RegsToPass[i].second,
2441          InFlag);
2442      InFlag = Chain.getValue(1);
2443    }
2444  }
2445
2446  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
2447  // every direct call is) turn it into a TargetGlobalAddress/
2448  // TargetExternalSymbol
2449  // node so that legalize doesn't hack it.
2450  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
2451    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
2452  }
2453  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2454    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
2455  }
2456  else if (isTailCall) {
2457    assert(0 && "Tail calls are not handled yet");
2458    // see X86 ISelLowering for ideas on implementation: 1708
2459  }
2460
2461  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
2462  SmallVector<SDValue, 8> Ops;
2463
2464  if (isTailCall) {
2465    assert(0 && "Tail calls are not handled yet");
2466    // see X86 ISelLowering for ideas on implementation: 1721
2467  }
2468  // If this is a direct call, pass the chain and the callee
2469  if (Callee.getNode()) {
2470    Ops.push_back(Chain);
2471    Ops.push_back(Callee);
2472  }
2473
2474  if (isTailCall) {
2475    assert(0 && "Tail calls are not handled yet");
2476    // see X86 ISelLowering for ideas on implementation: 1739
2477  }
2478
2479  // Add argument registers to the end of the list so that they are known
2480  // live into the call
2481  for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
2482    Ops.push_back(DAG.getRegister(
2483          RegsToPass[i].first,
2484          RegsToPass[i].second.getValueType()));
2485  }
2486  if (InFlag.getNode()) {
2487    Ops.push_back(InFlag);
2488  }
2489
2490  // Emit Tail Call
2491  if (isTailCall) {
2492    assert(0 && "Tail calls are not handled yet");
2493    // see X86 ISelLowering for ideas on implementation: 1762
2494  }
2495
2496  Chain = DAG.getNode(AMDILISD::CALL,
2497      dl,
2498      NodeTys, &Ops[0], Ops.size());
2499  InFlag = Chain.getValue(1);
2500
2501  // Create the CALLSEQ_END node
2502  Chain = DAG.getCALLSEQ_END(
2503      Chain,
2504      DAG.getIntPtrConstant(NumBytes, true),
2505      DAG.getIntPtrConstant(0, true),
2506      InFlag);
2507  InFlag = Chain.getValue(1);
2508  // Handle result values, copying them out of physregs into vregs that
2509  // we return
2510  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2511      InVals);
2512}
2513static void checkMADType(
2514    SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
2515{
2516  bool globalLoadStore = false;
2517  is24bitMAD = false;
2518  is32bitMAD = false;
2519  return;
2520  assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
2521      "this to work correctly!");
2522  if (Op.getNode()->use_empty()) {
2523    return;
2524  }
2525  for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
2526      nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
2527    SDNode *ptr = *nBegin;
2528    const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
2529    // If we are not a LSBaseSDNode then we don't do this
2530    // optimization.
2531    // If we are a LSBaseSDNode, but the op is not the offset
2532    // or base pointer, then we don't do this optimization
2533    // (i.e. we are the value being stored)
2534    if (!lsNode ||
2535        (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
2536      return;
2537    }
2538    const PointerType *PT =
2539      dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
2540    unsigned as = PT->getAddressSpace();
2541    switch(as) {
2542      default:
2543        globalLoadStore = true;
2544      case AMDILAS::PRIVATE_ADDRESS:
2545        if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
2546          globalLoadStore = true;
2547        }
2548        break;
2549      case AMDILAS::CONSTANT_ADDRESS:
2550        if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
2551          globalLoadStore = true;
2552        }
2553        break;
2554      case AMDILAS::LOCAL_ADDRESS:
2555        if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
2556          globalLoadStore = true;
2557        }
2558        break;
2559      case AMDILAS::REGION_ADDRESS:
2560        if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
2561          globalLoadStore = true;
2562        }
2563        break;
2564    }
2565  }
2566  if (globalLoadStore) {
2567    is32bitMAD = true;
2568  } else {
2569    is24bitMAD = true;
2570  }
2571}
2572
2573SDValue
2574AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
2575{
2576  SDValue LHS = Op.getOperand(0);
2577  SDValue RHS = Op.getOperand(1);
2578  DebugLoc DL = Op.getDebugLoc();
2579  EVT OVT = Op.getValueType();
2580  SDValue DST;
2581  const AMDILSubtarget *stm = &this->getTargetMachine()
2582    .getSubtarget<AMDILSubtarget>();
2583  bool isVec = OVT.isVector();
2584  if (OVT.getScalarType() == MVT::i64) {
2585    MVT INTTY = MVT::i32;
2586    if (OVT == MVT::v2i64) {
2587      INTTY = MVT::v2i32;
2588    }
2589    if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
2590        && INTTY == MVT::i32) {
2591      DST = DAG.getNode(AMDILISD::ADD,
2592          DL,
2593          OVT,
2594          LHS, RHS);
2595    } else {
2596      SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
2597      // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2598      LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
2599      RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
2600      LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
2601      RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
2602      INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
2603      INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
2604      SDValue cmp;
2605      cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2606          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2607          INTLO, RHSLO);
2608      cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
2609      INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
2610      DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
2611          INTLO, INTHI);
2612    }
2613  } else {
2614    if (LHS.getOpcode() == ISD::FrameIndex ||
2615        RHS.getOpcode() == ISD::FrameIndex) {
2616      DST = DAG.getNode(AMDILISD::ADDADDR,
2617          DL,
2618          OVT,
2619          LHS, RHS);
2620    } else {
2621      if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
2622          && LHS.getNumOperands()
2623          && RHS.getNumOperands()) {
2624        bool is24bitMAD = false;
2625        bool is32bitMAD = false;
2626        const ConstantSDNode *LHSConstOpCode =
2627          dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
2628        const ConstantSDNode *RHSConstOpCode =
2629          dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
2630        if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
2631            || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
2632            || LHS.getOpcode() == ISD::MUL
2633            || RHS.getOpcode() == ISD::MUL) {
2634          SDValue Op1, Op2, Op3;
2635          // FIXME: Fix this so that it works for unsigned 24bit ops.
2636          if (LHS.getOpcode() == ISD::MUL) {
2637            Op1 = LHS.getOperand(0);
2638            Op2 = LHS.getOperand(1);
2639            Op3 = RHS;
2640          } else if (RHS.getOpcode() == ISD::MUL) {
2641            Op1 = RHS.getOperand(0);
2642            Op2 = RHS.getOperand(1);
2643            Op3 = LHS;
2644          } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
2645            Op1 = LHS.getOperand(0);
2646            Op2 = DAG.getConstant(
2647                1 << LHSConstOpCode->getZExtValue(), MVT::i32);
2648            Op3 = RHS;
2649          } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
2650            Op1 = RHS.getOperand(0);
2651            Op2 = DAG.getConstant(
2652                1 << RHSConstOpCode->getZExtValue(), MVT::i32);
2653            Op3 = LHS;
2654          }
2655          checkMADType(Op, stm, is24bitMAD, is32bitMAD);
2656          // We can possibly do a MAD transform!
2657          if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
2658            uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
2659            SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2660            DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2661                DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
2662                Op1, Op2, Op3);
2663          } else if(is32bitMAD) {
2664            SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2665            DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2666                DL, Tys, DAG.getEntryNode(),
2667                DAG.getConstant(
2668                  AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
2669                Op1, Op2, Op3);
2670          }
2671        }
2672      }
2673      DST = DAG.getNode(AMDILISD::ADD,
2674          DL,
2675          OVT,
2676          LHS, RHS);
2677    }
2678  }
2679  return DST;
2680}
2681SDValue
2682AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
2683    uint32_t bits) const
2684{
2685  DebugLoc DL = Op.getDebugLoc();
2686  EVT INTTY = Op.getValueType();
2687  EVT FPTY;
2688  if (INTTY.isVector()) {
2689    FPTY = EVT(MVT::getVectorVT(MVT::f32,
2690          INTTY.getVectorNumElements()));
2691  } else {
2692    FPTY = EVT(MVT::f32);
2693  }
2694  /* static inline uint
2695     __clz_Nbit(uint x)
2696     {
2697     int xor = 0x3f800000U | x;
2698     float tp = as_float(xor);
2699     float t = tp + -1.0f;
2700     uint tint = as_uint(t);
2701     int cmp = (x != 0);
2702     uint tsrc = tint >> 23;
2703     uint tmask = tsrc & 0xffU;
2704     uint cst = (103 + N)U - tmask;
2705     return cmp ? cst : N;
2706     }
2707     */
2708  assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
2709      && "genCLZu16 only works on 32bit types");
2710  // uint x = Op
2711  SDValue x = Op;
2712  // xornode = 0x3f800000 | x
2713  SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
2714      DAG.getConstant(0x3f800000, INTTY), x);
2715  // float tp = as_float(xornode)
2716  SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
2717  // float t = tp + -1.0f
2718  SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
2719      DAG.getConstantFP(-1.0f, FPTY));
2720  // uint tint = as_uint(t)
2721  SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
2722  // int cmp = (x != 0)
2723  SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2724      DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
2725      DAG.getConstant(0, INTTY));
2726  // uint tsrc = tint >> 23
2727  SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
2728      DAG.getConstant(23, INTTY));
2729  // uint tmask = tsrc & 0xFF
2730  SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
2731      DAG.getConstant(0xFFU, INTTY));
2732  // uint cst = (103 + bits) - tmask
2733  SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
2734      DAG.getConstant((103U + bits), INTTY), tmask);
2735  // return cmp ? cst : N
2736  cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
2737      DAG.getConstant(bits, INTTY));
2738  return cst;
2739}
2740
2741SDValue
2742AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
2743{
2744  SDValue DST = SDValue();
2745  DebugLoc DL = Op.getDebugLoc();
2746  EVT INTTY = Op.getValueType();
2747  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2748      &this->getTargetMachine())->getSubtargetImpl();
2749  if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2750    //__clz_32bit(uint u)
2751    //{
2752    // int z = __amdil_ffb_hi(u) ;
2753    // return z < 0 ? 32 : z;
2754    // }
2755    // uint u = op
2756    SDValue u = Op;
2757    // int z = __amdil_ffb_hi(u)
2758    SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
2759    // int cmp = z < 0
2760    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2761        DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2762        z, DAG.getConstant(0, INTTY));
2763    // return cmp ? 32 : z
2764    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
2765        DAG.getConstant(32, INTTY), z);
2766  } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2767    //  static inline uint
2768    //__clz_32bit(uint x)
2769    //{
2770    //    uint zh = __clz_16bit(x >> 16);
2771    //    uint zl = __clz_16bit(x & 0xffffU);
2772    //   return zh == 16U ? 16U + zl : zh;
2773    //}
2774    // uint x = Op
2775    SDValue x = Op;
2776    // uint xs16 = x >> 16
2777    SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
2778        DAG.getConstant(16, INTTY));
2779    // uint zh = __clz_16bit(xs16)
2780    SDValue zh = genCLZuN(xs16, DAG, 16);
2781    // uint xa16 = x & 0xFFFF
2782    SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
2783        DAG.getConstant(0xFFFFU, INTTY));
2784    // uint zl = __clz_16bit(xa16)
2785    SDValue zl = genCLZuN(xa16, DAG, 16);
2786    // uint cmp = zh == 16U
2787    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2788        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2789        zh, DAG.getConstant(16U, INTTY));
2790    // uint zl16 = zl + 16
2791    SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
2792        DAG.getConstant(16, INTTY), zl);
2793    // return cmp ? zl16 : zh
2794    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2795        cmp, zl16, zh);
2796  } else {
2797    assert(0 && "Attempting to generate a CLZ function with an"
2798        " unknown graphics card");
2799  }
2800  return DST;
2801}
2802SDValue
2803AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
2804{
2805  SDValue DST = SDValue();
2806  DebugLoc DL = Op.getDebugLoc();
2807  EVT INTTY;
2808  EVT LONGTY = Op.getValueType();
2809  bool isVec = LONGTY.isVector();
2810  if (isVec) {
2811    INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
2812          .getVectorNumElements()));
2813  } else {
2814    INTTY = EVT(MVT::i32);
2815  }
2816  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2817      &this->getTargetMachine())->getSubtargetImpl();
2818  if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2819    // Evergreen:
2820    // static inline uint
2821    // __clz_u64(ulong x)
2822    // {
2823    //uint zhi = __clz_32bit((uint)(x >> 32));
2824    //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2825    //return zhi == 32U ? 32U + zlo : zhi;
2826    //}
2827    //ulong x = op
2828    SDValue x = Op;
2829    // uint xhi = x >> 32
2830    SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2831    // uint xlo = x & 0xFFFFFFFF
2832    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
2833    // uint zhi = __clz_32bit(xhi)
2834    SDValue zhi = genCLZu32(xhi, DAG);
2835    // uint zlo = __clz_32bit(xlo)
2836    SDValue zlo = genCLZu32(xlo, DAG);
2837    // uint cmp = zhi == 32
2838    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2839        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2840        zhi, DAG.getConstant(32U, INTTY));
2841    // uint zlop32 = 32 + zlo
2842    SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
2843        DAG.getConstant(32U, INTTY), zlo);
2844    // return cmp ? zlop32: zhi
2845    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
2846  } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2847    // HD4XXX:
2848    //  static inline uint
2849    //__clz_64bit(ulong x)
2850    //{
2851    //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2852    //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2853    //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2854    //uint r = zh == 18U ? 18U + zm : zh;
2855    //return zh + zm == 41U ? 41U + zl : r;
2856    //}
2857    //ulong x = Op
2858    SDValue x = Op;
2859    // ulong xs46 = x >> 46
2860    SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2861        DAG.getConstant(46, LONGTY));
2862    // uint ixs46 = (uint)xs46
2863    SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
2864    // ulong xs23 = x >> 23
2865    SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2866        DAG.getConstant(23, LONGTY));
2867    // uint ixs23 = (uint)xs23
2868    SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
2869    // uint xs23m23 = ixs23 & 0x7FFFFF
2870    SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
2871        DAG.getConstant(0x7fffffU, INTTY));
2872    // uint ix = (uint)x
2873    SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2874    // uint xm23 = ix & 0x7FFFFF
2875    SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
2876        DAG.getConstant(0x7fffffU, INTTY));
2877    // uint zh = __clz_23bit(ixs46)
2878    SDValue zh = genCLZuN(ixs46, DAG, 23);
2879    // uint zm = __clz_23bit(xs23m23)
2880    SDValue zm = genCLZuN(xs23m23, DAG, 23);
2881    // uint zl = __clz_23bit(xm23)
2882    SDValue zl = genCLZuN(xm23, DAG, 23);
2883    // uint zhm5 = zh - 5
2884    SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
2885        DAG.getConstant(-5U, INTTY));
2886    SDValue const18 = DAG.getConstant(18, INTTY);
2887    SDValue const41 = DAG.getConstant(41, INTTY);
2888    // uint cmp1 = zh = 18
2889    SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2890        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2891        zhm5, const18);
2892    // uint zhm5zm = zhm5 + zh
2893    SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
2894    // uint cmp2 = zhm5zm == 41
2895    SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2896        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2897        zhm5zm, const41);
2898    // uint zmp18 = zhm5 + 18
2899    SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
2900    // uint zlp41 = zl + 41
2901    SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
2902    // uint r = cmp1 ? zmp18 : zh
2903    SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2904        cmp1, zmp18, zhm5);
2905    // return cmp2 ? zlp41 : r
2906    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
2907  } else {
2908    assert(0 && "Attempting to generate a CLZ function with an"
2909        " unknown graphics card");
2910  }
2911  return DST;
2912}
2913SDValue
2914AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
2915    bool includeSign) const
2916{
2917  EVT INTVT;
2918  EVT LONGVT;
2919  SDValue DST;
2920  DebugLoc DL = RHS.getDebugLoc();
2921  EVT RHSVT = RHS.getValueType();
2922  bool isVec = RHSVT.isVector();
2923  if (isVec) {
2924    LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
2925          .getVectorNumElements()));
2926    INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
2927          .getVectorNumElements()));
2928  } else {
2929    LONGVT = EVT(MVT::i64);
2930    INTVT = EVT(MVT::i32);
2931  }
2932  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2933      &this->getTargetMachine())->getSubtargetImpl();
2934  if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2935    // unsigned version:
2936    // uint uhi = (uint)(d * 0x1.0p-32);
2937    // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2938    // return as_ulong2((uint2)(ulo, uhi));
2939    //
2940    // signed version:
2941    // double ad = fabs(d);
2942    // long l = unsigned_version(ad);
2943    // long nl = -l;
2944    // return d == ad ? l : nl;
2945    SDValue d = RHS;
2946    if (includeSign) {
2947      d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
2948    }
2949    SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
2950        DAG.getConstantFP(0x2f800000, RHSVT));
2951    SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
2952    SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
2953    ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
2954        DAG.getConstantFP(0xcf800000, RHSVT), d);
2955    SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
2956    SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
2957    if (includeSign) {
2958      SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
2959      SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
2960          DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
2961          RHS, d);
2962      l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
2963    }
2964    DST = l;
2965  } else {
2966    /*
2967       __attribute__((always_inline)) long
2968       cast_f64_to_i64(double d)
2969       {
2970    // Convert d in to 32-bit components
2971    long x = as_long(d);
2972    xhi = LCOMPHI(x);
2973    xlo = LCOMPLO(x);
2974
2975    // Generate 'normalized' mantissa
2976    mhi = xhi | 0x00100000; // hidden bit
2977    mhi <<= 11;
2978    temp = xlo >> (32 - 11);
2979    mhi |= temp
2980    mlo = xlo << 11;
2981
2982    // Compute shift right count from exponent
2983    e = (xhi >> (52-32)) & 0x7ff;
2984    sr = 1023 + 63 - e;
2985    srge64 = sr >= 64;
2986    srge32 = sr >= 32;
2987
2988    // Compute result for 0 <= sr < 32
2989    rhi0 = mhi >> (sr &31);
2990    rlo0 = mlo >> (sr &31);
2991    temp = mhi << (32 - sr);
2992    temp |= rlo0;
2993    rlo0 = sr ? temp : rlo0;
2994
2995    // Compute result for 32 <= sr
2996    rhi1 = 0;
2997    rlo1 = srge64 ? 0 : rhi0;
2998
2999    // Pick between the 2 results
3000    rhi = srge32 ? rhi1 : rhi0;
3001    rlo = srge32 ? rlo1 : rlo0;
3002
3003    // Optional saturate on overflow
3004    srlt0 = sr < 0;
3005    rhi = srlt0 ? MAXVALUE : rhi;
3006    rlo = srlt0 ? MAXVALUE : rlo;
3007
3008    // Create long
3009    res = LCREATE( rlo, rhi );
3010
3011    // Deal with sign bit (ignoring whether result is signed or unsigned value)
3012    if (includeSign) {
3013    sign = ((signed int) xhi) >> 31; fill with sign bit
3014    sign = LCREATE( sign, sign );
3015    res += sign;
3016    res ^= sign;
3017    }
3018
3019    return res;
3020    }
3021    */
3022    SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
3023    SDValue c32 = DAG.getConstant( 32, INTVT );
3024
3025    // Convert d in to 32-bit components
3026    SDValue d = RHS;
3027    SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
3028    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3029    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3030
3031    // Generate 'normalized' mantissa
3032    SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
3033        xhi, DAG.getConstant( 0x00100000, INTVT ) );
3034    mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
3035    SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
3036        xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
3037    mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
3038    SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
3039
3040    // Compute shift right count from exponent
3041    SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
3042        xhi, DAG.getConstant( 52-32, INTVT ) );
3043    e = DAG.getNode( ISD::AND, DL, INTVT,
3044        e, DAG.getConstant( 0x7ff, INTVT ) );
3045    SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
3046        DAG.getConstant( 1023 + 63, INTVT ), e );
3047    SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3048        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3049        sr, DAG.getConstant(64, INTVT));
3050    SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3051        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3052        sr, DAG.getConstant(32, INTVT));
3053
3054    // Compute result for 0 <= sr < 32
3055    SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
3056    SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
3057    temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
3058    temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
3059    temp = DAG.getNode( ISD::OR,  DL, INTVT, rlo0, temp );
3060    rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
3061
3062    // Compute result for 32 <= sr
3063    SDValue rhi1 = DAG.getConstant( 0, INTVT );
3064    SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3065        srge64, rhi1, rhi0 );
3066
3067    // Pick between the 2 results
3068    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3069        srge32, rhi1, rhi0 );
3070    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3071        srge32, rlo1, rlo0 );
3072
3073    // Create long
3074    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3075
3076    // Deal with sign bit
3077    if (includeSign) {
3078      SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
3079          xhi, DAG.getConstant( 31, INTVT ) );
3080      sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
3081      res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
3082      res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
3083    }
3084    DST = res;
3085  }
3086  return DST;
3087}
3088SDValue
3089AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
3090    bool includeSign) const
3091{
3092  EVT INTVT;
3093  EVT LONGVT;
3094  DebugLoc DL = RHS.getDebugLoc();
3095  EVT RHSVT = RHS.getValueType();
3096  bool isVec = RHSVT.isVector();
3097  if (isVec) {
3098    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3099          RHSVT.getVectorNumElements()));
3100    INTVT = EVT(MVT::getVectorVT(MVT::i32,
3101          RHSVT.getVectorNumElements()));
3102  } else {
3103    LONGVT = EVT(MVT::i64);
3104    INTVT = EVT(MVT::i32);
3105  }
3106  /*
3107     __attribute__((always_inline)) int
3108     cast_f64_to_[u|i]32(double d)
3109     {
3110  // Convert d in to 32-bit components
3111  long x = as_long(d);
3112  xhi = LCOMPHI(x);
3113  xlo = LCOMPLO(x);
3114
3115  // Generate 'normalized' mantissa
3116  mhi = xhi | 0x00100000; // hidden bit
3117  mhi <<= 11;
3118  temp = xlo >> (32 - 11);
3119  mhi |= temp
3120
3121  // Compute shift right count from exponent
3122  e = (xhi >> (52-32)) & 0x7ff;
3123  sr = 1023 + 31 - e;
3124  srge32 = sr >= 32;
3125
3126  // Compute result for 0 <= sr < 32
3127  res = mhi >> (sr &31);
3128  res = srge32 ? 0 : res;
3129
3130  // Optional saturate on overflow
3131  srlt0 = sr < 0;
3132  res = srlt0 ? MAXVALUE : res;
3133
3134  // Deal with sign bit (ignoring whether result is signed or unsigned value)
3135  if (includeSign) {
3136  sign = ((signed int) xhi) >> 31; fill with sign bit
3137  res += sign;
3138  res ^= sign;
3139  }
3140
3141  return res;
3142  }
3143  */
3144  SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
3145
3146  // Convert d in to 32-bit components
3147  SDValue d = RHS;
3148  SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
3149  SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3150  SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3151
3152  // Generate 'normalized' mantissa
3153  SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
3154      xhi, DAG.getConstant( 0x00100000, INTVT ) );
3155  mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
3156  SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
3157      xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
3158  mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
3159
3160  // Compute shift right count from exponent
3161  SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
3162      xhi, DAG.getConstant( 52-32, INTVT ) );
3163  e = DAG.getNode( ISD::AND, DL, INTVT,
3164      e, DAG.getConstant( 0x7ff, INTVT ) );
3165  SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
3166      DAG.getConstant( 1023 + 31, INTVT ), e );
3167  SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3168      DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3169      sr, DAG.getConstant(32, INTVT));
3170
3171  // Compute result for 0 <= sr < 32
3172  SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
3173  res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3174      srge32, DAG.getConstant(0,INTVT), res );
3175
3176  // Deal with sign bit
3177  if (includeSign) {
3178    SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
3179        xhi, DAG.getConstant( 31, INTVT ) );
3180    res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
3181    res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
3182  }
3183  return res;
3184}
3185SDValue
3186AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
3187{
3188  SDValue RHS = Op.getOperand(0);
3189  EVT RHSVT = RHS.getValueType();
3190  MVT RST = RHSVT.getScalarType().getSimpleVT();
3191  EVT LHSVT = Op.getValueType();
3192  MVT LST = LHSVT.getScalarType().getSimpleVT();
3193  DebugLoc DL = Op.getDebugLoc();
3194  SDValue DST;
3195  const AMDILTargetMachine*
3196    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3197    (&this->getTargetMachine());
3198  const AMDILSubtarget*
3199    stm = dynamic_cast<const AMDILSubtarget*>(
3200        amdtm->getSubtargetImpl());
3201  if (RST == MVT::f64 && RHSVT.isVector()
3202      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3203    // We dont support vector 64bit floating point convertions.
3204    for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
3205      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3206          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3207      op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
3208      if (!x) {
3209        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3210      } else {
3211        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
3212            DST, op, DAG.getTargetConstant(x, MVT::i32));
3213      }
3214    }
3215  } else {
3216    if (RST == MVT::f64
3217        && LST == MVT::i32) {
3218      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3219        DST = SDValue(Op.getNode(), 0);
3220      } else {
3221        DST = genf64toi32(RHS, DAG, true);
3222      }
3223    } else if (RST == MVT::f64
3224        && LST == MVT::i64) {
3225      DST = genf64toi64(RHS, DAG, true);
3226    } else if (RST == MVT::f64
3227        && (LST == MVT::i8 || LST == MVT::i16)) {
3228      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3229        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
3230      } else {
3231        SDValue ToInt = genf64toi32(RHS, DAG, true);
3232        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
3233      }
3234
3235    } else {
3236      DST = SDValue(Op.getNode(), 0);
3237    }
3238  }
3239  return DST;
3240}
3241
3242SDValue
3243AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
3244{
3245  SDValue DST;
3246  SDValue RHS = Op.getOperand(0);
3247  EVT RHSVT = RHS.getValueType();
3248  MVT RST = RHSVT.getScalarType().getSimpleVT();
3249  EVT LHSVT = Op.getValueType();
3250  MVT LST = LHSVT.getScalarType().getSimpleVT();
3251  DebugLoc DL = Op.getDebugLoc();
3252  const AMDILTargetMachine*
3253    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3254    (&this->getTargetMachine());
3255  const AMDILSubtarget*
3256    stm = dynamic_cast<const AMDILSubtarget*>(
3257        amdtm->getSubtargetImpl());
3258  if (RST == MVT::f64 && RHSVT.isVector()
3259      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3260    // We dont support vector 64bit floating point convertions.
3261    for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
3262      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3263          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3264      op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
3265      if (!x) {
3266        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3267      } else {
3268        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
3269            DST, op, DAG.getTargetConstant(x, MVT::i32));
3270      }
3271
3272    }
3273  } else {
3274    if (RST == MVT::f64
3275        && LST == MVT::i32) {
3276      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3277        DST = SDValue(Op.getNode(), 0);
3278      } else {
3279        DST = genf64toi32(RHS, DAG, false);
3280      }
3281    } else if (RST == MVT::f64
3282        && LST == MVT::i64) {
3283      DST = genf64toi64(RHS, DAG, false);
3284    } else if (RST == MVT::f64
3285        && (LST == MVT::i8 || LST == MVT::i16)) {
3286      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3287        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
3288      } else {
3289        SDValue ToInt = genf64toi32(RHS, DAG, false);
3290        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
3291      }
3292
3293    } else {
3294      DST = SDValue(Op.getNode(), 0);
3295    }
3296  }
3297  return DST;
3298}
3299SDValue
3300AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
3301    SelectionDAG &DAG) const
3302{
3303  EVT RHSVT = RHS.getValueType();
3304  DebugLoc DL = RHS.getDebugLoc();
3305  EVT INTVT;
3306  EVT LONGVT;
3307  bool isVec = RHSVT.isVector();
3308  if (isVec) {
3309    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3310          RHSVT.getVectorNumElements()));
3311    INTVT = EVT(MVT::getVectorVT(MVT::i32,
3312          RHSVT.getVectorNumElements()));
3313  } else {
3314    LONGVT = EVT(MVT::i64);
3315    INTVT = EVT(MVT::i32);
3316  }
3317  SDValue x = RHS;
3318  const AMDILTargetMachine*
3319    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3320    (&this->getTargetMachine());
3321  const AMDILSubtarget*
3322    stm = dynamic_cast<const AMDILSubtarget*>(
3323        amdtm->getSubtargetImpl());
3324  if (stm->calVersion() >= CAL_VERSION_SC_135) {
3325    // unsigned x = RHS;
3326    // ulong xd = (ulong)(0x4330_0000 << 32) | x;
3327    // double d = as_double( xd );
3328    // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
3329    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
3330        DAG.getConstant( 0x43300000, INTVT ) );
3331    SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
3332    SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
3333        DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
3334    return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
3335  } else {
3336    SDValue clz = genCLZu32(x, DAG);
3337
3338    // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
3339    // Except for an input 0... which requires a 0 exponent
3340    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
3341        DAG.getConstant( (1023+31), INTVT), clz );
3342    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
3343
3344    // Normalize frac
3345    SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
3346
3347    // Eliminate hidden bit
3348    rhi = DAG.getNode( ISD::AND, DL, INTVT,
3349        rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
3350
3351    // Pack exponent and frac
3352    SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
3353        rhi, DAG.getConstant( (32 - 11), INTVT ) );
3354    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
3355        rhi, DAG.getConstant( 11, INTVT ) );
3356    exp = DAG.getNode( ISD::SHL, DL, INTVT,
3357        exp, DAG.getConstant( 20, INTVT ) );
3358    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
3359
3360    // Convert 2 x 32 in to 1 x 64, then to double precision float type
3361    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3362    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
3363  }
3364}
3365SDValue
3366AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
3367    SelectionDAG &DAG) const
3368{
3369  EVT RHSVT = RHS.getValueType();
3370  DebugLoc DL = RHS.getDebugLoc();
3371  EVT INTVT;
3372  EVT LONGVT;
3373  bool isVec = RHSVT.isVector();
3374  if (isVec) {
3375    INTVT = EVT(MVT::getVectorVT(MVT::i32,
3376          RHSVT.getVectorNumElements()));
3377  } else {
3378    INTVT = EVT(MVT::i32);
3379  }
3380  LONGVT = RHSVT;
3381  SDValue x = RHS;
3382  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
3383      &this->getTargetMachine())->getSubtargetImpl();
3384  if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3385    // double dhi = (double)(as_uint2(x).y);
3386    // double dlo = (double)(as_uint2(x).x);
3387    // return mad(dhi, 0x1.0p+32, dlo)
3388    SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
3389    dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
3390    SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
3391    dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
3392    return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
3393        DAG.getConstantFP(0x4f800000, LHSVT), dlo);
3394  } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
3395    // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
3396    // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
3397    // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
3398    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );  // x & 0xffff_ffffUL
3399    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
3400    SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
3401    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 :  AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
3402    SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
3403    SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
3404    SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
3405        DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
3406    hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
3407    return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
3408
3409  } else {
3410    SDValue clz = genCLZu64(x, DAG);
3411    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3412    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3413
3414    // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
3415    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
3416        DAG.getConstant( (1023+63), INTVT), clz );
3417    SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
3418    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3419        mash, exp, mash );  // exp = exp, or 0 if input was 0
3420
3421    // Normalize frac
3422    SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
3423        clz, DAG.getConstant( 31, INTVT ) );
3424    SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
3425        DAG.getConstant( 32, INTVT ), clz31 );
3426    SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
3427    SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
3428    t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
3429    SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
3430    SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
3431    SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
3432    SDValue rlo2 = DAG.getConstant( 0, INTVT );
3433    SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
3434        clz, DAG.getConstant( 32, INTVT ) );
3435    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3436        clz32, rhi2, rhi1 );
3437    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3438        clz32, rlo2, rlo1 );
3439
3440    // Eliminate hidden bit
3441    rhi = DAG.getNode( ISD::AND, DL, INTVT,
3442        rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
3443
3444    // Save bits needed to round properly
3445    SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
3446        rlo, DAG.getConstant( 0x7ff, INTVT ) );
3447
3448    // Pack exponent and frac
3449    rlo = DAG.getNode( ISD::SRL, DL, INTVT,
3450        rlo, DAG.getConstant( 11, INTVT ) );
3451    SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
3452        rhi, DAG.getConstant( (32 - 11), INTVT ) );
3453    rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
3454    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
3455        rhi, DAG.getConstant( 11, INTVT ) );
3456    exp = DAG.getNode( ISD::SHL, DL, INTVT,
3457        exp, DAG.getConstant( 20, INTVT ) );
3458    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
3459
3460    // Compute rounding bit
3461    SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
3462        rlo, DAG.getConstant( 1, INTVT ) );
3463    SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
3464        round, DAG.getConstant( 0x3ff, INTVT ) );
3465    grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3466        DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
3467        grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
3468    grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
3469    round = DAG.getNode( ISD::SRL, DL, INTVT,
3470        round, DAG.getConstant( 10, INTVT ) );
3471    round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
3472
3473    // Add rounding bit
3474    SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
3475        round, DAG.getConstant( 0, INTVT ) );
3476    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3477    res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
3478    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
3479  }
3480}
3481SDValue
3482AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3483{
3484  SDValue RHS = Op.getOperand(0);
3485  EVT RHSVT = RHS.getValueType();
3486  MVT RST = RHSVT.getScalarType().getSimpleVT();
3487  EVT LHSVT = Op.getValueType();
3488  MVT LST = LHSVT.getScalarType().getSimpleVT();
3489  DebugLoc DL = Op.getDebugLoc();
3490  SDValue DST;
3491  EVT INTVT;
3492  EVT LONGVT;
3493  const AMDILTargetMachine*
3494    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3495    (&this->getTargetMachine());
3496  const AMDILSubtarget*
3497    stm = dynamic_cast<const AMDILSubtarget*>(
3498        amdtm->getSubtargetImpl());
3499  if (LST == MVT::f64 && LHSVT.isVector()
3500      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3501    // We dont support vector 64bit floating point convertions.
3502    DST = Op;
3503    for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3504      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3505          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3506      op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3507      if (!x) {
3508        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3509      } else {
3510        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3511            op, DAG.getTargetConstant(x, MVT::i32));
3512      }
3513
3514    }
3515  } else {
3516
3517    if (RST == MVT::i32
3518        && LST == MVT::f64) {
3519      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3520        DST = SDValue(Op.getNode(), 0);
3521      } else {
3522        DST = genu32tof64(RHS, LHSVT, DAG);
3523      }
3524    } else if (RST == MVT::i64
3525        && LST == MVT::f64) {
3526      DST = genu64tof64(RHS, LHSVT, DAG);
3527    } else {
3528      DST = SDValue(Op.getNode(), 0);
3529    }
3530  }
3531  return DST;
3532}
3533
3534SDValue
3535AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3536{
3537  SDValue RHS = Op.getOperand(0);
3538  EVT RHSVT = RHS.getValueType();
3539  MVT RST = RHSVT.getScalarType().getSimpleVT();
3540  EVT INTVT;
3541  EVT LONGVT;
3542  SDValue DST;
3543  bool isVec = RHSVT.isVector();
3544  DebugLoc DL = Op.getDebugLoc();
3545  EVT LHSVT = Op.getValueType();
3546  MVT LST = LHSVT.getScalarType().getSimpleVT();
3547  const AMDILTargetMachine*
3548    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3549    (&this->getTargetMachine());
3550  const AMDILSubtarget*
3551    stm = dynamic_cast<const AMDILSubtarget*>(
3552        amdtm->getSubtargetImpl());
3553  if (LST == MVT::f64 && LHSVT.isVector()
3554      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3555    // We dont support vector 64bit floating point convertions.
3556    for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3557      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3558          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3559      op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3560      if (!x) {
3561        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3562      } else {
3563        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3564            op, DAG.getTargetConstant(x, MVT::i32));
3565      }
3566
3567    }
3568  } else {
3569
3570    if (isVec) {
3571      LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3572            RHSVT.getVectorNumElements()));
3573      INTVT = EVT(MVT::getVectorVT(MVT::i32,
3574            RHSVT.getVectorNumElements()));
3575    } else {
3576      LONGVT = EVT(MVT::i64);
3577      INTVT = EVT(MVT::i32);
3578    }
3579    MVT RST = RHSVT.getScalarType().getSimpleVT();
3580    if ((RST == MVT::i32 || RST == MVT::i64)
3581        && LST == MVT::f64) {
3582      if (RST == MVT::i32) {
3583        if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3584          DST = SDValue(Op.getNode(), 0);
3585          return DST;
3586        }
3587      }
3588      SDValue c31 = DAG.getConstant( 31, INTVT );
3589      SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
3590
3591      SDValue S;      // Sign, as 0 or -1
3592      SDValue Sbit;   // Sign bit, as one bit, MSB only.
3593      if (RST == MVT::i32) {
3594        Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
3595        S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
3596      } else { // 64-bit case... SRA of 64-bit values is slow
3597        SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
3598        Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
3599        SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
3600        S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
3601      }
3602
3603      // get abs() of input value, given sign as S (0 or -1)
3604      // SpI = RHS + S
3605      SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
3606      // SpIxS = SpI ^ S
3607      SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
3608
3609      // Convert unsigned value to double precision
3610      SDValue R;
3611      if (RST == MVT::i32) {
3612        // r = cast_u32_to_f64(SpIxS)
3613        R = genu32tof64(SpIxS, LHSVT, DAG);
3614      } else {
3615        // r = cast_u64_to_f64(SpIxS)
3616        R = genu64tof64(SpIxS, LHSVT, DAG);
3617      }
3618
3619      // drop in the sign bit
3620      SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
3621      SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
3622      SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
3623      thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
3624      t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
3625      DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
3626    } else {
3627      DST = SDValue(Op.getNode(), 0);
3628    }
3629  }
3630  return DST;
3631}
3632SDValue
3633AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
3634{
3635  SDValue LHS = Op.getOperand(0);
3636  SDValue RHS = Op.getOperand(1);
3637  DebugLoc DL = Op.getDebugLoc();
3638  EVT OVT = Op.getValueType();
3639  SDValue DST;
3640  bool isVec = RHS.getValueType().isVector();
3641  if (OVT.getScalarType() == MVT::i64) {
3642    /*const AMDILTargetMachine*
3643      amdtm = reinterpret_cast<const AMDILTargetMachine*>
3644      (&this->getTargetMachine());
3645      const AMDILSubtarget*
3646      stm = dynamic_cast<const AMDILSubtarget*>(
3647      amdtm->getSubtargetImpl());*/
3648    MVT INTTY = MVT::i32;
3649    if (OVT == MVT::v2i64) {
3650      INTTY = MVT::v2i32;
3651    }
3652    SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
3653    // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
3654    LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
3655    RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
3656    LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
3657    RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
3658    INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
3659    INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
3660    //TODO: need to use IBORROW on HD5XXX and later hardware
3661    SDValue cmp;
3662    if (OVT == MVT::i64) {
3663      cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3664          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3665          LHSLO, RHSLO);
3666    } else {
3667      SDValue cmplo;
3668      SDValue cmphi;
3669      SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3670          DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
3671      SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3672          DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
3673      SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3674          DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
3675      SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3676          DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
3677      cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3678          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3679          LHSRLO, RHSRLO);
3680      cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3681          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3682          LHSRHI, RHSRHI);
3683      cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
3684      cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
3685          cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
3686    }
3687    INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
3688    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
3689        INTLO, INTHI);
3690  } else {
3691    DST = SDValue(Op.getNode(), 0);
3692  }
3693  return DST;
3694}
3695SDValue
3696AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
3697{
3698  EVT OVT = Op.getValueType();
3699  SDValue DST;
3700  if (OVT.getScalarType() == MVT::f64) {
3701    DST = LowerFDIV64(Op, DAG);
3702  } else if (OVT.getScalarType() == MVT::f32) {
3703    DST = LowerFDIV32(Op, DAG);
3704  } else {
3705    DST = SDValue(Op.getNode(), 0);
3706  }
3707  return DST;
3708}
3709
3710SDValue
3711AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
3712{
3713  EVT OVT = Op.getValueType();
3714  SDValue DST;
3715  if (OVT.getScalarType() == MVT::i64) {
3716    DST = LowerSDIV64(Op, DAG);
3717  } else if (OVT.getScalarType() == MVT::i32) {
3718    DST = LowerSDIV32(Op, DAG);
3719  } else if (OVT.getScalarType() == MVT::i16
3720      || OVT.getScalarType() == MVT::i8) {
3721    DST = LowerSDIV24(Op, DAG);
3722  } else {
3723    DST = SDValue(Op.getNode(), 0);
3724  }
3725  return DST;
3726}
3727
3728SDValue
3729AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
3730{
3731  EVT OVT = Op.getValueType();
3732  SDValue DST;
3733  if (OVT.getScalarType() == MVT::i64) {
3734    DST = LowerUDIV64(Op, DAG);
3735  } else if (OVT.getScalarType() == MVT::i32) {
3736    DST = LowerUDIV32(Op, DAG);
3737  } else if (OVT.getScalarType() == MVT::i16
3738      || OVT.getScalarType() == MVT::i8) {
3739    DST = LowerUDIV24(Op, DAG);
3740  } else {
3741    DST = SDValue(Op.getNode(), 0);
3742  }
3743  return DST;
3744}
3745
3746SDValue
3747AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
3748{
3749  EVT OVT = Op.getValueType();
3750  SDValue DST;
3751  if (OVT.getScalarType() == MVT::i64) {
3752    DST = LowerSREM64(Op, DAG);
3753  } else if (OVT.getScalarType() == MVT::i32) {
3754    DST = LowerSREM32(Op, DAG);
3755  } else if (OVT.getScalarType() == MVT::i16) {
3756    DST = LowerSREM16(Op, DAG);
3757  } else if (OVT.getScalarType() == MVT::i8) {
3758    DST = LowerSREM8(Op, DAG);
3759  } else {
3760    DST = SDValue(Op.getNode(), 0);
3761  }
3762  return DST;
3763}
3764
3765SDValue
3766AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
3767{
3768  EVT OVT = Op.getValueType();
3769  SDValue DST;
3770  if (OVT.getScalarType() == MVT::i64) {
3771    DST = LowerUREM64(Op, DAG);
3772  } else if (OVT.getScalarType() == MVT::i32) {
3773    DST = LowerUREM32(Op, DAG);
3774  } else if (OVT.getScalarType() == MVT::i16) {
3775    DST = LowerUREM16(Op, DAG);
3776  } else if (OVT.getScalarType() == MVT::i8) {
3777    DST = LowerUREM8(Op, DAG);
3778  } else {
3779    DST = SDValue(Op.getNode(), 0);
3780  }
3781  return DST;
3782}
3783
3784SDValue
3785AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
3786{
3787  DebugLoc DL = Op.getDebugLoc();
3788  EVT OVT = Op.getValueType();
3789  SDValue DST;
3790  bool isVec = OVT.isVector();
3791  if (OVT.getScalarType() != MVT::i64)
3792  {
3793    DST = SDValue(Op.getNode(), 0);
3794  } else {
3795    assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
3796    // TODO: This needs to be turned into a tablegen pattern
3797    SDValue LHS = Op.getOperand(0);
3798    SDValue RHS = Op.getOperand(1);
3799
3800    MVT INTTY = MVT::i32;
3801    if (OVT == MVT::v2i64) {
3802      INTTY = MVT::v2i32;
3803    }
3804    // mul64(h1, l1, h0, l0)
3805    SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3806        DL,
3807        INTTY, LHS);
3808    SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3809        DL,
3810        INTTY, LHS);
3811    SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3812        DL,
3813        INTTY, RHS);
3814    SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3815        DL,
3816        INTTY, RHS);
3817    // MULLO_UINT_1 r1, h0, l1
3818    SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
3819        DL,
3820        INTTY, RHSHI, LHSLO);
3821    // MULLO_UINT_1 r2, h1, l0
3822    SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
3823        DL,
3824        INTTY, RHSLO, LHSHI);
3825    // ADD_INT hr, r1, r2
3826    SDValue ADDHI = DAG.getNode(ISD::ADD,
3827        DL,
3828        INTTY, RHILLO, RLOHHI);
3829    // MULHI_UINT_1 r3, l1, l0
3830    SDValue RLOLLO = DAG.getNode(ISD::MULHU,
3831        DL,
3832        INTTY, RHSLO, LHSLO);
3833    // ADD_INT hr, hr, r3
3834    SDValue HIGH = DAG.getNode(ISD::ADD,
3835        DL,
3836        INTTY, ADDHI, RLOLLO);
3837    // MULLO_UINT_1 l3, l1, l0
3838    SDValue LOW = DAG.getNode(AMDILISD::UMUL,
3839        DL,
3840        INTTY, LHSLO, RHSLO);
3841    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
3842        DL,
3843        OVT, LOW, HIGH);
3844  }
3845  return DST;
3846}
3847SDValue
3848AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
3849{
3850  EVT VT = Op.getValueType();
3851  //printSDValue(Op, 1);
3852  SDValue Nodes1;
3853  SDValue second;
3854  SDValue third;
3855  SDValue fourth;
3856  DebugLoc DL = Op.getDebugLoc();
3857  Nodes1 = DAG.getNode(AMDILISD::VBUILD,
3858      DL,
3859      VT, Op.getOperand(0));
3860  bool allEqual = true;
3861  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
3862    if (Op.getOperand(0) != Op.getOperand(x)) {
3863      allEqual = false;
3864      break;
3865    }
3866  }
3867  if (allEqual) {
3868    return Nodes1;
3869  }
3870  switch(Op.getNumOperands()) {
3871    default:
3872    case 1:
3873      break;
3874    case 4:
3875      fourth = Op.getOperand(3);
3876      if (fourth.getOpcode() != ISD::UNDEF) {
3877        Nodes1 = DAG.getNode(
3878            ISD::INSERT_VECTOR_ELT,
3879            DL,
3880            Op.getValueType(),
3881            Nodes1,
3882            fourth,
3883            DAG.getConstant(7, MVT::i32));
3884      }
3885    case 3:
3886      third = Op.getOperand(2);
3887      if (third.getOpcode() != ISD::UNDEF) {
3888        Nodes1 = DAG.getNode(
3889            ISD::INSERT_VECTOR_ELT,
3890            DL,
3891            Op.getValueType(),
3892            Nodes1,
3893            third,
3894            DAG.getConstant(6, MVT::i32));
3895      }
3896    case 2:
3897      second = Op.getOperand(1);
3898      if (second.getOpcode() != ISD::UNDEF) {
3899        Nodes1 = DAG.getNode(
3900            ISD::INSERT_VECTOR_ELT,
3901            DL,
3902            Op.getValueType(),
3903            Nodes1,
3904            second,
3905            DAG.getConstant(5, MVT::i32));
3906      }
3907      break;
3908  };
3909  return Nodes1;
3910}
3911
3912SDValue
3913AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3914    SelectionDAG &DAG) const
3915{
3916  DebugLoc DL = Op.getDebugLoc();
3917  EVT VT = Op.getValueType();
3918  const SDValue *ptr = NULL;
3919  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3920  uint32_t swizzleNum = 0;
3921  SDValue DST;
3922  if (!VT.isVector()) {
3923    SDValue Res = Op.getOperand(0);
3924    return Res;
3925  }
3926
3927  if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
3928    ptr = &Op.getOperand(1);
3929  } else {
3930    ptr = &Op.getOperand(0);
3931  }
3932  if (CSDN) {
3933    swizzleNum = (uint32_t)CSDN->getZExtValue();
3934    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3935    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3936    DST = DAG.getNode(AMDILISD::VINSERT,
3937        DL,
3938        VT,
3939        Op.getOperand(0),
3940        *ptr,
3941        DAG.getTargetConstant(mask2, MVT::i32),
3942        DAG.getTargetConstant(mask3, MVT::i32));
3943  } else {
3944    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3945    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3946    SDValue res = DAG.getNode(AMDILISD::VINSERT,
3947        DL, VT, Op.getOperand(0), *ptr,
3948        DAG.getTargetConstant(mask2, MVT::i32),
3949        DAG.getTargetConstant(mask3, MVT::i32));
3950    for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
3951      mask2 = 0x04030201 & ~(0xFF << (x * 8));
3952      mask3 = 0x01010101 & (0xFF << (x * 8));
3953      SDValue t = DAG.getNode(AMDILISD::VINSERT,
3954          DL, VT, Op.getOperand(0), *ptr,
3955          DAG.getTargetConstant(mask2, MVT::i32),
3956          DAG.getTargetConstant(mask3, MVT::i32));
3957      SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
3958          DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3959          Op.getOperand(2), DAG.getConstant(x, MVT::i32));
3960      c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
3961      res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
3962    }
3963    DST = res;
3964  }
3965  return DST;
3966}
3967
3968SDValue
3969AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
3970    SelectionDAG &DAG) const
3971{
3972  EVT VT = Op.getValueType();
3973  //printSDValue(Op, 1);
3974  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
3975  uint64_t swizzleNum = 0;
3976  DebugLoc DL = Op.getDebugLoc();
3977  SDValue Res;
3978  if (!Op.getOperand(0).getValueType().isVector()) {
3979    Res = Op.getOperand(0);
3980    return Res;
3981  }
3982  if (CSDN) {
3983    // Static vector extraction
3984    swizzleNum = CSDN->getZExtValue() + 1;
3985    Res = DAG.getNode(AMDILISD::VEXTRACT,
3986        DL, VT,
3987        Op.getOperand(0),
3988        DAG.getTargetConstant(swizzleNum, MVT::i32));
3989  } else {
3990    SDValue Op1 = Op.getOperand(1);
3991    uint32_t vecSize = 4;
3992    SDValue Op0 = Op.getOperand(0);
3993    SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
3994        DL, VT, Op0,
3995        DAG.getTargetConstant(1, MVT::i32));
3996    if (Op0.getValueType().isVector()) {
3997      vecSize = Op0.getValueType().getVectorNumElements();
3998    }
3999    for (uint32_t x = 2; x <= vecSize; ++x) {
4000      SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
4001          DL, VT, Op0,
4002          DAG.getTargetConstant(x, MVT::i32));
4003      SDValue c = DAG.getNode(AMDILISD::CMP,
4004          DL, Op1.getValueType(),
4005          DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
4006          Op1, DAG.getConstant(x, MVT::i32));
4007      res = DAG.getNode(AMDILISD::CMOVLOG, DL,
4008          VT, c, t, res);
4009
4010    }
4011    Res = res;
4012  }
4013  return Res;
4014}
4015
4016SDValue
4017AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
4018    SelectionDAG &DAG) const
4019{
4020  uint32_t vecSize = Op.getValueType().getVectorNumElements();
4021  SDValue src = Op.getOperand(0);
4022  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4023  uint64_t offset = 0;
4024  EVT vecType = Op.getValueType().getVectorElementType();
4025  DebugLoc DL = Op.getDebugLoc();
4026  SDValue Result;
4027  if (CSDN) {
4028    offset = CSDN->getZExtValue();
4029    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4030        DL,vecType, src, DAG.getConstant(offset, MVT::i32));
4031    Result = DAG.getNode(AMDILISD::VBUILD, DL,
4032        Op.getValueType(), Result);
4033    for (uint32_t x = 1; x < vecSize; ++x) {
4034      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
4035          src, DAG.getConstant(offset + x, MVT::i32));
4036      if (elt.getOpcode() != ISD::UNDEF) {
4037        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4038            Op.getValueType(), Result, elt,
4039            DAG.getConstant(x, MVT::i32));
4040      }
4041    }
4042  } else {
4043    SDValue idx = Op.getOperand(1);
4044    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4045        DL, vecType, src, idx);
4046    Result = DAG.getNode(AMDILISD::VBUILD, DL,
4047        Op.getValueType(), Result);
4048    for (uint32_t x = 1; x < vecSize; ++x) {
4049      idx = DAG.getNode(ISD::ADD, DL, vecType,
4050          idx, DAG.getConstant(1, MVT::i32));
4051      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
4052          src, idx);
4053      if (elt.getOpcode() != ISD::UNDEF) {
4054        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4055            Op.getValueType(), Result, elt, idx);
4056      }
4057    }
4058  }
4059  return Result;
4060}
4061SDValue
4062AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
4063    SelectionDAG &DAG) const
4064{
4065  SDValue Res = DAG.getNode(AMDILISD::VBUILD,
4066      Op.getDebugLoc(),
4067      Op.getValueType(),
4068      Op.getOperand(0));
4069  return Res;
4070}
4071SDValue
4072AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
4073{
4074  SDValue andOp;
4075  andOp = DAG.getNode(
4076      AMDILISD::AND,
4077      Op.getDebugLoc(),
4078      Op.getValueType(),
4079      Op.getOperand(0),
4080      Op.getOperand(1));
4081  return andOp;
4082}
4083SDValue
4084AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
4085{
4086  SDValue orOp;
4087  orOp = DAG.getNode(AMDILISD::OR,
4088      Op.getDebugLoc(),
4089      Op.getValueType(),
4090      Op.getOperand(0),
4091      Op.getOperand(1));
4092  return orOp;
4093}
4094SDValue
4095AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
4096{
4097  SDValue Cond = Op.getOperand(0);
4098  SDValue LHS = Op.getOperand(1);
4099  SDValue RHS = Op.getOperand(2);
4100  DebugLoc DL = Op.getDebugLoc();
4101  Cond = getConversionNode(DAG, Cond, Op, true);
4102  Cond = DAG.getNode(AMDILISD::CMOVLOG,
4103      DL,
4104      Op.getValueType(), Cond, LHS, RHS);
4105  return Cond;
4106}
4107SDValue
4108AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
4109{
4110  SDValue Cond;
4111  SDValue LHS = Op.getOperand(0);
4112  SDValue RHS = Op.getOperand(1);
4113  SDValue TRUE = Op.getOperand(2);
4114  SDValue FALSE = Op.getOperand(3);
4115  SDValue CC = Op.getOperand(4);
4116  DebugLoc DL = Op.getDebugLoc();
4117  bool skipCMov = false;
4118  bool genINot = false;
4119  EVT OVT = Op.getValueType();
4120
4121  // Check for possible elimination of cmov
4122  if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
4123    const ConstantSDNode *trueConst
4124      = dyn_cast<ConstantSDNode>( TRUE.getNode() );
4125    const ConstantSDNode *falseConst
4126      = dyn_cast<ConstantSDNode>( FALSE.getNode() );
4127    if (trueConst && falseConst) {
4128      // both possible result values are constants
4129      if (trueConst->isAllOnesValue()
4130          && falseConst->isNullValue()) { // and convenient constants
4131        skipCMov = true;
4132      }
4133      else if (trueConst->isNullValue()
4134          && falseConst->isAllOnesValue()) { // less convenient
4135        skipCMov = true;
4136        genINot = true;
4137      }
4138    }
4139  }
4140  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4141  unsigned int AMDILCC = CondCCodeToCC(
4142      SetCCOpcode,
4143      LHS.getValueType().getSimpleVT().SimpleTy);
4144  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
4145  Cond = DAG.getNode(
4146      AMDILISD::CMP,
4147      DL,
4148      LHS.getValueType(),
4149      DAG.getConstant(AMDILCC, MVT::i32),
4150      LHS,
4151      RHS);
4152  Cond = getConversionNode(DAG, Cond, Op, true);
4153  if (genINot) {
4154    Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
4155  }
4156  if (!skipCMov) {
4157    Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
4158  }
4159  return Cond;
4160}
4161SDValue
4162AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
4163{
4164  SDValue Cond;
4165  SDValue LHS = Op.getOperand(0);
4166  SDValue RHS = Op.getOperand(1);
4167  SDValue CC  = Op.getOperand(2);
4168  DebugLoc DL = Op.getDebugLoc();
4169  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4170  unsigned int AMDILCC = CondCCodeToCC(
4171      SetCCOpcode,
4172      LHS.getValueType().getSimpleVT().SimpleTy);
4173  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
4174  Cond = DAG.getNode(
4175      AMDILISD::CMP,
4176      DL,
4177      LHS.getValueType(),
4178      DAG.getConstant(AMDILCC, MVT::i32),
4179      LHS,
4180      RHS);
4181  Cond = getConversionNode(DAG, Cond, Op, true);
4182  Cond = DAG.getNode(
4183      ISD::AND,
4184      DL,
4185      Cond.getValueType(),
4186      DAG.getConstant(1, Cond.getValueType()),
4187      Cond);
4188  return Cond;
4189}
4190
4191SDValue
4192AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
4193{
4194  SDValue Data = Op.getOperand(0);
4195  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
4196  DebugLoc DL = Op.getDebugLoc();
4197  EVT DVT = Data.getValueType();
4198  EVT BVT = BaseType->getVT();
4199  unsigned baseBits = BVT.getScalarType().getSizeInBits();
4200  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
4201  unsigned shiftBits = srcBits - baseBits;
4202  if (srcBits < 32) {
4203    // If the op is less than 32 bits, then it needs to extend to 32bits
4204    // so it can properly keep the upper bits valid.
4205    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
4206    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
4207    shiftBits = 32 - baseBits;
4208    DVT = IVT;
4209  }
4210  SDValue Shift = DAG.getConstant(shiftBits, DVT);
4211  // Shift left by 'Shift' bits.
4212  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
4213  // Signed shift Right by 'Shift' bits.
4214  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
4215  if (srcBits < 32) {
4216    // Once the sign extension is done, the op needs to be converted to
4217    // its original type.
4218    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
4219  }
4220  return Data;
4221}
4222EVT
4223AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
4224{
4225  int iSize = (size * numEle);
4226  int vEle = (iSize >> ((size == 64) ? 6 : 5));
4227  if (!vEle) {
4228    vEle = 1;
4229  }
4230  if (size == 64) {
4231    if (vEle == 1) {
4232      return EVT(MVT::i64);
4233    } else {
4234      return EVT(MVT::getVectorVT(MVT::i64, vEle));
4235    }
4236  } else {
4237    if (vEle == 1) {
4238      return EVT(MVT::i32);
4239    } else {
4240      return EVT(MVT::getVectorVT(MVT::i32, vEle));
4241    }
4242  }
4243}
4244
4245SDValue
4246AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
4247{
4248  SDValue Src = Op.getOperand(0);
4249  SDValue Dst = Op;
4250  SDValue Res;
4251  DebugLoc DL = Op.getDebugLoc();
4252  EVT SrcVT = Src.getValueType();
4253  EVT DstVT = Dst.getValueType();
4254  // Lets bitcast the floating point types to an
4255  // equivalent integer type before converting to vectors.
4256  if (SrcVT.getScalarType().isFloatingPoint()) {
4257    Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
4258          SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
4259          SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
4260        Src);
4261    SrcVT = Src.getValueType();
4262  }
4263  uint32_t ScalarSrcSize = SrcVT.getScalarType()
4264    .getSimpleVT().getSizeInBits();
4265  uint32_t ScalarDstSize = DstVT.getScalarType()
4266    .getSimpleVT().getSizeInBits();
4267  uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
4268  uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
4269  bool isVec = SrcVT.isVector();
4270  if (DstVT.getScalarType().isInteger() &&
4271      (SrcVT.getScalarType().isInteger()
4272       || SrcVT.getScalarType().isFloatingPoint())) {
4273    if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
4274        || (ScalarSrcSize == 64
4275          && DstNumEle == 4
4276          && ScalarDstSize == 16)) {
4277      // This is the problematic case when bitcasting i64 <-> <4 x i16>
4278      // This approach is a little different as we cannot generate a
4279      // <4 x i64> vector
4280      // as that is illegal in our backend and we are already past
4281      // the DAG legalizer.
4282      // So, in this case, we will do the following conversion.
4283      // Case 1:
4284      // %dst = <4 x i16> %src bitconvert i64 ==>
4285      // %tmp = <4 x i16> %src convert <4 x i32>
4286      // %tmp = <4 x i32> %tmp and 0xFFFF
4287      // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
4288      // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
4289      // %dst = <2 x i32> %tmp bitcast i64
4290      // case 2:
4291      // %dst = i64 %src bitconvert <4 x i16> ==>
4292      // %tmp = i64 %src bitcast <2 x i32>
4293      // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
4294      // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
4295      // %tmp = <4 x i32> %tmp and 0xFFFF
4296      // %dst = <4 x i16> %tmp bitcast <4 x i32>
4297      SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
4298          DAG.getConstant(0xFFFF, MVT::i32));
4299      SDValue const16 = DAG.getConstant(16, MVT::i32);
4300      if (ScalarDstSize == 64) {
4301        // case 1
4302        Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
4303        Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
4304        SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4305            Op, DAG.getConstant(0, MVT::i32));
4306        SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4307            Op, DAG.getConstant(1, MVT::i32));
4308        y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
4309        SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4310            Op, DAG.getConstant(2, MVT::i32));
4311        SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4312            Op, DAG.getConstant(3, MVT::i32));
4313        w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
4314        x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
4315        y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
4316        Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
4317        return Res;
4318      } else {
4319        // case 2
4320        SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
4321        SDValue lor16
4322          = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
4323        SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
4324        SDValue hir16
4325          = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
4326        SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
4327            MVT::v4i32, lo);
4328        SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4329            getPointerTy(), DAG.getConstant(1, MVT::i32));
4330        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4331            resVec, lor16, idxVal);
4332        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4333            getPointerTy(), DAG.getConstant(2, MVT::i32));
4334        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4335            resVec, hi, idxVal);
4336        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4337            getPointerTy(), DAG.getConstant(3, MVT::i32));
4338        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4339            resVec, hir16, idxVal);
4340        resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
4341        Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
4342        return Res;
4343      }
4344    } else {
4345      // There are four cases we need to worry about for bitcasts
4346      // where the size of all
4347      // source, intermediates and result is <= 128 bits, unlike
4348      // the above case
4349      // 1) Sub32bit bitcast 32bitAlign
4350      // %dst = <4 x i8> bitcast i32
4351      // (also <[2|4] x i16> to <[2|4] x i32>)
4352      // 2) 32bitAlign bitcast Sub32bit
4353      // %dst = i32 bitcast <4 x i8>
4354      // 3) Sub32bit bitcast LargerSub32bit
4355      // %dst = <2 x i8> bitcast i16
4356      // (also <4 x i8> to <2 x i16>)
4357      // 4) Sub32bit bitcast SmallerSub32bit
4358      // %dst = i16 bitcast <2 x i8>
4359      // (also <2 x i16> to <4 x i8>)
4360      // This also only handles types that are powers of two
4361      if ((ScalarDstSize & (ScalarDstSize - 1))
4362          || (ScalarSrcSize & (ScalarSrcSize - 1))) {
4363      } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
4364        // case 1:
4365        EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
4366#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
4367        SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
4368#else
4369        SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4370            DAG.getUNDEF(IntTy.getScalarType()));
4371        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4372          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4373              getPointerTy(), DAG.getConstant(x, MVT::i32));
4374          SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4375              SrcVT.getScalarType(), Src,
4376              DAG.getConstant(x, MVT::i32));
4377          temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
4378          res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
4379              res, temp, idx);
4380        }
4381#endif
4382        SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4383            DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
4384        SDValue *newEle = new SDValue[SrcNumEle];
4385        res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
4386        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4387          newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4388              IntTy.getScalarType(), res,
4389              DAG.getConstant(x, MVT::i32));
4390        }
4391        uint32_t Ratio = SrcNumEle / DstNumEle;
4392        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4393          if (x % Ratio) {
4394            newEle[x] = DAG.getNode(ISD::SHL, DL,
4395                IntTy.getScalarType(), newEle[x],
4396                DAG.getConstant(ScalarSrcSize * (x % Ratio),
4397                  MVT::i32));
4398          }
4399        }
4400        for (uint32_t x = 0; x < SrcNumEle; x += 2) {
4401          newEle[x] = DAG.getNode(ISD::OR, DL,
4402              IntTy.getScalarType(), newEle[x], newEle[x + 1]);
4403        }
4404        if (ScalarSrcSize == 8) {
4405          for (uint32_t x = 0; x < SrcNumEle; x += 4) {
4406            newEle[x] = DAG.getNode(ISD::OR, DL,
4407                IntTy.getScalarType(), newEle[x], newEle[x + 2]);
4408          }
4409          if (DstNumEle == 1) {
4410            Dst = newEle[0];
4411          } else {
4412            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
4413                newEle[0]);
4414            for (uint32_t x = 1; x < DstNumEle; ++x) {
4415              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4416                  getPointerTy(), DAG.getConstant(x, MVT::i32));
4417              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4418                  DstVT, Dst, newEle[x * 4], idx);
4419            }
4420          }
4421        } else {
4422          if (DstNumEle == 1) {
4423            Dst = newEle[0];
4424          } else {
4425            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
4426                newEle[0]);
4427            for (uint32_t x = 1; x < DstNumEle; ++x) {
4428              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4429                  getPointerTy(), DAG.getConstant(x, MVT::i32));
4430              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4431                  DstVT, Dst, newEle[x * 2], idx);
4432            }
4433          }
4434        }
4435        delete [] newEle;
4436        return Dst;
4437      } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
4438        // case 2:
4439        EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
4440        SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4441            DAG.getUNDEF(IntTy.getScalarType()));
4442        uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
4443        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4444          for (uint32_t y = 0; y < mult; ++y) {
4445            SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4446                getPointerTy(),
4447                DAG.getConstant(x * mult + y, MVT::i32));
4448            SDValue t;
4449            if (SrcNumEle > 1) {
4450              t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4451                  DL, SrcVT.getScalarType(), Src,
4452                  DAG.getConstant(x, MVT::i32));
4453            } else {
4454              t = Src;
4455            }
4456            if (y != 0) {
4457              t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
4458                  t, DAG.getConstant(y * ScalarDstSize,
4459                    MVT::i32));
4460            }
4461            vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
4462                DL, IntTy, vec, t, idx);
4463          }
4464        }
4465        Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
4466        return Dst;
4467      } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
4468        // case 3:
4469        SDValue *numEle = new SDValue[SrcNumEle];
4470        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4471          numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4472              MVT::i8, Src, DAG.getConstant(x, MVT::i32));
4473          numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
4474          numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
4475              DAG.getConstant(0xFF, MVT::i16));
4476        }
4477        for (uint32_t x = 1; x < SrcNumEle; x += 2) {
4478          numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
4479              DAG.getConstant(8, MVT::i16));
4480          numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
4481              numEle[x-1], numEle[x]);
4482        }
4483        if (DstNumEle > 1) {
4484          // If we are not a scalar i16, the only other case is a
4485          // v2i16 since we can't have v8i8 at this point, v4i16
4486          // cannot be generated
4487          Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
4488              numEle[0]);
4489          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4490              getPointerTy(), DAG.getConstant(1, MVT::i32));
4491          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
4492              Dst, numEle[2], idx);
4493        } else {
4494          Dst = numEle[0];
4495        }
4496        delete [] numEle;
4497        return Dst;
4498      } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
4499        // case 4:
4500        SDValue *numEle = new SDValue[DstNumEle];
4501        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4502          numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4503              MVT::i16, Src, DAG.getConstant(x, MVT::i32));
4504          numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
4505              numEle[x * 2], DAG.getConstant(8, MVT::i16));
4506        }
4507        MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
4508        Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
4509        for (uint32_t x = 1; x < DstNumEle; ++x) {
4510          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4511              getPointerTy(), DAG.getConstant(x, MVT::i32));
4512          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
4513              Dst, numEle[x], idx);
4514        }
4515        delete [] numEle;
4516        ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
4517        Res = DAG.getSExtOrTrunc(Dst, DL, ty);
4518        return Res;
4519      }
4520    }
4521  }
4522  Res = DAG.getNode(AMDILISD::BITCONV,
4523      Dst.getDebugLoc(),
4524      Dst.getValueType(), Src);
4525  return Res;
4526}
4527
4528SDValue
4529AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4530    SelectionDAG &DAG) const
4531{
4532  SDValue Chain = Op.getOperand(0);
4533  SDValue Size = Op.getOperand(1);
4534  unsigned int SPReg = AMDIL::SP;
4535  DebugLoc DL = Op.getDebugLoc();
4536  SDValue SP = DAG.getCopyFromReg(Chain,
4537      DL,
4538      SPReg, MVT::i32);
4539  SDValue NewSP = DAG.getNode(ISD::ADD,
4540      DL,
4541      MVT::i32, SP, Size);
4542  Chain = DAG.getCopyToReg(SP.getValue(1),
4543      DL,
4544      SPReg, NewSP);
4545  SDValue Ops[2] = {NewSP, Chain};
4546  Chain = DAG.getMergeValues(Ops, 2 ,DL);
4547  return Chain;
4548}
4549SDValue
4550AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
4551{
4552  SDValue Chain = Op.getOperand(0);
4553  SDValue Cond  = Op.getOperand(1);
4554  SDValue Jump  = Op.getOperand(2);
4555  SDValue Result;
4556  Result = DAG.getNode(
4557      AMDILISD::BRANCH_COND,
4558      Op.getDebugLoc(),
4559      Op.getValueType(),
4560      Chain, Jump, Cond);
4561  return Result;
4562}
4563
4564SDValue
4565AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
4566{
4567  SDValue Chain = Op.getOperand(0);
4568  CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
4569  SDValue LHS   = Op.getOperand(2);
4570  SDValue RHS   = Op.getOperand(3);
4571  SDValue JumpT  = Op.getOperand(4);
4572  SDValue CmpValue;
4573  ISD::CondCode CC = CCNode->get();
4574  SDValue Result;
4575  unsigned int cmpOpcode = CondCCodeToCC(
4576      CC,
4577      LHS.getValueType().getSimpleVT().SimpleTy);
4578  CmpValue = DAG.getNode(
4579      AMDILISD::CMP,
4580      Op.getDebugLoc(),
4581      LHS.getValueType(),
4582      DAG.getConstant(cmpOpcode, MVT::i32),
4583      LHS, RHS);
4584  Result = DAG.getNode(
4585      AMDILISD::BRANCH_COND,
4586      CmpValue.getDebugLoc(),
4587      MVT::Other, Chain,
4588      JumpT, CmpValue);
4589  return Result;
4590}
4591
4592SDValue
4593AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
4594{
4595  SDValue Result = DAG.getNode(
4596      AMDILISD::DP_TO_FP,
4597      Op.getDebugLoc(),
4598      Op.getValueType(),
4599      Op.getOperand(0),
4600      Op.getOperand(1));
4601  return Result;
4602}
4603
4604SDValue
4605AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
4606{
4607  SDValue Result = DAG.getNode(
4608      AMDILISD::VCONCAT,
4609      Op.getDebugLoc(),
4610      Op.getValueType(),
4611      Op.getOperand(0),
4612      Op.getOperand(1));
4613  return Result;
4614}
4615// LowerRET - Lower an ISD::RET node.
4616SDValue
4617AMDILTargetLowering::LowerReturn(SDValue Chain,
4618    CallingConv::ID CallConv, bool isVarArg,
4619    const SmallVectorImpl<ISD::OutputArg> &Outs,
4620    const SmallVectorImpl<SDValue> &OutVals,
4621    DebugLoc dl, SelectionDAG &DAG)
4622const
4623{
4624  //MachineFunction& MF = DAG.getMachineFunction();
4625  // CCValAssign - represent the assignment of the return value
4626  // to a location
4627  SmallVector<CCValAssign, 16> RVLocs;
4628
4629  // CCState - Info about the registers and stack slot
4630  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4631                 getTargetMachine(), RVLocs, *DAG.getContext());
4632
4633  // Analyze return values of ISD::RET
4634  CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
4635  // If this is the first return lowered for this function, add
4636  // the regs to the liveout set for the function
4637  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
4638  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4639    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
4640      MRI.addLiveOut(RVLocs[i].getLocReg());
4641    }
4642  }
4643  // FIXME: implement this when tail call is implemented
4644  // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
4645  // both x86 and ppc implement this in ISelLowering
4646
4647  // Regular return here
4648  SDValue Flag;
4649  SmallVector<SDValue, 6> RetOps;
4650  RetOps.push_back(Chain);
4651  RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
4652  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4653    CCValAssign &VA = RVLocs[i];
4654    SDValue ValToCopy = OutVals[i];
4655    assert(VA.isRegLoc() && "Can only return in registers!");
4656    // ISD::Ret => ret chain, (regnum1, val1), ...
4657    // So i * 2 + 1 index only the regnums
4658    Chain = DAG.getCopyToReg(Chain,
4659        dl,
4660        VA.getLocReg(),
4661        ValToCopy,
4662        Flag);
4663    // guarantee that all emitted copies are stuck together
4664    // avoiding something bad
4665    Flag = Chain.getValue(1);
4666  }
4667  /*if (MF.getFunction()->hasStructRetAttr()) {
4668    assert(0 && "Struct returns are not yet implemented!");
4669  // Both MIPS and X86 have this
4670  }*/
4671  RetOps[0] = Chain;
4672  if (Flag.getNode())
4673    RetOps.push_back(Flag);
4674
4675  Flag = DAG.getNode(AMDILISD::RET_FLAG,
4676      dl,
4677      MVT::Other, &RetOps[0], RetOps.size());
4678  return Flag;
4679}
4680void
4681AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
4682    unsigned int opCode) const
4683{
4684  MachineOperand DST = MI->getOperand(0);
4685  MachineOperand LHS = MI->getOperand(2);
4686  MachineOperand RHS = MI->getOperand(3);
4687  unsigned int opi32Code = 0, si32Code = 0;
4688  unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
4689  uint32_t REGS[12];
4690  // All the relationals can be generated with with 6 temp registers
4691  for (int x = 0; x < 12; ++x) {
4692    REGS[x] = genVReg(simpleVT);
4693  }
4694  // Pull out the high and low components of each 64 bit register
4695  generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
4696  generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
4697  generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
4698  generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
4699  // Determine the correct opcode that we should use
4700  switch(opCode) {
4701    default:
4702      assert(!"comparison case not handled!");
4703      break;
4704    case AMDIL::LEQ:
4705      si32Code = opi32Code = AMDIL::IEQ;
4706      break;
4707    case AMDIL::LNE:
4708      si32Code = opi32Code = AMDIL::INE;
4709      break;
4710    case AMDIL::LLE:
4711    case AMDIL::ULLE:
4712    case AMDIL::LGE:
4713    case AMDIL::ULGE:
4714      if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
4715        std::swap(REGS[0], REGS[2]);
4716      } else {
4717        std::swap(REGS[1], REGS[3]);
4718      }
4719      if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
4720        opi32Code = AMDIL::ILT;
4721      } else {
4722        opi32Code = AMDIL::ULT;
4723      }
4724      si32Code = AMDIL::UGE;
4725      break;
4726    case AMDIL::LGT:
4727    case AMDIL::ULGT:
4728      std::swap(REGS[0], REGS[2]);
4729      std::swap(REGS[1], REGS[3]);
4730    case AMDIL::LLT:
4731    case AMDIL::ULLT:
4732      if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
4733        opi32Code = AMDIL::ILT;
4734      } else {
4735        opi32Code = AMDIL::ULT;
4736      }
4737      si32Code = AMDIL::ULT;
4738      break;
4739  };
4740  // Do the initial opcode on the high and low components.
4741  // This leaves the following:
4742  // REGS[4] = L_HI OP R_HI
4743  // REGS[5] = L_LO OP R_LO
4744  generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
4745  generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
4746  switch(opi32Code) {
4747    case AMDIL::IEQ:
4748    case AMDIL::INE:
4749      {
4750        // combine the results with an and or or depending on if
4751        // we are eq or ne
4752        uint32_t combineOp = (opi32Code == AMDIL::IEQ)
4753          ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
4754        generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
4755      }
4756      break;
4757    default:
4758      // this finishes codegen for the following pattern
4759      // REGS[4] || (REGS[5] && (L_HI == R_HI))
4760      generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
4761      generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
4762          REGS[9]);
4763      generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
4764          REGS[10]);
4765      break;
4766  }
4767  generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
4768}
4769
4770unsigned int
4771AMDILTargetLowering::getFunctionAlignment(const Function *) const
4772{
4773  return 0;
4774}
4775
4776void
4777AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
4778    MachineBasicBlock::iterator &BBI,
4779    DebugLoc *DL, const TargetInstrInfo *TII) const
4780{
4781  mBB = BB;
4782  mBBI = BBI;
4783  mDL = DL;
4784  mTII = TII;
4785}
4786uint32_t
4787AMDILTargetLowering::genVReg(uint32_t regType) const
4788{
4789  return mBB->getParent()->getRegInfo().createVirtualRegister(
4790      getRegClassFromID(regType));
4791}
4792
4793MachineInstrBuilder
4794AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
4795{
4796  return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
4797}
4798
4799MachineInstrBuilder
4800AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4801    uint32_t src1) const
4802{
4803  return generateMachineInst(opcode, dst).addReg(src1);
4804}
4805
4806MachineInstrBuilder
4807AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4808    uint32_t src1, uint32_t src2) const
4809{
4810  return generateMachineInst(opcode, dst, src1).addReg(src2);
4811}
4812
4813MachineInstrBuilder
4814AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4815    uint32_t src1, uint32_t src2, uint32_t src3) const
4816{
4817  return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
4818}
4819
4820
4821SDValue
4822AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
4823{
4824  DebugLoc DL = Op.getDebugLoc();
4825  EVT OVT = Op.getValueType();
4826  SDValue LHS = Op.getOperand(0);
4827  SDValue RHS = Op.getOperand(1);
4828  MVT INTTY;
4829  MVT FLTTY;
4830  if (!OVT.isVector()) {
4831    INTTY = MVT::i32;
4832    FLTTY = MVT::f32;
4833  } else if (OVT.getVectorNumElements() == 2) {
4834    INTTY = MVT::v2i32;
4835    FLTTY = MVT::v2f32;
4836  } else if (OVT.getVectorNumElements() == 4) {
4837    INTTY = MVT::v4i32;
4838    FLTTY = MVT::v4f32;
4839  }
4840  unsigned bitsize = OVT.getScalarType().getSizeInBits();
4841  // char|short jq = ia ^ ib;
4842  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
4843
4844  // jq = jq >> (bitsize - 2)
4845  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
4846
4847  // jq = jq | 0x1
4848  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
4849
4850  // jq = (int)jq
4851  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
4852
4853  // int ia = (int)LHS;
4854  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
4855
4856  // int ib, (int)RHS;
4857  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
4858
4859  // float fa = (float)ia;
4860  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
4861
4862  // float fb = (float)ib;
4863  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
4864
4865  // float fq = native_divide(fa, fb);
4866  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
4867
4868  // fq = trunc(fq);
4869  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
4870
4871  // float fqneg = -fq;
4872  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
4873
4874  // float fr = mad(fqneg, fb, fa);
4875  SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
4876
4877  // int iq = (int)fq;
4878  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
4879
4880  // fr = fabs(fr);
4881  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
4882
4883  // fb = fabs(fb);
4884  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
4885
4886  // int cv = fr >= fb;
4887  SDValue cv;
4888  if (INTTY == MVT::i32) {
4889    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4890  } else {
4891    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4892  }
4893  // jq = (cv ? jq : 0);
4894  jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
4895      DAG.getConstant(0, OVT));
4896  // dst = iq + jq;
4897  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
4898  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
4899  return iq;
4900}
4901
4902SDValue
4903AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
4904{
4905  DebugLoc DL = Op.getDebugLoc();
4906  EVT OVT = Op.getValueType();
4907  SDValue LHS = Op.getOperand(0);
4908  SDValue RHS = Op.getOperand(1);
4909  // The LowerSDIV32 function generates equivalent to the following IL.
4910  // mov r0, LHS
4911  // mov r1, RHS
4912  // ilt r10, r0, 0
4913  // ilt r11, r1, 0
4914  // iadd r0, r0, r10
4915  // iadd r1, r1, r11
4916  // ixor r0, r0, r10
4917  // ixor r1, r1, r11
4918  // udiv r0, r0, r1
4919  // ixor r10, r10, r11
4920  // iadd r0, r0, r10
4921  // ixor DST, r0, r10
4922
4923  // mov r0, LHS
4924  SDValue r0 = LHS;
4925
4926  // mov r1, RHS
4927  SDValue r1 = RHS;
4928
4929  // ilt r10, r0, 0
4930  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4931      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4932      r0, DAG.getConstant(0, OVT));
4933
4934  // ilt r11, r1, 0
4935  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4936      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4937      r1, DAG.getConstant(0, OVT));
4938
4939  // iadd r0, r0, r10
4940  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4941
4942  // iadd r1, r1, r11
4943  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
4944
4945  // ixor r0, r0, r10
4946  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4947
4948  // ixor r1, r1, r11
4949  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
4950
4951  // udiv r0, r0, r1
4952  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
4953
4954  // ixor r10, r10, r11
4955  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
4956
4957  // iadd r0, r0, r10
4958  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4959
4960  // ixor DST, r0, r10
4961  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4962  return DST;
4963}
4964
4965SDValue
4966AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
4967{
4968  return SDValue(Op.getNode(), 0);
4969}
4970
4971SDValue
4972AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
4973{
4974  DebugLoc DL = Op.getDebugLoc();
4975  EVT OVT = Op.getValueType();
4976  SDValue LHS = Op.getOperand(0);
4977  SDValue RHS = Op.getOperand(1);
4978  MVT INTTY;
4979  MVT FLTTY;
4980  if (!OVT.isVector()) {
4981    INTTY = MVT::i32;
4982    FLTTY = MVT::f32;
4983  } else if (OVT.getVectorNumElements() == 2) {
4984    INTTY = MVT::v2i32;
4985    FLTTY = MVT::v2f32;
4986  } else if (OVT.getVectorNumElements() == 4) {
4987    INTTY = MVT::v4i32;
4988    FLTTY = MVT::v4f32;
4989  }
4990
4991  // The LowerUDIV24 function implements the following CL.
4992  // int ia = (int)LHS
4993  // float fa = (float)ia
4994  // int ib = (int)RHS
4995  // float fb = (float)ib
4996  // float fq = native_divide(fa, fb)
4997  // fq = trunc(fq)
4998  // float t = mad(fq, fb, fb)
4999  // int iq = (int)fq - (t <= fa)
5000  // return (type)iq
5001
5002  // int ia = (int)LHS
5003  SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
5004
5005  // float fa = (float)ia
5006  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
5007
5008  // int ib = (int)RHS
5009  SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
5010
5011  // float fb = (float)ib
5012  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
5013
5014  // float fq = native_divide(fa, fb)
5015  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
5016
5017  // fq = trunc(fq)
5018  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
5019
5020  // float t = mad(fq, fb, fb)
5021  SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
5022
5023  // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
5024  SDValue iq;
5025  fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
5026  if (INTTY == MVT::i32) {
5027    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
5028  } else {
5029    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
5030  }
5031  iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
5032
5033
5034  // return (type)iq
5035  iq = DAG.getZExtOrTrunc(iq, DL, OVT);
5036  return iq;
5037
5038}
5039
5040SDValue
5041AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
5042{
5043  return SDValue(Op.getNode(), 0);
5044}
5045
5046SDValue
5047AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
5048{
5049  return SDValue(Op.getNode(), 0);
5050}
5051SDValue
5052AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
5053{
5054  DebugLoc DL = Op.getDebugLoc();
5055  EVT OVT = Op.getValueType();
5056  MVT INTTY = MVT::i32;
5057  if (OVT == MVT::v2i8) {
5058    INTTY = MVT::v2i32;
5059  } else if (OVT == MVT::v4i8) {
5060    INTTY = MVT::v4i32;
5061  }
5062  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
5063  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
5064  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
5065  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
5066  return LHS;
5067}
5068
5069SDValue
5070AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
5071{
5072  DebugLoc DL = Op.getDebugLoc();
5073  EVT OVT = Op.getValueType();
5074  MVT INTTY = MVT::i32;
5075  if (OVT == MVT::v2i16) {
5076    INTTY = MVT::v2i32;
5077  } else if (OVT == MVT::v4i16) {
5078    INTTY = MVT::v4i32;
5079  }
5080  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
5081  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
5082  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
5083  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
5084  return LHS;
5085}
5086
5087SDValue
5088AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
5089{
5090  DebugLoc DL = Op.getDebugLoc();
5091  EVT OVT = Op.getValueType();
5092  SDValue LHS = Op.getOperand(0);
5093  SDValue RHS = Op.getOperand(1);
5094  // The LowerSREM32 function generates equivalent to the following IL.
5095  // mov r0, LHS
5096  // mov r1, RHS
5097  // ilt r10, r0, 0
5098  // ilt r11, r1, 0
5099  // iadd r0, r0, r10
5100  // iadd r1, r1, r11
5101  // ixor r0, r0, r10
5102  // ixor r1, r1, r11
5103  // udiv r20, r0, r1
5104  // umul r20, r20, r1
5105  // sub r0, r0, r20
5106  // iadd r0, r0, r10
5107  // ixor DST, r0, r10
5108
5109  // mov r0, LHS
5110  SDValue r0 = LHS;
5111
5112  // mov r1, RHS
5113  SDValue r1 = RHS;
5114
5115  // ilt r10, r0, 0
5116  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5117      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
5118      r0, DAG.getConstant(0, OVT));
5119
5120  // ilt r11, r1, 0
5121  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5122      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
5123      r1, DAG.getConstant(0, OVT));
5124
5125  // iadd r0, r0, r10
5126  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
5127
5128  // iadd r1, r1, r11
5129  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
5130
5131  // ixor r0, r0, r10
5132  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5133
5134  // ixor r1, r1, r11
5135  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
5136
5137  // udiv r20, r0, r1
5138  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
5139
5140  // umul r20, r20, r1
5141  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
5142
5143  // sub r0, r0, r20
5144  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
5145
5146  // iadd r0, r0, r10
5147  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
5148
5149  // ixor DST, r0, r10
5150  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5151  return DST;
5152}
5153
5154SDValue
5155AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
5156{
5157  return SDValue(Op.getNode(), 0);
5158}
5159
5160SDValue
5161AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
5162{
5163  DebugLoc DL = Op.getDebugLoc();
5164  EVT OVT = Op.getValueType();
5165  MVT INTTY = MVT::i32;
5166  if (OVT == MVT::v2i8) {
5167    INTTY = MVT::v2i32;
5168  } else if (OVT == MVT::v4i8) {
5169    INTTY = MVT::v4i32;
5170  }
5171  SDValue LHS = Op.getOperand(0);
5172  SDValue RHS = Op.getOperand(1);
5173  // The LowerUREM8 function generates equivalent to the following IL.
5174  // mov r0, as_u32(LHS)
5175  // mov r1, as_u32(RHS)
5176  // and r10, r0, 0xFF
5177  // and r11, r1, 0xFF
5178  // cmov_logical r3, r11, r11, 0x1
5179  // udiv r3, r10, r3
5180  // cmov_logical r3, r11, r3, 0
5181  // umul r3, r3, r11
5182  // sub r3, r10, r3
5183  // and as_u8(DST), r3, 0xFF
5184
5185  // mov r0, as_u32(LHS)
5186  SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
5187
5188  // mov r1, as_u32(RHS)
5189  SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
5190
5191  // and r10, r0, 0xFF
5192  SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
5193      DAG.getConstant(0xFF, INTTY));
5194
5195  // and r11, r1, 0xFF
5196  SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
5197      DAG.getConstant(0xFF, INTTY));
5198
5199  // cmov_logical r3, r11, r11, 0x1
5200  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
5201      DAG.getConstant(0x01, INTTY));
5202
5203  // udiv r3, r10, r3
5204  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
5205
5206  // cmov_logical r3, r11, r3, 0
5207  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
5208      DAG.getConstant(0, INTTY));
5209
5210  // umul r3, r3, r11
5211  r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
5212
5213  // sub r3, r10, r3
5214  r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
5215
5216  // and as_u8(DST), r3, 0xFF
5217  SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
5218      DAG.getConstant(0xFF, INTTY));
5219  DST = DAG.getZExtOrTrunc(DST, DL, OVT);
5220  return DST;
5221}
5222
5223SDValue
5224AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
5225{
5226  DebugLoc DL = Op.getDebugLoc();
5227  EVT OVT = Op.getValueType();
5228  MVT INTTY = MVT::i32;
5229  if (OVT == MVT::v2i16) {
5230    INTTY = MVT::v2i32;
5231  } else if (OVT == MVT::v4i16) {
5232    INTTY = MVT::v4i32;
5233  }
5234  SDValue LHS = Op.getOperand(0);
5235  SDValue RHS = Op.getOperand(1);
5236  // The LowerUREM16 function generatest equivalent to the following IL.
5237  // mov r0, LHS
5238  // mov r1, RHS
5239  // DIV = LowerUDIV16(LHS, RHS)
5240  // and r10, r0, 0xFFFF
5241  // and r11, r1, 0xFFFF
5242  // cmov_logical r3, r11, r11, 0x1
5243  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5244  // and r3, r3, 0xFFFF
5245  // cmov_logical r3, r11, r3, 0
5246  // umul r3, r3, r11
5247  // sub r3, r10, r3
5248  // and DST, r3, 0xFFFF
5249
5250  // mov r0, LHS
5251  SDValue r0 = LHS;
5252
5253  // mov r1, RHS
5254  SDValue r1 = RHS;
5255
5256  // and r10, r0, 0xFFFF
5257  SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
5258      DAG.getConstant(0xFFFF, OVT));
5259
5260  // and r11, r1, 0xFFFF
5261  SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
5262      DAG.getConstant(0xFFFF, OVT));
5263
5264  // cmov_logical r3, r11, r11, 0x1
5265  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
5266      DAG.getConstant(0x01, OVT));
5267
5268  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5269  r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
5270  r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
5271  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
5272  r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
5273  r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
5274
5275  // and r3, r3, 0xFFFF
5276  r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
5277      DAG.getConstant(0xFFFF, OVT));
5278
5279  // cmov_logical r3, r11, r3, 0
5280  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
5281      DAG.getConstant(0, OVT));
5282  // umul r3, r3, r11
5283  r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
5284
5285  // sub r3, r10, r3
5286  r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
5287
5288  // and DST, r3, 0xFFFF
5289  SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
5290      DAG.getConstant(0xFFFF, OVT));
5291  return DST;
5292}
5293
5294SDValue
5295AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
5296{
5297  DebugLoc DL = Op.getDebugLoc();
5298  EVT OVT = Op.getValueType();
5299  SDValue LHS = Op.getOperand(0);
5300  SDValue RHS = Op.getOperand(1);
5301  // The LowerUREM32 function generates equivalent to the following IL.
5302  // udiv r20, LHS, RHS
5303  // umul r20, r20, RHS
5304  // sub DST, LHS, r20
5305
5306  // udiv r20, LHS, RHS
5307  SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
5308
5309  // umul r20, r20, RHS
5310  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
5311
5312  // sub DST, LHS, r20
5313  SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
5314  return DST;
5315}
5316
5317SDValue
5318AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
5319{
5320  return SDValue(Op.getNode(), 0);
5321}
5322
5323
5324SDValue
5325AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
5326{
5327  DebugLoc DL = Op.getDebugLoc();
5328  EVT OVT = Op.getValueType();
5329  MVT INTTY = MVT::i32;
5330  if (OVT == MVT::v2f32) {
5331    INTTY = MVT::v2i32;
5332  } else if (OVT == MVT::v4f32) {
5333    INTTY = MVT::v4i32;
5334  }
5335  SDValue LHS = Op.getOperand(0);
5336  SDValue RHS = Op.getOperand(1);
5337  SDValue DST;
5338  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
5339      &this->getTargetMachine())->getSubtargetImpl();
5340  if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
5341    // TODO: This doesn't work for vector types yet
5342    // The LowerFDIV32 function generates equivalent to the following
5343    // IL:
5344    // mov r20, as_int(LHS)
5345    // mov r21, as_int(RHS)
5346    // and r30, r20, 0x7f800000
5347    // and r31, r20, 0x807FFFFF
5348    // and r32, r21, 0x7f800000
5349    // and r33, r21, 0x807FFFFF
5350    // ieq r40, r30, 0x7F800000
5351    // ieq r41, r31, 0x7F800000
5352    // ieq r42, r32, 0
5353    // ieq r43, r33, 0
5354    // and r50, r20, 0x80000000
5355    // and r51, r21, 0x80000000
5356    // ior r32, r32, 0x3f800000
5357    // ior r33, r33, 0x3f800000
5358    // cmov_logical r32, r42, r50, r32
5359    // cmov_logical r33, r43, r51, r33
5360    // cmov_logical r32, r40, r20, r32
5361    // cmov_logical r33, r41, r21, r33
5362    // ior r50, r40, r41
5363    // ior r51, r42, r43
5364    // ior r50, r50, r51
5365    // inegate r52, r31
5366    // iadd r30, r30, r52
5367    // cmov_logical r30, r50, 0, r30
5368    // div_zeroop(infinity) r21, 1.0, r33
5369    // mul_ieee r20, r32, r21
5370    // and r22, r20, 0x7FFFFFFF
5371    // and r23, r20, 0x80000000
5372    // ishr r60, r22, 0x00000017
5373    // ishr r61, r30, 0x00000017
5374    // iadd r20, r20, r30
5375    // iadd r21, r22, r30
5376    // iadd r60, r60, r61
5377    // ige r42, 0, R60
5378    // ior r41, r23, 0x7F800000
5379    // ige r40, r60, 0x000000FF
5380    // cmov_logical r40, r50, 0, r40
5381    // cmov_logical r20, r42, r23, r20
5382    // cmov_logical DST, r40, r41, r20
5383    // as_float(DST)
5384
5385    // mov r20, as_int(LHS)
5386    SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
5387
5388    // mov r21, as_int(RHS)
5389    SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
5390
5391    // and r30, r20, 0x7f800000
5392    SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5393        DAG.getConstant(0x7F800000, INTTY));
5394
5395    // and r31, r21, 0x7f800000
5396    SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5397        DAG.getConstant(0x7f800000, INTTY));
5398
5399    // and r32, r20, 0x807FFFFF
5400    SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5401        DAG.getConstant(0x807FFFFF, INTTY));
5402
5403    // and r33, r21, 0x807FFFFF
5404    SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5405        DAG.getConstant(0x807FFFFF, INTTY));
5406
5407    // ieq r40, r30, 0x7F800000
5408    SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5409        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5410        R30, DAG.getConstant(0x7F800000, INTTY));
5411
5412    // ieq r41, r31, 0x7F800000
5413    SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5414        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5415        R31, DAG.getConstant(0x7F800000, INTTY));
5416
5417    // ieq r42, r30, 0
5418    SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5419        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5420        R30, DAG.getConstant(0, INTTY));
5421
5422    // ieq r43, r31, 0
5423    SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5424        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5425        R31, DAG.getConstant(0, INTTY));
5426
5427    // and r50, r20, 0x80000000
5428    SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5429        DAG.getConstant(0x80000000, INTTY));
5430
5431    // and r51, r21, 0x80000000
5432    SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5433        DAG.getConstant(0x80000000, INTTY));
5434
5435    // ior r32, r32, 0x3f800000
5436    R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
5437        DAG.getConstant(0x3F800000, INTTY));
5438
5439    // ior r33, r33, 0x3f800000
5440    R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
5441        DAG.getConstant(0x3F800000, INTTY));
5442
5443    // cmov_logical r32, r42, r50, r32
5444    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
5445
5446    // cmov_logical r33, r43, r51, r33
5447    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
5448
5449    // cmov_logical r32, r40, r20, r32
5450    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
5451
5452    // cmov_logical r33, r41, r21, r33
5453    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
5454
5455    // ior r50, r40, r41
5456    R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
5457
5458    // ior r51, r42, r43
5459    R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
5460
5461    // ior r50, r50, r51
5462    R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
5463
5464    // inegate r52, r31
5465    SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
5466
5467    // iadd r30, r30, r52
5468    R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
5469
5470    // cmov_logical r30, r50, 0, r30
5471    R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
5472        DAG.getConstant(0, INTTY), R30);
5473
5474    // div_zeroop(infinity) r21, 1.0, as_float(r33)
5475    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
5476    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
5477        DAG.getConstantFP(1.0f, OVT), R33);
5478
5479    // mul_ieee as_int(r20), as_float(r32), r21
5480    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
5481    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
5482    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
5483
5484    // div_zeroop(infinity) r21, 1.0, as_float(r33)
5485    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
5486    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
5487        DAG.getConstantFP(1.0f, OVT), R33);
5488
5489    // mul_ieee as_int(r20), as_float(r32), r21
5490    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
5491    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
5492    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
5493
5494    // and r22, r20, 0x7FFFFFFF
5495    SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5496        DAG.getConstant(0x7FFFFFFF, INTTY));
5497
5498    // and r23, r20, 0x80000000
5499    SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5500        DAG.getConstant(0x80000000, INTTY));
5501
5502    // ishr r60, r22, 0x00000017
5503    SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
5504        DAG.getConstant(0x00000017, INTTY));
5505
5506    // ishr r61, r30, 0x00000017
5507    SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
5508        DAG.getConstant(0x00000017, INTTY));
5509
5510    // iadd r20, r20, r30
5511    R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
5512
5513    // iadd r21, r22, r30
5514    R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
5515
5516    // iadd r60, r60, r61
5517    R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
5518
5519    // ige r42, 0, R60
5520    R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5521        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
5522        DAG.getConstant(0, INTTY),
5523        R60);
5524
5525    // ior r41, r23, 0x7F800000
5526    R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
5527        DAG.getConstant(0x7F800000, INTTY));
5528
5529    // ige r40, r60, 0x000000FF
5530    R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5531        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
5532        R60,
5533        DAG.getConstant(0x0000000FF, INTTY));
5534
5535    // cmov_logical r40, r50, 0, r40
5536    R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
5537        DAG.getConstant(0, INTTY),
5538        R40);
5539
5540    // cmov_logical r20, r42, r23, r20
5541    R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
5542
5543    // cmov_logical DST, r40, r41, r20
5544    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
5545
5546    // as_float(DST)
5547    DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
5548  } else {
5549    // The following sequence of DAG nodes produce the following IL:
5550    // fabs r1, RHS
5551    // lt r2, 0x1.0p+96f, r1
5552    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5553    // mul_ieee r1, RHS, r3
5554    // div_zeroop(infinity) r0, LHS, r1
5555    // mul_ieee DST, r0, r3
5556
5557    // fabs r1, RHS
5558    SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
5559    // lt r2, 0x1.0p+96f, r1
5560    SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5561        DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
5562        DAG.getConstant(0x6f800000, INTTY), r1);
5563    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5564    SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
5565        DAG.getConstant(0x2f800000, INTTY),
5566        DAG.getConstant(0x3f800000, INTTY));
5567    // mul_ieee r1, RHS, r3
5568    r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
5569    // div_zeroop(infinity) r0, LHS, r1
5570    SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
5571    // mul_ieee DST, r0, r3
5572    DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
5573  }
5574  return DST;
5575}
5576
5577SDValue
5578AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
5579{
5580  return SDValue(Op.getNode(), 0);
5581}
5582