AMDILISelLowering.cpp revision a75c6163e605f35b14f26930dd9227e4f337ec9e
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file implements the interfaces that AMDIL uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDILISelLowering.h"
16#include "AMDILDevices.h"
17#include "AMDILGlobalManager.h"
18#include "AMDILIntrinsicInfo.h"
19#include "AMDILKernelManager.h"
20#include "AMDILMachineFunctionInfo.h"
21#include "AMDILSubtarget.h"
22#include "AMDILTargetMachine.h"
23#include "AMDILUtilityFunctions.h"
24#include "llvm/CallingConv.h"
25#include "llvm/CodeGen/MachineFrameInfo.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/PseudoSourceValue.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
31#include "llvm/DerivedTypes.h"
32#include "llvm/Instructions.h"
33#include "llvm/Intrinsics.h"
34#include "llvm/Target/TargetOptions.h"
35
36using namespace llvm;
37#define ISDBITCAST  ISD::BITCAST
38#define MVTGLUE     MVT::Glue
39//===----------------------------------------------------------------------===//
40// Calling Convention Implementation
41//===----------------------------------------------------------------------===//
42#include "AMDILGenCallingConv.inc"
43
44//===----------------------------------------------------------------------===//
45// TargetLowering Implementation Help Functions Begin
46//===----------------------------------------------------------------------===//
47  static SDValue
48getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
49{
50  DebugLoc DL = Src.getDebugLoc();
51  EVT svt = Src.getValueType().getScalarType();
52  EVT dvt = Dst.getValueType().getScalarType();
53  if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
54    if (dvt.bitsGT(svt)) {
55      Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
56    } else if (svt.bitsLT(svt)) {
57      Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
58          DAG.getConstant(1, MVT::i32));
59    }
60  } else if (svt.isInteger() && dvt.isInteger()) {
61    if (!svt.bitsEq(dvt)) {
62      Src = DAG.getSExtOrTrunc(Src, DL, dvt);
63    } else {
64      Src = DAG.getNode(AMDILISD::MOVE, DL, dvt, Src);
65    }
66  } else if (svt.isInteger()) {
67    unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
68    if (!svt.bitsEq(dvt)) {
69      if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
70        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
71      } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
72        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
73      } else {
74        assert(0 && "We only support 32 and 64bit fp types");
75      }
76    }
77    Src = DAG.getNode(opcode, DL, dvt, Src);
78  } else if (dvt.isInteger()) {
79    unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
80    if (svt.getSimpleVT().SimpleTy == MVT::f32) {
81      Src = DAG.getNode(opcode, DL, MVT::i32, Src);
82    } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
83      Src = DAG.getNode(opcode, DL, MVT::i64, Src);
84    } else {
85      assert(0 && "We only support 32 and 64bit fp types");
86    }
87    Src = DAG.getSExtOrTrunc(Src, DL, dvt);
88  }
89  return Src;
90}
91// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
92// condition.
93  static AMDILCC::CondCodes
94CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
95{
96  switch (CC) {
97    default:
98      {
99        errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
100        assert(0 && "Unknown condition code!");
101      }
102    case ISD::SETO:
103      switch(type) {
104        case MVT::f32:
105          return AMDILCC::IL_CC_F_O;
106        case MVT::f64:
107          return AMDILCC::IL_CC_D_O;
108        default:
109          assert(0 && "Opcode combination not generated correctly!");
110          return AMDILCC::COND_ERROR;
111      };
112    case ISD::SETUO:
113      switch(type) {
114        case MVT::f32:
115          return AMDILCC::IL_CC_F_UO;
116        case MVT::f64:
117          return AMDILCC::IL_CC_D_UO;
118        default:
119          assert(0 && "Opcode combination not generated correctly!");
120          return AMDILCC::COND_ERROR;
121      };
122    case ISD::SETGT:
123      switch (type) {
124        case MVT::i1:
125        case MVT::i8:
126        case MVT::i16:
127        case MVT::i32:
128          return AMDILCC::IL_CC_I_GT;
129        case MVT::f32:
130          return AMDILCC::IL_CC_F_GT;
131        case MVT::f64:
132          return AMDILCC::IL_CC_D_GT;
133        case MVT::i64:
134          return AMDILCC::IL_CC_L_GT;
135        default:
136          assert(0 && "Opcode combination not generated correctly!");
137          return AMDILCC::COND_ERROR;
138      };
139    case ISD::SETGE:
140      switch (type) {
141        case MVT::i1:
142        case MVT::i8:
143        case MVT::i16:
144        case MVT::i32:
145          return AMDILCC::IL_CC_I_GE;
146        case MVT::f32:
147          return AMDILCC::IL_CC_F_GE;
148        case MVT::f64:
149          return AMDILCC::IL_CC_D_GE;
150        case MVT::i64:
151          return AMDILCC::IL_CC_L_GE;
152        default:
153          assert(0 && "Opcode combination not generated correctly!");
154          return AMDILCC::COND_ERROR;
155      };
156    case ISD::SETLT:
157      switch (type) {
158        case MVT::i1:
159        case MVT::i8:
160        case MVT::i16:
161        case MVT::i32:
162          return AMDILCC::IL_CC_I_LT;
163        case MVT::f32:
164          return AMDILCC::IL_CC_F_LT;
165        case MVT::f64:
166          return AMDILCC::IL_CC_D_LT;
167        case MVT::i64:
168          return AMDILCC::IL_CC_L_LT;
169        default:
170          assert(0 && "Opcode combination not generated correctly!");
171          return AMDILCC::COND_ERROR;
172      };
173    case ISD::SETLE:
174      switch (type) {
175        case MVT::i1:
176        case MVT::i8:
177        case MVT::i16:
178        case MVT::i32:
179          return AMDILCC::IL_CC_I_LE;
180        case MVT::f32:
181          return AMDILCC::IL_CC_F_LE;
182        case MVT::f64:
183          return AMDILCC::IL_CC_D_LE;
184        case MVT::i64:
185          return AMDILCC::IL_CC_L_LE;
186        default:
187          assert(0 && "Opcode combination not generated correctly!");
188          return AMDILCC::COND_ERROR;
189      };
190    case ISD::SETNE:
191      switch (type) {
192        case MVT::i1:
193        case MVT::i8:
194        case MVT::i16:
195        case MVT::i32:
196          return AMDILCC::IL_CC_I_NE;
197        case MVT::f32:
198          return AMDILCC::IL_CC_F_NE;
199        case MVT::f64:
200          return AMDILCC::IL_CC_D_NE;
201        case MVT::i64:
202          return AMDILCC::IL_CC_L_NE;
203        default:
204          assert(0 && "Opcode combination not generated correctly!");
205          return AMDILCC::COND_ERROR;
206      };
207    case ISD::SETEQ:
208      switch (type) {
209        case MVT::i1:
210        case MVT::i8:
211        case MVT::i16:
212        case MVT::i32:
213          return AMDILCC::IL_CC_I_EQ;
214        case MVT::f32:
215          return AMDILCC::IL_CC_F_EQ;
216        case MVT::f64:
217          return AMDILCC::IL_CC_D_EQ;
218        case MVT::i64:
219          return AMDILCC::IL_CC_L_EQ;
220        default:
221          assert(0 && "Opcode combination not generated correctly!");
222          return AMDILCC::COND_ERROR;
223      };
224    case ISD::SETUGT:
225      switch (type) {
226        case MVT::i1:
227        case MVT::i8:
228        case MVT::i16:
229        case MVT::i32:
230          return AMDILCC::IL_CC_U_GT;
231        case MVT::f32:
232          return AMDILCC::IL_CC_F_UGT;
233        case MVT::f64:
234          return AMDILCC::IL_CC_D_UGT;
235        case MVT::i64:
236          return AMDILCC::IL_CC_UL_GT;
237        default:
238          assert(0 && "Opcode combination not generated correctly!");
239          return AMDILCC::COND_ERROR;
240      };
241    case ISD::SETUGE:
242      switch (type) {
243        case MVT::i1:
244        case MVT::i8:
245        case MVT::i16:
246        case MVT::i32:
247          return AMDILCC::IL_CC_U_GE;
248        case MVT::f32:
249          return AMDILCC::IL_CC_F_UGE;
250        case MVT::f64:
251          return AMDILCC::IL_CC_D_UGE;
252        case MVT::i64:
253          return AMDILCC::IL_CC_UL_GE;
254        default:
255          assert(0 && "Opcode combination not generated correctly!");
256          return AMDILCC::COND_ERROR;
257      };
258    case ISD::SETULT:
259      switch (type) {
260        case MVT::i1:
261        case MVT::i8:
262        case MVT::i16:
263        case MVT::i32:
264          return AMDILCC::IL_CC_U_LT;
265        case MVT::f32:
266          return AMDILCC::IL_CC_F_ULT;
267        case MVT::f64:
268          return AMDILCC::IL_CC_D_ULT;
269        case MVT::i64:
270          return AMDILCC::IL_CC_UL_LT;
271        default:
272          assert(0 && "Opcode combination not generated correctly!");
273          return AMDILCC::COND_ERROR;
274      };
275    case ISD::SETULE:
276      switch (type) {
277        case MVT::i1:
278        case MVT::i8:
279        case MVT::i16:
280        case MVT::i32:
281          return AMDILCC::IL_CC_U_LE;
282        case MVT::f32:
283          return AMDILCC::IL_CC_F_ULE;
284        case MVT::f64:
285          return AMDILCC::IL_CC_D_ULE;
286        case MVT::i64:
287          return AMDILCC::IL_CC_UL_LE;
288        default:
289          assert(0 && "Opcode combination not generated correctly!");
290          return AMDILCC::COND_ERROR;
291      };
292    case ISD::SETUNE:
293      switch (type) {
294        case MVT::i1:
295        case MVT::i8:
296        case MVT::i16:
297        case MVT::i32:
298          return AMDILCC::IL_CC_U_NE;
299        case MVT::f32:
300          return AMDILCC::IL_CC_F_UNE;
301        case MVT::f64:
302          return AMDILCC::IL_CC_D_UNE;
303        case MVT::i64:
304          return AMDILCC::IL_CC_UL_NE;
305        default:
306          assert(0 && "Opcode combination not generated correctly!");
307          return AMDILCC::COND_ERROR;
308      };
309    case ISD::SETUEQ:
310      switch (type) {
311        case MVT::i1:
312        case MVT::i8:
313        case MVT::i16:
314        case MVT::i32:
315          return AMDILCC::IL_CC_U_EQ;
316        case MVT::f32:
317          return AMDILCC::IL_CC_F_UEQ;
318        case MVT::f64:
319          return AMDILCC::IL_CC_D_UEQ;
320        case MVT::i64:
321          return AMDILCC::IL_CC_UL_EQ;
322        default:
323          assert(0 && "Opcode combination not generated correctly!");
324          return AMDILCC::COND_ERROR;
325      };
326    case ISD::SETOGT:
327      switch (type) {
328        case MVT::f32:
329          return AMDILCC::IL_CC_F_OGT;
330        case MVT::f64:
331          return AMDILCC::IL_CC_D_OGT;
332        case MVT::i1:
333        case MVT::i8:
334        case MVT::i16:
335        case MVT::i32:
336        case MVT::i64:
337        default:
338          assert(0 && "Opcode combination not generated correctly!");
339          return AMDILCC::COND_ERROR;
340      };
341    case ISD::SETOGE:
342      switch (type) {
343        case MVT::f32:
344          return AMDILCC::IL_CC_F_OGE;
345        case MVT::f64:
346          return AMDILCC::IL_CC_D_OGE;
347        case MVT::i1:
348        case MVT::i8:
349        case MVT::i16:
350        case MVT::i32:
351        case MVT::i64:
352        default:
353          assert(0 && "Opcode combination not generated correctly!");
354          return AMDILCC::COND_ERROR;
355      };
356    case ISD::SETOLT:
357      switch (type) {
358        case MVT::f32:
359          return AMDILCC::IL_CC_F_OLT;
360        case MVT::f64:
361          return AMDILCC::IL_CC_D_OLT;
362        case MVT::i1:
363        case MVT::i8:
364        case MVT::i16:
365        case MVT::i32:
366        case MVT::i64:
367        default:
368          assert(0 && "Opcode combination not generated correctly!");
369          return AMDILCC::COND_ERROR;
370      };
371    case ISD::SETOLE:
372      switch (type) {
373        case MVT::f32:
374          return AMDILCC::IL_CC_F_OLE;
375        case MVT::f64:
376          return AMDILCC::IL_CC_D_OLE;
377        case MVT::i1:
378        case MVT::i8:
379        case MVT::i16:
380        case MVT::i32:
381        case MVT::i64:
382        default:
383          assert(0 && "Opcode combination not generated correctly!");
384          return AMDILCC::COND_ERROR;
385      };
386    case ISD::SETONE:
387      switch (type) {
388        case MVT::f32:
389          return AMDILCC::IL_CC_F_ONE;
390        case MVT::f64:
391          return AMDILCC::IL_CC_D_ONE;
392        case MVT::i1:
393        case MVT::i8:
394        case MVT::i16:
395        case MVT::i32:
396        case MVT::i64:
397        default:
398          assert(0 && "Opcode combination not generated correctly!");
399          return AMDILCC::COND_ERROR;
400      };
401    case ISD::SETOEQ:
402      switch (type) {
403        case MVT::f32:
404          return AMDILCC::IL_CC_F_OEQ;
405        case MVT::f64:
406          return AMDILCC::IL_CC_D_OEQ;
407        case MVT::i1:
408        case MVT::i8:
409        case MVT::i16:
410        case MVT::i32:
411        case MVT::i64:
412        default:
413          assert(0 && "Opcode combination not generated correctly!");
414          return AMDILCC::COND_ERROR;
415      };
416  };
417}
418
419  static unsigned int
420translateToOpcode(uint64_t CCCode, unsigned int regClass)
421{
422  switch (CCCode) {
423    case AMDILCC::IL_CC_D_EQ:
424    case AMDILCC::IL_CC_D_OEQ:
425      if (regClass == AMDIL::GPRV2F64RegClassID) {
426        return (unsigned int)AMDIL::DEQ_v2f64;
427      } else {
428        return (unsigned int)AMDIL::DEQ;
429      }
430    case AMDILCC::IL_CC_D_LE:
431    case AMDILCC::IL_CC_D_OLE:
432    case AMDILCC::IL_CC_D_ULE:
433    case AMDILCC::IL_CC_D_GE:
434    case AMDILCC::IL_CC_D_OGE:
435    case AMDILCC::IL_CC_D_UGE:
436      return (unsigned int)AMDIL::DGE;
437    case AMDILCC::IL_CC_D_LT:
438    case AMDILCC::IL_CC_D_OLT:
439    case AMDILCC::IL_CC_D_ULT:
440    case AMDILCC::IL_CC_D_GT:
441    case AMDILCC::IL_CC_D_OGT:
442    case AMDILCC::IL_CC_D_UGT:
443      return (unsigned int)AMDIL::DLT;
444    case AMDILCC::IL_CC_D_NE:
445    case AMDILCC::IL_CC_D_UNE:
446      return (unsigned int)AMDIL::DNE;
447    case AMDILCC::IL_CC_F_EQ:
448    case AMDILCC::IL_CC_F_OEQ:
449      return (unsigned int)AMDIL::FEQ;
450    case AMDILCC::IL_CC_F_LE:
451    case AMDILCC::IL_CC_F_ULE:
452    case AMDILCC::IL_CC_F_OLE:
453    case AMDILCC::IL_CC_F_GE:
454    case AMDILCC::IL_CC_F_UGE:
455    case AMDILCC::IL_CC_F_OGE:
456      return (unsigned int)AMDIL::FGE;
457    case AMDILCC::IL_CC_F_LT:
458    case AMDILCC::IL_CC_F_OLT:
459    case AMDILCC::IL_CC_F_ULT:
460    case AMDILCC::IL_CC_F_GT:
461    case AMDILCC::IL_CC_F_OGT:
462    case AMDILCC::IL_CC_F_UGT:
463      if (regClass == AMDIL::GPRV2F32RegClassID) {
464        return (unsigned int)AMDIL::FLT_v2f32;
465      } else if (regClass == AMDIL::GPRV4F32RegClassID) {
466        return (unsigned int)AMDIL::FLT_v4f32;
467      } else {
468        return (unsigned int)AMDIL::FLT;
469      }
470    case AMDILCC::IL_CC_F_NE:
471    case AMDILCC::IL_CC_F_UNE:
472      return (unsigned int)AMDIL::FNE;
473    case AMDILCC::IL_CC_I_EQ:
474    case AMDILCC::IL_CC_U_EQ:
475      if (regClass == AMDIL::GPRI32RegClassID
476          || regClass == AMDIL::GPRI8RegClassID
477          || regClass == AMDIL::GPRI16RegClassID) {
478        return (unsigned int)AMDIL::IEQ;
479      } else if (regClass == AMDIL::GPRV2I32RegClassID
480          || regClass == AMDIL::GPRV2I8RegClassID
481          || regClass == AMDIL::GPRV2I16RegClassID) {
482        return (unsigned int)AMDIL::IEQ_v2i32;
483      } else if (regClass == AMDIL::GPRV4I32RegClassID
484          || regClass == AMDIL::GPRV4I8RegClassID
485          || regClass == AMDIL::GPRV4I16RegClassID) {
486        return (unsigned int)AMDIL::IEQ_v4i32;
487      } else {
488        assert(!"Unknown reg class!");
489      }
490    case AMDILCC::IL_CC_L_EQ:
491    case AMDILCC::IL_CC_UL_EQ:
492      return (unsigned int)AMDIL::LEQ;
493    case AMDILCC::IL_CC_I_GE:
494    case AMDILCC::IL_CC_I_LE:
495      if (regClass == AMDIL::GPRI32RegClassID
496          || regClass == AMDIL::GPRI8RegClassID
497          || regClass == AMDIL::GPRI16RegClassID) {
498        return (unsigned int)AMDIL::IGE;
499      } else if (regClass == AMDIL::GPRV2I32RegClassID
500          || regClass == AMDIL::GPRI8RegClassID
501          || regClass == AMDIL::GPRI16RegClassID) {
502        return (unsigned int)AMDIL::IGE_v2i32;
503      } else if (regClass == AMDIL::GPRV4I32RegClassID
504          || regClass == AMDIL::GPRI8RegClassID
505          || regClass == AMDIL::GPRI16RegClassID) {
506        return (unsigned int)AMDIL::IGE_v4i32;
507      } else {
508        assert(!"Unknown reg class!");
509      }
510    case AMDILCC::IL_CC_I_LT:
511    case AMDILCC::IL_CC_I_GT:
512      if (regClass == AMDIL::GPRI32RegClassID
513          || regClass == AMDIL::GPRI8RegClassID
514          || regClass == AMDIL::GPRI16RegClassID) {
515        return (unsigned int)AMDIL::ILT;
516      } else if (regClass == AMDIL::GPRV2I32RegClassID
517          || regClass == AMDIL::GPRI8RegClassID
518          || regClass == AMDIL::GPRI16RegClassID) {
519        return (unsigned int)AMDIL::ILT_v2i32;
520      } else if (regClass == AMDIL::GPRV4I32RegClassID
521          || regClass == AMDIL::GPRI8RegClassID
522          || regClass == AMDIL::GPRI16RegClassID) {
523        return (unsigned int)AMDIL::ILT_v4i32;
524      } else {
525        assert(!"Unknown reg class!");
526      }
527    case AMDILCC::IL_CC_L_GE:
528      return (unsigned int)AMDIL::LGE;
529    case AMDILCC::IL_CC_L_LE:
530      return (unsigned int)AMDIL::LLE;
531    case AMDILCC::IL_CC_L_LT:
532      return (unsigned int)AMDIL::LLT;
533    case AMDILCC::IL_CC_L_GT:
534      return (unsigned int)AMDIL::LGT;
535    case AMDILCC::IL_CC_I_NE:
536    case AMDILCC::IL_CC_U_NE:
537      if (regClass == AMDIL::GPRI32RegClassID
538          || regClass == AMDIL::GPRI8RegClassID
539          || regClass == AMDIL::GPRI16RegClassID) {
540        return (unsigned int)AMDIL::INE;
541      } else if (regClass == AMDIL::GPRV2I32RegClassID
542          || regClass == AMDIL::GPRI8RegClassID
543          || regClass == AMDIL::GPRI16RegClassID) {
544        return (unsigned int)AMDIL::INE_v2i32;
545      } else if (regClass == AMDIL::GPRV4I32RegClassID
546          || regClass == AMDIL::GPRI8RegClassID
547          || regClass == AMDIL::GPRI16RegClassID) {
548        return (unsigned int)AMDIL::INE_v4i32;
549      } else {
550        assert(!"Unknown reg class!");
551      }
552    case AMDILCC::IL_CC_U_GE:
553    case AMDILCC::IL_CC_U_LE:
554      if (regClass == AMDIL::GPRI32RegClassID
555          || regClass == AMDIL::GPRI8RegClassID
556          || regClass == AMDIL::GPRI16RegClassID) {
557        return (unsigned int)AMDIL::UGE;
558      } else if (regClass == AMDIL::GPRV2I32RegClassID
559          || regClass == AMDIL::GPRI8RegClassID
560          || regClass == AMDIL::GPRI16RegClassID) {
561        return (unsigned int)AMDIL::UGE_v2i32;
562      } else if (regClass == AMDIL::GPRV4I32RegClassID
563          || regClass == AMDIL::GPRI8RegClassID
564          || regClass == AMDIL::GPRI16RegClassID) {
565        return (unsigned int)AMDIL::UGE_v4i32;
566      } else {
567        assert(!"Unknown reg class!");
568      }
569    case AMDILCC::IL_CC_L_NE:
570    case AMDILCC::IL_CC_UL_NE:
571      return (unsigned int)AMDIL::LNE;
572    case AMDILCC::IL_CC_UL_GE:
573      return (unsigned int)AMDIL::ULGE;
574    case AMDILCC::IL_CC_UL_LE:
575      return (unsigned int)AMDIL::ULLE;
576    case AMDILCC::IL_CC_U_LT:
577      if (regClass == AMDIL::GPRI32RegClassID
578          || regClass == AMDIL::GPRI8RegClassID
579          || regClass == AMDIL::GPRI16RegClassID) {
580        return (unsigned int)AMDIL::ULT;
581      } else if (regClass == AMDIL::GPRV2I32RegClassID
582          || regClass == AMDIL::GPRI8RegClassID
583          || regClass == AMDIL::GPRI16RegClassID) {
584        return (unsigned int)AMDIL::ULT_v2i32;
585      } else if (regClass == AMDIL::GPRV4I32RegClassID
586          || regClass == AMDIL::GPRI8RegClassID
587          || regClass == AMDIL::GPRI16RegClassID) {
588        return (unsigned int)AMDIL::ULT_v4i32;
589      } else {
590        assert(!"Unknown reg class!");
591      }
592    case AMDILCC::IL_CC_U_GT:
593      if (regClass == AMDIL::GPRI32RegClassID
594          || regClass == AMDIL::GPRI8RegClassID
595          || regClass == AMDIL::GPRI16RegClassID) {
596        return (unsigned int)AMDIL::UGT;
597      } else if (regClass == AMDIL::GPRV2I32RegClassID
598          || regClass == AMDIL::GPRI8RegClassID
599          || regClass == AMDIL::GPRI16RegClassID) {
600        return (unsigned int)AMDIL::UGT_v2i32;
601      } else if (regClass == AMDIL::GPRV4I32RegClassID
602          || regClass == AMDIL::GPRI8RegClassID
603          || regClass == AMDIL::GPRI16RegClassID) {
604        return (unsigned int)AMDIL::UGT_v4i32;
605      } else {
606        assert(!"Unknown reg class!");
607      }
608    case AMDILCC::IL_CC_UL_LT:
609      return (unsigned int)AMDIL::ULLT;
610    case AMDILCC::IL_CC_UL_GT:
611      return (unsigned int)AMDIL::ULGT;
612    case AMDILCC::IL_CC_F_UEQ:
613    case AMDILCC::IL_CC_D_UEQ:
614    case AMDILCC::IL_CC_F_ONE:
615    case AMDILCC::IL_CC_D_ONE:
616    case AMDILCC::IL_CC_F_O:
617    case AMDILCC::IL_CC_F_UO:
618    case AMDILCC::IL_CC_D_O:
619    case AMDILCC::IL_CC_D_UO:
620      // we don't care
621      return 0;
622
623  }
624  errs()<<"Opcode: "<<CCCode<<"\n";
625  assert(0 && "Unknown opcode retrieved");
626  return 0;
627}
628SDValue
629AMDILTargetLowering::LowerMemArgument(
630    SDValue Chain,
631    CallingConv::ID CallConv,
632    const SmallVectorImpl<ISD::InputArg> &Ins,
633    DebugLoc dl, SelectionDAG &DAG,
634    const CCValAssign &VA,
635    MachineFrameInfo *MFI,
636    unsigned i) const
637{
638  // Create the nodes corresponding to a load from this parameter slot.
639  ISD::ArgFlagsTy Flags = Ins[i].Flags;
640
641  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
642    getTargetMachine().Options.GuaranteedTailCallOpt;
643  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
644
645  // FIXME: For now, all byval parameter objects are marked mutable. This can
646  // be changed with more analysis.
647  // In case of tail call optimization mark all arguments mutable. Since they
648  // could be overwritten by lowering of arguments in case of a tail call.
649  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
650      VA.getLocMemOffset(), isImmutable);
651  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
652
653  if (Flags.isByVal())
654    return FIN;
655  return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
656      MachinePointerInfo::getFixedStack(FI),
657      false, false, false, 0);
658}
659//===----------------------------------------------------------------------===//
660// TargetLowering Implementation Help Functions End
661//===----------------------------------------------------------------------===//
662//===----------------------------------------------------------------------===//
663// Instruction generation functions
664//===----------------------------------------------------------------------===//
665uint32_t
666AMDILTargetLowering::addExtensionInstructions(
667    uint32_t reg, bool signedShift,
668    unsigned int simpleVT) const
669{
670  int shiftSize = 0;
671  uint32_t LShift, RShift;
672  switch(simpleVT)
673  {
674    default:
675      return reg;
676    case AMDIL::GPRI8RegClassID:
677      shiftSize = 24;
678      LShift = AMDIL::SHL_i8;
679      if (signedShift) {
680        RShift = AMDIL::SHR_i8;
681      } else {
682        RShift = AMDIL::USHR_i8;
683      }
684      break;
685    case AMDIL::GPRV2I8RegClassID:
686      shiftSize = 24;
687      LShift = AMDIL::SHL_v2i8;
688      if (signedShift) {
689        RShift = AMDIL::SHR_v2i8;
690      } else {
691        RShift = AMDIL::USHR_v2i8;
692      }
693      break;
694    case AMDIL::GPRV4I8RegClassID:
695      shiftSize = 24;
696      LShift = AMDIL::SHL_v4i8;
697      if (signedShift) {
698        RShift = AMDIL::SHR_v4i8;
699      } else {
700        RShift = AMDIL::USHR_v4i8;
701      }
702      break;
703    case AMDIL::GPRI16RegClassID:
704      shiftSize = 16;
705      LShift = AMDIL::SHL_i16;
706      if (signedShift) {
707        RShift = AMDIL::SHR_i16;
708      } else {
709        RShift = AMDIL::USHR_i16;
710      }
711      break;
712    case AMDIL::GPRV2I16RegClassID:
713      shiftSize = 16;
714      LShift = AMDIL::SHL_v2i16;
715      if (signedShift) {
716        RShift = AMDIL::SHR_v2i16;
717      } else {
718        RShift = AMDIL::USHR_v2i16;
719      }
720      break;
721    case AMDIL::GPRV4I16RegClassID:
722      shiftSize = 16;
723      LShift = AMDIL::SHL_v4i16;
724      if (signedShift) {
725        RShift = AMDIL::SHR_v4i16;
726      } else {
727        RShift = AMDIL::USHR_v4i16;
728      }
729      break;
730  };
731  uint32_t LoadReg = genVReg(simpleVT);
732  uint32_t tmp1 = genVReg(simpleVT);
733  uint32_t tmp2 = genVReg(simpleVT);
734  generateMachineInst(AMDIL::LOADCONST_i32, LoadReg).addImm(shiftSize);
735  generateMachineInst(LShift, tmp1, reg, LoadReg);
736  generateMachineInst(RShift, tmp2, tmp1, LoadReg);
737  return tmp2;
738}
739
740MachineOperand
741AMDILTargetLowering::convertToReg(MachineOperand op) const
742{
743  if (op.isReg()) {
744    return op;
745  } else if (op.isImm()) {
746    uint32_t loadReg
747      = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
748    generateMachineInst(AMDIL::LOADCONST_i32, loadReg)
749      .addImm(op.getImm());
750    op.ChangeToRegister(loadReg, false);
751  } else if (op.isFPImm()) {
752    uint32_t loadReg
753      = genVReg(op.getParent()->getDesc().OpInfo[0].RegClass);
754    generateMachineInst(AMDIL::LOADCONST_f32, loadReg)
755      .addFPImm(op.getFPImm());
756    op.ChangeToRegister(loadReg, false);
757  } else if (op.isMBB()) {
758    op.ChangeToRegister(0, false);
759  } else if (op.isFI()) {
760    op.ChangeToRegister(0, false);
761  } else if (op.isCPI()) {
762    op.ChangeToRegister(0, false);
763  } else if (op.isJTI()) {
764    op.ChangeToRegister(0, false);
765  } else if (op.isGlobal()) {
766    op.ChangeToRegister(0, false);
767  } else if (op.isSymbol()) {
768    op.ChangeToRegister(0, false);
769  }/* else if (op.isMetadata()) {
770      op.ChangeToRegister(0, false);
771      }*/
772  return op;
773}
774
775void
776AMDILTargetLowering::generateCMPInstr(
777    MachineInstr *MI,
778    MachineBasicBlock *BB,
779    const TargetInstrInfo& TII)
780const
781{
782  MachineOperand DST = MI->getOperand(0);
783  MachineOperand CC = MI->getOperand(1);
784  MachineOperand LHS = MI->getOperand(2);
785  MachineOperand RHS = MI->getOperand(3);
786  int64_t ccCode = CC.getImm();
787  unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
788  unsigned int opCode = translateToOpcode(ccCode, simpleVT);
789  DebugLoc DL = MI->getDebugLoc();
790  MachineBasicBlock::iterator BBI = MI;
791  setPrivateData(BB, BBI, &DL, &TII);
792  if (!LHS.isReg()) {
793    LHS = convertToReg(LHS);
794  }
795  if (!RHS.isReg()) {
796    RHS = convertToReg(RHS);
797  }
798  switch (ccCode) {
799    case AMDILCC::IL_CC_I_EQ:
800    case AMDILCC::IL_CC_I_NE:
801    case AMDILCC::IL_CC_I_GE:
802    case AMDILCC::IL_CC_I_LT:
803      {
804        uint32_t lhsreg = addExtensionInstructions(
805            LHS.getReg(), true, simpleVT);
806        uint32_t rhsreg = addExtensionInstructions(
807            RHS.getReg(), true, simpleVT);
808        generateMachineInst(opCode, DST.getReg(), lhsreg, rhsreg);
809      }
810      break;
811    case AMDILCC::IL_CC_U_EQ:
812    case AMDILCC::IL_CC_U_NE:
813    case AMDILCC::IL_CC_U_GE:
814    case AMDILCC::IL_CC_U_LT:
815    case AMDILCC::IL_CC_D_EQ:
816    case AMDILCC::IL_CC_F_EQ:
817    case AMDILCC::IL_CC_F_OEQ:
818    case AMDILCC::IL_CC_D_OEQ:
819    case AMDILCC::IL_CC_D_NE:
820    case AMDILCC::IL_CC_F_NE:
821    case AMDILCC::IL_CC_F_UNE:
822    case AMDILCC::IL_CC_D_UNE:
823    case AMDILCC::IL_CC_D_GE:
824    case AMDILCC::IL_CC_F_GE:
825    case AMDILCC::IL_CC_D_OGE:
826    case AMDILCC::IL_CC_F_OGE:
827    case AMDILCC::IL_CC_D_LT:
828    case AMDILCC::IL_CC_F_LT:
829    case AMDILCC::IL_CC_F_OLT:
830    case AMDILCC::IL_CC_D_OLT:
831      generateMachineInst(opCode, DST.getReg(),
832          LHS.getReg(), RHS.getReg());
833      break;
834    case AMDILCC::IL_CC_I_GT:
835    case AMDILCC::IL_CC_I_LE:
836      {
837        uint32_t lhsreg = addExtensionInstructions(
838            LHS.getReg(), true, simpleVT);
839        uint32_t rhsreg = addExtensionInstructions(
840            RHS.getReg(), true, simpleVT);
841        generateMachineInst(opCode, DST.getReg(), rhsreg, lhsreg);
842      }
843      break;
844    case AMDILCC::IL_CC_U_GT:
845    case AMDILCC::IL_CC_U_LE:
846    case AMDILCC::IL_CC_F_GT:
847    case AMDILCC::IL_CC_D_GT:
848    case AMDILCC::IL_CC_F_OGT:
849    case AMDILCC::IL_CC_D_OGT:
850    case AMDILCC::IL_CC_F_LE:
851    case AMDILCC::IL_CC_D_LE:
852    case AMDILCC::IL_CC_D_OLE:
853    case AMDILCC::IL_CC_F_OLE:
854      generateMachineInst(opCode, DST.getReg(),
855          RHS.getReg(), LHS.getReg());
856      break;
857    case AMDILCC::IL_CC_F_UGT:
858    case AMDILCC::IL_CC_F_ULE:
859      {
860        uint32_t VReg[4] = {
861          genVReg(simpleVT), genVReg(simpleVT),
862          genVReg(simpleVT), genVReg(simpleVT)
863        };
864        generateMachineInst(opCode, VReg[0],
865            RHS.getReg(), LHS.getReg());
866        generateMachineInst(AMDIL::FNE, VReg[1],
867            RHS.getReg(), RHS.getReg());
868        generateMachineInst(AMDIL::FNE, VReg[2],
869            LHS.getReg(), LHS.getReg());
870        generateMachineInst(AMDIL::BINARY_OR_f32,
871            VReg[3], VReg[0], VReg[1]);
872        generateMachineInst(AMDIL::BINARY_OR_f32,
873            DST.getReg(), VReg[2], VReg[3]);
874      }
875      break;
876    case AMDILCC::IL_CC_F_ULT:
877    case AMDILCC::IL_CC_F_UGE:
878      {
879        uint32_t VReg[4] = {
880          genVReg(simpleVT), genVReg(simpleVT),
881          genVReg(simpleVT), genVReg(simpleVT)
882        };
883        generateMachineInst(opCode, VReg[0],
884            LHS.getReg(), RHS.getReg());
885        generateMachineInst(AMDIL::FNE, VReg[1],
886            RHS.getReg(), RHS.getReg());
887        generateMachineInst(AMDIL::FNE, VReg[2],
888            LHS.getReg(), LHS.getReg());
889        generateMachineInst(AMDIL::BINARY_OR_f32,
890            VReg[3], VReg[0], VReg[1]);
891        generateMachineInst(AMDIL::BINARY_OR_f32,
892            DST.getReg(), VReg[2], VReg[3]);
893      }
894      break;
895    case AMDILCC::IL_CC_D_UGT:
896    case AMDILCC::IL_CC_D_ULE:
897      {
898        uint32_t regID = AMDIL::GPRF64RegClassID;
899        uint32_t VReg[4] = {
900          genVReg(regID), genVReg(regID),
901          genVReg(regID), genVReg(regID)
902        };
903        // The result of a double comparison is a 32bit result
904        generateMachineInst(opCode, VReg[0],
905            RHS.getReg(), LHS.getReg());
906        generateMachineInst(AMDIL::DNE, VReg[1],
907            RHS.getReg(), RHS.getReg());
908        generateMachineInst(AMDIL::DNE, VReg[2],
909            LHS.getReg(), LHS.getReg());
910        generateMachineInst(AMDIL::BINARY_OR_f32,
911            VReg[3], VReg[0], VReg[1]);
912        generateMachineInst(AMDIL::BINARY_OR_f32,
913            DST.getReg(), VReg[2], VReg[3]);
914      }
915      break;
916    case AMDILCC::IL_CC_D_UGE:
917    case AMDILCC::IL_CC_D_ULT:
918      {
919        uint32_t regID = AMDIL::GPRF64RegClassID;
920        uint32_t VReg[4] = {
921          genVReg(regID), genVReg(regID),
922          genVReg(regID), genVReg(regID)
923        };
924        // The result of a double comparison is a 32bit result
925        generateMachineInst(opCode, VReg[0],
926            LHS.getReg(), RHS.getReg());
927        generateMachineInst(AMDIL::DNE, VReg[1],
928            RHS.getReg(), RHS.getReg());
929        generateMachineInst(AMDIL::DNE, VReg[2],
930            LHS.getReg(), LHS.getReg());
931        generateMachineInst(AMDIL::BINARY_OR_f32,
932            VReg[3], VReg[0], VReg[1]);
933        generateMachineInst(AMDIL::BINARY_OR_f32,
934            DST.getReg(), VReg[2], VReg[3]);
935      }
936      break;
937    case AMDILCC::IL_CC_F_UEQ:
938      {
939        uint32_t VReg[4] = {
940          genVReg(simpleVT), genVReg(simpleVT),
941          genVReg(simpleVT), genVReg(simpleVT)
942        };
943        generateMachineInst(AMDIL::FEQ, VReg[0],
944            LHS.getReg(), RHS.getReg());
945        generateMachineInst(AMDIL::FNE, VReg[1],
946            LHS.getReg(), LHS.getReg());
947        generateMachineInst(AMDIL::FNE, VReg[2],
948            RHS.getReg(), RHS.getReg());
949        generateMachineInst(AMDIL::BINARY_OR_f32,
950            VReg[3], VReg[0], VReg[1]);
951        generateMachineInst(AMDIL::BINARY_OR_f32,
952            DST.getReg(), VReg[2], VReg[3]);
953      }
954      break;
955    case AMDILCC::IL_CC_F_ONE:
956      {
957        uint32_t VReg[4] = {
958          genVReg(simpleVT), genVReg(simpleVT),
959          genVReg(simpleVT), genVReg(simpleVT)
960        };
961        generateMachineInst(AMDIL::FNE, VReg[0],
962            LHS.getReg(), RHS.getReg());
963        generateMachineInst(AMDIL::FEQ, VReg[1],
964            LHS.getReg(), LHS.getReg());
965        generateMachineInst(AMDIL::FEQ, VReg[2],
966            RHS.getReg(), RHS.getReg());
967        generateMachineInst(AMDIL::BINARY_AND_f32,
968            VReg[3], VReg[0], VReg[1]);
969        generateMachineInst(AMDIL::BINARY_AND_f32,
970            DST.getReg(), VReg[2], VReg[3]);
971      }
972      break;
973    case AMDILCC::IL_CC_D_UEQ:
974      {
975        uint32_t regID = AMDIL::GPRF64RegClassID;
976        uint32_t VReg[4] = {
977          genVReg(regID), genVReg(regID),
978          genVReg(regID), genVReg(regID)
979        };
980        // The result of a double comparison is a 32bit result
981        generateMachineInst(AMDIL::DEQ, VReg[0],
982            LHS.getReg(), RHS.getReg());
983        generateMachineInst(AMDIL::DNE, VReg[1],
984            LHS.getReg(), LHS.getReg());
985        generateMachineInst(AMDIL::DNE, VReg[2],
986            RHS.getReg(), RHS.getReg());
987        generateMachineInst(AMDIL::BINARY_OR_f32,
988            VReg[3], VReg[0], VReg[1]);
989        generateMachineInst(AMDIL::BINARY_OR_f32,
990            DST.getReg(), VReg[2], VReg[3]);
991
992      }
993      break;
994    case AMDILCC::IL_CC_D_ONE:
995      {
996        uint32_t regID = AMDIL::GPRF64RegClassID;
997        uint32_t VReg[4] = {
998          genVReg(regID), genVReg(regID),
999          genVReg(regID), genVReg(regID)
1000        };
1001        // The result of a double comparison is a 32bit result
1002        generateMachineInst(AMDIL::DNE, VReg[0],
1003            LHS.getReg(), RHS.getReg());
1004        generateMachineInst(AMDIL::DEQ, VReg[1],
1005            LHS.getReg(), LHS.getReg());
1006        generateMachineInst(AMDIL::DEQ, VReg[2],
1007            RHS.getReg(), RHS.getReg());
1008        generateMachineInst(AMDIL::BINARY_AND_f32,
1009            VReg[3], VReg[0], VReg[1]);
1010        generateMachineInst(AMDIL::BINARY_AND_f32,
1011            DST.getReg(), VReg[2], VReg[3]);
1012
1013      }
1014      break;
1015    case AMDILCC::IL_CC_F_O:
1016      {
1017        uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
1018        generateMachineInst(AMDIL::FEQ, VReg[0],
1019            RHS.getReg(), RHS.getReg());
1020        generateMachineInst(AMDIL::FEQ, VReg[1],
1021            LHS.getReg(), LHS.getReg());
1022        generateMachineInst(AMDIL::BINARY_AND_f32,
1023            DST.getReg(), VReg[0], VReg[1]);
1024      }
1025      break;
1026    case AMDILCC::IL_CC_D_O:
1027      {
1028        uint32_t regID = AMDIL::GPRF64RegClassID;
1029        uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
1030        // The result of a double comparison is a 32bit result
1031        generateMachineInst(AMDIL::DEQ, VReg[0],
1032            RHS.getReg(), RHS.getReg());
1033        generateMachineInst(AMDIL::DEQ, VReg[1],
1034            LHS.getReg(), LHS.getReg());
1035        generateMachineInst(AMDIL::BINARY_AND_f32,
1036            DST.getReg(), VReg[0], VReg[1]);
1037      }
1038      break;
1039    case AMDILCC::IL_CC_F_UO:
1040      {
1041        uint32_t VReg[2] = { genVReg(simpleVT), genVReg(simpleVT) };
1042        generateMachineInst(AMDIL::FNE, VReg[0],
1043            RHS.getReg(), RHS.getReg());
1044        generateMachineInst(AMDIL::FNE, VReg[1],
1045            LHS.getReg(), LHS.getReg());
1046        generateMachineInst(AMDIL::BINARY_OR_f32,
1047            DST.getReg(), VReg[0], VReg[1]);
1048      }
1049      break;
1050    case AMDILCC::IL_CC_D_UO:
1051      {
1052        uint32_t regID = AMDIL::GPRF64RegClassID;
1053        uint32_t VReg[2] = { genVReg(regID), genVReg(regID) };
1054        // The result of a double comparison is a 32bit result
1055        generateMachineInst(AMDIL::DNE, VReg[0],
1056            RHS.getReg(), RHS.getReg());
1057        generateMachineInst(AMDIL::DNE, VReg[1],
1058            LHS.getReg(), LHS.getReg());
1059        generateMachineInst(AMDIL::BINARY_OR_f32,
1060            DST.getReg(), VReg[0], VReg[1]);
1061      }
1062      break;
1063    case AMDILCC::IL_CC_L_LE:
1064    case AMDILCC::IL_CC_L_GE:
1065    case AMDILCC::IL_CC_L_EQ:
1066    case AMDILCC::IL_CC_L_NE:
1067    case AMDILCC::IL_CC_L_LT:
1068    case AMDILCC::IL_CC_L_GT:
1069    case AMDILCC::IL_CC_UL_LE:
1070    case AMDILCC::IL_CC_UL_GE:
1071    case AMDILCC::IL_CC_UL_EQ:
1072    case AMDILCC::IL_CC_UL_NE:
1073    case AMDILCC::IL_CC_UL_LT:
1074    case AMDILCC::IL_CC_UL_GT:
1075      {
1076        const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1077            &this->getTargetMachine())->getSubtargetImpl();
1078        if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)) {
1079          generateMachineInst(opCode, DST.getReg(), LHS.getReg(), RHS.getReg());
1080        } else {
1081          generateLongRelational(MI, opCode);
1082        }
1083      }
1084      break;
1085    case AMDILCC::COND_ERROR:
1086      assert(0 && "Invalid CC code");
1087      break;
1088  };
1089}
1090
1091//===----------------------------------------------------------------------===//
1092// TargetLowering Class Implementation Begins
1093//===----------------------------------------------------------------------===//
1094  AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
1095: TargetLowering(TM, new TargetLoweringObjectFileELF())
1096{
1097  int types[] =
1098  {
1099    (int)MVT::i8,
1100    (int)MVT::i16,
1101    (int)MVT::i32,
1102    (int)MVT::f32,
1103    (int)MVT::f64,
1104    (int)MVT::i64,
1105    (int)MVT::v2i8,
1106    (int)MVT::v4i8,
1107    (int)MVT::v2i16,
1108    (int)MVT::v4i16,
1109    (int)MVT::v4f32,
1110    (int)MVT::v4i32,
1111    (int)MVT::v2f32,
1112    (int)MVT::v2i32,
1113    (int)MVT::v2f64,
1114    (int)MVT::v2i64
1115  };
1116
1117  int IntTypes[] =
1118  {
1119    (int)MVT::i8,
1120    (int)MVT::i16,
1121    (int)MVT::i32,
1122    (int)MVT::i64
1123  };
1124
1125  int FloatTypes[] =
1126  {
1127    (int)MVT::f32,
1128    (int)MVT::f64
1129  };
1130
1131  int VectorTypes[] =
1132  {
1133    (int)MVT::v2i8,
1134    (int)MVT::v4i8,
1135    (int)MVT::v2i16,
1136    (int)MVT::v4i16,
1137    (int)MVT::v4f32,
1138    (int)MVT::v4i32,
1139    (int)MVT::v2f32,
1140    (int)MVT::v2i32,
1141    (int)MVT::v2f64,
1142    (int)MVT::v2i64
1143  };
1144  size_t numTypes = sizeof(types) / sizeof(*types);
1145  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
1146  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
1147  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
1148
1149  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
1150      &this->getTargetMachine())->getSubtargetImpl();
1151  // These are the current register classes that are
1152  // supported
1153
1154  addRegisterClass(MVT::i32, AMDIL::GPRI32RegisterClass);
1155  addRegisterClass(MVT::f32, AMDIL::GPRF32RegisterClass);
1156
1157  if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
1158    addRegisterClass(MVT::f64, AMDIL::GPRF64RegisterClass);
1159    addRegisterClass(MVT::v2f64, AMDIL::GPRV2F64RegisterClass);
1160  }
1161  if (stm->device()->isSupported(AMDILDeviceInfo::ByteOps)) {
1162    addRegisterClass(MVT::i8, AMDIL::GPRI8RegisterClass);
1163    addRegisterClass(MVT::v2i8, AMDIL::GPRV2I8RegisterClass);
1164    addRegisterClass(MVT::v4i8, AMDIL::GPRV4I8RegisterClass);
1165    setOperationAction(ISD::Constant          , MVT::i8   , Legal);
1166  }
1167  if (stm->device()->isSupported(AMDILDeviceInfo::ShortOps)) {
1168    addRegisterClass(MVT::i16, AMDIL::GPRI16RegisterClass);
1169    addRegisterClass(MVT::v2i16, AMDIL::GPRV2I16RegisterClass);
1170    addRegisterClass(MVT::v4i16, AMDIL::GPRV4I16RegisterClass);
1171    setOperationAction(ISD::Constant          , MVT::i16  , Legal);
1172  }
1173  addRegisterClass(MVT::v2f32, AMDIL::GPRV2F32RegisterClass);
1174  addRegisterClass(MVT::v4f32, AMDIL::GPRV4F32RegisterClass);
1175  addRegisterClass(MVT::v2i32, AMDIL::GPRV2I32RegisterClass);
1176  addRegisterClass(MVT::v4i32, AMDIL::GPRV4I32RegisterClass);
1177  if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
1178    addRegisterClass(MVT::i64, AMDIL::GPRI64RegisterClass);
1179    addRegisterClass(MVT::v2i64, AMDIL::GPRV2I64RegisterClass);
1180  }
1181
1182  for (unsigned int x  = 0; x < numTypes; ++x) {
1183    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
1184
1185    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
1186    // We cannot sextinreg, expand to shifts
1187    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
1188    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1189    setOperationAction(ISD::FP_ROUND, VT, Expand);
1190    setOperationAction(ISD::OR, VT, Custom);
1191    setOperationAction(ISD::SUBE, VT, Expand);
1192    setOperationAction(ISD::SUBC, VT, Expand);
1193    setOperationAction(ISD::ADD, VT, Custom);
1194    setOperationAction(ISD::ADDE, VT, Expand);
1195    setOperationAction(ISD::ADDC, VT, Expand);
1196    setOperationAction(ISD::SETCC, VT, Custom);
1197    setOperationAction(ISD::BRCOND, VT, Custom);
1198    setOperationAction(ISD::BR_CC, VT, Custom);
1199    setOperationAction(ISD::BR_JT, VT, Expand);
1200    setOperationAction(ISD::BRIND, VT, Expand);
1201    // TODO: Implement custom UREM/SREM routines
1202    setOperationAction(ISD::UREM, VT, Expand);
1203    setOperationAction(ISD::SREM, VT, Expand);
1204    setOperationAction(ISD::SINT_TO_FP, VT, Custom);
1205    setOperationAction(ISD::UINT_TO_FP, VT, Custom);
1206    setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1207    setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1208    setOperationAction(ISDBITCAST, VT, Custom);
1209    setOperationAction(ISD::GlobalAddress, VT, Custom);
1210    setOperationAction(ISD::JumpTable, VT, Custom);
1211    setOperationAction(ISD::ConstantPool, VT, Custom);
1212    setOperationAction(ISD::SELECT_CC, VT, Custom);
1213    setOperationAction(ISD::SELECT, VT, Custom);
1214    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1215    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1216    if (VT != MVT::i64 && VT != MVT::v2i64) {
1217      setOperationAction(ISD::SDIV, VT, Custom);
1218      setOperationAction(ISD::UDIV, VT, Custom);
1219    }
1220    setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1221    setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1222  }
1223  for (unsigned int x = 0; x < numFloatTypes; ++x) {
1224    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
1225
1226    // IL does not have these operations for floating point types
1227    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
1228    setOperationAction(ISD::FP_ROUND, VT, Custom);
1229    setOperationAction(ISD::SETOLT, VT, Expand);
1230    setOperationAction(ISD::SETOGE, VT, Expand);
1231    setOperationAction(ISD::SETOGT, VT, Expand);
1232    setOperationAction(ISD::SETOLE, VT, Expand);
1233    setOperationAction(ISD::SETULT, VT, Expand);
1234    setOperationAction(ISD::SETUGE, VT, Expand);
1235    setOperationAction(ISD::SETUGT, VT, Expand);
1236    setOperationAction(ISD::SETULE, VT, Expand);
1237  }
1238
1239  for (unsigned int x = 0; x < numIntTypes; ++x) {
1240    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
1241
1242    // GPU also does not have divrem function for signed or unsigned
1243    setOperationAction(ISD::SDIVREM, VT, Expand);
1244    setOperationAction(ISD::UDIVREM, VT, Expand);
1245    setOperationAction(ISD::FP_ROUND, VT, Expand);
1246
1247    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
1248    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1249    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
1250
1251    // GPU doesn't have a rotl, rotr, or byteswap instruction
1252    setOperationAction(ISD::ROTR, VT, Expand);
1253    setOperationAction(ISD::ROTL, VT, Expand);
1254    setOperationAction(ISD::BSWAP, VT, Expand);
1255
1256    // GPU doesn't have any counting operators
1257    setOperationAction(ISD::CTPOP, VT, Expand);
1258    setOperationAction(ISD::CTTZ, VT, Expand);
1259    setOperationAction(ISD::CTLZ, VT, Expand);
1260  }
1261
1262  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
1263  {
1264    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
1265
1266    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1267    setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1268    setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1269    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
1270    setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1271    setOperationAction(ISD::FP_ROUND, VT, Expand);
1272    setOperationAction(ISD::SDIVREM, VT, Expand);
1273    setOperationAction(ISD::UDIVREM, VT, Expand);
1274    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
1275    // setOperationAction(ISD::VSETCC, VT, Expand);
1276    setOperationAction(ISD::SETCC, VT, Expand);
1277    setOperationAction(ISD::SELECT_CC, VT, Expand);
1278    setOperationAction(ISD::SELECT, VT, Expand);
1279
1280  }
1281  setOperationAction(ISD::FP_ROUND, MVT::Other, Expand);
1282  if (stm->device()->isSupported(AMDILDeviceInfo::LongOps)) {
1283    if (stm->calVersion() < CAL_VERSION_SC_139
1284        || stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
1285      setOperationAction(ISD::MUL, MVT::i64, Custom);
1286    }
1287    setOperationAction(ISD::SUB, MVT::i64, Custom);
1288    setOperationAction(ISD::ADD, MVT::i64, Custom);
1289    setOperationAction(ISD::MULHU, MVT::i64, Expand);
1290    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
1291    setOperationAction(ISD::MULHS, MVT::i64, Expand);
1292    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
1293    setOperationAction(ISD::MUL, MVT::v2i64, Expand);
1294    setOperationAction(ISD::SUB, MVT::v2i64, Expand);
1295    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
1296    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
1297    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
1298    setOperationAction(ISD::UDIV, MVT::v2i64, Expand);
1299    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
1300    setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Expand);
1301    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Expand);
1302    setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Expand);
1303    setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Expand);
1304    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
1305    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
1306    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
1307    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
1308  }
1309  if (stm->device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
1310    // we support loading/storing v2f64 but not operations on the type
1311    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
1312    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
1313    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
1314    setOperationAction(ISD::FP_ROUND, MVT::v2f64, Expand);
1315    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
1316    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
1317    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
1318    setOperationAction(ISD::FDIV, MVT::v2f64, Expand);
1319    // We want to expand vector conversions into their scalar
1320    // counterparts.
1321    setOperationAction(ISD::SINT_TO_FP, MVT::v2f64, Expand);
1322    setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Expand);
1323    setOperationAction(ISD::FP_TO_SINT, MVT::v2f64, Expand);
1324    setOperationAction(ISD::FP_TO_UINT, MVT::v2f64, Expand);
1325    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
1326    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
1327    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
1328    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
1329    setOperationAction(ISD::FABS, MVT::f64, Expand);
1330    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
1331  }
1332  // TODO: Fix the UDIV24 algorithm so it works for these
1333  // types correctly. This needs vector comparisons
1334  // for this to work correctly.
1335  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
1336  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
1337  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
1338  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
1339  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
1340  setOperationAction(ISD::SUBC, MVT::Other, Expand);
1341  setOperationAction(ISD::ADDE, MVT::Other, Expand);
1342  setOperationAction(ISD::ADDC, MVT::Other, Expand);
1343  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
1344  setOperationAction(ISD::BR_CC, MVT::Other, Custom);
1345  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
1346  setOperationAction(ISD::BRIND, MVT::Other, Expand);
1347  setOperationAction(ISD::SETCC, MVT::Other, Custom);
1348  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
1349  setOperationAction(ISD::FDIV, MVT::f32, Custom);
1350  setOperationAction(ISD::FDIV, MVT::v2f32, Custom);
1351  setOperationAction(ISD::FDIV, MVT::v4f32, Custom);
1352
1353  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
1354  // Use the default implementation.
1355  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
1356  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
1357  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
1358  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
1359  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
1360  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
1361  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
1362  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
1363  setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
1364
1365  setStackPointerRegisterToSaveRestore(AMDIL::SP);
1366  setSchedulingPreference(Sched::RegPressure);
1367  setPow2DivIsCheap(false);
1368  setPrefLoopAlignment(16);
1369  setSelectIsExpensive(true);
1370  setJumpIsExpensive(true);
1371  computeRegisterProperties();
1372
1373  maxStoresPerMemcpy  = 4096;
1374  maxStoresPerMemmove = 4096;
1375  maxStoresPerMemset  = 4096;
1376
1377#undef numTypes
1378#undef numIntTypes
1379#undef numVectorTypes
1380#undef numFloatTypes
1381}
1382
1383const char *
1384AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
1385{
1386  switch (Opcode) {
1387    default: return 0;
1388    case AMDILISD::INTTOANY: return "AMDILISD::INTTOANY";
1389    case AMDILISD::DP_TO_FP:  return "AMDILISD::DP_TO_FP";
1390    case AMDILISD::FP_TO_DP:  return "AMDILISD::FP_TO_DP";
1391    case AMDILISD::BITCONV: return "AMDILISD::BITCONV";
1392    case AMDILISD::CMOV:  return "AMDILISD::CMOV";
1393    case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
1394    case AMDILISD::INEGATE:  return "AMDILISD::INEGATE";
1395    case AMDILISD::MAD:  return "AMDILISD::MAD";
1396    case AMDILISD::UMAD:  return "AMDILISD::UMAD";
1397    case AMDILISD::CALL:  return "AMDILISD::CALL";
1398    case AMDILISD::RET:   return "AMDILISD::RET";
1399    case AMDILISD::IFFB_HI: return "AMDILISD::IFFB_HI";
1400    case AMDILISD::IFFB_LO: return "AMDILISD::IFFB_LO";
1401    case AMDILISD::ADD: return "AMDILISD::ADD";
1402    case AMDILISD::UMUL: return "AMDILISD::UMUL";
1403    case AMDILISD::AND: return "AMDILISD::AND";
1404    case AMDILISD::OR: return "AMDILISD::OR";
1405    case AMDILISD::NOT: return "AMDILISD::NOT";
1406    case AMDILISD::XOR: return "AMDILISD::XOR";
1407    case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
1408    case AMDILISD::SMAX: return "AMDILISD::SMAX";
1409    case AMDILISD::PHIMOVE: return "AMDILISD::PHIMOVE";
1410    case AMDILISD::MOVE: return "AMDILISD::MOVE";
1411    case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
1412    case AMDILISD::VEXTRACT: return "AMDILISD::VEXTRACT";
1413    case AMDILISD::VINSERT: return "AMDILISD::VINSERT";
1414    case AMDILISD::VCONCAT: return "AMDILISD::VCONCAT";
1415    case AMDILISD::LCREATE: return "AMDILISD::LCREATE";
1416    case AMDILISD::LCOMPHI: return "AMDILISD::LCOMPHI";
1417    case AMDILISD::LCOMPLO: return "AMDILISD::LCOMPLO";
1418    case AMDILISD::DCREATE: return "AMDILISD::DCREATE";
1419    case AMDILISD::DCOMPHI: return "AMDILISD::DCOMPHI";
1420    case AMDILISD::DCOMPLO: return "AMDILISD::DCOMPLO";
1421    case AMDILISD::LCREATE2: return "AMDILISD::LCREATE2";
1422    case AMDILISD::LCOMPHI2: return "AMDILISD::LCOMPHI2";
1423    case AMDILISD::LCOMPLO2: return "AMDILISD::LCOMPLO2";
1424    case AMDILISD::DCREATE2: return "AMDILISD::DCREATE2";
1425    case AMDILISD::DCOMPHI2: return "AMDILISD::DCOMPHI2";
1426    case AMDILISD::DCOMPLO2: return "AMDILISD::DCOMPLO2";
1427    case AMDILISD::CMP: return "AMDILISD::CMP";
1428    case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
1429    case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
1430    case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
1431    case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
1432    case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
1433    case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
1434    case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
1435    case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
1436    case AMDILISD::LOOP_NZERO: return "AMDILISD::LOOP_NZERO";
1437    case AMDILISD::LOOP_ZERO: return "AMDILISD::LOOP_ZERO";
1438    case AMDILISD::LOOP_CMP: return "AMDILISD::LOOP_CMP";
1439    case AMDILISD::ADDADDR: return "AMDILISD::ADDADDR";
1440    case AMDILISD::ATOM_G_ADD: return "AMDILISD::ATOM_G_ADD";
1441    case AMDILISD::ATOM_G_AND: return "AMDILISD::ATOM_G_AND";
1442    case AMDILISD::ATOM_G_CMPXCHG: return "AMDILISD::ATOM_G_CMPXCHG";
1443    case AMDILISD::ATOM_G_DEC: return "AMDILISD::ATOM_G_DEC";
1444    case AMDILISD::ATOM_G_INC: return "AMDILISD::ATOM_G_INC";
1445    case AMDILISD::ATOM_G_MAX: return "AMDILISD::ATOM_G_MAX";
1446    case AMDILISD::ATOM_G_UMAX: return "AMDILISD::ATOM_G_UMAX";
1447    case AMDILISD::ATOM_G_MIN: return "AMDILISD::ATOM_G_MIN";
1448    case AMDILISD::ATOM_G_UMIN: return "AMDILISD::ATOM_G_UMIN";
1449    case AMDILISD::ATOM_G_OR: return "AMDILISD::ATOM_G_OR";
1450    case AMDILISD::ATOM_G_SUB: return "AMDILISD::ATOM_G_SUB";
1451    case AMDILISD::ATOM_G_RSUB: return "AMDILISD::ATOM_G_RSUB";
1452    case AMDILISD::ATOM_G_XCHG: return "AMDILISD::ATOM_G_XCHG";
1453    case AMDILISD::ATOM_G_XOR: return "AMDILISD::ATOM_G_XOR";
1454    case AMDILISD::ATOM_G_ADD_NORET: return "AMDILISD::ATOM_G_ADD_NORET";
1455    case AMDILISD::ATOM_G_AND_NORET: return "AMDILISD::ATOM_G_AND_NORET";
1456    case AMDILISD::ATOM_G_CMPXCHG_NORET: return "AMDILISD::ATOM_G_CMPXCHG_NORET";
1457    case AMDILISD::ATOM_G_DEC_NORET: return "AMDILISD::ATOM_G_DEC_NORET";
1458    case AMDILISD::ATOM_G_INC_NORET: return "AMDILISD::ATOM_G_INC_NORET";
1459    case AMDILISD::ATOM_G_MAX_NORET: return "AMDILISD::ATOM_G_MAX_NORET";
1460    case AMDILISD::ATOM_G_UMAX_NORET: return "AMDILISD::ATOM_G_UMAX_NORET";
1461    case AMDILISD::ATOM_G_MIN_NORET: return "AMDILISD::ATOM_G_MIN_NORET";
1462    case AMDILISD::ATOM_G_UMIN_NORET: return "AMDILISD::ATOM_G_UMIN_NORET";
1463    case AMDILISD::ATOM_G_OR_NORET: return "AMDILISD::ATOM_G_OR_NORET";
1464    case AMDILISD::ATOM_G_SUB_NORET: return "AMDILISD::ATOM_G_SUB_NORET";
1465    case AMDILISD::ATOM_G_RSUB_NORET: return "AMDILISD::ATOM_G_RSUB_NORET";
1466    case AMDILISD::ATOM_G_XCHG_NORET: return "AMDILISD::ATOM_G_XCHG_NORET";
1467    case AMDILISD::ATOM_G_XOR_NORET: return "AMDILISD::ATOM_G_XOR_NORET";
1468    case AMDILISD::ATOM_L_ADD: return "AMDILISD::ATOM_L_ADD";
1469    case AMDILISD::ATOM_L_AND: return "AMDILISD::ATOM_L_AND";
1470    case AMDILISD::ATOM_L_CMPXCHG: return "AMDILISD::ATOM_L_CMPXCHG";
1471    case AMDILISD::ATOM_L_DEC: return "AMDILISD::ATOM_L_DEC";
1472    case AMDILISD::ATOM_L_INC: return "AMDILISD::ATOM_L_INC";
1473    case AMDILISD::ATOM_L_MAX: return "AMDILISD::ATOM_L_MAX";
1474    case AMDILISD::ATOM_L_UMAX: return "AMDILISD::ATOM_L_UMAX";
1475    case AMDILISD::ATOM_L_MIN: return "AMDILISD::ATOM_L_MIN";
1476    case AMDILISD::ATOM_L_UMIN: return "AMDILISD::ATOM_L_UMIN";
1477    case AMDILISD::ATOM_L_OR: return "AMDILISD::ATOM_L_OR";
1478    case AMDILISD::ATOM_L_SUB: return "AMDILISD::ATOM_L_SUB";
1479    case AMDILISD::ATOM_L_RSUB: return "AMDILISD::ATOM_L_RSUB";
1480    case AMDILISD::ATOM_L_XCHG: return "AMDILISD::ATOM_L_XCHG";
1481    case AMDILISD::ATOM_L_XOR: return "AMDILISD::ATOM_L_XOR";
1482    case AMDILISD::ATOM_L_ADD_NORET: return "AMDILISD::ATOM_L_ADD_NORET";
1483    case AMDILISD::ATOM_L_AND_NORET: return "AMDILISD::ATOM_L_AND_NORET";
1484    case AMDILISD::ATOM_L_CMPXCHG_NORET: return "AMDILISD::ATOM_L_CMPXCHG_NORET";
1485    case AMDILISD::ATOM_L_DEC_NORET: return "AMDILISD::ATOM_L_DEC_NORET";
1486    case AMDILISD::ATOM_L_INC_NORET: return "AMDILISD::ATOM_L_INC_NORET";
1487    case AMDILISD::ATOM_L_MAX_NORET: return "AMDILISD::ATOM_L_MAX_NORET";
1488    case AMDILISD::ATOM_L_UMAX_NORET: return "AMDILISD::ATOM_L_UMAX_NORET";
1489    case AMDILISD::ATOM_L_MIN_NORET: return "AMDILISD::ATOM_L_MIN_NORET";
1490    case AMDILISD::ATOM_L_UMIN_NORET: return "AMDILISD::ATOM_L_UMIN_NORET";
1491    case AMDILISD::ATOM_L_OR_NORET: return "AMDILISD::ATOM_L_OR_NORET";
1492    case AMDILISD::ATOM_L_SUB_NORET: return "AMDILISD::ATOM_L_SUB_NORET";
1493    case AMDILISD::ATOM_L_RSUB_NORET: return "AMDILISD::ATOM_L_RSUB_NORET";
1494    case AMDILISD::ATOM_L_XCHG_NORET: return "AMDILISD::ATOM_L_XCHG_NORET";
1495    case AMDILISD::ATOM_R_ADD: return "AMDILISD::ATOM_R_ADD";
1496    case AMDILISD::ATOM_R_AND: return "AMDILISD::ATOM_R_AND";
1497    case AMDILISD::ATOM_R_CMPXCHG: return "AMDILISD::ATOM_R_CMPXCHG";
1498    case AMDILISD::ATOM_R_DEC: return "AMDILISD::ATOM_R_DEC";
1499    case AMDILISD::ATOM_R_INC: return "AMDILISD::ATOM_R_INC";
1500    case AMDILISD::ATOM_R_MAX: return "AMDILISD::ATOM_R_MAX";
1501    case AMDILISD::ATOM_R_UMAX: return "AMDILISD::ATOM_R_UMAX";
1502    case AMDILISD::ATOM_R_MIN: return "AMDILISD::ATOM_R_MIN";
1503    case AMDILISD::ATOM_R_UMIN: return "AMDILISD::ATOM_R_UMIN";
1504    case AMDILISD::ATOM_R_OR: return "AMDILISD::ATOM_R_OR";
1505    case AMDILISD::ATOM_R_MSKOR: return "AMDILISD::ATOM_R_MSKOR";
1506    case AMDILISD::ATOM_R_SUB: return "AMDILISD::ATOM_R_SUB";
1507    case AMDILISD::ATOM_R_RSUB: return "AMDILISD::ATOM_R_RSUB";
1508    case AMDILISD::ATOM_R_XCHG: return "AMDILISD::ATOM_R_XCHG";
1509    case AMDILISD::ATOM_R_XOR: return "AMDILISD::ATOM_R_XOR";
1510    case AMDILISD::ATOM_R_ADD_NORET: return "AMDILISD::ATOM_R_ADD_NORET";
1511    case AMDILISD::ATOM_R_AND_NORET: return "AMDILISD::ATOM_R_AND_NORET";
1512    case AMDILISD::ATOM_R_CMPXCHG_NORET: return "AMDILISD::ATOM_R_CMPXCHG_NORET";
1513    case AMDILISD::ATOM_R_DEC_NORET: return "AMDILISD::ATOM_R_DEC_NORET";
1514    case AMDILISD::ATOM_R_INC_NORET: return "AMDILISD::ATOM_R_INC_NORET";
1515    case AMDILISD::ATOM_R_MAX_NORET: return "AMDILISD::ATOM_R_MAX_NORET";
1516    case AMDILISD::ATOM_R_UMAX_NORET: return "AMDILISD::ATOM_R_UMAX_NORET";
1517    case AMDILISD::ATOM_R_MIN_NORET: return "AMDILISD::ATOM_R_MIN_NORET";
1518    case AMDILISD::ATOM_R_UMIN_NORET: return "AMDILISD::ATOM_R_UMIN_NORET";
1519    case AMDILISD::ATOM_R_OR_NORET: return "AMDILISD::ATOM_R_OR_NORET";
1520    case AMDILISD::ATOM_R_MSKOR_NORET: return "AMDILISD::ATOM_R_MSKOR_NORET";
1521    case AMDILISD::ATOM_R_SUB_NORET: return "AMDILISD::ATOM_R_SUB_NORET";
1522    case AMDILISD::ATOM_R_RSUB_NORET: return "AMDILISD::ATOM_R_RSUB_NORET";
1523    case AMDILISD::ATOM_R_XCHG_NORET: return "AMDILISD::ATOM_R_XCHG_NORET";
1524    case AMDILISD::ATOM_R_XOR_NORET: return "AMDILISD::ATOM_R_XOR_NORET";
1525    case AMDILISD::APPEND_ALLOC: return "AMDILISD::APPEND_ALLOC";
1526    case AMDILISD::APPEND_ALLOC_NORET: return "AMDILISD::APPEND_ALLOC_NORET";
1527    case AMDILISD::APPEND_CONSUME: return "AMDILISD::APPEND_CONSUME";
1528    case AMDILISD::APPEND_CONSUME_NORET: return "AMDILISD::APPEND_CONSUME_NORET";
1529    case AMDILISD::IMAGE2D_READ: return "AMDILISD::IMAGE2D_READ";
1530    case AMDILISD::IMAGE2D_WRITE: return "AMDILISD::IMAGE2D_WRITE";
1531    case AMDILISD::IMAGE2D_INFO0: return "AMDILISD::IMAGE2D_INFO0";
1532    case AMDILISD::IMAGE2D_INFO1: return "AMDILISD::IMAGE2D_INFO1";
1533    case AMDILISD::IMAGE3D_READ: return "AMDILISD::IMAGE3D_READ";
1534    case AMDILISD::IMAGE3D_WRITE: return "AMDILISD::IMAGE3D_WRITE";
1535    case AMDILISD::IMAGE3D_INFO0: return "AMDILISD::IMAGE3D_INFO0";
1536    case AMDILISD::IMAGE3D_INFO1: return "AMDILISD::IMAGE3D_INFO1";
1537
1538  };
1539}
1540bool
1541AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1542    const CallInst &I, unsigned Intrinsic) const
1543{
1544  if (Intrinsic <= AMDGPUIntrinsic::last_non_AMDIL_intrinsic
1545      || Intrinsic > AMDGPUIntrinsic::num_AMDIL_intrinsics) {
1546    return false;
1547  }
1548  bool bitCastToInt = false;
1549  unsigned IntNo;
1550  bool isRet = true;
1551  const AMDILSubtarget *STM = &this->getTargetMachine()
1552    .getSubtarget<AMDILSubtarget>();
1553  switch (Intrinsic) {
1554    default: return false; // Don't custom lower most intrinsics.
1555    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32:
1556    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32:
1557             IntNo = AMDILISD::ATOM_G_ADD; break;
1558    case AMDGPUIntrinsic::AMDIL_atomic_add_gi32_noret:
1559    case AMDGPUIntrinsic::AMDIL_atomic_add_gu32_noret:
1560             isRet = false;
1561             IntNo = AMDILISD::ATOM_G_ADD_NORET; break;
1562    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32:
1563    case AMDGPUIntrinsic::AMDIL_atomic_add_li32:
1564             IntNo = AMDILISD::ATOM_L_ADD; break;
1565    case AMDGPUIntrinsic::AMDIL_atomic_add_li32_noret:
1566    case AMDGPUIntrinsic::AMDIL_atomic_add_lu32_noret:
1567             isRet = false;
1568             IntNo = AMDILISD::ATOM_L_ADD_NORET; break;
1569    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32:
1570    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32:
1571             IntNo = AMDILISD::ATOM_R_ADD; break;
1572    case AMDGPUIntrinsic::AMDIL_atomic_add_ri32_noret:
1573    case AMDGPUIntrinsic::AMDIL_atomic_add_ru32_noret:
1574             isRet = false;
1575             IntNo = AMDILISD::ATOM_R_ADD_NORET; break;
1576    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32:
1577    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32:
1578             IntNo = AMDILISD::ATOM_G_AND; break;
1579    case AMDGPUIntrinsic::AMDIL_atomic_and_gi32_noret:
1580    case AMDGPUIntrinsic::AMDIL_atomic_and_gu32_noret:
1581             isRet = false;
1582             IntNo = AMDILISD::ATOM_G_AND_NORET; break;
1583    case AMDGPUIntrinsic::AMDIL_atomic_and_li32:
1584    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32:
1585             IntNo = AMDILISD::ATOM_L_AND; break;
1586    case AMDGPUIntrinsic::AMDIL_atomic_and_li32_noret:
1587    case AMDGPUIntrinsic::AMDIL_atomic_and_lu32_noret:
1588             isRet = false;
1589             IntNo = AMDILISD::ATOM_L_AND_NORET; break;
1590    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32:
1591    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32:
1592             IntNo = AMDILISD::ATOM_R_AND; break;
1593    case AMDGPUIntrinsic::AMDIL_atomic_and_ri32_noret:
1594    case AMDGPUIntrinsic::AMDIL_atomic_and_ru32_noret:
1595             isRet = false;
1596             IntNo = AMDILISD::ATOM_R_AND_NORET; break;
1597    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32:
1598    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32:
1599             IntNo = AMDILISD::ATOM_G_CMPXCHG; break;
1600    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gi32_noret:
1601    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_gu32_noret:
1602             isRet = false;
1603             IntNo = AMDILISD::ATOM_G_CMPXCHG_NORET; break;
1604    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32:
1605    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32:
1606             IntNo = AMDILISD::ATOM_L_CMPXCHG; break;
1607    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_li32_noret:
1608    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_lu32_noret:
1609             isRet = false;
1610             IntNo = AMDILISD::ATOM_L_CMPXCHG_NORET; break;
1611    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32:
1612    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32:
1613             IntNo = AMDILISD::ATOM_R_CMPXCHG; break;
1614    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ri32_noret:
1615    case AMDGPUIntrinsic::AMDIL_atomic_cmpxchg_ru32_noret:
1616             isRet = false;
1617             IntNo = AMDILISD::ATOM_R_CMPXCHG_NORET; break;
1618    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32:
1619    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32:
1620             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1621               IntNo = AMDILISD::ATOM_G_DEC;
1622             } else {
1623               IntNo = AMDILISD::ATOM_G_SUB;
1624             }
1625             break;
1626    case AMDGPUIntrinsic::AMDIL_atomic_dec_gi32_noret:
1627    case AMDGPUIntrinsic::AMDIL_atomic_dec_gu32_noret:
1628             isRet = false;
1629             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1630               IntNo = AMDILISD::ATOM_G_DEC_NORET;
1631             } else {
1632               IntNo = AMDILISD::ATOM_G_SUB_NORET;
1633             }
1634             break;
1635    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32:
1636    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32:
1637             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1638               IntNo = AMDILISD::ATOM_L_DEC;
1639             } else {
1640               IntNo = AMDILISD::ATOM_L_SUB;
1641             }
1642             break;
1643    case AMDGPUIntrinsic::AMDIL_atomic_dec_li32_noret:
1644    case AMDGPUIntrinsic::AMDIL_atomic_dec_lu32_noret:
1645             isRet = false;
1646             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1647               IntNo = AMDILISD::ATOM_L_DEC_NORET;
1648             } else {
1649               IntNo = AMDILISD::ATOM_L_SUB_NORET;
1650             }
1651             break;
1652    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32:
1653    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32:
1654             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1655               IntNo = AMDILISD::ATOM_R_DEC;
1656             } else {
1657               IntNo = AMDILISD::ATOM_R_SUB;
1658             }
1659             break;
1660    case AMDGPUIntrinsic::AMDIL_atomic_dec_ri32_noret:
1661    case AMDGPUIntrinsic::AMDIL_atomic_dec_ru32_noret:
1662             isRet = false;
1663             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1664               IntNo = AMDILISD::ATOM_R_DEC_NORET;
1665             } else {
1666               IntNo = AMDILISD::ATOM_R_SUB_NORET;
1667             }
1668             break;
1669    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32:
1670    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32:
1671             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1672               IntNo = AMDILISD::ATOM_G_INC;
1673             } else {
1674               IntNo = AMDILISD::ATOM_G_ADD;
1675             }
1676             break;
1677    case AMDGPUIntrinsic::AMDIL_atomic_inc_gi32_noret:
1678    case AMDGPUIntrinsic::AMDIL_atomic_inc_gu32_noret:
1679             isRet = false;
1680             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1681               IntNo = AMDILISD::ATOM_G_INC_NORET;
1682             } else {
1683               IntNo = AMDILISD::ATOM_G_ADD_NORET;
1684             }
1685             break;
1686    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32:
1687    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32:
1688             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1689               IntNo = AMDILISD::ATOM_L_INC;
1690             } else {
1691               IntNo = AMDILISD::ATOM_L_ADD;
1692             }
1693             break;
1694    case AMDGPUIntrinsic::AMDIL_atomic_inc_li32_noret:
1695    case AMDGPUIntrinsic::AMDIL_atomic_inc_lu32_noret:
1696             isRet = false;
1697             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1698               IntNo = AMDILISD::ATOM_L_INC_NORET;
1699             } else {
1700               IntNo = AMDILISD::ATOM_L_ADD_NORET;
1701             }
1702             break;
1703    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32:
1704    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32:
1705             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1706               IntNo = AMDILISD::ATOM_R_INC;
1707             } else {
1708               IntNo = AMDILISD::ATOM_R_ADD;
1709             }
1710             break;
1711    case AMDGPUIntrinsic::AMDIL_atomic_inc_ri32_noret:
1712    case AMDGPUIntrinsic::AMDIL_atomic_inc_ru32_noret:
1713             isRet = false;
1714             if (STM->calVersion() >= CAL_VERSION_SC_136) {
1715               IntNo = AMDILISD::ATOM_R_INC_NORET;
1716             } else {
1717               IntNo = AMDILISD::ATOM_R_ADD_NORET;
1718             }
1719             break;
1720    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32:
1721             IntNo = AMDILISD::ATOM_G_MAX; break;
1722    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32:
1723             IntNo = AMDILISD::ATOM_G_UMAX; break;
1724    case AMDGPUIntrinsic::AMDIL_atomic_max_gi32_noret:
1725             isRet = false;
1726             IntNo = AMDILISD::ATOM_G_MAX_NORET; break;
1727    case AMDGPUIntrinsic::AMDIL_atomic_max_gu32_noret:
1728             isRet = false;
1729             IntNo = AMDILISD::ATOM_G_UMAX_NORET; break;
1730    case AMDGPUIntrinsic::AMDIL_atomic_max_li32:
1731             IntNo = AMDILISD::ATOM_L_MAX; break;
1732    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32:
1733             IntNo = AMDILISD::ATOM_L_UMAX; break;
1734    case AMDGPUIntrinsic::AMDIL_atomic_max_li32_noret:
1735             isRet = false;
1736             IntNo = AMDILISD::ATOM_L_MAX_NORET; break;
1737    case AMDGPUIntrinsic::AMDIL_atomic_max_lu32_noret:
1738             isRet = false;
1739             IntNo = AMDILISD::ATOM_L_UMAX_NORET; break;
1740    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32:
1741             IntNo = AMDILISD::ATOM_R_MAX; break;
1742    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32:
1743             IntNo = AMDILISD::ATOM_R_UMAX; break;
1744    case AMDGPUIntrinsic::AMDIL_atomic_max_ri32_noret:
1745             isRet = false;
1746             IntNo = AMDILISD::ATOM_R_MAX_NORET; break;
1747    case AMDGPUIntrinsic::AMDIL_atomic_max_ru32_noret:
1748             isRet = false;
1749             IntNo = AMDILISD::ATOM_R_UMAX_NORET; break;
1750    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32:
1751             IntNo = AMDILISD::ATOM_G_MIN; break;
1752    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32:
1753             IntNo = AMDILISD::ATOM_G_UMIN; break;
1754    case AMDGPUIntrinsic::AMDIL_atomic_min_gi32_noret:
1755             isRet = false;
1756             IntNo = AMDILISD::ATOM_G_MIN_NORET; break;
1757    case AMDGPUIntrinsic::AMDIL_atomic_min_gu32_noret:
1758             isRet = false;
1759             IntNo = AMDILISD::ATOM_G_UMIN_NORET; break;
1760    case AMDGPUIntrinsic::AMDIL_atomic_min_li32:
1761             IntNo = AMDILISD::ATOM_L_MIN; break;
1762    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32:
1763             IntNo = AMDILISD::ATOM_L_UMIN; break;
1764    case AMDGPUIntrinsic::AMDIL_atomic_min_li32_noret:
1765             isRet = false;
1766             IntNo = AMDILISD::ATOM_L_MIN_NORET; break;
1767    case AMDGPUIntrinsic::AMDIL_atomic_min_lu32_noret:
1768             isRet = false;
1769             IntNo = AMDILISD::ATOM_L_UMIN_NORET; break;
1770    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32:
1771             IntNo = AMDILISD::ATOM_R_MIN; break;
1772    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32:
1773             IntNo = AMDILISD::ATOM_R_UMIN; break;
1774    case AMDGPUIntrinsic::AMDIL_atomic_min_ri32_noret:
1775             isRet = false;
1776             IntNo = AMDILISD::ATOM_R_MIN_NORET; break;
1777    case AMDGPUIntrinsic::AMDIL_atomic_min_ru32_noret:
1778             isRet = false;
1779             IntNo = AMDILISD::ATOM_R_UMIN_NORET; break;
1780    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32:
1781    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32:
1782             IntNo = AMDILISD::ATOM_G_OR; break;
1783    case AMDGPUIntrinsic::AMDIL_atomic_or_gi32_noret:
1784    case AMDGPUIntrinsic::AMDIL_atomic_or_gu32_noret:
1785             isRet = false;
1786             IntNo = AMDILISD::ATOM_G_OR_NORET; break;
1787    case AMDGPUIntrinsic::AMDIL_atomic_or_li32:
1788    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32:
1789             IntNo = AMDILISD::ATOM_L_OR; break;
1790    case AMDGPUIntrinsic::AMDIL_atomic_or_li32_noret:
1791    case AMDGPUIntrinsic::AMDIL_atomic_or_lu32_noret:
1792             isRet = false;
1793             IntNo = AMDILISD::ATOM_L_OR_NORET; break;
1794    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32:
1795    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32:
1796             IntNo = AMDILISD::ATOM_R_OR; break;
1797    case AMDGPUIntrinsic::AMDIL_atomic_or_ri32_noret:
1798    case AMDGPUIntrinsic::AMDIL_atomic_or_ru32_noret:
1799             isRet = false;
1800             IntNo = AMDILISD::ATOM_R_OR_NORET; break;
1801    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32:
1802    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32:
1803             IntNo = AMDILISD::ATOM_G_SUB; break;
1804    case AMDGPUIntrinsic::AMDIL_atomic_sub_gi32_noret:
1805    case AMDGPUIntrinsic::AMDIL_atomic_sub_gu32_noret:
1806             isRet = false;
1807             IntNo = AMDILISD::ATOM_G_SUB_NORET; break;
1808    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32:
1809    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32:
1810             IntNo = AMDILISD::ATOM_L_SUB; break;
1811    case AMDGPUIntrinsic::AMDIL_atomic_sub_li32_noret:
1812    case AMDGPUIntrinsic::AMDIL_atomic_sub_lu32_noret:
1813             isRet = false;
1814             IntNo = AMDILISD::ATOM_L_SUB_NORET; break;
1815    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32:
1816    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32:
1817             IntNo = AMDILISD::ATOM_R_SUB; break;
1818    case AMDGPUIntrinsic::AMDIL_atomic_sub_ri32_noret:
1819    case AMDGPUIntrinsic::AMDIL_atomic_sub_ru32_noret:
1820             isRet = false;
1821             IntNo = AMDILISD::ATOM_R_SUB_NORET; break;
1822    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32:
1823    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32:
1824             IntNo = AMDILISD::ATOM_G_RSUB; break;
1825    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gi32_noret:
1826    case AMDGPUIntrinsic::AMDIL_atomic_rsub_gu32_noret:
1827             isRet = false;
1828             IntNo = AMDILISD::ATOM_G_RSUB_NORET; break;
1829    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32:
1830    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32:
1831             IntNo = AMDILISD::ATOM_L_RSUB; break;
1832    case AMDGPUIntrinsic::AMDIL_atomic_rsub_li32_noret:
1833    case AMDGPUIntrinsic::AMDIL_atomic_rsub_lu32_noret:
1834             isRet = false;
1835             IntNo = AMDILISD::ATOM_L_RSUB_NORET; break;
1836    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32:
1837    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32:
1838             IntNo = AMDILISD::ATOM_R_RSUB; break;
1839    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ri32_noret:
1840    case AMDGPUIntrinsic::AMDIL_atomic_rsub_ru32_noret:
1841             isRet = false;
1842             IntNo = AMDILISD::ATOM_R_RSUB_NORET; break;
1843    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32:
1844             bitCastToInt = true;
1845    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32:
1846    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32:
1847             IntNo = AMDILISD::ATOM_G_XCHG; break;
1848    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gf32_noret:
1849             bitCastToInt = true;
1850    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gi32_noret:
1851    case AMDGPUIntrinsic::AMDIL_atomic_xchg_gu32_noret:
1852             isRet = false;
1853             IntNo = AMDILISD::ATOM_G_XCHG_NORET; break;
1854    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32:
1855             bitCastToInt = true;
1856    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32:
1857    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32:
1858             IntNo = AMDILISD::ATOM_L_XCHG; break;
1859    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lf32_noret:
1860             bitCastToInt = true;
1861    case AMDGPUIntrinsic::AMDIL_atomic_xchg_li32_noret:
1862    case AMDGPUIntrinsic::AMDIL_atomic_xchg_lu32_noret:
1863             isRet = false;
1864             IntNo = AMDILISD::ATOM_L_XCHG_NORET; break;
1865    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32:
1866             bitCastToInt = true;
1867    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32:
1868    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32:
1869             IntNo = AMDILISD::ATOM_R_XCHG; break;
1870    case AMDGPUIntrinsic::AMDIL_atomic_xchg_rf32_noret:
1871             bitCastToInt = true;
1872    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ri32_noret:
1873    case AMDGPUIntrinsic::AMDIL_atomic_xchg_ru32_noret:
1874             isRet = false;
1875             IntNo = AMDILISD::ATOM_R_XCHG_NORET; break;
1876    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32:
1877    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32:
1878             IntNo = AMDILISD::ATOM_G_XOR; break;
1879    case AMDGPUIntrinsic::AMDIL_atomic_xor_gi32_noret:
1880    case AMDGPUIntrinsic::AMDIL_atomic_xor_gu32_noret:
1881             isRet = false;
1882             IntNo = AMDILISD::ATOM_G_XOR_NORET; break;
1883    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32:
1884    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32:
1885             IntNo = AMDILISD::ATOM_L_XOR; break;
1886    case AMDGPUIntrinsic::AMDIL_atomic_xor_li32_noret:
1887    case AMDGPUIntrinsic::AMDIL_atomic_xor_lu32_noret:
1888             isRet = false;
1889             IntNo = AMDILISD::ATOM_L_XOR_NORET; break;
1890    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32:
1891    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32:
1892             IntNo = AMDILISD::ATOM_R_XOR; break;
1893    case AMDGPUIntrinsic::AMDIL_atomic_xor_ri32_noret:
1894    case AMDGPUIntrinsic::AMDIL_atomic_xor_ru32_noret:
1895             isRet = false;
1896             IntNo = AMDILISD::ATOM_R_XOR_NORET; break;
1897    case AMDGPUIntrinsic::AMDIL_append_alloc_i32:
1898             IntNo = AMDILISD::APPEND_ALLOC; break;
1899    case AMDGPUIntrinsic::AMDIL_append_alloc_i32_noret:
1900             isRet = false;
1901             IntNo = AMDILISD::APPEND_ALLOC_NORET; break;
1902    case AMDGPUIntrinsic::AMDIL_append_consume_i32:
1903             IntNo = AMDILISD::APPEND_CONSUME; break;
1904    case AMDGPUIntrinsic::AMDIL_append_consume_i32_noret:
1905             isRet = false;
1906             IntNo = AMDILISD::APPEND_CONSUME_NORET; break;
1907  };
1908  const AMDILSubtarget *stm = &this->getTargetMachine()
1909    .getSubtarget<AMDILSubtarget>();
1910  AMDILKernelManager *KM = const_cast<AMDILKernelManager*>(
1911      stm->getKernelManager());
1912  KM->setOutputInst();
1913
1914  Info.opc = IntNo;
1915  Info.memVT = (bitCastToInt) ? MVT::f32 : MVT::i32;
1916  Info.ptrVal = I.getOperand(0);
1917  Info.offset = 0;
1918  Info.align = 4;
1919  Info.vol = true;
1920  Info.readMem = isRet;
1921  Info.writeMem = true;
1922  return true;
1923}
1924// The backend supports 32 and 64 bit floating point immediates
1925bool
1926AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
1927{
1928  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1929      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1930    return true;
1931  } else {
1932    return false;
1933  }
1934}
1935
1936bool
1937AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
1938{
1939  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
1940      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
1941    return false;
1942  } else {
1943    return true;
1944  }
1945}
1946
1947
1948// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
1949// be zero. Op is expected to be a target specific node. Used by DAG
1950// combiner.
1951
1952void
1953AMDILTargetLowering::computeMaskedBitsForTargetNode(
1954    const SDValue Op,
1955    APInt &KnownZero,
1956    APInt &KnownOne,
1957    const SelectionDAG &DAG,
1958    unsigned Depth) const
1959{
1960  APInt KnownZero2;
1961  APInt KnownOne2;
1962  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
1963  switch (Op.getOpcode()) {
1964    default: break;
1965    case AMDILISD::SELECT_CC:
1966             DAG.ComputeMaskedBits(
1967                 Op.getOperand(1),
1968                 KnownZero,
1969                 KnownOne,
1970                 Depth + 1
1971                 );
1972             DAG.ComputeMaskedBits(
1973                 Op.getOperand(0),
1974                 KnownZero2,
1975                 KnownOne2
1976                 );
1977             assert((KnownZero & KnownOne) == 0
1978                 && "Bits known to be one AND zero?");
1979             assert((KnownZero2 & KnownOne2) == 0
1980                 && "Bits known to be one AND zero?");
1981             // Only known if known in both the LHS and RHS
1982             KnownOne &= KnownOne2;
1983             KnownZero &= KnownZero2;
1984             break;
1985  };
1986}
1987
1988// This is the function that determines which calling convention should
1989// be used. Currently there is only one calling convention
1990CCAssignFn*
1991AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
1992{
1993  //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
1994  return CC_AMDIL32;
1995}
1996
1997// LowerCallResult - Lower the result values of an ISD::CALL into the
1998// appropriate copies out of appropriate physical registers.  This assumes that
1999// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
2000// being lowered.  The returns a SDNode with the same number of values as the
2001// ISD::CALL.
2002SDValue
2003AMDILTargetLowering::LowerCallResult(
2004    SDValue Chain,
2005    SDValue InFlag,
2006    CallingConv::ID CallConv,
2007    bool isVarArg,
2008    const SmallVectorImpl<ISD::InputArg> &Ins,
2009    DebugLoc dl,
2010    SelectionDAG &DAG,
2011    SmallVectorImpl<SDValue> &InVals) const
2012{
2013  // Assign locations to each value returned by this call
2014  SmallVector<CCValAssign, 16> RVLocs;
2015  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2016                 getTargetMachine(), RVLocs, *DAG.getContext());
2017  CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
2018
2019  // Copy all of the result registers out of their specified physreg.
2020  for (unsigned i = 0; i != RVLocs.size(); ++i) {
2021    EVT CopyVT = RVLocs[i].getValVT();
2022    if (RVLocs[i].isRegLoc()) {
2023      Chain = DAG.getCopyFromReg(
2024          Chain,
2025          dl,
2026          RVLocs[i].getLocReg(),
2027          CopyVT,
2028          InFlag
2029          ).getValue(1);
2030      SDValue Val = Chain.getValue(0);
2031      InFlag = Chain.getValue(2);
2032      InVals.push_back(Val);
2033    }
2034  }
2035
2036  return Chain;
2037
2038}
2039
2040//===----------------------------------------------------------------------===//
2041//                           Other Lowering Hooks
2042//===----------------------------------------------------------------------===//
2043
2044MachineBasicBlock *
2045AMDILTargetLowering::EmitInstrWithCustomInserter(
2046    MachineInstr *MI, MachineBasicBlock *BB) const
2047{
2048  const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
2049  switch (MI->getOpcode()) {
2050    ExpandCaseToAllTypes(AMDIL::CMP);
2051    generateCMPInstr(MI, BB, TII);
2052    MI->eraseFromParent();
2053    break;
2054    default:
2055    break;
2056  }
2057  return BB;
2058}
2059
2060// Recursively assign SDNodeOrdering to any unordered nodes
2061// This is necessary to maintain source ordering of instructions
2062// under -O0 to avoid odd-looking "skipping around" issues.
2063  static const SDValue
2064Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
2065{
2066  if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
2067    DAG.AssignOrdering( New.getNode(), order );
2068    for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
2069      Ordered( DAG, order, New.getOperand(i) );
2070  }
2071  return New;
2072}
2073
2074#define LOWER(A) \
2075  case ISD:: A: \
2076return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
2077
2078SDValue
2079AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
2080{
2081  switch (Op.getOpcode()) {
2082    default:
2083      Op.getNode()->dump();
2084      assert(0 && "Custom lowering code for this"
2085          "instruction is not implemented yet!");
2086      break;
2087      LOWER(GlobalAddress);
2088      LOWER(JumpTable);
2089      LOWER(ConstantPool);
2090      LOWER(ExternalSymbol);
2091      LOWER(FP_TO_SINT);
2092      LOWER(FP_TO_UINT);
2093      LOWER(SINT_TO_FP);
2094      LOWER(UINT_TO_FP);
2095      LOWER(ADD);
2096      LOWER(MUL);
2097      LOWER(SUB);
2098      LOWER(FDIV);
2099      LOWER(SDIV);
2100      LOWER(SREM);
2101      LOWER(UDIV);
2102      LOWER(UREM);
2103      LOWER(BUILD_VECTOR);
2104      LOWER(INSERT_VECTOR_ELT);
2105      LOWER(EXTRACT_VECTOR_ELT);
2106      LOWER(EXTRACT_SUBVECTOR);
2107      LOWER(SCALAR_TO_VECTOR);
2108      LOWER(CONCAT_VECTORS);
2109      LOWER(AND);
2110      LOWER(OR);
2111      LOWER(SELECT);
2112      LOWER(SELECT_CC);
2113      LOWER(SETCC);
2114      LOWER(SIGN_EXTEND_INREG);
2115      LOWER(BITCAST);
2116      LOWER(DYNAMIC_STACKALLOC);
2117      LOWER(BRCOND);
2118      LOWER(BR_CC);
2119      LOWER(FP_ROUND);
2120  }
2121  return Op;
2122}
2123
2124int
2125AMDILTargetLowering::getVarArgsFrameOffset() const
2126{
2127  return VarArgsFrameOffset;
2128}
2129#undef LOWER
2130
2131SDValue
2132AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
2133{
2134  SDValue DST = Op;
2135  const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
2136  const GlobalValue *G = GADN->getGlobal();
2137  const AMDILSubtarget *stm = &this->getTargetMachine()
2138    .getSubtarget<AMDILSubtarget>();
2139  const AMDILGlobalManager *GM = stm->getGlobalManager();
2140  DebugLoc DL = Op.getDebugLoc();
2141  int64_t base_offset = GADN->getOffset();
2142  int32_t arrayoffset = GM->getArrayOffset(G->getName());
2143  int32_t constoffset = GM->getConstOffset(G->getName());
2144  if (arrayoffset != -1) {
2145    DST = DAG.getConstant(arrayoffset, MVT::i32);
2146    DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
2147        DST, DAG.getConstant(base_offset, MVT::i32));
2148  } else if (constoffset != -1) {
2149    if (GM->getConstHWBit(G->getName())) {
2150      DST = DAG.getConstant(constoffset, MVT::i32);
2151      DST = DAG.getNode(ISD::ADD, DL, MVT::i32,
2152          DST, DAG.getConstant(base_offset, MVT::i32));
2153    } else {
2154      SDValue addr = DAG.getTargetGlobalAddress(G, DL, MVT::i32);
2155      SDValue DPReg = DAG.getRegister(AMDIL::SDP, MVT::i32);
2156      DPReg = DAG.getNode(ISD::ADD, DL, MVT::i32, DPReg,
2157          DAG.getConstant(base_offset, MVT::i32));
2158      DST = DAG.getNode(AMDILISD::ADDADDR, DL, MVT::i32, addr, DPReg);
2159    }
2160  } else {
2161    const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
2162    if (!GV) {
2163      DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2164    } else {
2165      if (GV->hasInitializer()) {
2166        const Constant *C = dyn_cast<Constant>(GV->getInitializer());
2167        if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
2168          DST = DAG.getConstant(CI->getValue(), Op.getValueType());
2169
2170        } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
2171          DST = DAG.getConstantFP(CF->getValueAPF(),
2172              Op.getValueType());
2173        } else if (dyn_cast<ConstantAggregateZero>(C)) {
2174          EVT VT = Op.getValueType();
2175          if (VT.isInteger()) {
2176            DST = DAG.getConstant(0, VT);
2177          } else {
2178            DST = DAG.getConstantFP(0, VT);
2179          }
2180        } else {
2181          assert(!"lowering this type of Global Address "
2182              "not implemented yet!");
2183          C->dump();
2184          DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2185        }
2186      } else {
2187        DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
2188      }
2189    }
2190  }
2191  return DST;
2192}
2193
2194SDValue
2195AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
2196{
2197  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
2198  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
2199  return Result;
2200}
2201SDValue
2202AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
2203{
2204  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
2205  EVT PtrVT = Op.getValueType();
2206  SDValue Result;
2207  if (CP->isMachineConstantPoolEntry()) {
2208    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
2209        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
2210  } else {
2211    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
2212        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
2213  }
2214  return Result;
2215}
2216
2217SDValue
2218AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
2219{
2220  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
2221  SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
2222  return Result;
2223}
2224/// LowerFORMAL_ARGUMENTS - transform physical registers into
2225/// virtual registers and generate load operations for
2226/// arguments places on the stack.
2227/// TODO: isVarArg, hasStructRet, isMemReg
2228  SDValue
2229AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
2230    CallingConv::ID CallConv,
2231    bool isVarArg,
2232    const SmallVectorImpl<ISD::InputArg> &Ins,
2233    DebugLoc dl,
2234    SelectionDAG &DAG,
2235    SmallVectorImpl<SDValue> &InVals)
2236const
2237{
2238
2239  MachineFunction &MF = DAG.getMachineFunction();
2240  AMDILMachineFunctionInfo *FuncInfo
2241    = MF.getInfo<AMDILMachineFunctionInfo>();
2242  MachineFrameInfo *MFI = MF.getFrameInfo();
2243  //const Function *Fn = MF.getFunction();
2244  //MachineRegisterInfo &RegInfo = MF.getRegInfo();
2245
2246  SmallVector<CCValAssign, 16> ArgLocs;
2247  CallingConv::ID CC = MF.getFunction()->getCallingConv();
2248  //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
2249
2250  CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
2251                 getTargetMachine(), ArgLocs, *DAG.getContext());
2252
2253  // When more calling conventions are added, they need to be chosen here
2254  CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
2255  SDValue StackPtr;
2256
2257  //unsigned int FirstStackArgLoc = 0;
2258
2259  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
2260    CCValAssign &VA = ArgLocs[i];
2261    if (VA.isRegLoc()) {
2262      EVT RegVT = VA.getLocVT();
2263      const TargetRegisterClass *RC = getRegClassFromType(
2264          RegVT.getSimpleVT().SimpleTy);
2265
2266      unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
2267      SDValue ArgValue = DAG.getCopyFromReg(
2268          Chain,
2269          dl,
2270          Reg,
2271          RegVT);
2272      // If this is an 8 or 16-bit value, it is really passed
2273      // promoted to 32 bits.  Insert an assert[sz]ext to capture
2274      // this, then truncate to the right size.
2275
2276      if (VA.getLocInfo() == CCValAssign::SExt) {
2277        ArgValue = DAG.getNode(
2278            ISD::AssertSext,
2279            dl,
2280            RegVT,
2281            ArgValue,
2282            DAG.getValueType(VA.getValVT()));
2283      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
2284        ArgValue = DAG.getNode(
2285            ISD::AssertZext,
2286            dl,
2287            RegVT,
2288            ArgValue,
2289            DAG.getValueType(VA.getValVT()));
2290      }
2291      if (VA.getLocInfo() != CCValAssign::Full) {
2292        ArgValue = DAG.getNode(
2293            ISD::TRUNCATE,
2294            dl,
2295            VA.getValVT(),
2296            ArgValue);
2297      }
2298      // Add the value to the list of arguments
2299      // to be passed in registers
2300      InVals.push_back(ArgValue);
2301      if (isVarArg) {
2302        assert(0 && "Variable arguments are not yet supported");
2303        // See MipsISelLowering.cpp for ideas on how to implement
2304      }
2305    } else if(VA.isMemLoc()) {
2306      InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
2307            dl, DAG, VA, MFI, i));
2308    } else {
2309      assert(0 && "found a Value Assign that is "
2310          "neither a register or a memory location");
2311    }
2312  }
2313  /*if (hasStructRet) {
2314    assert(0 && "Has struct return is not yet implemented");
2315  // See MipsISelLowering.cpp for ideas on how to implement
2316  }*/
2317
2318  unsigned int StackSize = CCInfo.getNextStackOffset();
2319  if (isVarArg) {
2320    assert(0 && "Variable arguments are not yet supported");
2321    // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
2322  }
2323  // This needs to be changed to non-zero if the return function needs
2324  // to pop bytes
2325  FuncInfo->setBytesToPopOnReturn(StackSize);
2326  return Chain;
2327}
2328/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
2329/// by "Src" to address "Dst" with size and alignment information specified by
2330/// the specific parameter attribute. The copy will be passed as a byval
2331/// function parameter.
2332static SDValue
2333CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
2334    ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
2335  assert(0 && "MemCopy does not exist yet");
2336  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
2337
2338  return DAG.getMemcpy(Chain,
2339      Src.getDebugLoc(),
2340      Dst, Src, SizeNode, Flags.getByValAlign(),
2341      /*IsVol=*/false, /*AlwaysInline=*/true,
2342      MachinePointerInfo(), MachinePointerInfo());
2343}
2344
2345SDValue
2346AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
2347    SDValue StackPtr, SDValue Arg,
2348    DebugLoc dl, SelectionDAG &DAG,
2349    const CCValAssign &VA,
2350    ISD::ArgFlagsTy Flags) const
2351{
2352  unsigned int LocMemOffset = VA.getLocMemOffset();
2353  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
2354  PtrOff = DAG.getNode(ISD::ADD,
2355      dl,
2356      getPointerTy(), StackPtr, PtrOff);
2357  if (Flags.isByVal()) {
2358    PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
2359  } else {
2360    PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
2361        MachinePointerInfo::getStack(LocMemOffset),
2362        false, false, 0);
2363  }
2364  return PtrOff;
2365}
2366/// LowerCAL - functions arguments are copied from virtual
2367/// regs to (physical regs)/(stack frame), CALLSEQ_START and
2368/// CALLSEQ_END are emitted.
2369/// TODO: isVarArg, isTailCall, hasStructRet
2370SDValue
2371AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
2372    CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
2373    bool& isTailCall,
2374    const SmallVectorImpl<ISD::OutputArg> &Outs,
2375    const SmallVectorImpl<SDValue> &OutVals,
2376    const SmallVectorImpl<ISD::InputArg> &Ins,
2377    DebugLoc dl, SelectionDAG &DAG,
2378    SmallVectorImpl<SDValue> &InVals)
2379const
2380{
2381  isTailCall = false;
2382  MachineFunction& MF = DAG.getMachineFunction();
2383  // FIXME: DO we need to handle fast calling conventions and tail call
2384  // optimizations?? X86/PPC ISelLowering
2385  /*bool hasStructRet = (TheCall->getNumArgs())
2386    ? TheCall->getArgFlags(0).device()->isSRet()
2387    : false;*/
2388
2389  MachineFrameInfo *MFI = MF.getFrameInfo();
2390
2391  // Analyze operands of the call, assigning locations to each operand
2392  SmallVector<CCValAssign, 16> ArgLocs;
2393  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
2394                 getTargetMachine(), ArgLocs, *DAG.getContext());
2395  // Analyize the calling operands, but need to change
2396  // if we have more than one calling convetion
2397  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
2398
2399  unsigned int NumBytes = CCInfo.getNextStackOffset();
2400  if (isTailCall) {
2401    assert(isTailCall && "Tail Call not handled yet!");
2402    // See X86/PPC ISelLowering
2403  }
2404
2405  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
2406
2407  SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
2408  SmallVector<SDValue, 8> MemOpChains;
2409  SDValue StackPtr;
2410  //unsigned int FirstStacArgLoc = 0;
2411  //int LastArgStackLoc = 0;
2412
2413  // Walk the register/memloc assignments, insert copies/loads
2414  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
2415    CCValAssign &VA = ArgLocs[i];
2416    //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
2417    // Arguments start after the 5 first operands of ISD::CALL
2418    SDValue Arg = OutVals[i];
2419    //Promote the value if needed
2420    switch(VA.getLocInfo()) {
2421      default: assert(0 && "Unknown loc info!");
2422      case CCValAssign::Full:
2423               break;
2424      case CCValAssign::SExt:
2425               Arg = DAG.getNode(ISD::SIGN_EXTEND,
2426                   dl,
2427                   VA.getLocVT(), Arg);
2428               break;
2429      case CCValAssign::ZExt:
2430               Arg = DAG.getNode(ISD::ZERO_EXTEND,
2431                   dl,
2432                   VA.getLocVT(), Arg);
2433               break;
2434      case CCValAssign::AExt:
2435               Arg = DAG.getNode(ISD::ANY_EXTEND,
2436                   dl,
2437                   VA.getLocVT(), Arg);
2438               break;
2439    }
2440
2441    if (VA.isRegLoc()) {
2442      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2443    } else if (VA.isMemLoc()) {
2444      // Create the frame index object for this incoming parameter
2445      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
2446          VA.getLocMemOffset(), true);
2447      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
2448
2449      // emit ISD::STORE whichs stores the
2450      // parameter value to a stack Location
2451      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
2452            MachinePointerInfo::getFixedStack(FI),
2453            false, false, 0));
2454    } else {
2455      assert(0 && "Not a Reg/Mem Loc, major error!");
2456    }
2457  }
2458  if (!MemOpChains.empty()) {
2459    Chain = DAG.getNode(ISD::TokenFactor,
2460        dl,
2461        MVT::Other,
2462        &MemOpChains[0],
2463        MemOpChains.size());
2464  }
2465  SDValue InFlag;
2466  if (!isTailCall) {
2467    for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
2468      Chain = DAG.getCopyToReg(Chain,
2469          dl,
2470          RegsToPass[i].first,
2471          RegsToPass[i].second,
2472          InFlag);
2473      InFlag = Chain.getValue(1);
2474    }
2475  }
2476
2477  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
2478  // every direct call is) turn it into a TargetGlobalAddress/
2479  // TargetExternalSymbol
2480  // node so that legalize doesn't hack it.
2481  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
2482    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
2483  }
2484  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
2485    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
2486  }
2487  else if (isTailCall) {
2488    assert(0 && "Tail calls are not handled yet");
2489    // see X86 ISelLowering for ideas on implementation: 1708
2490  }
2491
2492  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
2493  SmallVector<SDValue, 8> Ops;
2494
2495  if (isTailCall) {
2496    assert(0 && "Tail calls are not handled yet");
2497    // see X86 ISelLowering for ideas on implementation: 1721
2498  }
2499  // If this is a direct call, pass the chain and the callee
2500  if (Callee.getNode()) {
2501    Ops.push_back(Chain);
2502    Ops.push_back(Callee);
2503  }
2504
2505  if (isTailCall) {
2506    assert(0 && "Tail calls are not handled yet");
2507    // see X86 ISelLowering for ideas on implementation: 1739
2508  }
2509
2510  // Add argument registers to the end of the list so that they are known
2511  // live into the call
2512  for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
2513    Ops.push_back(DAG.getRegister(
2514          RegsToPass[i].first,
2515          RegsToPass[i].second.getValueType()));
2516  }
2517  if (InFlag.getNode()) {
2518    Ops.push_back(InFlag);
2519  }
2520
2521  // Emit Tail Call
2522  if (isTailCall) {
2523    assert(0 && "Tail calls are not handled yet");
2524    // see X86 ISelLowering for ideas on implementation: 1762
2525  }
2526
2527  Chain = DAG.getNode(AMDILISD::CALL,
2528      dl,
2529      NodeTys, &Ops[0], Ops.size());
2530  InFlag = Chain.getValue(1);
2531
2532  // Create the CALLSEQ_END node
2533  Chain = DAG.getCALLSEQ_END(
2534      Chain,
2535      DAG.getIntPtrConstant(NumBytes, true),
2536      DAG.getIntPtrConstant(0, true),
2537      InFlag);
2538  InFlag = Chain.getValue(1);
2539  // Handle result values, copying them out of physregs into vregs that
2540  // we return
2541  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
2542      InVals);
2543}
2544static void checkMADType(
2545    SDValue Op, const AMDILSubtarget *STM, bool& is24bitMAD, bool& is32bitMAD)
2546{
2547  bool globalLoadStore = false;
2548  is24bitMAD = false;
2549  is32bitMAD = false;
2550  return;
2551  assert(Op.getOpcode() == ISD::ADD && "The opcode must be a add in order for "
2552      "this to work correctly!");
2553  if (Op.getNode()->use_empty()) {
2554    return;
2555  }
2556  for (SDNode::use_iterator nBegin = Op.getNode()->use_begin(),
2557      nEnd = Op.getNode()->use_end(); nBegin != nEnd; ++nBegin) {
2558    SDNode *ptr = *nBegin;
2559    const LSBaseSDNode *lsNode = dyn_cast<LSBaseSDNode>(ptr);
2560    // If we are not a LSBaseSDNode then we don't do this
2561    // optimization.
2562    // If we are a LSBaseSDNode, but the op is not the offset
2563    // or base pointer, then we don't do this optimization
2564    // (i.e. we are the value being stored)
2565    if (!lsNode ||
2566        (lsNode->writeMem() && lsNode->getOperand(1) == Op)) {
2567      return;
2568    }
2569    const PointerType *PT =
2570      dyn_cast<PointerType>(lsNode->getSrcValue()->getType());
2571    unsigned as = PT->getAddressSpace();
2572    switch(as) {
2573      default:
2574        globalLoadStore = true;
2575      case AMDILAS::PRIVATE_ADDRESS:
2576        if (!STM->device()->usesHardware(AMDILDeviceInfo::PrivateMem)) {
2577          globalLoadStore = true;
2578        }
2579        break;
2580      case AMDILAS::CONSTANT_ADDRESS:
2581        if (!STM->device()->usesHardware(AMDILDeviceInfo::ConstantMem)) {
2582          globalLoadStore = true;
2583        }
2584        break;
2585      case AMDILAS::LOCAL_ADDRESS:
2586        if (!STM->device()->usesHardware(AMDILDeviceInfo::LocalMem)) {
2587          globalLoadStore = true;
2588        }
2589        break;
2590      case AMDILAS::REGION_ADDRESS:
2591        if (!STM->device()->usesHardware(AMDILDeviceInfo::RegionMem)) {
2592          globalLoadStore = true;
2593        }
2594        break;
2595    }
2596  }
2597  if (globalLoadStore) {
2598    is32bitMAD = true;
2599  } else {
2600    is24bitMAD = true;
2601  }
2602}
2603
2604SDValue
2605AMDILTargetLowering::LowerADD(SDValue Op, SelectionDAG &DAG) const
2606{
2607  SDValue LHS = Op.getOperand(0);
2608  SDValue RHS = Op.getOperand(1);
2609  DebugLoc DL = Op.getDebugLoc();
2610  EVT OVT = Op.getValueType();
2611  SDValue DST;
2612  const AMDILSubtarget *stm = &this->getTargetMachine()
2613    .getSubtarget<AMDILSubtarget>();
2614  bool isVec = OVT.isVector();
2615  if (OVT.getScalarType() == MVT::i64) {
2616    MVT INTTY = MVT::i32;
2617    if (OVT == MVT::v2i64) {
2618      INTTY = MVT::v2i32;
2619    }
2620    if (stm->device()->usesHardware(AMDILDeviceInfo::LongOps)
2621        && INTTY == MVT::i32) {
2622      DST = DAG.getNode(AMDILISD::ADD,
2623          DL,
2624          OVT,
2625          LHS, RHS);
2626    } else {
2627      SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
2628      // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
2629      LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
2630      RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
2631      LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
2632      RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
2633      INTLO = DAG.getNode(ISD::ADD, DL, INTTY, LHSLO, RHSLO);
2634      INTHI = DAG.getNode(ISD::ADD, DL, INTTY, LHSHI, RHSHI);
2635      SDValue cmp;
2636      cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2637          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
2638          INTLO, RHSLO);
2639      cmp = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, cmp);
2640      INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
2641      DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
2642          INTLO, INTHI);
2643    }
2644  } else {
2645    if (LHS.getOpcode() == ISD::FrameIndex ||
2646        RHS.getOpcode() == ISD::FrameIndex) {
2647      DST = DAG.getNode(AMDILISD::ADDADDR,
2648          DL,
2649          OVT,
2650          LHS, RHS);
2651    } else {
2652      if (stm->device()->usesHardware(AMDILDeviceInfo::LocalMem)
2653          && LHS.getNumOperands()
2654          && RHS.getNumOperands()) {
2655        bool is24bitMAD = false;
2656        bool is32bitMAD = false;
2657        const ConstantSDNode *LHSConstOpCode =
2658          dyn_cast<ConstantSDNode>(LHS.getOperand(LHS.getNumOperands()-1));
2659        const ConstantSDNode *RHSConstOpCode =
2660          dyn_cast<ConstantSDNode>(RHS.getOperand(RHS.getNumOperands()-1));
2661        if ((LHS.getOpcode() == ISD::SHL && LHSConstOpCode)
2662            || (RHS.getOpcode() == ISD::SHL && RHSConstOpCode)
2663            || LHS.getOpcode() == ISD::MUL
2664            || RHS.getOpcode() == ISD::MUL) {
2665          SDValue Op1, Op2, Op3;
2666          // FIXME: Fix this so that it works for unsigned 24bit ops.
2667          if (LHS.getOpcode() == ISD::MUL) {
2668            Op1 = LHS.getOperand(0);
2669            Op2 = LHS.getOperand(1);
2670            Op3 = RHS;
2671          } else if (RHS.getOpcode() == ISD::MUL) {
2672            Op1 = RHS.getOperand(0);
2673            Op2 = RHS.getOperand(1);
2674            Op3 = LHS;
2675          } else if (LHS.getOpcode() == ISD::SHL && LHSConstOpCode) {
2676            Op1 = LHS.getOperand(0);
2677            Op2 = DAG.getConstant(
2678                1 << LHSConstOpCode->getZExtValue(), MVT::i32);
2679            Op3 = RHS;
2680          } else if (RHS.getOpcode() == ISD::SHL && RHSConstOpCode) {
2681            Op1 = RHS.getOperand(0);
2682            Op2 = DAG.getConstant(
2683                1 << RHSConstOpCode->getZExtValue(), MVT::i32);
2684            Op3 = LHS;
2685          }
2686          checkMADType(Op, stm, is24bitMAD, is32bitMAD);
2687          // We can possibly do a MAD transform!
2688          if (is24bitMAD && stm->device()->usesHardware(AMDILDeviceInfo::Signed24BitOps)) {
2689            uint32_t opcode = AMDGPUIntrinsic::AMDIL_mad24_i32;
2690            SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2691            DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2692                DL, Tys, DAG.getEntryNode(), DAG.getConstant(opcode, MVT::i32),
2693                Op1, Op2, Op3);
2694          } else if(is32bitMAD) {
2695            SDVTList Tys = DAG.getVTList(OVT/*, MVT::Other*/);
2696            DST = DAG.getNode(ISD::INTRINSIC_W_CHAIN,
2697                DL, Tys, DAG.getEntryNode(),
2698                DAG.getConstant(
2699                  AMDGPUIntrinsic::AMDIL_mad_i32, MVT::i32),
2700                Op1, Op2, Op3);
2701          }
2702        }
2703      }
2704      DST = DAG.getNode(AMDILISD::ADD,
2705          DL,
2706          OVT,
2707          LHS, RHS);
2708    }
2709  }
2710  return DST;
2711}
2712SDValue
2713AMDILTargetLowering::genCLZuN(SDValue Op, SelectionDAG &DAG,
2714    uint32_t bits) const
2715{
2716  DebugLoc DL = Op.getDebugLoc();
2717  EVT INTTY = Op.getValueType();
2718  EVT FPTY;
2719  if (INTTY.isVector()) {
2720    FPTY = EVT(MVT::getVectorVT(MVT::f32,
2721          INTTY.getVectorNumElements()));
2722  } else {
2723    FPTY = EVT(MVT::f32);
2724  }
2725  /* static inline uint
2726     __clz_Nbit(uint x)
2727     {
2728     int xor = 0x3f800000U | x;
2729     float tp = as_float(xor);
2730     float t = tp + -1.0f;
2731     uint tint = as_uint(t);
2732     int cmp = (x != 0);
2733     uint tsrc = tint >> 23;
2734     uint tmask = tsrc & 0xffU;
2735     uint cst = (103 + N)U - tmask;
2736     return cmp ? cst : N;
2737     }
2738     */
2739  assert(INTTY.getScalarType().getSimpleVT().SimpleTy == MVT::i32
2740      && "genCLZu16 only works on 32bit types");
2741  // uint x = Op
2742  SDValue x = Op;
2743  // xornode = 0x3f800000 | x
2744  SDValue xornode = DAG.getNode(ISD::OR, DL, INTTY,
2745      DAG.getConstant(0x3f800000, INTTY), x);
2746  // float tp = as_float(xornode)
2747  SDValue tp = DAG.getNode(ISDBITCAST, DL, FPTY, xornode);
2748  // float t = tp + -1.0f
2749  SDValue t = DAG.getNode(ISD::FADD, DL, FPTY, tp,
2750      DAG.getConstantFP(-1.0f, FPTY));
2751  // uint tint = as_uint(t)
2752  SDValue tint = DAG.getNode(ISDBITCAST, DL, INTTY, t);
2753  // int cmp = (x != 0)
2754  SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2755      DAG.getConstant(CondCCodeToCC(ISD::SETNE, MVT::i32), MVT::i32), x,
2756      DAG.getConstant(0, INTTY));
2757  // uint tsrc = tint >> 23
2758  SDValue tsrc = DAG.getNode(ISD::SRL, DL, INTTY, tint,
2759      DAG.getConstant(23, INTTY));
2760  // uint tmask = tsrc & 0xFF
2761  SDValue tmask = DAG.getNode(ISD::AND, DL, INTTY, tsrc,
2762      DAG.getConstant(0xFFU, INTTY));
2763  // uint cst = (103 + bits) - tmask
2764  SDValue cst = DAG.getNode(ISD::SUB, DL, INTTY,
2765      DAG.getConstant((103U + bits), INTTY), tmask);
2766  // return cmp ? cst : N
2767  cst = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, cst,
2768      DAG.getConstant(bits, INTTY));
2769  return cst;
2770}
2771
2772SDValue
2773AMDILTargetLowering::genCLZu32(SDValue Op, SelectionDAG &DAG) const
2774{
2775  SDValue DST = SDValue();
2776  DebugLoc DL = Op.getDebugLoc();
2777  EVT INTTY = Op.getValueType();
2778  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2779      &this->getTargetMachine())->getSubtargetImpl();
2780  if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2781    //__clz_32bit(uint u)
2782    //{
2783    // int z = __amdil_ffb_hi(u) ;
2784    // return z < 0 ? 32 : z;
2785    // }
2786    // uint u = op
2787    SDValue u = Op;
2788    // int z = __amdil_ffb_hi(u)
2789    SDValue z = DAG.getNode(AMDILISD::IFFB_HI, DL, INTTY, u);
2790    // int cmp = z < 0
2791    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2792        DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
2793        z, DAG.getConstant(0, INTTY));
2794    // return cmp ? 32 : z
2795    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp,
2796        DAG.getConstant(32, INTTY), z);
2797  } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2798    //  static inline uint
2799    //__clz_32bit(uint x)
2800    //{
2801    //    uint zh = __clz_16bit(x >> 16);
2802    //    uint zl = __clz_16bit(x & 0xffffU);
2803    //   return zh == 16U ? 16U + zl : zh;
2804    //}
2805    // uint x = Op
2806    SDValue x = Op;
2807    // uint xs16 = x >> 16
2808    SDValue xs16 = DAG.getNode(ISD::SRL, DL, INTTY, x,
2809        DAG.getConstant(16, INTTY));
2810    // uint zh = __clz_16bit(xs16)
2811    SDValue zh = genCLZuN(xs16, DAG, 16);
2812    // uint xa16 = x & 0xFFFF
2813    SDValue xa16 = DAG.getNode(ISD::AND, DL, INTTY, x,
2814        DAG.getConstant(0xFFFFU, INTTY));
2815    // uint zl = __clz_16bit(xa16)
2816    SDValue zl = genCLZuN(xa16, DAG, 16);
2817    // uint cmp = zh == 16U
2818    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2819        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2820        zh, DAG.getConstant(16U, INTTY));
2821    // uint zl16 = zl + 16
2822    SDValue zl16 = DAG.getNode(ISD::ADD, DL, INTTY,
2823        DAG.getConstant(16, INTTY), zl);
2824    // return cmp ? zl16 : zh
2825    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2826        cmp, zl16, zh);
2827  } else {
2828    assert(0 && "Attempting to generate a CLZ function with an"
2829        " unknown graphics card");
2830  }
2831  return DST;
2832}
2833SDValue
2834AMDILTargetLowering::genCLZu64(SDValue Op, SelectionDAG &DAG) const
2835{
2836  SDValue DST = SDValue();
2837  DebugLoc DL = Op.getDebugLoc();
2838  EVT INTTY;
2839  EVT LONGTY = Op.getValueType();
2840  bool isVec = LONGTY.isVector();
2841  if (isVec) {
2842    INTTY = EVT(MVT::getVectorVT(MVT::i32, Op.getValueType()
2843          .getVectorNumElements()));
2844  } else {
2845    INTTY = EVT(MVT::i32);
2846  }
2847  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2848      &this->getTargetMachine())->getSubtargetImpl();
2849  if (stm->device()->getGeneration() >= AMDILDeviceInfo::HD5XXX) {
2850    // Evergreen:
2851    // static inline uint
2852    // __clz_u64(ulong x)
2853    // {
2854    //uint zhi = __clz_32bit((uint)(x >> 32));
2855    //uint zlo = __clz_32bit((uint)(x & 0xffffffffUL));
2856    //return zhi == 32U ? 32U + zlo : zhi;
2857    //}
2858    //ulong x = op
2859    SDValue x = Op;
2860    // uint xhi = x >> 32
2861    SDValue xlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2862    // uint xlo = x & 0xFFFFFFFF
2863    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, x);
2864    // uint zhi = __clz_32bit(xhi)
2865    SDValue zhi = genCLZu32(xhi, DAG);
2866    // uint zlo = __clz_32bit(xlo)
2867    SDValue zlo = genCLZu32(xlo, DAG);
2868    // uint cmp = zhi == 32
2869    SDValue cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2870        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2871        zhi, DAG.getConstant(32U, INTTY));
2872    // uint zlop32 = 32 + zlo
2873    SDValue zlop32 = DAG.getNode(AMDILISD::ADD, DL, INTTY,
2874        DAG.getConstant(32U, INTTY), zlo);
2875    // return cmp ? zlop32: zhi
2876    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp, zlop32, zhi);
2877  } else if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
2878    // HD4XXX:
2879    //  static inline uint
2880    //__clz_64bit(ulong x)
2881    //{
2882    //uint zh = __clz_23bit((uint)(x >> 46)) - 5U;
2883    //uint zm = __clz_23bit((uint)(x >> 23) & 0x7fffffU);
2884    //uint zl = __clz_23bit((uint)x & 0x7fffffU);
2885    //uint r = zh == 18U ? 18U + zm : zh;
2886    //return zh + zm == 41U ? 41U + zl : r;
2887    //}
2888    //ulong x = Op
2889    SDValue x = Op;
2890    // ulong xs46 = x >> 46
2891    SDValue xs46 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2892        DAG.getConstant(46, LONGTY));
2893    // uint ixs46 = (uint)xs46
2894    SDValue ixs46 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs46);
2895    // ulong xs23 = x >> 23
2896    SDValue xs23 = DAG.getNode(ISD::SRL, DL, LONGTY, x,
2897        DAG.getConstant(23, LONGTY));
2898    // uint ixs23 = (uint)xs23
2899    SDValue ixs23 = DAG.getNode(ISD::TRUNCATE, DL, INTTY, xs23);
2900    // uint xs23m23 = ixs23 & 0x7FFFFF
2901    SDValue xs23m23 = DAG.getNode(ISD::AND, DL, INTTY, ixs23,
2902        DAG.getConstant(0x7fffffU, INTTY));
2903    // uint ix = (uint)x
2904    SDValue ix = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, x);
2905    // uint xm23 = ix & 0x7FFFFF
2906    SDValue xm23 = DAG.getNode(ISD::AND, DL, INTTY, ix,
2907        DAG.getConstant(0x7fffffU, INTTY));
2908    // uint zh = __clz_23bit(ixs46)
2909    SDValue zh = genCLZuN(ixs46, DAG, 23);
2910    // uint zm = __clz_23bit(xs23m23)
2911    SDValue zm = genCLZuN(xs23m23, DAG, 23);
2912    // uint zl = __clz_23bit(xm23)
2913    SDValue zl = genCLZuN(xm23, DAG, 23);
2914    // uint zhm5 = zh - 5
2915    SDValue zhm5 = DAG.getNode(ISD::ADD, DL, INTTY, zh,
2916        DAG.getConstant(-5U, INTTY));
2917    SDValue const18 = DAG.getConstant(18, INTTY);
2918    SDValue const41 = DAG.getConstant(41, INTTY);
2919    // uint cmp1 = zh = 18
2920    SDValue cmp1 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2921        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2922        zhm5, const18);
2923    // uint zhm5zm = zhm5 + zh
2924    SDValue zhm5zm = DAG.getNode(ISD::ADD, DL, INTTY, zhm5, zm);
2925    // uint cmp2 = zhm5zm == 41
2926    SDValue cmp2 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
2927        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
2928        zhm5zm, const41);
2929    // uint zmp18 = zhm5 + 18
2930    SDValue zmp18 = DAG.getNode(ISD::ADD, DL, INTTY, zm, const18);
2931    // uint zlp41 = zl + 41
2932    SDValue zlp41 = DAG.getNode(ISD::ADD, DL, INTTY, zl, const41);
2933    // uint r = cmp1 ? zmp18 : zh
2934    SDValue r = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY,
2935        cmp1, zmp18, zhm5);
2936    // return cmp2 ? zlp41 : r
2937    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, cmp2, zlp41, r);
2938  } else {
2939    assert(0 && "Attempting to generate a CLZ function with an"
2940        " unknown graphics card");
2941  }
2942  return DST;
2943}
2944SDValue
2945AMDILTargetLowering::genf64toi64(SDValue RHS, SelectionDAG &DAG,
2946    bool includeSign) const
2947{
2948  EVT INTVT;
2949  EVT LONGVT;
2950  SDValue DST;
2951  DebugLoc DL = RHS.getDebugLoc();
2952  EVT RHSVT = RHS.getValueType();
2953  bool isVec = RHSVT.isVector();
2954  if (isVec) {
2955    LONGVT = EVT(MVT::getVectorVT(MVT::i64, RHSVT
2956          .getVectorNumElements()));
2957    INTVT = EVT(MVT::getVectorVT(MVT::i32, RHSVT
2958          .getVectorNumElements()));
2959  } else {
2960    LONGVT = EVT(MVT::i64);
2961    INTVT = EVT(MVT::i32);
2962  }
2963  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
2964      &this->getTargetMachine())->getSubtargetImpl();
2965  if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
2966    // unsigned version:
2967    // uint uhi = (uint)(d * 0x1.0p-32);
2968    // uint ulo = (uint)(mad((double)uhi, -0x1.0p+32, d));
2969    // return as_ulong2((uint2)(ulo, uhi));
2970    //
2971    // signed version:
2972    // double ad = fabs(d);
2973    // long l = unsigned_version(ad);
2974    // long nl = -l;
2975    // return d == ad ? l : nl;
2976    SDValue d = RHS;
2977    if (includeSign) {
2978      d = DAG.getNode(ISD::FABS, DL, RHSVT, d);
2979    }
2980    SDValue uhid = DAG.getNode(ISD::FMUL, DL, RHSVT, d,
2981        DAG.getConstantFP(0x2f800000, RHSVT));
2982    SDValue uhi = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, uhid);
2983    SDValue ulod = DAG.getNode(ISD::UINT_TO_FP, DL, RHSVT, uhi);
2984    ulod = DAG.getNode(AMDILISD::MAD, DL, RHSVT, ulod,
2985        DAG.getConstantFP(0xcf800000, RHSVT), d);
2986    SDValue ulo = DAG.getNode(ISD::FP_TO_UINT, DL, INTVT, ulod);
2987    SDValue l = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, ulo, uhi);
2988    if (includeSign) {
2989      SDValue nl = DAG.getNode(AMDILISD::INEGATE, DL, LONGVT, l);
2990      SDValue c = DAG.getNode(AMDILISD::CMP, DL, RHSVT,
2991          DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::f64), MVT::i32),
2992          RHS, d);
2993      l = DAG.getNode(AMDILISD::CMOVLOG, DL, LONGVT, c, l, nl);
2994    }
2995    DST = l;
2996  } else {
2997    /*
2998       __attribute__((always_inline)) long
2999       cast_f64_to_i64(double d)
3000       {
3001    // Convert d in to 32-bit components
3002    long x = as_long(d);
3003    xhi = LCOMPHI(x);
3004    xlo = LCOMPLO(x);
3005
3006    // Generate 'normalized' mantissa
3007    mhi = xhi | 0x00100000; // hidden bit
3008    mhi <<= 11;
3009    temp = xlo >> (32 - 11);
3010    mhi |= temp
3011    mlo = xlo << 11;
3012
3013    // Compute shift right count from exponent
3014    e = (xhi >> (52-32)) & 0x7ff;
3015    sr = 1023 + 63 - e;
3016    srge64 = sr >= 64;
3017    srge32 = sr >= 32;
3018
3019    // Compute result for 0 <= sr < 32
3020    rhi0 = mhi >> (sr &31);
3021    rlo0 = mlo >> (sr &31);
3022    temp = mhi << (32 - sr);
3023    temp |= rlo0;
3024    rlo0 = sr ? temp : rlo0;
3025
3026    // Compute result for 32 <= sr
3027    rhi1 = 0;
3028    rlo1 = srge64 ? 0 : rhi0;
3029
3030    // Pick between the 2 results
3031    rhi = srge32 ? rhi1 : rhi0;
3032    rlo = srge32 ? rlo1 : rlo0;
3033
3034    // Optional saturate on overflow
3035    srlt0 = sr < 0;
3036    rhi = srlt0 ? MAXVALUE : rhi;
3037    rlo = srlt0 ? MAXVALUE : rlo;
3038
3039    // Create long
3040    res = LCREATE( rlo, rhi );
3041
3042    // Deal with sign bit (ignoring whether result is signed or unsigned value)
3043    if (includeSign) {
3044    sign = ((signed int) xhi) >> 31; fill with sign bit
3045    sign = LCREATE( sign, sign );
3046    res += sign;
3047    res ^= sign;
3048    }
3049
3050    return res;
3051    }
3052    */
3053    SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
3054    SDValue c32 = DAG.getConstant( 32, INTVT );
3055
3056    // Convert d in to 32-bit components
3057    SDValue d = RHS;
3058    SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
3059    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3060    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3061
3062    // Generate 'normalized' mantissa
3063    SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
3064        xhi, DAG.getConstant( 0x00100000, INTVT ) );
3065    mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
3066    SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
3067        xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
3068    mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
3069    SDValue mlo = DAG.getNode( ISD::SHL, DL, INTVT, xlo, c11 );
3070
3071    // Compute shift right count from exponent
3072    SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
3073        xhi, DAG.getConstant( 52-32, INTVT ) );
3074    e = DAG.getNode( ISD::AND, DL, INTVT,
3075        e, DAG.getConstant( 0x7ff, INTVT ) );
3076    SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
3077        DAG.getConstant( 1023 + 63, INTVT ), e );
3078    SDValue srge64 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3079        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3080        sr, DAG.getConstant(64, INTVT));
3081    SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3082        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3083        sr, DAG.getConstant(32, INTVT));
3084
3085    // Compute result for 0 <= sr < 32
3086    SDValue rhi0 = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
3087    SDValue rlo0 = DAG.getNode( ISD::SRL, DL, INTVT, mlo, sr );
3088    temp = DAG.getNode( ISD::SUB, DL, INTVT, c32, sr );
3089    temp = DAG.getNode( ISD::SHL, DL, INTVT, mhi, temp );
3090    temp = DAG.getNode( ISD::OR,  DL, INTVT, rlo0, temp );
3091    rlo0 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, sr, temp, rlo0 );
3092
3093    // Compute result for 32 <= sr
3094    SDValue rhi1 = DAG.getConstant( 0, INTVT );
3095    SDValue rlo1 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3096        srge64, rhi1, rhi0 );
3097
3098    // Pick between the 2 results
3099    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3100        srge32, rhi1, rhi0 );
3101    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3102        srge32, rlo1, rlo0 );
3103
3104    // Create long
3105    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3106
3107    // Deal with sign bit
3108    if (includeSign) {
3109      SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
3110          xhi, DAG.getConstant( 31, INTVT ) );
3111      sign = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, sign, sign );
3112      res = DAG.getNode( ISD::ADD, DL, LONGVT, res, sign );
3113      res = DAG.getNode( ISD::XOR, DL, LONGVT, res, sign );
3114    }
3115    DST = res;
3116  }
3117  return DST;
3118}
3119SDValue
3120AMDILTargetLowering::genf64toi32(SDValue RHS, SelectionDAG &DAG,
3121    bool includeSign) const
3122{
3123  EVT INTVT;
3124  EVT LONGVT;
3125  DebugLoc DL = RHS.getDebugLoc();
3126  EVT RHSVT = RHS.getValueType();
3127  bool isVec = RHSVT.isVector();
3128  if (isVec) {
3129    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3130          RHSVT.getVectorNumElements()));
3131    INTVT = EVT(MVT::getVectorVT(MVT::i32,
3132          RHSVT.getVectorNumElements()));
3133  } else {
3134    LONGVT = EVT(MVT::i64);
3135    INTVT = EVT(MVT::i32);
3136  }
3137  /*
3138     __attribute__((always_inline)) int
3139     cast_f64_to_[u|i]32(double d)
3140     {
3141  // Convert d in to 32-bit components
3142  long x = as_long(d);
3143  xhi = LCOMPHI(x);
3144  xlo = LCOMPLO(x);
3145
3146  // Generate 'normalized' mantissa
3147  mhi = xhi | 0x00100000; // hidden bit
3148  mhi <<= 11;
3149  temp = xlo >> (32 - 11);
3150  mhi |= temp
3151
3152  // Compute shift right count from exponent
3153  e = (xhi >> (52-32)) & 0x7ff;
3154  sr = 1023 + 31 - e;
3155  srge32 = sr >= 32;
3156
3157  // Compute result for 0 <= sr < 32
3158  res = mhi >> (sr &31);
3159  res = srge32 ? 0 : res;
3160
3161  // Optional saturate on overflow
3162  srlt0 = sr < 0;
3163  res = srlt0 ? MAXVALUE : res;
3164
3165  // Deal with sign bit (ignoring whether result is signed or unsigned value)
3166  if (includeSign) {
3167  sign = ((signed int) xhi) >> 31; fill with sign bit
3168  res += sign;
3169  res ^= sign;
3170  }
3171
3172  return res;
3173  }
3174  */
3175  SDValue c11 = DAG.getConstant( 63 - 52, INTVT );
3176
3177  // Convert d in to 32-bit components
3178  SDValue d = RHS;
3179  SDValue x = DAG.getNode(ISDBITCAST, DL, LONGVT, d);
3180  SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3181  SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3182
3183  // Generate 'normalized' mantissa
3184  SDValue mhi = DAG.getNode( ISD::OR, DL, INTVT,
3185      xhi, DAG.getConstant( 0x00100000, INTVT ) );
3186  mhi = DAG.getNode( ISD::SHL, DL, INTVT, mhi, c11 );
3187  SDValue temp = DAG.getNode( ISD::SRL, DL, INTVT,
3188      xlo, DAG.getConstant( 32 - (63 - 52), INTVT ) );
3189  mhi = DAG.getNode( ISD::OR, DL, INTVT, mhi, temp );
3190
3191  // Compute shift right count from exponent
3192  SDValue e = DAG.getNode( ISD::SRL, DL, INTVT,
3193      xhi, DAG.getConstant( 52-32, INTVT ) );
3194  e = DAG.getNode( ISD::AND, DL, INTVT,
3195      e, DAG.getConstant( 0x7ff, INTVT ) );
3196  SDValue sr = DAG.getNode( ISD::SUB, DL, INTVT,
3197      DAG.getConstant( 1023 + 31, INTVT ), e );
3198  SDValue srge32 = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3199      DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
3200      sr, DAG.getConstant(32, INTVT));
3201
3202  // Compute result for 0 <= sr < 32
3203  SDValue res = DAG.getNode( ISD::SRL, DL, INTVT, mhi, sr );
3204  res = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3205      srge32, DAG.getConstant(0,INTVT), res );
3206
3207  // Deal with sign bit
3208  if (includeSign) {
3209    SDValue sign = DAG.getNode( ISD::SRA, DL, INTVT,
3210        xhi, DAG.getConstant( 31, INTVT ) );
3211    res = DAG.getNode( ISD::ADD, DL, INTVT, res, sign );
3212    res = DAG.getNode( ISD::XOR, DL, INTVT, res, sign );
3213  }
3214  return res;
3215}
3216SDValue
3217AMDILTargetLowering::LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const
3218{
3219  SDValue RHS = Op.getOperand(0);
3220  EVT RHSVT = RHS.getValueType();
3221  MVT RST = RHSVT.getScalarType().getSimpleVT();
3222  EVT LHSVT = Op.getValueType();
3223  MVT LST = LHSVT.getScalarType().getSimpleVT();
3224  DebugLoc DL = Op.getDebugLoc();
3225  SDValue DST;
3226  const AMDILTargetMachine*
3227    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3228    (&this->getTargetMachine());
3229  const AMDILSubtarget*
3230    stm = dynamic_cast<const AMDILSubtarget*>(
3231        amdtm->getSubtargetImpl());
3232  if (RST == MVT::f64 && RHSVT.isVector()
3233      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3234    // We dont support vector 64bit floating point convertions.
3235    for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
3236      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3237          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3238      op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
3239      if (!x) {
3240        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3241      } else {
3242        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
3243            DST, op, DAG.getTargetConstant(x, MVT::i32));
3244      }
3245    }
3246  } else {
3247    if (RST == MVT::f64
3248        && LST == MVT::i32) {
3249      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3250        DST = SDValue(Op.getNode(), 0);
3251      } else {
3252        DST = genf64toi32(RHS, DAG, true);
3253      }
3254    } else if (RST == MVT::f64
3255        && LST == MVT::i64) {
3256      DST = genf64toi64(RHS, DAG, true);
3257    } else if (RST == MVT::f64
3258        && (LST == MVT::i8 || LST == MVT::i16)) {
3259      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3260        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
3261      } else {
3262        SDValue ToInt = genf64toi32(RHS, DAG, true);
3263        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
3264      }
3265
3266    } else {
3267      DST = SDValue(Op.getNode(), 0);
3268    }
3269  }
3270  return DST;
3271}
3272
3273SDValue
3274AMDILTargetLowering::LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const
3275{
3276  SDValue DST;
3277  SDValue RHS = Op.getOperand(0);
3278  EVT RHSVT = RHS.getValueType();
3279  MVT RST = RHSVT.getScalarType().getSimpleVT();
3280  EVT LHSVT = Op.getValueType();
3281  MVT LST = LHSVT.getScalarType().getSimpleVT();
3282  DebugLoc DL = Op.getDebugLoc();
3283  const AMDILTargetMachine*
3284    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3285    (&this->getTargetMachine());
3286  const AMDILSubtarget*
3287    stm = dynamic_cast<const AMDILSubtarget*>(
3288        amdtm->getSubtargetImpl());
3289  if (RST == MVT::f64 && RHSVT.isVector()
3290      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3291    // We dont support vector 64bit floating point convertions.
3292    for (unsigned x = 0, y = RHSVT.getVectorNumElements(); x < y; ++x) {
3293      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3294          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3295      op = DAG.getNode(ISD::FP_TO_SINT, DL, LST, op);
3296      if (!x) {
3297        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3298      } else {
3299        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT,
3300            DST, op, DAG.getTargetConstant(x, MVT::i32));
3301      }
3302
3303    }
3304  } else {
3305    if (RST == MVT::f64
3306        && LST == MVT::i32) {
3307      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3308        DST = SDValue(Op.getNode(), 0);
3309      } else {
3310        DST = genf64toi32(RHS, DAG, false);
3311      }
3312    } else if (RST == MVT::f64
3313        && LST == MVT::i64) {
3314      DST = genf64toi64(RHS, DAG, false);
3315    } else if (RST == MVT::f64
3316        && (LST == MVT::i8 || LST == MVT::i16)) {
3317      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3318        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, SDValue(Op.getNode(), 0));
3319      } else {
3320        SDValue ToInt = genf64toi32(RHS, DAG, false);
3321        DST = DAG.getNode(ISD::TRUNCATE, DL, LHSVT, ToInt);
3322      }
3323
3324    } else {
3325      DST = SDValue(Op.getNode(), 0);
3326    }
3327  }
3328  return DST;
3329}
3330SDValue
3331AMDILTargetLowering::genu32tof64(SDValue RHS, EVT LHSVT,
3332    SelectionDAG &DAG) const
3333{
3334  EVT RHSVT = RHS.getValueType();
3335  DebugLoc DL = RHS.getDebugLoc();
3336  EVT INTVT;
3337  EVT LONGVT;
3338  bool isVec = RHSVT.isVector();
3339  if (isVec) {
3340    LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3341          RHSVT.getVectorNumElements()));
3342    INTVT = EVT(MVT::getVectorVT(MVT::i32,
3343          RHSVT.getVectorNumElements()));
3344  } else {
3345    LONGVT = EVT(MVT::i64);
3346    INTVT = EVT(MVT::i32);
3347  }
3348  SDValue x = RHS;
3349  const AMDILTargetMachine*
3350    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3351    (&this->getTargetMachine());
3352  const AMDILSubtarget*
3353    stm = dynamic_cast<const AMDILSubtarget*>(
3354        amdtm->getSubtargetImpl());
3355  if (stm->calVersion() >= CAL_VERSION_SC_135) {
3356    // unsigned x = RHS;
3357    // ulong xd = (ulong)(0x4330_0000 << 32) | x;
3358    // double d = as_double( xd );
3359    // return d - 0x1.0p+52; // 0x1.0p+52 == 0x4330_0000_0000_0000
3360    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, x,
3361        DAG.getConstant( 0x43300000, INTVT ) );
3362    SDValue d = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
3363    SDValue offsetd = DAG.getNode( ISDBITCAST, DL, LHSVT,
3364        DAG.getConstant( 0x4330000000000000ULL, LONGVT ) );
3365    return DAG.getNode( ISD::FSUB, DL, LHSVT, d, offsetd );
3366  } else {
3367    SDValue clz = genCLZu32(x, DAG);
3368
3369    // Compute the exponent. 1023 is the bias, 31-clz the actual power of 2
3370    // Except for an input 0... which requires a 0 exponent
3371    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
3372        DAG.getConstant( (1023+31), INTVT), clz );
3373    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, x, exp, x );
3374
3375    // Normalize frac
3376    SDValue rhi = DAG.getNode( ISD::SHL, DL, INTVT, x, clz );
3377
3378    // Eliminate hidden bit
3379    rhi = DAG.getNode( ISD::AND, DL, INTVT,
3380        rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
3381
3382    // Pack exponent and frac
3383    SDValue rlo = DAG.getNode( ISD::SHL, DL, INTVT,
3384        rhi, DAG.getConstant( (32 - 11), INTVT ) );
3385    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
3386        rhi, DAG.getConstant( 11, INTVT ) );
3387    exp = DAG.getNode( ISD::SHL, DL, INTVT,
3388        exp, DAG.getConstant( 20, INTVT ) );
3389    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
3390
3391    // Convert 2 x 32 in to 1 x 64, then to double precision float type
3392    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3393    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
3394  }
3395}
3396SDValue
3397AMDILTargetLowering::genu64tof64(SDValue RHS, EVT LHSVT,
3398    SelectionDAG &DAG) const
3399{
3400  EVT RHSVT = RHS.getValueType();
3401  DebugLoc DL = RHS.getDebugLoc();
3402  EVT INTVT;
3403  EVT LONGVT;
3404  bool isVec = RHSVT.isVector();
3405  if (isVec) {
3406    INTVT = EVT(MVT::getVectorVT(MVT::i32,
3407          RHSVT.getVectorNumElements()));
3408  } else {
3409    INTVT = EVT(MVT::i32);
3410  }
3411  LONGVT = RHSVT;
3412  SDValue x = RHS;
3413  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
3414      &this->getTargetMachine())->getSubtargetImpl();
3415  if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3416    // double dhi = (double)(as_uint2(x).y);
3417    // double dlo = (double)(as_uint2(x).x);
3418    // return mad(dhi, 0x1.0p+32, dlo)
3419    SDValue dhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x);
3420    dhi = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dhi);
3421    SDValue dlo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x);
3422    dlo = DAG.getNode(ISD::UINT_TO_FP, DL, LHSVT, dlo);
3423    return DAG.getNode(AMDILISD::MAD, DL, LHSVT, dhi,
3424        DAG.getConstantFP(0x4f800000, LHSVT), dlo);
3425  } else if (stm->calVersion() >= CAL_VERSION_SC_135) {
3426    // double lo = as_double( as_ulong( 0x1.0p+52) | (u & 0xffff_ffffUL));
3427    // double hi = as_double( as_ulong( 0x1.0p+84) | (u >> 32));
3428    // return (hi - (0x1.0p+84 + 0x1.0p+52)) + lo;
3429    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );  // x & 0xffff_ffffUL
3430    SDValue xd = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xlo, DAG.getConstant( 0x43300000, INTVT ) );
3431    SDValue lo = DAG.getNode( ISDBITCAST, DL, LHSVT, xd );
3432    SDValue xhi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 :  AMDILISD::LCOMPHI, DL, INTVT, x ); // x >> 32
3433    SDValue xe = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, xhi, DAG.getConstant( 0x45300000, INTVT ) );
3434    SDValue hi = DAG.getNode( ISDBITCAST, DL, LHSVT, xe );
3435    SDValue c = DAG.getNode( ISDBITCAST, DL, LHSVT,
3436        DAG.getConstant( 0x4530000000100000ULL, LONGVT ) );
3437    hi = DAG.getNode( ISD::FSUB, DL, LHSVT, hi, c );
3438    return DAG.getNode( ISD::FADD, DL, LHSVT, hi, lo );
3439
3440  } else {
3441    SDValue clz = genCLZu64(x, DAG);
3442    SDValue xhi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, x );
3443    SDValue xlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, x );
3444
3445    // Compute the exponent. 1023 is the bias, 63-clz the actual power of 2
3446    SDValue exp = DAG.getNode( ISD::SUB, DL, INTVT,
3447        DAG.getConstant( (1023+63), INTVT), clz );
3448    SDValue mash = DAG.getNode( ISD::OR, DL, INTVT, xhi, xlo );
3449    exp = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3450        mash, exp, mash );  // exp = exp, or 0 if input was 0
3451
3452    // Normalize frac
3453    SDValue clz31 = DAG.getNode( ISD::AND, DL, INTVT,
3454        clz, DAG.getConstant( 31, INTVT ) );
3455    SDValue rshift = DAG.getNode( ISD::SUB, DL, INTVT,
3456        DAG.getConstant( 32, INTVT ), clz31 );
3457    SDValue t1 = DAG.getNode( ISD::SHL, DL, INTVT, xhi, clz31 );
3458    SDValue t2 = DAG.getNode( ISD::SRL, DL, INTVT, xlo, rshift );
3459    t2 = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT, clz31, t2, t1 );
3460    SDValue rhi1 = DAG.getNode( ISD::OR, DL, INTVT, t1, t2 );
3461    SDValue rlo1 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
3462    SDValue rhi2 = DAG.getNode( ISD::SHL, DL, INTVT, xlo, clz31 );
3463    SDValue rlo2 = DAG.getConstant( 0, INTVT );
3464    SDValue clz32 = DAG.getNode( ISD::AND, DL, INTVT,
3465        clz, DAG.getConstant( 32, INTVT ) );
3466    SDValue rhi = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3467        clz32, rhi2, rhi1 );
3468    SDValue rlo = DAG.getNode( AMDILISD::CMOVLOG, DL, INTVT,
3469        clz32, rlo2, rlo1 );
3470
3471    // Eliminate hidden bit
3472    rhi = DAG.getNode( ISD::AND, DL, INTVT,
3473        rhi, DAG.getConstant( 0x7fffffff, INTVT ) );
3474
3475    // Save bits needed to round properly
3476    SDValue round = DAG.getNode( ISD::AND, DL, INTVT,
3477        rlo, DAG.getConstant( 0x7ff, INTVT ) );
3478
3479    // Pack exponent and frac
3480    rlo = DAG.getNode( ISD::SRL, DL, INTVT,
3481        rlo, DAG.getConstant( 11, INTVT ) );
3482    SDValue temp = DAG.getNode( ISD::SHL, DL, INTVT,
3483        rhi, DAG.getConstant( (32 - 11), INTVT ) );
3484    rlo = DAG.getNode( ISD::OR, DL, INTVT, rlo, temp );
3485    rhi = DAG.getNode( ISD::SRL, DL, INTVT,
3486        rhi, DAG.getConstant( 11, INTVT ) );
3487    exp = DAG.getNode( ISD::SHL, DL, INTVT,
3488        exp, DAG.getConstant( 20, INTVT ) );
3489    rhi = DAG.getNode( ISD::OR, DL, INTVT, rhi, exp );
3490
3491    // Compute rounding bit
3492    SDValue even = DAG.getNode( ISD::AND, DL, INTVT,
3493        rlo, DAG.getConstant( 1, INTVT ) );
3494    SDValue grs = DAG.getNode( ISD::AND, DL, INTVT,
3495        round, DAG.getConstant( 0x3ff, INTVT ) );
3496    grs = DAG.getNode( AMDILISD::CMP, DL, INTVT,
3497        DAG.getConstant( CondCCodeToCC( ISD::SETNE, MVT::i32), MVT::i32),
3498        grs, DAG.getConstant( 0, INTVT ) ); // -1 if any GRS set, 0 if none
3499    grs = DAG.getNode( ISD::OR, DL, INTVT, grs, even );
3500    round = DAG.getNode( ISD::SRL, DL, INTVT,
3501        round, DAG.getConstant( 10, INTVT ) );
3502    round = DAG.getNode( ISD::AND, DL, INTVT, round, grs ); // 0 or 1
3503
3504    // Add rounding bit
3505    SDValue lround = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT,
3506        round, DAG.getConstant( 0, INTVT ) );
3507    SDValue res = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, rlo, rhi );
3508    res = DAG.getNode( ISD::ADD, DL, LONGVT, res, lround );
3509    return DAG.getNode(ISDBITCAST, DL, LHSVT, res);
3510  }
3511}
3512SDValue
3513AMDILTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3514{
3515  SDValue RHS = Op.getOperand(0);
3516  EVT RHSVT = RHS.getValueType();
3517  MVT RST = RHSVT.getScalarType().getSimpleVT();
3518  EVT LHSVT = Op.getValueType();
3519  MVT LST = LHSVT.getScalarType().getSimpleVT();
3520  DebugLoc DL = Op.getDebugLoc();
3521  SDValue DST;
3522  EVT INTVT;
3523  EVT LONGVT;
3524  const AMDILTargetMachine*
3525    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3526    (&this->getTargetMachine());
3527  const AMDILSubtarget*
3528    stm = dynamic_cast<const AMDILSubtarget*>(
3529        amdtm->getSubtargetImpl());
3530  if (LST == MVT::f64 && LHSVT.isVector()
3531      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3532    // We dont support vector 64bit floating point convertions.
3533    DST = Op;
3534    for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3535      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3536          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3537      op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3538      if (!x) {
3539        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3540      } else {
3541        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3542            op, DAG.getTargetConstant(x, MVT::i32));
3543      }
3544
3545    }
3546  } else {
3547
3548    if (RST == MVT::i32
3549        && LST == MVT::f64) {
3550      if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3551        DST = SDValue(Op.getNode(), 0);
3552      } else {
3553        DST = genu32tof64(RHS, LHSVT, DAG);
3554      }
3555    } else if (RST == MVT::i64
3556        && LST == MVT::f64) {
3557      DST = genu64tof64(RHS, LHSVT, DAG);
3558    } else {
3559      DST = SDValue(Op.getNode(), 0);
3560    }
3561  }
3562  return DST;
3563}
3564
3565SDValue
3566AMDILTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const
3567{
3568  SDValue RHS = Op.getOperand(0);
3569  EVT RHSVT = RHS.getValueType();
3570  MVT RST = RHSVT.getScalarType().getSimpleVT();
3571  EVT INTVT;
3572  EVT LONGVT;
3573  SDValue DST;
3574  bool isVec = RHSVT.isVector();
3575  DebugLoc DL = Op.getDebugLoc();
3576  EVT LHSVT = Op.getValueType();
3577  MVT LST = LHSVT.getScalarType().getSimpleVT();
3578  const AMDILTargetMachine*
3579    amdtm = reinterpret_cast<const AMDILTargetMachine*>
3580    (&this->getTargetMachine());
3581  const AMDILSubtarget*
3582    stm = dynamic_cast<const AMDILSubtarget*>(
3583        amdtm->getSubtargetImpl());
3584  if (LST == MVT::f64 && LHSVT.isVector()
3585      && stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX)  {
3586    // We dont support vector 64bit floating point convertions.
3587    for (unsigned x = 0, y = LHSVT.getVectorNumElements(); x < y; ++x) {
3588      SDValue op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3589          DL, RST, RHS, DAG.getTargetConstant(x, MVT::i32));
3590      op = DAG.getNode(ISD::UINT_TO_FP, DL, LST, op);
3591      if (!x) {
3592        DST = DAG.getNode(AMDILISD::VBUILD, DL, LHSVT, op);
3593      } else {
3594        DST = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, LHSVT, DST,
3595            op, DAG.getTargetConstant(x, MVT::i32));
3596      }
3597
3598    }
3599  } else {
3600
3601    if (isVec) {
3602      LONGVT = EVT(MVT::getVectorVT(MVT::i64,
3603            RHSVT.getVectorNumElements()));
3604      INTVT = EVT(MVT::getVectorVT(MVT::i32,
3605            RHSVT.getVectorNumElements()));
3606    } else {
3607      LONGVT = EVT(MVT::i64);
3608      INTVT = EVT(MVT::i32);
3609    }
3610    MVT RST = RHSVT.getScalarType().getSimpleVT();
3611    if ((RST == MVT::i32 || RST == MVT::i64)
3612        && LST == MVT::f64) {
3613      if (RST == MVT::i32) {
3614        if (stm->device()->getGeneration() > AMDILDeviceInfo::HD6XXX) {
3615          DST = SDValue(Op.getNode(), 0);
3616          return DST;
3617        }
3618      }
3619      SDValue c31 = DAG.getConstant( 31, INTVT );
3620      SDValue cSbit = DAG.getConstant( 0x80000000, INTVT );
3621
3622      SDValue S;      // Sign, as 0 or -1
3623      SDValue Sbit;   // Sign bit, as one bit, MSB only.
3624      if (RST == MVT::i32) {
3625        Sbit = DAG.getNode( ISD::AND, DL, INTVT, RHS, cSbit );
3626        S = DAG.getNode(ISD::SRA, DL, RHSVT, RHS, c31 );
3627      } else { // 64-bit case... SRA of 64-bit values is slow
3628        SDValue hi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, RHS );
3629        Sbit = DAG.getNode( ISD::AND, DL, INTVT, hi, cSbit );
3630        SDValue temp = DAG.getNode( ISD::SRA, DL, INTVT, hi, c31 );
3631        S = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, RHSVT, temp, temp );
3632      }
3633
3634      // get abs() of input value, given sign as S (0 or -1)
3635      // SpI = RHS + S
3636      SDValue SpI = DAG.getNode(ISD::ADD, DL, RHSVT, RHS, S);
3637      // SpIxS = SpI ^ S
3638      SDValue SpIxS = DAG.getNode(ISD::XOR, DL, RHSVT, SpI, S);
3639
3640      // Convert unsigned value to double precision
3641      SDValue R;
3642      if (RST == MVT::i32) {
3643        // r = cast_u32_to_f64(SpIxS)
3644        R = genu32tof64(SpIxS, LHSVT, DAG);
3645      } else {
3646        // r = cast_u64_to_f64(SpIxS)
3647        R = genu64tof64(SpIxS, LHSVT, DAG);
3648      }
3649
3650      // drop in the sign bit
3651      SDValue t = DAG.getNode( AMDILISD::BITCONV, DL, LONGVT, R );
3652      SDValue thi = DAG.getNode( (isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTVT, t );
3653      SDValue tlo = DAG.getNode( (isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTVT, t );
3654      thi = DAG.getNode( ISD::OR, DL, INTVT, thi, Sbit );
3655      t = DAG.getNode( (isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, LONGVT, tlo, thi );
3656      DST = DAG.getNode( AMDILISD::BITCONV, DL, LHSVT, t );
3657    } else {
3658      DST = SDValue(Op.getNode(), 0);
3659    }
3660  }
3661  return DST;
3662}
3663SDValue
3664AMDILTargetLowering::LowerSUB(SDValue Op, SelectionDAG &DAG) const
3665{
3666  SDValue LHS = Op.getOperand(0);
3667  SDValue RHS = Op.getOperand(1);
3668  DebugLoc DL = Op.getDebugLoc();
3669  EVT OVT = Op.getValueType();
3670  SDValue DST;
3671  bool isVec = RHS.getValueType().isVector();
3672  if (OVT.getScalarType() == MVT::i64) {
3673    /*const AMDILTargetMachine*
3674      amdtm = reinterpret_cast<const AMDILTargetMachine*>
3675      (&this->getTargetMachine());
3676      const AMDILSubtarget*
3677      stm = dynamic_cast<const AMDILSubtarget*>(
3678      amdtm->getSubtargetImpl());*/
3679    MVT INTTY = MVT::i32;
3680    if (OVT == MVT::v2i64) {
3681      INTTY = MVT::v2i32;
3682    }
3683    SDValue LHSLO, LHSHI, RHSLO, RHSHI, INTLO, INTHI;
3684    // TODO: need to turn this into a bitcast of i64/v2i64 to v2i32/v4i32
3685    LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, LHS);
3686    RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, INTTY, RHS);
3687    LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, LHS);
3688    RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, INTTY, RHS);
3689    INTLO = DAG.getNode(ISD::SUB, DL, INTTY, LHSLO, RHSLO);
3690    INTHI = DAG.getNode(ISD::SUB, DL, INTTY, LHSHI, RHSHI);
3691    //TODO: need to use IBORROW on HD5XXX and later hardware
3692    SDValue cmp;
3693    if (OVT == MVT::i64) {
3694      cmp = DAG.getNode(AMDILISD::CMP, DL, INTTY,
3695          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3696          LHSLO, RHSLO);
3697    } else {
3698      SDValue cmplo;
3699      SDValue cmphi;
3700      SDValue LHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3701          DL, MVT::i32, LHSLO, DAG.getTargetConstant(0, MVT::i32));
3702      SDValue LHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3703          DL, MVT::i32, LHSLO, DAG.getTargetConstant(1, MVT::i32));
3704      SDValue RHSRLO = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3705          DL, MVT::i32, RHSLO, DAG.getTargetConstant(0, MVT::i32));
3706      SDValue RHSRHI = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
3707          DL, MVT::i32, RHSLO, DAG.getTargetConstant(1, MVT::i32));
3708      cmplo = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3709          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3710          LHSRLO, RHSRLO);
3711      cmphi = DAG.getNode(AMDILISD::CMP, DL, MVT::i32,
3712          DAG.getConstant(CondCCodeToCC(ISD::SETULT, MVT::i32), MVT::i32),
3713          LHSRHI, RHSRHI);
3714      cmp = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i32, cmplo);
3715      cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i32,
3716          cmp, cmphi, DAG.getTargetConstant(1, MVT::i32));
3717    }
3718    INTHI = DAG.getNode(ISD::ADD, DL, INTTY, INTHI, cmp);
3719    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, OVT,
3720        INTLO, INTHI);
3721  } else {
3722    DST = SDValue(Op.getNode(), 0);
3723  }
3724  return DST;
3725}
3726SDValue
3727AMDILTargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const
3728{
3729  EVT OVT = Op.getValueType();
3730  SDValue DST;
3731  if (OVT.getScalarType() == MVT::f64) {
3732    DST = LowerFDIV64(Op, DAG);
3733  } else if (OVT.getScalarType() == MVT::f32) {
3734    DST = LowerFDIV32(Op, DAG);
3735  } else {
3736    DST = SDValue(Op.getNode(), 0);
3737  }
3738  return DST;
3739}
3740
3741SDValue
3742AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
3743{
3744  EVT OVT = Op.getValueType();
3745  SDValue DST;
3746  if (OVT.getScalarType() == MVT::i64) {
3747    DST = LowerSDIV64(Op, DAG);
3748  } else if (OVT.getScalarType() == MVT::i32) {
3749    DST = LowerSDIV32(Op, DAG);
3750  } else if (OVT.getScalarType() == MVT::i16
3751      || OVT.getScalarType() == MVT::i8) {
3752    DST = LowerSDIV24(Op, DAG);
3753  } else {
3754    DST = SDValue(Op.getNode(), 0);
3755  }
3756  return DST;
3757}
3758
3759SDValue
3760AMDILTargetLowering::LowerUDIV(SDValue Op, SelectionDAG &DAG) const
3761{
3762  EVT OVT = Op.getValueType();
3763  SDValue DST;
3764  if (OVT.getScalarType() == MVT::i64) {
3765    DST = LowerUDIV64(Op, DAG);
3766  } else if (OVT.getScalarType() == MVT::i32) {
3767    DST = LowerUDIV32(Op, DAG);
3768  } else if (OVT.getScalarType() == MVT::i16
3769      || OVT.getScalarType() == MVT::i8) {
3770    DST = LowerUDIV24(Op, DAG);
3771  } else {
3772    DST = SDValue(Op.getNode(), 0);
3773  }
3774  return DST;
3775}
3776
3777SDValue
3778AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
3779{
3780  EVT OVT = Op.getValueType();
3781  SDValue DST;
3782  if (OVT.getScalarType() == MVT::i64) {
3783    DST = LowerSREM64(Op, DAG);
3784  } else if (OVT.getScalarType() == MVT::i32) {
3785    DST = LowerSREM32(Op, DAG);
3786  } else if (OVT.getScalarType() == MVT::i16) {
3787    DST = LowerSREM16(Op, DAG);
3788  } else if (OVT.getScalarType() == MVT::i8) {
3789    DST = LowerSREM8(Op, DAG);
3790  } else {
3791    DST = SDValue(Op.getNode(), 0);
3792  }
3793  return DST;
3794}
3795
3796SDValue
3797AMDILTargetLowering::LowerUREM(SDValue Op, SelectionDAG &DAG) const
3798{
3799  EVT OVT = Op.getValueType();
3800  SDValue DST;
3801  if (OVT.getScalarType() == MVT::i64) {
3802    DST = LowerUREM64(Op, DAG);
3803  } else if (OVT.getScalarType() == MVT::i32) {
3804    DST = LowerUREM32(Op, DAG);
3805  } else if (OVT.getScalarType() == MVT::i16) {
3806    DST = LowerUREM16(Op, DAG);
3807  } else if (OVT.getScalarType() == MVT::i8) {
3808    DST = LowerUREM8(Op, DAG);
3809  } else {
3810    DST = SDValue(Op.getNode(), 0);
3811  }
3812  return DST;
3813}
3814
3815SDValue
3816AMDILTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const
3817{
3818  DebugLoc DL = Op.getDebugLoc();
3819  EVT OVT = Op.getValueType();
3820  SDValue DST;
3821  bool isVec = OVT.isVector();
3822  if (OVT.getScalarType() != MVT::i64)
3823  {
3824    DST = SDValue(Op.getNode(), 0);
3825  } else {
3826    assert(OVT.getScalarType() == MVT::i64 && "Only 64 bit mul should be lowered!");
3827    // TODO: This needs to be turned into a tablegen pattern
3828    SDValue LHS = Op.getOperand(0);
3829    SDValue RHS = Op.getOperand(1);
3830
3831    MVT INTTY = MVT::i32;
3832    if (OVT == MVT::v2i64) {
3833      INTTY = MVT::v2i32;
3834    }
3835    // mul64(h1, l1, h0, l0)
3836    SDValue LHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3837        DL,
3838        INTTY, LHS);
3839    SDValue LHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3840        DL,
3841        INTTY, LHS);
3842    SDValue RHSLO = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO,
3843        DL,
3844        INTTY, RHS);
3845    SDValue RHSHI = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI,
3846        DL,
3847        INTTY, RHS);
3848    // MULLO_UINT_1 r1, h0, l1
3849    SDValue RHILLO = DAG.getNode(AMDILISD::UMUL,
3850        DL,
3851        INTTY, RHSHI, LHSLO);
3852    // MULLO_UINT_1 r2, h1, l0
3853    SDValue RLOHHI = DAG.getNode(AMDILISD::UMUL,
3854        DL,
3855        INTTY, RHSLO, LHSHI);
3856    // ADD_INT hr, r1, r2
3857    SDValue ADDHI = DAG.getNode(ISD::ADD,
3858        DL,
3859        INTTY, RHILLO, RLOHHI);
3860    // MULHI_UINT_1 r3, l1, l0
3861    SDValue RLOLLO = DAG.getNode(ISD::MULHU,
3862        DL,
3863        INTTY, RHSLO, LHSLO);
3864    // ADD_INT hr, hr, r3
3865    SDValue HIGH = DAG.getNode(ISD::ADD,
3866        DL,
3867        INTTY, ADDHI, RLOLLO);
3868    // MULLO_UINT_1 l3, l1, l0
3869    SDValue LOW = DAG.getNode(AMDILISD::UMUL,
3870        DL,
3871        INTTY, LHSLO, RHSLO);
3872    DST = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE,
3873        DL,
3874        OVT, LOW, HIGH);
3875  }
3876  return DST;
3877}
3878SDValue
3879AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
3880{
3881  EVT VT = Op.getValueType();
3882  //printSDValue(Op, 1);
3883  SDValue Nodes1;
3884  SDValue second;
3885  SDValue third;
3886  SDValue fourth;
3887  DebugLoc DL = Op.getDebugLoc();
3888  Nodes1 = DAG.getNode(AMDILISD::VBUILD,
3889      DL,
3890      VT, Op.getOperand(0));
3891  bool allEqual = true;
3892  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
3893    if (Op.getOperand(0) != Op.getOperand(x)) {
3894      allEqual = false;
3895      break;
3896    }
3897  }
3898  if (allEqual) {
3899    return Nodes1;
3900  }
3901  switch(Op.getNumOperands()) {
3902    default:
3903    case 1:
3904      break;
3905    case 4:
3906      fourth = Op.getOperand(3);
3907      if (fourth.getOpcode() != ISD::UNDEF) {
3908        Nodes1 = DAG.getNode(
3909            ISD::INSERT_VECTOR_ELT,
3910            DL,
3911            Op.getValueType(),
3912            Nodes1,
3913            fourth,
3914            DAG.getConstant(7, MVT::i32));
3915      }
3916    case 3:
3917      third = Op.getOperand(2);
3918      if (third.getOpcode() != ISD::UNDEF) {
3919        Nodes1 = DAG.getNode(
3920            ISD::INSERT_VECTOR_ELT,
3921            DL,
3922            Op.getValueType(),
3923            Nodes1,
3924            third,
3925            DAG.getConstant(6, MVT::i32));
3926      }
3927    case 2:
3928      second = Op.getOperand(1);
3929      if (second.getOpcode() != ISD::UNDEF) {
3930        Nodes1 = DAG.getNode(
3931            ISD::INSERT_VECTOR_ELT,
3932            DL,
3933            Op.getValueType(),
3934            Nodes1,
3935            second,
3936            DAG.getConstant(5, MVT::i32));
3937      }
3938      break;
3939  };
3940  return Nodes1;
3941}
3942
3943SDValue
3944AMDILTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
3945    SelectionDAG &DAG) const
3946{
3947  DebugLoc DL = Op.getDebugLoc();
3948  EVT VT = Op.getValueType();
3949  const SDValue *ptr = NULL;
3950  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(2));
3951  uint32_t swizzleNum = 0;
3952  SDValue DST;
3953  if (!VT.isVector()) {
3954    SDValue Res = Op.getOperand(0);
3955    return Res;
3956  }
3957
3958  if (Op.getOperand(1).getOpcode() != ISD::UNDEF) {
3959    ptr = &Op.getOperand(1);
3960  } else {
3961    ptr = &Op.getOperand(0);
3962  }
3963  if (CSDN) {
3964    swizzleNum = (uint32_t)CSDN->getZExtValue();
3965    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3966    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3967    DST = DAG.getNode(AMDILISD::VINSERT,
3968        DL,
3969        VT,
3970        Op.getOperand(0),
3971        *ptr,
3972        DAG.getTargetConstant(mask2, MVT::i32),
3973        DAG.getTargetConstant(mask3, MVT::i32));
3974  } else {
3975    uint32_t mask2 = 0x04030201 & ~(0xFF << (swizzleNum * 8));
3976    uint32_t mask3 = 0x01010101 & (0xFF << (swizzleNum * 8));
3977    SDValue res = DAG.getNode(AMDILISD::VINSERT,
3978        DL, VT, Op.getOperand(0), *ptr,
3979        DAG.getTargetConstant(mask2, MVT::i32),
3980        DAG.getTargetConstant(mask3, MVT::i32));
3981    for (uint32_t x = 1; x < VT.getVectorNumElements(); ++x) {
3982      mask2 = 0x04030201 & ~(0xFF << (x * 8));
3983      mask3 = 0x01010101 & (0xFF << (x * 8));
3984      SDValue t = DAG.getNode(AMDILISD::VINSERT,
3985          DL, VT, Op.getOperand(0), *ptr,
3986          DAG.getTargetConstant(mask2, MVT::i32),
3987          DAG.getTargetConstant(mask3, MVT::i32));
3988      SDValue c = DAG.getNode(AMDILISD::CMP, DL, ptr->getValueType(),
3989          DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
3990          Op.getOperand(2), DAG.getConstant(x, MVT::i32));
3991      c = DAG.getNode(AMDILISD::VBUILD, DL, Op.getValueType(), c);
3992      res = DAG.getNode(AMDILISD::CMOVLOG, DL, VT, c, t, res);
3993    }
3994    DST = res;
3995  }
3996  return DST;
3997}
3998
3999SDValue
4000AMDILTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
4001    SelectionDAG &DAG) const
4002{
4003  EVT VT = Op.getValueType();
4004  //printSDValue(Op, 1);
4005  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4006  uint64_t swizzleNum = 0;
4007  DebugLoc DL = Op.getDebugLoc();
4008  SDValue Res;
4009  if (!Op.getOperand(0).getValueType().isVector()) {
4010    Res = Op.getOperand(0);
4011    return Res;
4012  }
4013  if (CSDN) {
4014    // Static vector extraction
4015    swizzleNum = CSDN->getZExtValue() + 1;
4016    Res = DAG.getNode(AMDILISD::VEXTRACT,
4017        DL, VT,
4018        Op.getOperand(0),
4019        DAG.getTargetConstant(swizzleNum, MVT::i32));
4020  } else {
4021    SDValue Op1 = Op.getOperand(1);
4022    uint32_t vecSize = 4;
4023    SDValue Op0 = Op.getOperand(0);
4024    SDValue res = DAG.getNode(AMDILISD::VEXTRACT,
4025        DL, VT, Op0,
4026        DAG.getTargetConstant(1, MVT::i32));
4027    if (Op0.getValueType().isVector()) {
4028      vecSize = Op0.getValueType().getVectorNumElements();
4029    }
4030    for (uint32_t x = 2; x <= vecSize; ++x) {
4031      SDValue t = DAG.getNode(AMDILISD::VEXTRACT,
4032          DL, VT, Op0,
4033          DAG.getTargetConstant(x, MVT::i32));
4034      SDValue c = DAG.getNode(AMDILISD::CMP,
4035          DL, Op1.getValueType(),
4036          DAG.getConstant(AMDILCC::IL_CC_I_EQ, MVT::i32),
4037          Op1, DAG.getConstant(x, MVT::i32));
4038      res = DAG.getNode(AMDILISD::CMOVLOG, DL,
4039          VT, c, t, res);
4040
4041    }
4042    Res = res;
4043  }
4044  return Res;
4045}
4046
4047SDValue
4048AMDILTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
4049    SelectionDAG &DAG) const
4050{
4051  uint32_t vecSize = Op.getValueType().getVectorNumElements();
4052  SDValue src = Op.getOperand(0);
4053  const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(Op.getOperand(1));
4054  uint64_t offset = 0;
4055  EVT vecType = Op.getValueType().getVectorElementType();
4056  DebugLoc DL = Op.getDebugLoc();
4057  SDValue Result;
4058  if (CSDN) {
4059    offset = CSDN->getZExtValue();
4060    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4061        DL,vecType, src, DAG.getConstant(offset, MVT::i32));
4062    Result = DAG.getNode(AMDILISD::VBUILD, DL,
4063        Op.getValueType(), Result);
4064    for (uint32_t x = 1; x < vecSize; ++x) {
4065      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
4066          src, DAG.getConstant(offset + x, MVT::i32));
4067      if (elt.getOpcode() != ISD::UNDEF) {
4068        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4069            Op.getValueType(), Result, elt,
4070            DAG.getConstant(x, MVT::i32));
4071      }
4072    }
4073  } else {
4074    SDValue idx = Op.getOperand(1);
4075    Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4076        DL, vecType, src, idx);
4077    Result = DAG.getNode(AMDILISD::VBUILD, DL,
4078        Op.getValueType(), Result);
4079    for (uint32_t x = 1; x < vecSize; ++x) {
4080      idx = DAG.getNode(ISD::ADD, DL, vecType,
4081          idx, DAG.getConstant(1, MVT::i32));
4082      SDValue elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, vecType,
4083          src, idx);
4084      if (elt.getOpcode() != ISD::UNDEF) {
4085        Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4086            Op.getValueType(), Result, elt, idx);
4087      }
4088    }
4089  }
4090  return Result;
4091}
4092SDValue
4093AMDILTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
4094    SelectionDAG &DAG) const
4095{
4096  SDValue Res = DAG.getNode(AMDILISD::VBUILD,
4097      Op.getDebugLoc(),
4098      Op.getValueType(),
4099      Op.getOperand(0));
4100  return Res;
4101}
4102SDValue
4103AMDILTargetLowering::LowerAND(SDValue Op, SelectionDAG &DAG) const
4104{
4105  SDValue andOp;
4106  andOp = DAG.getNode(
4107      AMDILISD::AND,
4108      Op.getDebugLoc(),
4109      Op.getValueType(),
4110      Op.getOperand(0),
4111      Op.getOperand(1));
4112  return andOp;
4113}
4114SDValue
4115AMDILTargetLowering::LowerOR(SDValue Op, SelectionDAG &DAG) const
4116{
4117  SDValue orOp;
4118  orOp = DAG.getNode(AMDILISD::OR,
4119      Op.getDebugLoc(),
4120      Op.getValueType(),
4121      Op.getOperand(0),
4122      Op.getOperand(1));
4123  return orOp;
4124}
4125SDValue
4126AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
4127{
4128  SDValue Cond = Op.getOperand(0);
4129  SDValue LHS = Op.getOperand(1);
4130  SDValue RHS = Op.getOperand(2);
4131  DebugLoc DL = Op.getDebugLoc();
4132  Cond = getConversionNode(DAG, Cond, Op, true);
4133  Cond = DAG.getNode(AMDILISD::CMOVLOG,
4134      DL,
4135      Op.getValueType(), Cond, LHS, RHS);
4136  return Cond;
4137}
4138SDValue
4139AMDILTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
4140{
4141  SDValue Cond;
4142  SDValue LHS = Op.getOperand(0);
4143  SDValue RHS = Op.getOperand(1);
4144  SDValue TRUE = Op.getOperand(2);
4145  SDValue FALSE = Op.getOperand(3);
4146  SDValue CC = Op.getOperand(4);
4147  DebugLoc DL = Op.getDebugLoc();
4148  bool skipCMov = false;
4149  bool genINot = false;
4150  EVT OVT = Op.getValueType();
4151
4152  // Check for possible elimination of cmov
4153  if (TRUE.getValueType().getSimpleVT().SimpleTy == MVT::i32) {
4154    const ConstantSDNode *trueConst
4155      = dyn_cast<ConstantSDNode>( TRUE.getNode() );
4156    const ConstantSDNode *falseConst
4157      = dyn_cast<ConstantSDNode>( FALSE.getNode() );
4158    if (trueConst && falseConst) {
4159      // both possible result values are constants
4160      if (trueConst->isAllOnesValue()
4161          && falseConst->isNullValue()) { // and convenient constants
4162        skipCMov = true;
4163      }
4164      else if (trueConst->isNullValue()
4165          && falseConst->isAllOnesValue()) { // less convenient
4166        skipCMov = true;
4167        genINot = true;
4168      }
4169    }
4170  }
4171  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4172  unsigned int AMDILCC = CondCCodeToCC(
4173      SetCCOpcode,
4174      LHS.getValueType().getSimpleVT().SimpleTy);
4175  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
4176  Cond = DAG.getNode(
4177      AMDILISD::CMP,
4178      DL,
4179      LHS.getValueType(),
4180      DAG.getConstant(AMDILCC, MVT::i32),
4181      LHS,
4182      RHS);
4183  Cond = getConversionNode(DAG, Cond, Op, true);
4184  if (genINot) {
4185    Cond = DAG.getNode(AMDILISD::NOT, DL, OVT, Cond);
4186  }
4187  if (!skipCMov) {
4188    Cond = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, Cond, TRUE, FALSE);
4189  }
4190  return Cond;
4191}
4192SDValue
4193AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
4194{
4195  SDValue Cond;
4196  SDValue LHS = Op.getOperand(0);
4197  SDValue RHS = Op.getOperand(1);
4198  SDValue CC  = Op.getOperand(2);
4199  DebugLoc DL = Op.getDebugLoc();
4200  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
4201  unsigned int AMDILCC = CondCCodeToCC(
4202      SetCCOpcode,
4203      LHS.getValueType().getSimpleVT().SimpleTy);
4204  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
4205  Cond = DAG.getNode(
4206      AMDILISD::CMP,
4207      DL,
4208      LHS.getValueType(),
4209      DAG.getConstant(AMDILCC, MVT::i32),
4210      LHS,
4211      RHS);
4212  Cond = getConversionNode(DAG, Cond, Op, true);
4213  Cond = DAG.getNode(
4214      ISD::AND,
4215      DL,
4216      Cond.getValueType(),
4217      DAG.getConstant(1, Cond.getValueType()),
4218      Cond);
4219  return Cond;
4220}
4221
4222SDValue
4223AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
4224{
4225  SDValue Data = Op.getOperand(0);
4226  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
4227  DebugLoc DL = Op.getDebugLoc();
4228  EVT DVT = Data.getValueType();
4229  EVT BVT = BaseType->getVT();
4230  unsigned baseBits = BVT.getScalarType().getSizeInBits();
4231  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
4232  unsigned shiftBits = srcBits - baseBits;
4233  if (srcBits < 32) {
4234    // If the op is less than 32 bits, then it needs to extend to 32bits
4235    // so it can properly keep the upper bits valid.
4236    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
4237    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
4238    shiftBits = 32 - baseBits;
4239    DVT = IVT;
4240  }
4241  SDValue Shift = DAG.getConstant(shiftBits, DVT);
4242  // Shift left by 'Shift' bits.
4243  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
4244  // Signed shift Right by 'Shift' bits.
4245  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
4246  if (srcBits < 32) {
4247    // Once the sign extension is done, the op needs to be converted to
4248    // its original type.
4249    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
4250  }
4251  return Data;
4252}
4253EVT
4254AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
4255{
4256  int iSize = (size * numEle);
4257  int vEle = (iSize >> ((size == 64) ? 6 : 5));
4258  if (!vEle) {
4259    vEle = 1;
4260  }
4261  if (size == 64) {
4262    if (vEle == 1) {
4263      return EVT(MVT::i64);
4264    } else {
4265      return EVT(MVT::getVectorVT(MVT::i64, vEle));
4266    }
4267  } else {
4268    if (vEle == 1) {
4269      return EVT(MVT::i32);
4270    } else {
4271      return EVT(MVT::getVectorVT(MVT::i32, vEle));
4272    }
4273  }
4274}
4275
4276SDValue
4277AMDILTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const
4278{
4279  SDValue Src = Op.getOperand(0);
4280  SDValue Dst = Op;
4281  SDValue Res;
4282  DebugLoc DL = Op.getDebugLoc();
4283  EVT SrcVT = Src.getValueType();
4284  EVT DstVT = Dst.getValueType();
4285  // Lets bitcast the floating point types to an
4286  // equivalent integer type before converting to vectors.
4287  if (SrcVT.getScalarType().isFloatingPoint()) {
4288    Src = DAG.getNode(AMDILISD::BITCONV, DL, genIntType(
4289          SrcVT.getScalarType().getSimpleVT().getSizeInBits(),
4290          SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1),
4291        Src);
4292    SrcVT = Src.getValueType();
4293  }
4294  uint32_t ScalarSrcSize = SrcVT.getScalarType()
4295    .getSimpleVT().getSizeInBits();
4296  uint32_t ScalarDstSize = DstVT.getScalarType()
4297    .getSimpleVT().getSizeInBits();
4298  uint32_t SrcNumEle = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
4299  uint32_t DstNumEle = DstVT.isVector() ? DstVT.getVectorNumElements() : 1;
4300  bool isVec = SrcVT.isVector();
4301  if (DstVT.getScalarType().isInteger() &&
4302      (SrcVT.getScalarType().isInteger()
4303       || SrcVT.getScalarType().isFloatingPoint())) {
4304    if ((ScalarDstSize == 64 && SrcNumEle == 4 && ScalarSrcSize == 16)
4305        || (ScalarSrcSize == 64
4306          && DstNumEle == 4
4307          && ScalarDstSize == 16)) {
4308      // This is the problematic case when bitcasting i64 <-> <4 x i16>
4309      // This approach is a little different as we cannot generate a
4310      // <4 x i64> vector
4311      // as that is illegal in our backend and we are already past
4312      // the DAG legalizer.
4313      // So, in this case, we will do the following conversion.
4314      // Case 1:
4315      // %dst = <4 x i16> %src bitconvert i64 ==>
4316      // %tmp = <4 x i16> %src convert <4 x i32>
4317      // %tmp = <4 x i32> %tmp and 0xFFFF
4318      // %tmp = <4 x i32> %tmp shift_left <0, 16, 0, 16>
4319      // %tmp = <4 x i32> %tmp or %tmp.xz %tmp.yw
4320      // %dst = <2 x i32> %tmp bitcast i64
4321      // case 2:
4322      // %dst = i64 %src bitconvert <4 x i16> ==>
4323      // %tmp = i64 %src bitcast <2 x i32>
4324      // %tmp = <4 x i32> %tmp vinsert %tmp.xxyy
4325      // %tmp = <4 x i32> %tmp shift_right <0, 16, 0, 16>
4326      // %tmp = <4 x i32> %tmp and 0xFFFF
4327      // %dst = <4 x i16> %tmp bitcast <4 x i32>
4328      SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v4i32,
4329          DAG.getConstant(0xFFFF, MVT::i32));
4330      SDValue const16 = DAG.getConstant(16, MVT::i32);
4331      if (ScalarDstSize == 64) {
4332        // case 1
4333        Op = DAG.getSExtOrTrunc(Src, DL, MVT::v4i32);
4334        Op = DAG.getNode(ISD::AND, DL, Op.getValueType(), Op, mask);
4335        SDValue x = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4336            Op, DAG.getConstant(0, MVT::i32));
4337        SDValue y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4338            Op, DAG.getConstant(1, MVT::i32));
4339        y = DAG.getNode(ISD::SHL, DL, MVT::i32, y, const16);
4340        SDValue z = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4341            Op, DAG.getConstant(2, MVT::i32));
4342        SDValue w = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32,
4343            Op, DAG.getConstant(3, MVT::i32));
4344        w = DAG.getNode(ISD::SHL, DL, MVT::i32, w, const16);
4345        x = DAG.getNode(ISD::OR, DL, MVT::i32, x, y);
4346        y = DAG.getNode(ISD::OR, DL, MVT::i32, z, w);
4347        Res = DAG.getNode((isVec) ? AMDILISD::LCREATE2 : AMDILISD::LCREATE, DL, MVT::i64, x, y);
4348        return Res;
4349      } else {
4350        // case 2
4351        SDValue lo = DAG.getNode((isVec) ? AMDILISD::LCOMPLO2 : AMDILISD::LCOMPLO, DL, MVT::i32, Src);
4352        SDValue lor16
4353          = DAG.getNode(ISD::SRL, DL, MVT::i32, lo, const16);
4354        SDValue hi = DAG.getNode((isVec) ? AMDILISD::LCOMPHI2 : AMDILISD::LCOMPHI, DL, MVT::i32, Src);
4355        SDValue hir16
4356          = DAG.getNode(ISD::SRL, DL, MVT::i32, hi, const16);
4357        SDValue resVec = DAG.getNode(AMDILISD::VBUILD, DL,
4358            MVT::v4i32, lo);
4359        SDValue idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4360            getPointerTy(), DAG.getConstant(1, MVT::i32));
4361        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4362            resVec, lor16, idxVal);
4363        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4364            getPointerTy(), DAG.getConstant(2, MVT::i32));
4365        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4366            resVec, hi, idxVal);
4367        idxVal = DAG.getNode(ISD::ZERO_EXTEND, DL,
4368            getPointerTy(), DAG.getConstant(3, MVT::i32));
4369        resVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32,
4370            resVec, hir16, idxVal);
4371        resVec = DAG.getNode(ISD::AND, DL, MVT::v4i32, resVec, mask);
4372        Res = DAG.getSExtOrTrunc(resVec, DL, MVT::v4i16);
4373        return Res;
4374      }
4375    } else {
4376      // There are four cases we need to worry about for bitcasts
4377      // where the size of all
4378      // source, intermediates and result is <= 128 bits, unlike
4379      // the above case
4380      // 1) Sub32bit bitcast 32bitAlign
4381      // %dst = <4 x i8> bitcast i32
4382      // (also <[2|4] x i16> to <[2|4] x i32>)
4383      // 2) 32bitAlign bitcast Sub32bit
4384      // %dst = i32 bitcast <4 x i8>
4385      // 3) Sub32bit bitcast LargerSub32bit
4386      // %dst = <2 x i8> bitcast i16
4387      // (also <4 x i8> to <2 x i16>)
4388      // 4) Sub32bit bitcast SmallerSub32bit
4389      // %dst = i16 bitcast <2 x i8>
4390      // (also <2 x i16> to <4 x i8>)
4391      // This also only handles types that are powers of two
4392      if ((ScalarDstSize & (ScalarDstSize - 1))
4393          || (ScalarSrcSize & (ScalarSrcSize - 1))) {
4394      } else if (ScalarDstSize >= 32 && ScalarSrcSize < 32) {
4395        // case 1:
4396        EVT IntTy = genIntType(ScalarDstSize, SrcNumEle);
4397#if 0 // TODO: LLVM does not like this for some reason, cannot SignExt vectors
4398        SDValue res = DAG.getSExtOrTrunc(Src, DL, IntTy);
4399#else
4400        SDValue res = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4401            DAG.getUNDEF(IntTy.getScalarType()));
4402        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4403          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4404              getPointerTy(), DAG.getConstant(x, MVT::i32));
4405          SDValue temp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4406              SrcVT.getScalarType(), Src,
4407              DAG.getConstant(x, MVT::i32));
4408          temp = DAG.getSExtOrTrunc(temp, DL, IntTy.getScalarType());
4409          res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntTy,
4410              res, temp, idx);
4411        }
4412#endif
4413        SDValue mask = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4414            DAG.getConstant((1 << ScalarSrcSize) - 1, MVT::i32));
4415        SDValue *newEle = new SDValue[SrcNumEle];
4416        res = DAG.getNode(ISD::AND, DL, IntTy, res, mask);
4417        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4418          newEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4419              IntTy.getScalarType(), res,
4420              DAG.getConstant(x, MVT::i32));
4421        }
4422        uint32_t Ratio = SrcNumEle / DstNumEle;
4423        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4424          if (x % Ratio) {
4425            newEle[x] = DAG.getNode(ISD::SHL, DL,
4426                IntTy.getScalarType(), newEle[x],
4427                DAG.getConstant(ScalarSrcSize * (x % Ratio),
4428                  MVT::i32));
4429          }
4430        }
4431        for (uint32_t x = 0; x < SrcNumEle; x += 2) {
4432          newEle[x] = DAG.getNode(ISD::OR, DL,
4433              IntTy.getScalarType(), newEle[x], newEle[x + 1]);
4434        }
4435        if (ScalarSrcSize == 8) {
4436          for (uint32_t x = 0; x < SrcNumEle; x += 4) {
4437            newEle[x] = DAG.getNode(ISD::OR, DL,
4438                IntTy.getScalarType(), newEle[x], newEle[x + 2]);
4439          }
4440          if (DstNumEle == 1) {
4441            Dst = newEle[0];
4442          } else {
4443            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
4444                newEle[0]);
4445            for (uint32_t x = 1; x < DstNumEle; ++x) {
4446              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4447                  getPointerTy(), DAG.getConstant(x, MVT::i32));
4448              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4449                  DstVT, Dst, newEle[x * 4], idx);
4450            }
4451          }
4452        } else {
4453          if (DstNumEle == 1) {
4454            Dst = newEle[0];
4455          } else {
4456            Dst = DAG.getNode(AMDILISD::VBUILD, DL, DstVT,
4457                newEle[0]);
4458            for (uint32_t x = 1; x < DstNumEle; ++x) {
4459              SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4460                  getPointerTy(), DAG.getConstant(x, MVT::i32));
4461              Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
4462                  DstVT, Dst, newEle[x * 2], idx);
4463            }
4464          }
4465        }
4466        delete [] newEle;
4467        return Dst;
4468      } else if (ScalarDstSize < 32 && ScalarSrcSize >= 32) {
4469        // case 2:
4470        EVT IntTy = genIntType(ScalarSrcSize, DstNumEle);
4471        SDValue vec = DAG.getNode(AMDILISD::VBUILD, DL, IntTy,
4472            DAG.getUNDEF(IntTy.getScalarType()));
4473        uint32_t mult = (ScalarDstSize == 8) ? 4 : 2;
4474        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4475          for (uint32_t y = 0; y < mult; ++y) {
4476            SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4477                getPointerTy(),
4478                DAG.getConstant(x * mult + y, MVT::i32));
4479            SDValue t;
4480            if (SrcNumEle > 1) {
4481              t = DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
4482                  DL, SrcVT.getScalarType(), Src,
4483                  DAG.getConstant(x, MVT::i32));
4484            } else {
4485              t = Src;
4486            }
4487            if (y != 0) {
4488              t = DAG.getNode(ISD::SRL, DL, t.getValueType(),
4489                  t, DAG.getConstant(y * ScalarDstSize,
4490                    MVT::i32));
4491            }
4492            vec = DAG.getNode(ISD::INSERT_VECTOR_ELT,
4493                DL, IntTy, vec, t, idx);
4494          }
4495        }
4496        Dst = DAG.getSExtOrTrunc(vec, DL, DstVT);
4497        return Dst;
4498      } else if (ScalarDstSize == 16 && ScalarSrcSize == 8) {
4499        // case 3:
4500        SDValue *numEle = new SDValue[SrcNumEle];
4501        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4502          numEle[x] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4503              MVT::i8, Src, DAG.getConstant(x, MVT::i32));
4504          numEle[x] = DAG.getSExtOrTrunc(numEle[x], DL, MVT::i16);
4505          numEle[x] = DAG.getNode(ISD::AND, DL, MVT::i16, numEle[x],
4506              DAG.getConstant(0xFF, MVT::i16));
4507        }
4508        for (uint32_t x = 1; x < SrcNumEle; x += 2) {
4509          numEle[x] = DAG.getNode(ISD::SHL, DL, MVT::i16, numEle[x],
4510              DAG.getConstant(8, MVT::i16));
4511          numEle[x - 1] = DAG.getNode(ISD::OR, DL, MVT::i16,
4512              numEle[x-1], numEle[x]);
4513        }
4514        if (DstNumEle > 1) {
4515          // If we are not a scalar i16, the only other case is a
4516          // v2i16 since we can't have v8i8 at this point, v4i16
4517          // cannot be generated
4518          Dst = DAG.getNode(AMDILISD::VBUILD, DL, MVT::v2i16,
4519              numEle[0]);
4520          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4521              getPointerTy(), DAG.getConstant(1, MVT::i32));
4522          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2i16,
4523              Dst, numEle[2], idx);
4524        } else {
4525          Dst = numEle[0];
4526        }
4527        delete [] numEle;
4528        return Dst;
4529      } else if (ScalarDstSize == 8 && ScalarSrcSize == 16) {
4530        // case 4:
4531        SDValue *numEle = new SDValue[DstNumEle];
4532        for (uint32_t x = 0; x < SrcNumEle; ++x) {
4533          numEle[x * 2] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
4534              MVT::i16, Src, DAG.getConstant(x, MVT::i32));
4535          numEle[x * 2 + 1] = DAG.getNode(ISD::SRL, DL, MVT::i16,
4536              numEle[x * 2], DAG.getConstant(8, MVT::i16));
4537        }
4538        MVT ty = (SrcNumEle == 1) ? MVT::v2i16 : MVT::v4i16;
4539        Dst = DAG.getNode(AMDILISD::VBUILD, DL, ty, numEle[0]);
4540        for (uint32_t x = 1; x < DstNumEle; ++x) {
4541          SDValue idx = DAG.getNode(ISD::ZERO_EXTEND, DL,
4542              getPointerTy(), DAG.getConstant(x, MVT::i32));
4543          Dst = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ty,
4544              Dst, numEle[x], idx);
4545        }
4546        delete [] numEle;
4547        ty = (SrcNumEle == 1) ? MVT::v2i8 : MVT::v4i8;
4548        Res = DAG.getSExtOrTrunc(Dst, DL, ty);
4549        return Res;
4550      }
4551    }
4552  }
4553  Res = DAG.getNode(AMDILISD::BITCONV,
4554      Dst.getDebugLoc(),
4555      Dst.getValueType(), Src);
4556  return Res;
4557}
4558
4559SDValue
4560AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
4561    SelectionDAG &DAG) const
4562{
4563  SDValue Chain = Op.getOperand(0);
4564  SDValue Size = Op.getOperand(1);
4565  unsigned int SPReg = AMDIL::SP;
4566  DebugLoc DL = Op.getDebugLoc();
4567  SDValue SP = DAG.getCopyFromReg(Chain,
4568      DL,
4569      SPReg, MVT::i32);
4570  SDValue NewSP = DAG.getNode(ISD::ADD,
4571      DL,
4572      MVT::i32, SP, Size);
4573  Chain = DAG.getCopyToReg(SP.getValue(1),
4574      DL,
4575      SPReg, NewSP);
4576  SDValue Ops[2] = {NewSP, Chain};
4577  Chain = DAG.getMergeValues(Ops, 2 ,DL);
4578  return Chain;
4579}
4580SDValue
4581AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
4582{
4583  SDValue Chain = Op.getOperand(0);
4584  SDValue Cond  = Op.getOperand(1);
4585  SDValue Jump  = Op.getOperand(2);
4586  SDValue Result;
4587  Result = DAG.getNode(
4588      AMDILISD::BRANCH_COND,
4589      Op.getDebugLoc(),
4590      Op.getValueType(),
4591      Chain, Jump, Cond);
4592  return Result;
4593}
4594
4595SDValue
4596AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
4597{
4598  SDValue Chain = Op.getOperand(0);
4599  CondCodeSDNode *CCNode = cast<CondCodeSDNode>(Op.getOperand(1));
4600  SDValue LHS   = Op.getOperand(2);
4601  SDValue RHS   = Op.getOperand(3);
4602  SDValue JumpT  = Op.getOperand(4);
4603  SDValue CmpValue;
4604  ISD::CondCode CC = CCNode->get();
4605  SDValue Result;
4606  unsigned int cmpOpcode = CondCCodeToCC(
4607      CC,
4608      LHS.getValueType().getSimpleVT().SimpleTy);
4609  CmpValue = DAG.getNode(
4610      AMDILISD::CMP,
4611      Op.getDebugLoc(),
4612      LHS.getValueType(),
4613      DAG.getConstant(cmpOpcode, MVT::i32),
4614      LHS, RHS);
4615  Result = DAG.getNode(
4616      AMDILISD::BRANCH_COND,
4617      CmpValue.getDebugLoc(),
4618      MVT::Other, Chain,
4619      JumpT, CmpValue);
4620  return Result;
4621}
4622
4623SDValue
4624AMDILTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const
4625{
4626  SDValue Result = DAG.getNode(
4627      AMDILISD::DP_TO_FP,
4628      Op.getDebugLoc(),
4629      Op.getValueType(),
4630      Op.getOperand(0),
4631      Op.getOperand(1));
4632  return Result;
4633}
4634
4635SDValue
4636AMDILTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const
4637{
4638  SDValue Result = DAG.getNode(
4639      AMDILISD::VCONCAT,
4640      Op.getDebugLoc(),
4641      Op.getValueType(),
4642      Op.getOperand(0),
4643      Op.getOperand(1));
4644  return Result;
4645}
4646// LowerRET - Lower an ISD::RET node.
4647SDValue
4648AMDILTargetLowering::LowerReturn(SDValue Chain,
4649    CallingConv::ID CallConv, bool isVarArg,
4650    const SmallVectorImpl<ISD::OutputArg> &Outs,
4651    const SmallVectorImpl<SDValue> &OutVals,
4652    DebugLoc dl, SelectionDAG &DAG)
4653const
4654{
4655  //MachineFunction& MF = DAG.getMachineFunction();
4656  // CCValAssign - represent the assignment of the return value
4657  // to a location
4658  SmallVector<CCValAssign, 16> RVLocs;
4659
4660  // CCState - Info about the registers and stack slot
4661  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4662                 getTargetMachine(), RVLocs, *DAG.getContext());
4663
4664  // Analyze return values of ISD::RET
4665  CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
4666  // If this is the first return lowered for this function, add
4667  // the regs to the liveout set for the function
4668  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
4669  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4670    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
4671      MRI.addLiveOut(RVLocs[i].getLocReg());
4672    }
4673  }
4674  // FIXME: implement this when tail call is implemented
4675  // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
4676  // both x86 and ppc implement this in ISelLowering
4677
4678  // Regular return here
4679  SDValue Flag;
4680  SmallVector<SDValue, 6> RetOps;
4681  RetOps.push_back(Chain);
4682  RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
4683  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
4684    CCValAssign &VA = RVLocs[i];
4685    SDValue ValToCopy = OutVals[i];
4686    assert(VA.isRegLoc() && "Can only return in registers!");
4687    // ISD::Ret => ret chain, (regnum1, val1), ...
4688    // So i * 2 + 1 index only the regnums
4689    Chain = DAG.getCopyToReg(Chain,
4690        dl,
4691        VA.getLocReg(),
4692        ValToCopy,
4693        Flag);
4694    // guarantee that all emitted copies are stuck together
4695    // avoiding something bad
4696    Flag = Chain.getValue(1);
4697  }
4698  /*if (MF.getFunction()->hasStructRetAttr()) {
4699    assert(0 && "Struct returns are not yet implemented!");
4700  // Both MIPS and X86 have this
4701  }*/
4702  RetOps[0] = Chain;
4703  if (Flag.getNode())
4704    RetOps.push_back(Flag);
4705
4706  Flag = DAG.getNode(AMDILISD::RET_FLAG,
4707      dl,
4708      MVT::Other, &RetOps[0], RetOps.size());
4709  return Flag;
4710}
4711void
4712AMDILTargetLowering::generateLongRelational(MachineInstr *MI,
4713    unsigned int opCode) const
4714{
4715  MachineOperand DST = MI->getOperand(0);
4716  MachineOperand LHS = MI->getOperand(2);
4717  MachineOperand RHS = MI->getOperand(3);
4718  unsigned int opi32Code = 0, si32Code = 0;
4719  unsigned int simpleVT = MI->getDesc().OpInfo[0].RegClass;
4720  uint32_t REGS[12];
4721  // All the relationals can be generated with with 6 temp registers
4722  for (int x = 0; x < 12; ++x) {
4723    REGS[x] = genVReg(simpleVT);
4724  }
4725  // Pull out the high and low components of each 64 bit register
4726  generateMachineInst(AMDIL::LHI, REGS[0], LHS.getReg());
4727  generateMachineInst(AMDIL::LLO, REGS[1], LHS.getReg());
4728  generateMachineInst(AMDIL::LHI, REGS[2], RHS.getReg());
4729  generateMachineInst(AMDIL::LLO, REGS[3], RHS.getReg());
4730  // Determine the correct opcode that we should use
4731  switch(opCode) {
4732    default:
4733      assert(!"comparison case not handled!");
4734      break;
4735    case AMDIL::LEQ:
4736      si32Code = opi32Code = AMDIL::IEQ;
4737      break;
4738    case AMDIL::LNE:
4739      si32Code = opi32Code = AMDIL::INE;
4740      break;
4741    case AMDIL::LLE:
4742    case AMDIL::ULLE:
4743    case AMDIL::LGE:
4744    case AMDIL::ULGE:
4745      if (opCode == AMDIL::LGE || opCode == AMDIL::ULGE) {
4746        std::swap(REGS[0], REGS[2]);
4747      } else {
4748        std::swap(REGS[1], REGS[3]);
4749      }
4750      if (opCode == AMDIL::LLE || opCode == AMDIL::LGE) {
4751        opi32Code = AMDIL::ILT;
4752      } else {
4753        opi32Code = AMDIL::ULT;
4754      }
4755      si32Code = AMDIL::UGE;
4756      break;
4757    case AMDIL::LGT:
4758    case AMDIL::ULGT:
4759      std::swap(REGS[0], REGS[2]);
4760      std::swap(REGS[1], REGS[3]);
4761    case AMDIL::LLT:
4762    case AMDIL::ULLT:
4763      if (opCode == AMDIL::LGT || opCode == AMDIL::LLT) {
4764        opi32Code = AMDIL::ILT;
4765      } else {
4766        opi32Code = AMDIL::ULT;
4767      }
4768      si32Code = AMDIL::ULT;
4769      break;
4770  };
4771  // Do the initial opcode on the high and low components.
4772  // This leaves the following:
4773  // REGS[4] = L_HI OP R_HI
4774  // REGS[5] = L_LO OP R_LO
4775  generateMachineInst(opi32Code, REGS[4], REGS[0], REGS[2]);
4776  generateMachineInst(si32Code, REGS[5], REGS[1], REGS[3]);
4777  switch(opi32Code) {
4778    case AMDIL::IEQ:
4779    case AMDIL::INE:
4780      {
4781        // combine the results with an and or or depending on if
4782        // we are eq or ne
4783        uint32_t combineOp = (opi32Code == AMDIL::IEQ)
4784          ? AMDIL::BINARY_AND_i32 : AMDIL::BINARY_OR_i32;
4785        generateMachineInst(combineOp, REGS[11], REGS[4], REGS[5]);
4786      }
4787      break;
4788    default:
4789      // this finishes codegen for the following pattern
4790      // REGS[4] || (REGS[5] && (L_HI == R_HI))
4791      generateMachineInst(AMDIL::IEQ, REGS[9], REGS[0], REGS[2]);
4792      generateMachineInst(AMDIL::BINARY_AND_i32, REGS[10], REGS[5],
4793          REGS[9]);
4794      generateMachineInst(AMDIL::BINARY_OR_i32, REGS[11], REGS[4],
4795          REGS[10]);
4796      break;
4797  }
4798  generateMachineInst(AMDIL::LCREATE, DST.getReg(), REGS[11], REGS[11]);
4799}
4800
4801unsigned int
4802AMDILTargetLowering::getFunctionAlignment(const Function *) const
4803{
4804  return 0;
4805}
4806
4807void
4808AMDILTargetLowering::setPrivateData(MachineBasicBlock *BB,
4809    MachineBasicBlock::iterator &BBI,
4810    DebugLoc *DL, const TargetInstrInfo *TII) const
4811{
4812  mBB = BB;
4813  mBBI = BBI;
4814  mDL = DL;
4815  mTII = TII;
4816}
4817uint32_t
4818AMDILTargetLowering::genVReg(uint32_t regType) const
4819{
4820  return mBB->getParent()->getRegInfo().createVirtualRegister(
4821      getRegClassFromID(regType));
4822}
4823
4824MachineInstrBuilder
4825AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst) const
4826{
4827  return BuildMI(*mBB, mBBI, *mDL, mTII->get(opcode), dst);
4828}
4829
4830MachineInstrBuilder
4831AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4832    uint32_t src1) const
4833{
4834  return generateMachineInst(opcode, dst).addReg(src1);
4835}
4836
4837MachineInstrBuilder
4838AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4839    uint32_t src1, uint32_t src2) const
4840{
4841  return generateMachineInst(opcode, dst, src1).addReg(src2);
4842}
4843
4844MachineInstrBuilder
4845AMDILTargetLowering::generateMachineInst(uint32_t opcode, uint32_t dst,
4846    uint32_t src1, uint32_t src2, uint32_t src3) const
4847{
4848  return generateMachineInst(opcode, dst, src1, src2).addReg(src3);
4849}
4850
4851
4852SDValue
4853AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
4854{
4855  DebugLoc DL = Op.getDebugLoc();
4856  EVT OVT = Op.getValueType();
4857  SDValue LHS = Op.getOperand(0);
4858  SDValue RHS = Op.getOperand(1);
4859  MVT INTTY;
4860  MVT FLTTY;
4861  if (!OVT.isVector()) {
4862    INTTY = MVT::i32;
4863    FLTTY = MVT::f32;
4864  } else if (OVT.getVectorNumElements() == 2) {
4865    INTTY = MVT::v2i32;
4866    FLTTY = MVT::v2f32;
4867  } else if (OVT.getVectorNumElements() == 4) {
4868    INTTY = MVT::v4i32;
4869    FLTTY = MVT::v4f32;
4870  }
4871  unsigned bitsize = OVT.getScalarType().getSizeInBits();
4872  // char|short jq = ia ^ ib;
4873  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
4874
4875  // jq = jq >> (bitsize - 2)
4876  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
4877
4878  // jq = jq | 0x1
4879  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
4880
4881  // jq = (int)jq
4882  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
4883
4884  // int ia = (int)LHS;
4885  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
4886
4887  // int ib, (int)RHS;
4888  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
4889
4890  // float fa = (float)ia;
4891  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
4892
4893  // float fb = (float)ib;
4894  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
4895
4896  // float fq = native_divide(fa, fb);
4897  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
4898
4899  // fq = trunc(fq);
4900  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
4901
4902  // float fqneg = -fq;
4903  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
4904
4905  // float fr = mad(fqneg, fb, fa);
4906  SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
4907
4908  // int iq = (int)fq;
4909  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
4910
4911  // fr = fabs(fr);
4912  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
4913
4914  // fb = fabs(fb);
4915  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
4916
4917  // int cv = fr >= fb;
4918  SDValue cv;
4919  if (INTTY == MVT::i32) {
4920    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4921  } else {
4922    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
4923  }
4924  // jq = (cv ? jq : 0);
4925  jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
4926      DAG.getConstant(0, OVT));
4927  // dst = iq + jq;
4928  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
4929  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
4930  return iq;
4931}
4932
4933SDValue
4934AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
4935{
4936  DebugLoc DL = Op.getDebugLoc();
4937  EVT OVT = Op.getValueType();
4938  SDValue LHS = Op.getOperand(0);
4939  SDValue RHS = Op.getOperand(1);
4940  // The LowerSDIV32 function generates equivalent to the following IL.
4941  // mov r0, LHS
4942  // mov r1, RHS
4943  // ilt r10, r0, 0
4944  // ilt r11, r1, 0
4945  // iadd r0, r0, r10
4946  // iadd r1, r1, r11
4947  // ixor r0, r0, r10
4948  // ixor r1, r1, r11
4949  // udiv r0, r0, r1
4950  // ixor r10, r10, r11
4951  // iadd r0, r0, r10
4952  // ixor DST, r0, r10
4953
4954  // mov r0, LHS
4955  SDValue r0 = LHS;
4956
4957  // mov r1, RHS
4958  SDValue r1 = RHS;
4959
4960  // ilt r10, r0, 0
4961  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4962      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4963      r0, DAG.getConstant(0, OVT));
4964
4965  // ilt r11, r1, 0
4966  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
4967      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
4968      r1, DAG.getConstant(0, OVT));
4969
4970  // iadd r0, r0, r10
4971  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4972
4973  // iadd r1, r1, r11
4974  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
4975
4976  // ixor r0, r0, r10
4977  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4978
4979  // ixor r1, r1, r11
4980  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
4981
4982  // udiv r0, r0, r1
4983  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
4984
4985  // ixor r10, r10, r11
4986  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
4987
4988  // iadd r0, r0, r10
4989  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
4990
4991  // ixor DST, r0, r10
4992  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
4993  return DST;
4994}
4995
4996SDValue
4997AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
4998{
4999  return SDValue(Op.getNode(), 0);
5000}
5001
5002SDValue
5003AMDILTargetLowering::LowerUDIV24(SDValue Op, SelectionDAG &DAG) const
5004{
5005  DebugLoc DL = Op.getDebugLoc();
5006  EVT OVT = Op.getValueType();
5007  SDValue LHS = Op.getOperand(0);
5008  SDValue RHS = Op.getOperand(1);
5009  MVT INTTY;
5010  MVT FLTTY;
5011  if (!OVT.isVector()) {
5012    INTTY = MVT::i32;
5013    FLTTY = MVT::f32;
5014  } else if (OVT.getVectorNumElements() == 2) {
5015    INTTY = MVT::v2i32;
5016    FLTTY = MVT::v2f32;
5017  } else if (OVT.getVectorNumElements() == 4) {
5018    INTTY = MVT::v4i32;
5019    FLTTY = MVT::v4f32;
5020  }
5021
5022  // The LowerUDIV24 function implements the following CL.
5023  // int ia = (int)LHS
5024  // float fa = (float)ia
5025  // int ib = (int)RHS
5026  // float fb = (float)ib
5027  // float fq = native_divide(fa, fb)
5028  // fq = trunc(fq)
5029  // float t = mad(fq, fb, fb)
5030  // int iq = (int)fq - (t <= fa)
5031  // return (type)iq
5032
5033  // int ia = (int)LHS
5034  SDValue ia = DAG.getZExtOrTrunc(LHS, DL, INTTY);
5035
5036  // float fa = (float)ia
5037  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
5038
5039  // int ib = (int)RHS
5040  SDValue ib = DAG.getZExtOrTrunc(RHS, DL, INTTY);
5041
5042  // float fb = (float)ib
5043  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
5044
5045  // float fq = native_divide(fa, fb)
5046  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
5047
5048  // fq = trunc(fq)
5049  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
5050
5051  // float t = mad(fq, fb, fb)
5052  SDValue t = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fq, fb, fb);
5053
5054  // int iq = (int)fq - (t <= fa) // This is sub and not add because GPU returns 0, -1
5055  SDValue iq;
5056  fq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
5057  if (INTTY == MVT::i32) {
5058    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
5059  } else {
5060    iq = DAG.getSetCC(DL, INTTY, t, fa, ISD::SETOLE);
5061  }
5062  iq = DAG.getNode(ISD::ADD, DL, INTTY, fq, iq);
5063
5064
5065  // return (type)iq
5066  iq = DAG.getZExtOrTrunc(iq, DL, OVT);
5067  return iq;
5068
5069}
5070
5071SDValue
5072AMDILTargetLowering::LowerUDIV32(SDValue Op, SelectionDAG &DAG) const
5073{
5074  return SDValue(Op.getNode(), 0);
5075}
5076
5077SDValue
5078AMDILTargetLowering::LowerUDIV64(SDValue Op, SelectionDAG &DAG) const
5079{
5080  return SDValue(Op.getNode(), 0);
5081}
5082SDValue
5083AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
5084{
5085  DebugLoc DL = Op.getDebugLoc();
5086  EVT OVT = Op.getValueType();
5087  MVT INTTY = MVT::i32;
5088  if (OVT == MVT::v2i8) {
5089    INTTY = MVT::v2i32;
5090  } else if (OVT == MVT::v4i8) {
5091    INTTY = MVT::v4i32;
5092  }
5093  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
5094  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
5095  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
5096  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
5097  return LHS;
5098}
5099
5100SDValue
5101AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
5102{
5103  DebugLoc DL = Op.getDebugLoc();
5104  EVT OVT = Op.getValueType();
5105  MVT INTTY = MVT::i32;
5106  if (OVT == MVT::v2i16) {
5107    INTTY = MVT::v2i32;
5108  } else if (OVT == MVT::v4i16) {
5109    INTTY = MVT::v4i32;
5110  }
5111  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
5112  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
5113  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
5114  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
5115  return LHS;
5116}
5117
5118SDValue
5119AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
5120{
5121  DebugLoc DL = Op.getDebugLoc();
5122  EVT OVT = Op.getValueType();
5123  SDValue LHS = Op.getOperand(0);
5124  SDValue RHS = Op.getOperand(1);
5125  // The LowerSREM32 function generates equivalent to the following IL.
5126  // mov r0, LHS
5127  // mov r1, RHS
5128  // ilt r10, r0, 0
5129  // ilt r11, r1, 0
5130  // iadd r0, r0, r10
5131  // iadd r1, r1, r11
5132  // ixor r0, r0, r10
5133  // ixor r1, r1, r11
5134  // udiv r20, r0, r1
5135  // umul r20, r20, r1
5136  // sub r0, r0, r20
5137  // iadd r0, r0, r10
5138  // ixor DST, r0, r10
5139
5140  // mov r0, LHS
5141  SDValue r0 = LHS;
5142
5143  // mov r1, RHS
5144  SDValue r1 = RHS;
5145
5146  // ilt r10, r0, 0
5147  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5148      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
5149      r0, DAG.getConstant(0, OVT));
5150
5151  // ilt r11, r1, 0
5152  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5153      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
5154      r1, DAG.getConstant(0, OVT));
5155
5156  // iadd r0, r0, r10
5157  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
5158
5159  // iadd r1, r1, r11
5160  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
5161
5162  // ixor r0, r0, r10
5163  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5164
5165  // ixor r1, r1, r11
5166  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
5167
5168  // udiv r20, r0, r1
5169  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
5170
5171  // umul r20, r20, r1
5172  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
5173
5174  // sub r0, r0, r20
5175  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
5176
5177  // iadd r0, r0, r10
5178  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
5179
5180  // ixor DST, r0, r10
5181  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
5182  return DST;
5183}
5184
5185SDValue
5186AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
5187{
5188  return SDValue(Op.getNode(), 0);
5189}
5190
5191SDValue
5192AMDILTargetLowering::LowerUREM8(SDValue Op, SelectionDAG &DAG) const
5193{
5194  DebugLoc DL = Op.getDebugLoc();
5195  EVT OVT = Op.getValueType();
5196  MVT INTTY = MVT::i32;
5197  if (OVT == MVT::v2i8) {
5198    INTTY = MVT::v2i32;
5199  } else if (OVT == MVT::v4i8) {
5200    INTTY = MVT::v4i32;
5201  }
5202  SDValue LHS = Op.getOperand(0);
5203  SDValue RHS = Op.getOperand(1);
5204  // The LowerUREM8 function generates equivalent to the following IL.
5205  // mov r0, as_u32(LHS)
5206  // mov r1, as_u32(RHS)
5207  // and r10, r0, 0xFF
5208  // and r11, r1, 0xFF
5209  // cmov_logical r3, r11, r11, 0x1
5210  // udiv r3, r10, r3
5211  // cmov_logical r3, r11, r3, 0
5212  // umul r3, r3, r11
5213  // sub r3, r10, r3
5214  // and as_u8(DST), r3, 0xFF
5215
5216  // mov r0, as_u32(LHS)
5217  SDValue r0 = DAG.getSExtOrTrunc(LHS, DL, INTTY);
5218
5219  // mov r1, as_u32(RHS)
5220  SDValue r1 = DAG.getSExtOrTrunc(RHS, DL, INTTY);
5221
5222  // and r10, r0, 0xFF
5223  SDValue r10 = DAG.getNode(ISD::AND, DL, INTTY, r0,
5224      DAG.getConstant(0xFF, INTTY));
5225
5226  // and r11, r1, 0xFF
5227  SDValue r11 = DAG.getNode(ISD::AND, DL, INTTY, r1,
5228      DAG.getConstant(0xFF, INTTY));
5229
5230  // cmov_logical r3, r11, r11, 0x1
5231  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r11,
5232      DAG.getConstant(0x01, INTTY));
5233
5234  // udiv r3, r10, r3
5235  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
5236
5237  // cmov_logical r3, r11, r3, 0
5238  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, r11, r3,
5239      DAG.getConstant(0, INTTY));
5240
5241  // umul r3, r3, r11
5242  r3 = DAG.getNode(AMDILISD::UMUL, DL, INTTY, r3, r11);
5243
5244  // sub r3, r10, r3
5245  r3 = DAG.getNode(ISD::SUB, DL, INTTY, r10, r3);
5246
5247  // and as_u8(DST), r3, 0xFF
5248  SDValue DST = DAG.getNode(ISD::AND, DL, INTTY, r3,
5249      DAG.getConstant(0xFF, INTTY));
5250  DST = DAG.getZExtOrTrunc(DST, DL, OVT);
5251  return DST;
5252}
5253
5254SDValue
5255AMDILTargetLowering::LowerUREM16(SDValue Op, SelectionDAG &DAG) const
5256{
5257  DebugLoc DL = Op.getDebugLoc();
5258  EVT OVT = Op.getValueType();
5259  MVT INTTY = MVT::i32;
5260  if (OVT == MVT::v2i16) {
5261    INTTY = MVT::v2i32;
5262  } else if (OVT == MVT::v4i16) {
5263    INTTY = MVT::v4i32;
5264  }
5265  SDValue LHS = Op.getOperand(0);
5266  SDValue RHS = Op.getOperand(1);
5267  // The LowerUREM16 function generatest equivalent to the following IL.
5268  // mov r0, LHS
5269  // mov r1, RHS
5270  // DIV = LowerUDIV16(LHS, RHS)
5271  // and r10, r0, 0xFFFF
5272  // and r11, r1, 0xFFFF
5273  // cmov_logical r3, r11, r11, 0x1
5274  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5275  // and r3, r3, 0xFFFF
5276  // cmov_logical r3, r11, r3, 0
5277  // umul r3, r3, r11
5278  // sub r3, r10, r3
5279  // and DST, r3, 0xFFFF
5280
5281  // mov r0, LHS
5282  SDValue r0 = LHS;
5283
5284  // mov r1, RHS
5285  SDValue r1 = RHS;
5286
5287  // and r10, r0, 0xFFFF
5288  SDValue r10 = DAG.getNode(ISD::AND, DL, OVT, r0,
5289      DAG.getConstant(0xFFFF, OVT));
5290
5291  // and r11, r1, 0xFFFF
5292  SDValue r11 = DAG.getNode(ISD::AND, DL, OVT, r1,
5293      DAG.getConstant(0xFFFF, OVT));
5294
5295  // cmov_logical r3, r11, r11, 0x1
5296  SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r11,
5297      DAG.getConstant(0x01, OVT));
5298
5299  // udiv as_u16(r3), as_u32(r10), as_u32(r3)
5300  r10 = DAG.getZExtOrTrunc(r10, DL, INTTY);
5301  r3 = DAG.getZExtOrTrunc(r3, DL, INTTY);
5302  r3 = DAG.getNode(ISD::UREM, DL, INTTY, r10, r3);
5303  r3 = DAG.getZExtOrTrunc(r3, DL, OVT);
5304  r10 = DAG.getZExtOrTrunc(r10, DL, OVT);
5305
5306  // and r3, r3, 0xFFFF
5307  r3 = DAG.getNode(ISD::AND, DL, OVT, r3,
5308      DAG.getConstant(0xFFFF, OVT));
5309
5310  // cmov_logical r3, r11, r3, 0
5311  r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r11, r3,
5312      DAG.getConstant(0, OVT));
5313  // umul r3, r3, r11
5314  r3 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r3, r11);
5315
5316  // sub r3, r10, r3
5317  r3 = DAG.getNode(ISD::SUB, DL, OVT, r10, r3);
5318
5319  // and DST, r3, 0xFFFF
5320  SDValue DST = DAG.getNode(ISD::AND, DL, OVT, r3,
5321      DAG.getConstant(0xFFFF, OVT));
5322  return DST;
5323}
5324
5325SDValue
5326AMDILTargetLowering::LowerUREM32(SDValue Op, SelectionDAG &DAG) const
5327{
5328  DebugLoc DL = Op.getDebugLoc();
5329  EVT OVT = Op.getValueType();
5330  SDValue LHS = Op.getOperand(0);
5331  SDValue RHS = Op.getOperand(1);
5332  // The LowerUREM32 function generates equivalent to the following IL.
5333  // udiv r20, LHS, RHS
5334  // umul r20, r20, RHS
5335  // sub DST, LHS, r20
5336
5337  // udiv r20, LHS, RHS
5338  SDValue r20 = DAG.getNode(ISD::UDIV, DL, OVT, LHS, RHS);
5339
5340  // umul r20, r20, RHS
5341  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, RHS);
5342
5343  // sub DST, LHS, r20
5344  SDValue DST = DAG.getNode(ISD::SUB, DL, OVT, LHS, r20);
5345  return DST;
5346}
5347
5348SDValue
5349AMDILTargetLowering::LowerUREM64(SDValue Op, SelectionDAG &DAG) const
5350{
5351  return SDValue(Op.getNode(), 0);
5352}
5353
5354
5355SDValue
5356AMDILTargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const
5357{
5358  DebugLoc DL = Op.getDebugLoc();
5359  EVT OVT = Op.getValueType();
5360  MVT INTTY = MVT::i32;
5361  if (OVT == MVT::v2f32) {
5362    INTTY = MVT::v2i32;
5363  } else if (OVT == MVT::v4f32) {
5364    INTTY = MVT::v4i32;
5365  }
5366  SDValue LHS = Op.getOperand(0);
5367  SDValue RHS = Op.getOperand(1);
5368  SDValue DST;
5369  const AMDILSubtarget *stm = reinterpret_cast<const AMDILTargetMachine*>(
5370      &this->getTargetMachine())->getSubtargetImpl();
5371  if (stm->device()->getGeneration() == AMDILDeviceInfo::HD4XXX) {
5372    // TODO: This doesn't work for vector types yet
5373    // The LowerFDIV32 function generates equivalent to the following
5374    // IL:
5375    // mov r20, as_int(LHS)
5376    // mov r21, as_int(RHS)
5377    // and r30, r20, 0x7f800000
5378    // and r31, r20, 0x807FFFFF
5379    // and r32, r21, 0x7f800000
5380    // and r33, r21, 0x807FFFFF
5381    // ieq r40, r30, 0x7F800000
5382    // ieq r41, r31, 0x7F800000
5383    // ieq r42, r32, 0
5384    // ieq r43, r33, 0
5385    // and r50, r20, 0x80000000
5386    // and r51, r21, 0x80000000
5387    // ior r32, r32, 0x3f800000
5388    // ior r33, r33, 0x3f800000
5389    // cmov_logical r32, r42, r50, r32
5390    // cmov_logical r33, r43, r51, r33
5391    // cmov_logical r32, r40, r20, r32
5392    // cmov_logical r33, r41, r21, r33
5393    // ior r50, r40, r41
5394    // ior r51, r42, r43
5395    // ior r50, r50, r51
5396    // inegate r52, r31
5397    // iadd r30, r30, r52
5398    // cmov_logical r30, r50, 0, r30
5399    // div_zeroop(infinity) r21, 1.0, r33
5400    // mul_ieee r20, r32, r21
5401    // and r22, r20, 0x7FFFFFFF
5402    // and r23, r20, 0x80000000
5403    // ishr r60, r22, 0x00000017
5404    // ishr r61, r30, 0x00000017
5405    // iadd r20, r20, r30
5406    // iadd r21, r22, r30
5407    // iadd r60, r60, r61
5408    // ige r42, 0, R60
5409    // ior r41, r23, 0x7F800000
5410    // ige r40, r60, 0x000000FF
5411    // cmov_logical r40, r50, 0, r40
5412    // cmov_logical r20, r42, r23, r20
5413    // cmov_logical DST, r40, r41, r20
5414    // as_float(DST)
5415
5416    // mov r20, as_int(LHS)
5417    SDValue R20 = DAG.getNode(ISDBITCAST, DL, INTTY, LHS);
5418
5419    // mov r21, as_int(RHS)
5420    SDValue R21 = DAG.getNode(ISDBITCAST, DL, INTTY, RHS);
5421
5422    // and r30, r20, 0x7f800000
5423    SDValue R30 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5424        DAG.getConstant(0x7F800000, INTTY));
5425
5426    // and r31, r21, 0x7f800000
5427    SDValue R31 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5428        DAG.getConstant(0x7f800000, INTTY));
5429
5430    // and r32, r20, 0x807FFFFF
5431    SDValue R32 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5432        DAG.getConstant(0x807FFFFF, INTTY));
5433
5434    // and r33, r21, 0x807FFFFF
5435    SDValue R33 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5436        DAG.getConstant(0x807FFFFF, INTTY));
5437
5438    // ieq r40, r30, 0x7F800000
5439    SDValue R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5440        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5441        R30, DAG.getConstant(0x7F800000, INTTY));
5442
5443    // ieq r41, r31, 0x7F800000
5444    SDValue R41 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5445        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5446        R31, DAG.getConstant(0x7F800000, INTTY));
5447
5448    // ieq r42, r30, 0
5449    SDValue R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5450        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5451        R30, DAG.getConstant(0, INTTY));
5452
5453    // ieq r43, r31, 0
5454    SDValue R43 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5455        DAG.getConstant(CondCCodeToCC(ISD::SETEQ, MVT::i32), MVT::i32),
5456        R31, DAG.getConstant(0, INTTY));
5457
5458    // and r50, r20, 0x80000000
5459    SDValue R50 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5460        DAG.getConstant(0x80000000, INTTY));
5461
5462    // and r51, r21, 0x80000000
5463    SDValue R51 = DAG.getNode(ISD::AND, DL, INTTY, R21,
5464        DAG.getConstant(0x80000000, INTTY));
5465
5466    // ior r32, r32, 0x3f800000
5467    R32 = DAG.getNode(ISD::OR, DL, INTTY, R32,
5468        DAG.getConstant(0x3F800000, INTTY));
5469
5470    // ior r33, r33, 0x3f800000
5471    R33 = DAG.getNode(ISD::OR, DL, INTTY, R33,
5472        DAG.getConstant(0x3F800000, INTTY));
5473
5474    // cmov_logical r32, r42, r50, r32
5475    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R50, R32);
5476
5477    // cmov_logical r33, r43, r51, r33
5478    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R43, R51, R33);
5479
5480    // cmov_logical r32, r40, r20, r32
5481    R32 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R20, R32);
5482
5483    // cmov_logical r33, r41, r21, r33
5484    R33 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R41, R21, R33);
5485
5486    // ior r50, r40, r41
5487    R50 = DAG.getNode(ISD::OR, DL, INTTY, R40, R41);
5488
5489    // ior r51, r42, r43
5490    R51 = DAG.getNode(ISD::OR, DL, INTTY, R42, R43);
5491
5492    // ior r50, r50, r51
5493    R50 = DAG.getNode(ISD::OR, DL, INTTY, R50, R51);
5494
5495    // inegate r52, r31
5496    SDValue R52 = DAG.getNode(AMDILISD::INEGATE, DL, INTTY, R31);
5497
5498    // iadd r30, r30, r52
5499    R30 = DAG.getNode(ISD::ADD, DL, INTTY, R30, R52);
5500
5501    // cmov_logical r30, r50, 0, r30
5502    R30 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
5503        DAG.getConstant(0, INTTY), R30);
5504
5505    // div_zeroop(infinity) r21, 1.0, as_float(r33)
5506    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
5507    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
5508        DAG.getConstantFP(1.0f, OVT), R33);
5509
5510    // mul_ieee as_int(r20), as_float(r32), r21
5511    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
5512    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
5513    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
5514
5515    // div_zeroop(infinity) r21, 1.0, as_float(r33)
5516    R33 = DAG.getNode(ISDBITCAST, DL, OVT, R33);
5517    R21 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT,
5518        DAG.getConstantFP(1.0f, OVT), R33);
5519
5520    // mul_ieee as_int(r20), as_float(r32), r21
5521    R32 = DAG.getNode(ISDBITCAST, DL, OVT, R32);
5522    R20 = DAG.getNode(ISD::FMUL, DL, OVT, R32, R21);
5523    R20 = DAG.getNode(ISDBITCAST, DL, INTTY, R20);
5524
5525    // and r22, r20, 0x7FFFFFFF
5526    SDValue R22 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5527        DAG.getConstant(0x7FFFFFFF, INTTY));
5528
5529    // and r23, r20, 0x80000000
5530    SDValue R23 = DAG.getNode(ISD::AND, DL, INTTY, R20,
5531        DAG.getConstant(0x80000000, INTTY));
5532
5533    // ishr r60, r22, 0x00000017
5534    SDValue R60 = DAG.getNode(ISD::SRA, DL, INTTY, R22,
5535        DAG.getConstant(0x00000017, INTTY));
5536
5537    // ishr r61, r30, 0x00000017
5538    SDValue R61 = DAG.getNode(ISD::SRA, DL, INTTY, R30,
5539        DAG.getConstant(0x00000017, INTTY));
5540
5541    // iadd r20, r20, r30
5542    R20 = DAG.getNode(ISD::ADD, DL, INTTY, R20, R30);
5543
5544    // iadd r21, r22, r30
5545    R21 = DAG.getNode(ISD::ADD, DL, INTTY, R22, R30);
5546
5547    // iadd r60, r60, r61
5548    R60 = DAG.getNode(ISD::ADD, DL, INTTY, R60, R61);
5549
5550    // ige r42, 0, R60
5551    R42 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5552        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
5553        DAG.getConstant(0, INTTY),
5554        R60);
5555
5556    // ior r41, r23, 0x7F800000
5557    R41 = DAG.getNode(ISD::OR, DL, INTTY, R23,
5558        DAG.getConstant(0x7F800000, INTTY));
5559
5560    // ige r40, r60, 0x000000FF
5561    R40 = DAG.getNode(AMDILISD::CMP, DL, INTTY,
5562        DAG.getConstant(CondCCodeToCC(ISD::SETGE, MVT::i32), MVT::i32),
5563        R60,
5564        DAG.getConstant(0x0000000FF, INTTY));
5565
5566    // cmov_logical r40, r50, 0, r40
5567    R40 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R50,
5568        DAG.getConstant(0, INTTY),
5569        R40);
5570
5571    // cmov_logical r20, r42, r23, r20
5572    R20 = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R42, R23, R20);
5573
5574    // cmov_logical DST, r40, r41, r20
5575    DST = DAG.getNode(AMDILISD::CMOVLOG, DL, INTTY, R40, R41, R20);
5576
5577    // as_float(DST)
5578    DST = DAG.getNode(ISDBITCAST, DL, OVT, DST);
5579  } else {
5580    // The following sequence of DAG nodes produce the following IL:
5581    // fabs r1, RHS
5582    // lt r2, 0x1.0p+96f, r1
5583    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5584    // mul_ieee r1, RHS, r3
5585    // div_zeroop(infinity) r0, LHS, r1
5586    // mul_ieee DST, r0, r3
5587
5588    // fabs r1, RHS
5589    SDValue r1 = DAG.getNode(ISD::FABS, DL, OVT, RHS);
5590    // lt r2, 0x1.0p+96f, r1
5591    SDValue r2 = DAG.getNode(AMDILISD::CMP, DL, OVT,
5592        DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::f32), MVT::i32),
5593        DAG.getConstant(0x6f800000, INTTY), r1);
5594    // cmov_logical r3, r2, 0x1.0p-23f, 1.0f
5595    SDValue r3 = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, r2,
5596        DAG.getConstant(0x2f800000, INTTY),
5597        DAG.getConstant(0x3f800000, INTTY));
5598    // mul_ieee r1, RHS, r3
5599    r1 = DAG.getNode(ISD::FMUL, DL, OVT, RHS, r3);
5600    // div_zeroop(infinity) r0, LHS, r1
5601    SDValue r0 = DAG.getNode(AMDILISD::DIV_INF, DL, OVT, LHS, r1);
5602    // mul_ieee DST, r0, r3
5603    DST = DAG.getNode(ISD::FMUL, DL, OVT, r0, r3);
5604  }
5605  return DST;
5606}
5607
5608SDValue
5609AMDILTargetLowering::LowerFDIV64(SDValue Op, SelectionDAG &DAG) const
5610{
5611  return SDValue(Op.getNode(), 0);
5612}
5613