AMDILISelLowering.cpp revision 76b44034b9b234d3db4012342f0fae677d4f10f6
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file implements the interfaces that AMDIL uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDILISelLowering.h"
16#include "AMDILDevices.h"
17#include "AMDILIntrinsicInfo.h"
18#include "AMDILRegisterInfo.h"
19#include "AMDILSubtarget.h"
20#include "AMDILUtilityFunctions.h"
21#include "llvm/CallingConv.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/PseudoSourceValue.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28#include "llvm/DerivedTypes.h"
29#include "llvm/Instructions.h"
30#include "llvm/Intrinsics.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/Target/TargetInstrInfo.h"
33#include "llvm/Target/TargetOptions.h"
34
35using namespace llvm;
36#define ISDBITCAST  ISD::BITCAST
37#define MVTGLUE     MVT::Glue
38//===----------------------------------------------------------------------===//
39// Calling Convention Implementation
40//===----------------------------------------------------------------------===//
41#include "AMDGPUGenCallingConv.inc"
42
43//===----------------------------------------------------------------------===//
44// TargetLowering Implementation Help Functions Begin
45//===----------------------------------------------------------------------===//
46  static SDValue
47getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
48{
49  DebugLoc DL = Src.getDebugLoc();
50  EVT svt = Src.getValueType().getScalarType();
51  EVT dvt = Dst.getValueType().getScalarType();
52  if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
53    if (dvt.bitsGT(svt)) {
54      Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
55    } else if (svt.bitsLT(svt)) {
56      Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
57          DAG.getConstant(1, MVT::i32));
58    }
59  } else if (svt.isInteger() && dvt.isInteger()) {
60    if (!svt.bitsEq(dvt)) {
61      Src = DAG.getSExtOrTrunc(Src, DL, dvt);
62    }
63  } else if (svt.isInteger()) {
64    unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
65    if (!svt.bitsEq(dvt)) {
66      if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
67        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
68      } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
69        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
70      } else {
71        assert(0 && "We only support 32 and 64bit fp types");
72      }
73    }
74    Src = DAG.getNode(opcode, DL, dvt, Src);
75  } else if (dvt.isInteger()) {
76    unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
77    if (svt.getSimpleVT().SimpleTy == MVT::f32) {
78      Src = DAG.getNode(opcode, DL, MVT::i32, Src);
79    } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
80      Src = DAG.getNode(opcode, DL, MVT::i64, Src);
81    } else {
82      assert(0 && "We only support 32 and 64bit fp types");
83    }
84    Src = DAG.getSExtOrTrunc(Src, DL, dvt);
85  }
86  return Src;
87}
88// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
89// condition.
90  static AMDILCC::CondCodes
91CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
92{
93  switch (CC) {
94    default:
95      {
96        errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
97        assert(0 && "Unknown condition code!");
98      }
99    case ISD::SETO:
100      switch(type) {
101        case MVT::f32:
102          return AMDILCC::IL_CC_F_O;
103        case MVT::f64:
104          return AMDILCC::IL_CC_D_O;
105        default:
106          assert(0 && "Opcode combination not generated correctly!");
107          return AMDILCC::COND_ERROR;
108      };
109    case ISD::SETUO:
110      switch(type) {
111        case MVT::f32:
112          return AMDILCC::IL_CC_F_UO;
113        case MVT::f64:
114          return AMDILCC::IL_CC_D_UO;
115        default:
116          assert(0 && "Opcode combination not generated correctly!");
117          return AMDILCC::COND_ERROR;
118      };
119    case ISD::SETGT:
120      switch (type) {
121        case MVT::i1:
122        case MVT::i8:
123        case MVT::i16:
124        case MVT::i32:
125          return AMDILCC::IL_CC_I_GT;
126        case MVT::f32:
127          return AMDILCC::IL_CC_F_GT;
128        case MVT::f64:
129          return AMDILCC::IL_CC_D_GT;
130        case MVT::i64:
131          return AMDILCC::IL_CC_L_GT;
132        default:
133          assert(0 && "Opcode combination not generated correctly!");
134          return AMDILCC::COND_ERROR;
135      };
136    case ISD::SETGE:
137      switch (type) {
138        case MVT::i1:
139        case MVT::i8:
140        case MVT::i16:
141        case MVT::i32:
142          return AMDILCC::IL_CC_I_GE;
143        case MVT::f32:
144          return AMDILCC::IL_CC_F_GE;
145        case MVT::f64:
146          return AMDILCC::IL_CC_D_GE;
147        case MVT::i64:
148          return AMDILCC::IL_CC_L_GE;
149        default:
150          assert(0 && "Opcode combination not generated correctly!");
151          return AMDILCC::COND_ERROR;
152      };
153    case ISD::SETLT:
154      switch (type) {
155        case MVT::i1:
156        case MVT::i8:
157        case MVT::i16:
158        case MVT::i32:
159          return AMDILCC::IL_CC_I_LT;
160        case MVT::f32:
161          return AMDILCC::IL_CC_F_LT;
162        case MVT::f64:
163          return AMDILCC::IL_CC_D_LT;
164        case MVT::i64:
165          return AMDILCC::IL_CC_L_LT;
166        default:
167          assert(0 && "Opcode combination not generated correctly!");
168          return AMDILCC::COND_ERROR;
169      };
170    case ISD::SETLE:
171      switch (type) {
172        case MVT::i1:
173        case MVT::i8:
174        case MVT::i16:
175        case MVT::i32:
176          return AMDILCC::IL_CC_I_LE;
177        case MVT::f32:
178          return AMDILCC::IL_CC_F_LE;
179        case MVT::f64:
180          return AMDILCC::IL_CC_D_LE;
181        case MVT::i64:
182          return AMDILCC::IL_CC_L_LE;
183        default:
184          assert(0 && "Opcode combination not generated correctly!");
185          return AMDILCC::COND_ERROR;
186      };
187    case ISD::SETNE:
188      switch (type) {
189        case MVT::i1:
190        case MVT::i8:
191        case MVT::i16:
192        case MVT::i32:
193          return AMDILCC::IL_CC_I_NE;
194        case MVT::f32:
195          return AMDILCC::IL_CC_F_NE;
196        case MVT::f64:
197          return AMDILCC::IL_CC_D_NE;
198        case MVT::i64:
199          return AMDILCC::IL_CC_L_NE;
200        default:
201          assert(0 && "Opcode combination not generated correctly!");
202          return AMDILCC::COND_ERROR;
203      };
204    case ISD::SETEQ:
205      switch (type) {
206        case MVT::i1:
207        case MVT::i8:
208        case MVT::i16:
209        case MVT::i32:
210          return AMDILCC::IL_CC_I_EQ;
211        case MVT::f32:
212          return AMDILCC::IL_CC_F_EQ;
213        case MVT::f64:
214          return AMDILCC::IL_CC_D_EQ;
215        case MVT::i64:
216          return AMDILCC::IL_CC_L_EQ;
217        default:
218          assert(0 && "Opcode combination not generated correctly!");
219          return AMDILCC::COND_ERROR;
220      };
221    case ISD::SETUGT:
222      switch (type) {
223        case MVT::i1:
224        case MVT::i8:
225        case MVT::i16:
226        case MVT::i32:
227          return AMDILCC::IL_CC_U_GT;
228        case MVT::f32:
229          return AMDILCC::IL_CC_F_UGT;
230        case MVT::f64:
231          return AMDILCC::IL_CC_D_UGT;
232        case MVT::i64:
233          return AMDILCC::IL_CC_UL_GT;
234        default:
235          assert(0 && "Opcode combination not generated correctly!");
236          return AMDILCC::COND_ERROR;
237      };
238    case ISD::SETUGE:
239      switch (type) {
240        case MVT::i1:
241        case MVT::i8:
242        case MVT::i16:
243        case MVT::i32:
244          return AMDILCC::IL_CC_U_GE;
245        case MVT::f32:
246          return AMDILCC::IL_CC_F_UGE;
247        case MVT::f64:
248          return AMDILCC::IL_CC_D_UGE;
249        case MVT::i64:
250          return AMDILCC::IL_CC_UL_GE;
251        default:
252          assert(0 && "Opcode combination not generated correctly!");
253          return AMDILCC::COND_ERROR;
254      };
255    case ISD::SETULT:
256      switch (type) {
257        case MVT::i1:
258        case MVT::i8:
259        case MVT::i16:
260        case MVT::i32:
261          return AMDILCC::IL_CC_U_LT;
262        case MVT::f32:
263          return AMDILCC::IL_CC_F_ULT;
264        case MVT::f64:
265          return AMDILCC::IL_CC_D_ULT;
266        case MVT::i64:
267          return AMDILCC::IL_CC_UL_LT;
268        default:
269          assert(0 && "Opcode combination not generated correctly!");
270          return AMDILCC::COND_ERROR;
271      };
272    case ISD::SETULE:
273      switch (type) {
274        case MVT::i1:
275        case MVT::i8:
276        case MVT::i16:
277        case MVT::i32:
278          return AMDILCC::IL_CC_U_LE;
279        case MVT::f32:
280          return AMDILCC::IL_CC_F_ULE;
281        case MVT::f64:
282          return AMDILCC::IL_CC_D_ULE;
283        case MVT::i64:
284          return AMDILCC::IL_CC_UL_LE;
285        default:
286          assert(0 && "Opcode combination not generated correctly!");
287          return AMDILCC::COND_ERROR;
288      };
289    case ISD::SETUNE:
290      switch (type) {
291        case MVT::i1:
292        case MVT::i8:
293        case MVT::i16:
294        case MVT::i32:
295          return AMDILCC::IL_CC_U_NE;
296        case MVT::f32:
297          return AMDILCC::IL_CC_F_UNE;
298        case MVT::f64:
299          return AMDILCC::IL_CC_D_UNE;
300        case MVT::i64:
301          return AMDILCC::IL_CC_UL_NE;
302        default:
303          assert(0 && "Opcode combination not generated correctly!");
304          return AMDILCC::COND_ERROR;
305      };
306    case ISD::SETUEQ:
307      switch (type) {
308        case MVT::i1:
309        case MVT::i8:
310        case MVT::i16:
311        case MVT::i32:
312          return AMDILCC::IL_CC_U_EQ;
313        case MVT::f32:
314          return AMDILCC::IL_CC_F_UEQ;
315        case MVT::f64:
316          return AMDILCC::IL_CC_D_UEQ;
317        case MVT::i64:
318          return AMDILCC::IL_CC_UL_EQ;
319        default:
320          assert(0 && "Opcode combination not generated correctly!");
321          return AMDILCC::COND_ERROR;
322      };
323    case ISD::SETOGT:
324      switch (type) {
325        case MVT::f32:
326          return AMDILCC::IL_CC_F_OGT;
327        case MVT::f64:
328          return AMDILCC::IL_CC_D_OGT;
329        case MVT::i1:
330        case MVT::i8:
331        case MVT::i16:
332        case MVT::i32:
333        case MVT::i64:
334        default:
335          assert(0 && "Opcode combination not generated correctly!");
336          return AMDILCC::COND_ERROR;
337      };
338    case ISD::SETOGE:
339      switch (type) {
340        case MVT::f32:
341          return AMDILCC::IL_CC_F_OGE;
342        case MVT::f64:
343          return AMDILCC::IL_CC_D_OGE;
344        case MVT::i1:
345        case MVT::i8:
346        case MVT::i16:
347        case MVT::i32:
348        case MVT::i64:
349        default:
350          assert(0 && "Opcode combination not generated correctly!");
351          return AMDILCC::COND_ERROR;
352      };
353    case ISD::SETOLT:
354      switch (type) {
355        case MVT::f32:
356          return AMDILCC::IL_CC_F_OLT;
357        case MVT::f64:
358          return AMDILCC::IL_CC_D_OLT;
359        case MVT::i1:
360        case MVT::i8:
361        case MVT::i16:
362        case MVT::i32:
363        case MVT::i64:
364        default:
365          assert(0 && "Opcode combination not generated correctly!");
366          return AMDILCC::COND_ERROR;
367      };
368    case ISD::SETOLE:
369      switch (type) {
370        case MVT::f32:
371          return AMDILCC::IL_CC_F_OLE;
372        case MVT::f64:
373          return AMDILCC::IL_CC_D_OLE;
374        case MVT::i1:
375        case MVT::i8:
376        case MVT::i16:
377        case MVT::i32:
378        case MVT::i64:
379        default:
380          assert(0 && "Opcode combination not generated correctly!");
381          return AMDILCC::COND_ERROR;
382      };
383    case ISD::SETONE:
384      switch (type) {
385        case MVT::f32:
386          return AMDILCC::IL_CC_F_ONE;
387        case MVT::f64:
388          return AMDILCC::IL_CC_D_ONE;
389        case MVT::i1:
390        case MVT::i8:
391        case MVT::i16:
392        case MVT::i32:
393        case MVT::i64:
394        default:
395          assert(0 && "Opcode combination not generated correctly!");
396          return AMDILCC::COND_ERROR;
397      };
398    case ISD::SETOEQ:
399      switch (type) {
400        case MVT::f32:
401          return AMDILCC::IL_CC_F_OEQ;
402        case MVT::f64:
403          return AMDILCC::IL_CC_D_OEQ;
404        case MVT::i1:
405        case MVT::i8:
406        case MVT::i16:
407        case MVT::i32:
408        case MVT::i64:
409        default:
410          assert(0 && "Opcode combination not generated correctly!");
411          return AMDILCC::COND_ERROR;
412      };
413  };
414}
415
416SDValue
417AMDILTargetLowering::LowerMemArgument(
418    SDValue Chain,
419    CallingConv::ID CallConv,
420    const SmallVectorImpl<ISD::InputArg> &Ins,
421    DebugLoc dl, SelectionDAG &DAG,
422    const CCValAssign &VA,
423    MachineFrameInfo *MFI,
424    unsigned i) const
425{
426  // Create the nodes corresponding to a load from this parameter slot.
427  ISD::ArgFlagsTy Flags = Ins[i].Flags;
428
429  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
430    getTargetMachine().Options.GuaranteedTailCallOpt;
431  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
432
433  // FIXME: For now, all byval parameter objects are marked mutable. This can
434  // be changed with more analysis.
435  // In case of tail call optimization mark all arguments mutable. Since they
436  // could be overwritten by lowering of arguments in case of a tail call.
437  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
438      VA.getLocMemOffset(), isImmutable);
439  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
440
441  if (Flags.isByVal())
442    return FIN;
443  return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
444      MachinePointerInfo::getFixedStack(FI),
445      false, false, false, 0);
446}
447//===----------------------------------------------------------------------===//
448// TargetLowering Implementation Help Functions End
449//===----------------------------------------------------------------------===//
450
451//===----------------------------------------------------------------------===//
452// TargetLowering Class Implementation Begins
453//===----------------------------------------------------------------------===//
454  AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
455: TargetLowering(TM, new TargetLoweringObjectFileELF())
456{
457  int types[] =
458  {
459    (int)MVT::i8,
460    (int)MVT::i16,
461    (int)MVT::i32,
462    (int)MVT::f32,
463    (int)MVT::f64,
464    (int)MVT::i64,
465    (int)MVT::v2i8,
466    (int)MVT::v4i8,
467    (int)MVT::v2i16,
468    (int)MVT::v4i16,
469    (int)MVT::v4f32,
470    (int)MVT::v4i32,
471    (int)MVT::v2f32,
472    (int)MVT::v2i32,
473    (int)MVT::v2f64,
474    (int)MVT::v2i64
475  };
476
477  int IntTypes[] =
478  {
479    (int)MVT::i8,
480    (int)MVT::i16,
481    (int)MVT::i32,
482    (int)MVT::i64
483  };
484
485  int FloatTypes[] =
486  {
487    (int)MVT::f32,
488    (int)MVT::f64
489  };
490
491  int VectorTypes[] =
492  {
493    (int)MVT::v2i8,
494    (int)MVT::v4i8,
495    (int)MVT::v2i16,
496    (int)MVT::v4i16,
497    (int)MVT::v4f32,
498    (int)MVT::v4i32,
499    (int)MVT::v2f32,
500    (int)MVT::v2i32,
501    (int)MVT::v2f64,
502    (int)MVT::v2i64
503  };
504  size_t numTypes = sizeof(types) / sizeof(*types);
505  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
506  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
507  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
508
509  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
510  // These are the current register classes that are
511  // supported
512
513  for (unsigned int x  = 0; x < numTypes; ++x) {
514    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
515
516    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
517    // We cannot sextinreg, expand to shifts
518    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
519    setOperationAction(ISD::SUBE, VT, Expand);
520    setOperationAction(ISD::SUBC, VT, Expand);
521    setOperationAction(ISD::ADDE, VT, Expand);
522    setOperationAction(ISD::ADDC, VT, Expand);
523    setOperationAction(ISD::SETCC, VT, Custom);
524    setOperationAction(ISD::BRCOND, VT, Custom);
525    setOperationAction(ISD::BR_CC, VT, Custom);
526    setOperationAction(ISD::BR_JT, VT, Expand);
527    setOperationAction(ISD::BRIND, VT, Expand);
528    // TODO: Implement custom UREM/SREM routines
529    setOperationAction(ISD::SREM, VT, Expand);
530    setOperationAction(ISD::GlobalAddress, VT, Custom);
531    setOperationAction(ISD::JumpTable, VT, Custom);
532    setOperationAction(ISD::ConstantPool, VT, Custom);
533    setOperationAction(ISD::SELECT, VT, Custom);
534    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
535    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
536    if (VT != MVT::i64 && VT != MVT::v2i64) {
537      setOperationAction(ISD::SDIV, VT, Custom);
538    }
539  }
540  for (unsigned int x = 0; x < numFloatTypes; ++x) {
541    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
542
543    // IL does not have these operations for floating point types
544    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
545    setOperationAction(ISD::SETOLT, VT, Expand);
546    setOperationAction(ISD::SETOGE, VT, Expand);
547    setOperationAction(ISD::SETOGT, VT, Expand);
548    setOperationAction(ISD::SETOLE, VT, Expand);
549    setOperationAction(ISD::SETULT, VT, Expand);
550    setOperationAction(ISD::SETUGE, VT, Expand);
551    setOperationAction(ISD::SETUGT, VT, Expand);
552    setOperationAction(ISD::SETULE, VT, Expand);
553  }
554
555  for (unsigned int x = 0; x < numIntTypes; ++x) {
556    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
557
558    // GPU also does not have divrem function for signed or unsigned
559    setOperationAction(ISD::SDIVREM, VT, Expand);
560
561    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
562    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
563    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
564
565    // GPU doesn't have a rotl, rotr, or byteswap instruction
566    setOperationAction(ISD::ROTR, VT, Expand);
567    setOperationAction(ISD::BSWAP, VT, Expand);
568
569    // GPU doesn't have any counting operators
570    setOperationAction(ISD::CTPOP, VT, Expand);
571    setOperationAction(ISD::CTTZ, VT, Expand);
572    setOperationAction(ISD::CTLZ, VT, Expand);
573  }
574
575  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
576  {
577    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
578
579    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
580    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
581    setOperationAction(ISD::SDIVREM, VT, Expand);
582    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
583    // setOperationAction(ISD::VSETCC, VT, Expand);
584    setOperationAction(ISD::SETCC, VT, Expand);
585    setOperationAction(ISD::SELECT_CC, VT, Expand);
586    setOperationAction(ISD::SELECT, VT, Expand);
587
588  }
589  if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
590    setOperationAction(ISD::MULHU, MVT::i64, Expand);
591    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
592    setOperationAction(ISD::MULHS, MVT::i64, Expand);
593    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
594    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
595    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
596    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
597    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
598    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
599    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
600    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
601    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
602  }
603  if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
604    // we support loading/storing v2f64 but not operations on the type
605    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
606    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
607    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
608    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
609    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
610    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
611    // We want to expand vector conversions into their scalar
612    // counterparts.
613    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
614    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
615    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
616    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
617    setOperationAction(ISD::FABS, MVT::f64, Expand);
618    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
619  }
620  // TODO: Fix the UDIV24 algorithm so it works for these
621  // types correctly. This needs vector comparisons
622  // for this to work correctly.
623  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
624  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
625  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
626  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
627  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
628  setOperationAction(ISD::SUBC, MVT::Other, Expand);
629  setOperationAction(ISD::ADDE, MVT::Other, Expand);
630  setOperationAction(ISD::ADDC, MVT::Other, Expand);
631  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
632  setOperationAction(ISD::BR_CC, MVT::Other, Custom);
633  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
634  setOperationAction(ISD::BRIND, MVT::Other, Expand);
635  setOperationAction(ISD::SETCC, MVT::Other, Custom);
636  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
637
638  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
639  // Use the default implementation.
640  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
641  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
642  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
643  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
644  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
645  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
646  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
647  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
648  setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
649
650  setStackPointerRegisterToSaveRestore(AMDGPU::SP);
651  setSchedulingPreference(Sched::RegPressure);
652  setPow2DivIsCheap(false);
653  setPrefLoopAlignment(16);
654  setSelectIsExpensive(true);
655  setJumpIsExpensive(true);
656
657  maxStoresPerMemcpy  = 4096;
658  maxStoresPerMemmove = 4096;
659  maxStoresPerMemset  = 4096;
660
661#undef numTypes
662#undef numIntTypes
663#undef numVectorTypes
664#undef numFloatTypes
665}
666
667const char *
668AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
669{
670  switch (Opcode) {
671    default: return 0;
672    case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
673    case AMDILISD::MAD:  return "AMDILISD::MAD";
674    case AMDILISD::CALL:  return "AMDILISD::CALL";
675    case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
676    case AMDILISD::UMUL: return "AMDILISD::UMUL";
677    case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
678    case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
679    case AMDILISD::CMP: return "AMDILISD::CMP";
680    case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
681    case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
682    case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
683    case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
684    case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
685    case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
686    case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
687    case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
688
689  };
690}
691bool
692AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
693    const CallInst &I, unsigned Intrinsic) const
694{
695  return false;
696}
697// The backend supports 32 and 64 bit floating point immediates
698bool
699AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
700{
701  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
702      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
703    return true;
704  } else {
705    return false;
706  }
707}
708
709bool
710AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
711{
712  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
713      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
714    return false;
715  } else {
716    return true;
717  }
718}
719
720
721// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
722// be zero. Op is expected to be a target specific node. Used by DAG
723// combiner.
724
725void
726AMDILTargetLowering::computeMaskedBitsForTargetNode(
727    const SDValue Op,
728    APInt &KnownZero,
729    APInt &KnownOne,
730    const SelectionDAG &DAG,
731    unsigned Depth) const
732{
733  APInt KnownZero2;
734  APInt KnownOne2;
735  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
736  switch (Op.getOpcode()) {
737    default: break;
738    case AMDILISD::SELECT_CC:
739             DAG.ComputeMaskedBits(
740                 Op.getOperand(1),
741                 KnownZero,
742                 KnownOne,
743                 Depth + 1
744                 );
745             DAG.ComputeMaskedBits(
746                 Op.getOperand(0),
747                 KnownZero2,
748                 KnownOne2
749                 );
750             assert((KnownZero & KnownOne) == 0
751                 && "Bits known to be one AND zero?");
752             assert((KnownZero2 & KnownOne2) == 0
753                 && "Bits known to be one AND zero?");
754             // Only known if known in both the LHS and RHS
755             KnownOne &= KnownOne2;
756             KnownZero &= KnownZero2;
757             break;
758  };
759}
760
761// This is the function that determines which calling convention should
762// be used. Currently there is only one calling convention
763CCAssignFn*
764AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
765{
766  //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
767  return CC_AMDIL32;
768}
769
770// LowerCallResult - Lower the result values of an ISD::CALL into the
771// appropriate copies out of appropriate physical registers.  This assumes that
772// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
773// being lowered.  The returns a SDNode with the same number of values as the
774// ISD::CALL.
775SDValue
776AMDILTargetLowering::LowerCallResult(
777    SDValue Chain,
778    SDValue InFlag,
779    CallingConv::ID CallConv,
780    bool isVarArg,
781    const SmallVectorImpl<ISD::InputArg> &Ins,
782    DebugLoc dl,
783    SelectionDAG &DAG,
784    SmallVectorImpl<SDValue> &InVals) const
785{
786  // Assign locations to each value returned by this call
787  SmallVector<CCValAssign, 16> RVLocs;
788  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
789                 getTargetMachine(), RVLocs, *DAG.getContext());
790  CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
791
792  // Copy all of the result registers out of their specified physreg.
793  for (unsigned i = 0; i != RVLocs.size(); ++i) {
794    EVT CopyVT = RVLocs[i].getValVT();
795    if (RVLocs[i].isRegLoc()) {
796      Chain = DAG.getCopyFromReg(
797          Chain,
798          dl,
799          RVLocs[i].getLocReg(),
800          CopyVT,
801          InFlag
802          ).getValue(1);
803      SDValue Val = Chain.getValue(0);
804      InFlag = Chain.getValue(2);
805      InVals.push_back(Val);
806    }
807  }
808
809  return Chain;
810
811}
812
813//===----------------------------------------------------------------------===//
814//                           Other Lowering Hooks
815//===----------------------------------------------------------------------===//
816
817// Recursively assign SDNodeOrdering to any unordered nodes
818// This is necessary to maintain source ordering of instructions
819// under -O0 to avoid odd-looking "skipping around" issues.
820  static const SDValue
821Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
822{
823  if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
824    DAG.AssignOrdering( New.getNode(), order );
825    for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
826      Ordered( DAG, order, New.getOperand(i) );
827  }
828  return New;
829}
830
831#define LOWER(A) \
832  case ISD:: A: \
833return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
834
835SDValue
836AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
837{
838  switch (Op.getOpcode()) {
839    default:
840      Op.getNode()->dump();
841      assert(0 && "Custom lowering code for this"
842          "instruction is not implemented yet!");
843      break;
844      LOWER(GlobalAddress);
845      LOWER(JumpTable);
846      LOWER(ConstantPool);
847      LOWER(ExternalSymbol);
848      LOWER(SDIV);
849      LOWER(SREM);
850      LOWER(BUILD_VECTOR);
851      LOWER(SELECT);
852      LOWER(SETCC);
853      LOWER(SIGN_EXTEND_INREG);
854      LOWER(DYNAMIC_STACKALLOC);
855      LOWER(BRCOND);
856      LOWER(BR_CC);
857  }
858  return Op;
859}
860
861#undef LOWER
862
863SDValue
864AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
865{
866  SDValue DST = Op;
867  const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
868  const GlobalValue *G = GADN->getGlobal();
869  DebugLoc DL = Op.getDebugLoc();
870  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
871  if (!GV) {
872    DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
873  } else {
874    if (GV->hasInitializer()) {
875      const Constant *C = dyn_cast<Constant>(GV->getInitializer());
876      if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
877        DST = DAG.getConstant(CI->getValue(), Op.getValueType());
878      } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
879        DST = DAG.getConstantFP(CF->getValueAPF(),
880            Op.getValueType());
881      } else if (dyn_cast<ConstantAggregateZero>(C)) {
882        EVT VT = Op.getValueType();
883        if (VT.isInteger()) {
884          DST = DAG.getConstant(0, VT);
885        } else {
886          DST = DAG.getConstantFP(0, VT);
887        }
888      } else {
889        assert(!"lowering this type of Global Address "
890            "not implemented yet!");
891        C->dump();
892        DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
893      }
894    } else {
895      DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
896    }
897  }
898  return DST;
899}
900
901SDValue
902AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
903{
904  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
905  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
906  return Result;
907}
908SDValue
909AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
910{
911  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
912  EVT PtrVT = Op.getValueType();
913  SDValue Result;
914  if (CP->isMachineConstantPoolEntry()) {
915    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
916        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
917  } else {
918    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
919        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
920  }
921  return Result;
922}
923
924SDValue
925AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
926{
927  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
928  SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
929  return Result;
930}
931
932/// LowerFORMAL_ARGUMENTS - transform physical registers into
933/// virtual registers and generate load operations for
934/// arguments places on the stack.
935/// TODO: isVarArg, hasStructRet, isMemReg
936  SDValue
937AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
938    CallingConv::ID CallConv,
939    bool isVarArg,
940    const SmallVectorImpl<ISD::InputArg> &Ins,
941    DebugLoc dl,
942    SelectionDAG &DAG,
943    SmallVectorImpl<SDValue> &InVals)
944const
945{
946
947  MachineFunction &MF = DAG.getMachineFunction();
948  MachineFrameInfo *MFI = MF.getFrameInfo();
949  //const Function *Fn = MF.getFunction();
950  //MachineRegisterInfo &RegInfo = MF.getRegInfo();
951
952  SmallVector<CCValAssign, 16> ArgLocs;
953  CallingConv::ID CC = MF.getFunction()->getCallingConv();
954  //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
955
956  CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
957                 getTargetMachine(), ArgLocs, *DAG.getContext());
958
959  // When more calling conventions are added, they need to be chosen here
960  CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
961  SDValue StackPtr;
962
963  //unsigned int FirstStackArgLoc = 0;
964
965  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
966    CCValAssign &VA = ArgLocs[i];
967    if (VA.isRegLoc()) {
968      EVT RegVT = VA.getLocVT();
969      const TargetRegisterClass *RC = getRegClassFor(
970          RegVT.getSimpleVT().SimpleTy);
971
972      unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
973      SDValue ArgValue = DAG.getCopyFromReg(
974          Chain,
975          dl,
976          Reg,
977          RegVT);
978      // If this is an 8 or 16-bit value, it is really passed
979      // promoted to 32 bits.  Insert an assert[sz]ext to capture
980      // this, then truncate to the right size.
981
982      if (VA.getLocInfo() == CCValAssign::SExt) {
983        ArgValue = DAG.getNode(
984            ISD::AssertSext,
985            dl,
986            RegVT,
987            ArgValue,
988            DAG.getValueType(VA.getValVT()));
989      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
990        ArgValue = DAG.getNode(
991            ISD::AssertZext,
992            dl,
993            RegVT,
994            ArgValue,
995            DAG.getValueType(VA.getValVT()));
996      }
997      if (VA.getLocInfo() != CCValAssign::Full) {
998        ArgValue = DAG.getNode(
999            ISD::TRUNCATE,
1000            dl,
1001            VA.getValVT(),
1002            ArgValue);
1003      }
1004      // Add the value to the list of arguments
1005      // to be passed in registers
1006      InVals.push_back(ArgValue);
1007      if (isVarArg) {
1008        assert(0 && "Variable arguments are not yet supported");
1009        // See MipsISelLowering.cpp for ideas on how to implement
1010      }
1011    } else if(VA.isMemLoc()) {
1012      InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1013            dl, DAG, VA, MFI, i));
1014    } else {
1015      assert(0 && "found a Value Assign that is "
1016          "neither a register or a memory location");
1017    }
1018  }
1019  /*if (hasStructRet) {
1020    assert(0 && "Has struct return is not yet implemented");
1021  // See MipsISelLowering.cpp for ideas on how to implement
1022  }*/
1023
1024  if (isVarArg) {
1025    assert(0 && "Variable arguments are not yet supported");
1026    // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1027  }
1028  // This needs to be changed to non-zero if the return function needs
1029  // to pop bytes
1030  return Chain;
1031}
1032/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1033/// by "Src" to address "Dst" with size and alignment information specified by
1034/// the specific parameter attribute. The copy will be passed as a byval
1035/// function parameter.
1036static SDValue
1037CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1038    ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1039  assert(0 && "MemCopy does not exist yet");
1040  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1041
1042  return DAG.getMemcpy(Chain,
1043      Src.getDebugLoc(),
1044      Dst, Src, SizeNode, Flags.getByValAlign(),
1045      /*IsVol=*/false, /*AlwaysInline=*/true,
1046      MachinePointerInfo(), MachinePointerInfo());
1047}
1048
1049SDValue
1050AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1051    SDValue StackPtr, SDValue Arg,
1052    DebugLoc dl, SelectionDAG &DAG,
1053    const CCValAssign &VA,
1054    ISD::ArgFlagsTy Flags) const
1055{
1056  unsigned int LocMemOffset = VA.getLocMemOffset();
1057  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1058  PtrOff = DAG.getNode(ISD::ADD,
1059      dl,
1060      getPointerTy(), StackPtr, PtrOff);
1061  if (Flags.isByVal()) {
1062    PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1063  } else {
1064    PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1065        MachinePointerInfo::getStack(LocMemOffset),
1066        false, false, 0);
1067  }
1068  return PtrOff;
1069}
1070/// LowerCAL - functions arguments are copied from virtual
1071/// regs to (physical regs)/(stack frame), CALLSEQ_START and
1072/// CALLSEQ_END are emitted.
1073/// TODO: isVarArg, isTailCall, hasStructRet
1074SDValue
1075AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1076    CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1077    bool& isTailCall,
1078    const SmallVectorImpl<ISD::OutputArg> &Outs,
1079    const SmallVectorImpl<SDValue> &OutVals,
1080    const SmallVectorImpl<ISD::InputArg> &Ins,
1081    DebugLoc dl, SelectionDAG &DAG,
1082    SmallVectorImpl<SDValue> &InVals)
1083const
1084{
1085  isTailCall = false;
1086  MachineFunction& MF = DAG.getMachineFunction();
1087  // FIXME: DO we need to handle fast calling conventions and tail call
1088  // optimizations?? X86/PPC ISelLowering
1089  /*bool hasStructRet = (TheCall->getNumArgs())
1090    ? TheCall->getArgFlags(0).device()->isSRet()
1091    : false;*/
1092
1093  MachineFrameInfo *MFI = MF.getFrameInfo();
1094
1095  // Analyze operands of the call, assigning locations to each operand
1096  SmallVector<CCValAssign, 16> ArgLocs;
1097  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1098                 getTargetMachine(), ArgLocs, *DAG.getContext());
1099  // Analyize the calling operands, but need to change
1100  // if we have more than one calling convetion
1101  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1102
1103  unsigned int NumBytes = CCInfo.getNextStackOffset();
1104  if (isTailCall) {
1105    assert(isTailCall && "Tail Call not handled yet!");
1106    // See X86/PPC ISelLowering
1107  }
1108
1109  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1110
1111  SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1112  SmallVector<SDValue, 8> MemOpChains;
1113  SDValue StackPtr;
1114  //unsigned int FirstStacArgLoc = 0;
1115  //int LastArgStackLoc = 0;
1116
1117  // Walk the register/memloc assignments, insert copies/loads
1118  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1119    CCValAssign &VA = ArgLocs[i];
1120    //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1121    // Arguments start after the 5 first operands of ISD::CALL
1122    SDValue Arg = OutVals[i];
1123    //Promote the value if needed
1124    switch(VA.getLocInfo()) {
1125      default: assert(0 && "Unknown loc info!");
1126      case CCValAssign::Full:
1127               break;
1128      case CCValAssign::SExt:
1129               Arg = DAG.getNode(ISD::SIGN_EXTEND,
1130                   dl,
1131                   VA.getLocVT(), Arg);
1132               break;
1133      case CCValAssign::ZExt:
1134               Arg = DAG.getNode(ISD::ZERO_EXTEND,
1135                   dl,
1136                   VA.getLocVT(), Arg);
1137               break;
1138      case CCValAssign::AExt:
1139               Arg = DAG.getNode(ISD::ANY_EXTEND,
1140                   dl,
1141                   VA.getLocVT(), Arg);
1142               break;
1143    }
1144
1145    if (VA.isRegLoc()) {
1146      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1147    } else if (VA.isMemLoc()) {
1148      // Create the frame index object for this incoming parameter
1149      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1150          VA.getLocMemOffset(), true);
1151      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1152
1153      // emit ISD::STORE whichs stores the
1154      // parameter value to a stack Location
1155      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1156            MachinePointerInfo::getFixedStack(FI),
1157            false, false, 0));
1158    } else {
1159      assert(0 && "Not a Reg/Mem Loc, major error!");
1160    }
1161  }
1162  if (!MemOpChains.empty()) {
1163    Chain = DAG.getNode(ISD::TokenFactor,
1164        dl,
1165        MVT::Other,
1166        &MemOpChains[0],
1167        MemOpChains.size());
1168  }
1169  SDValue InFlag;
1170  if (!isTailCall) {
1171    for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1172      Chain = DAG.getCopyToReg(Chain,
1173          dl,
1174          RegsToPass[i].first,
1175          RegsToPass[i].second,
1176          InFlag);
1177      InFlag = Chain.getValue(1);
1178    }
1179  }
1180
1181  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1182  // every direct call is) turn it into a TargetGlobalAddress/
1183  // TargetExternalSymbol
1184  // node so that legalize doesn't hack it.
1185  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
1186    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1187  }
1188  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1189    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1190  }
1191  else if (isTailCall) {
1192    assert(0 && "Tail calls are not handled yet");
1193    // see X86 ISelLowering for ideas on implementation: 1708
1194  }
1195
1196  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1197  SmallVector<SDValue, 8> Ops;
1198
1199  if (isTailCall) {
1200    assert(0 && "Tail calls are not handled yet");
1201    // see X86 ISelLowering for ideas on implementation: 1721
1202  }
1203  // If this is a direct call, pass the chain and the callee
1204  if (Callee.getNode()) {
1205    Ops.push_back(Chain);
1206    Ops.push_back(Callee);
1207  }
1208
1209  if (isTailCall) {
1210    assert(0 && "Tail calls are not handled yet");
1211    // see X86 ISelLowering for ideas on implementation: 1739
1212  }
1213
1214  // Add argument registers to the end of the list so that they are known
1215  // live into the call
1216  for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1217    Ops.push_back(DAG.getRegister(
1218          RegsToPass[i].first,
1219          RegsToPass[i].second.getValueType()));
1220  }
1221  if (InFlag.getNode()) {
1222    Ops.push_back(InFlag);
1223  }
1224
1225  // Emit Tail Call
1226  if (isTailCall) {
1227    assert(0 && "Tail calls are not handled yet");
1228    // see X86 ISelLowering for ideas on implementation: 1762
1229  }
1230
1231  Chain = DAG.getNode(AMDILISD::CALL,
1232      dl,
1233      NodeTys, &Ops[0], Ops.size());
1234  InFlag = Chain.getValue(1);
1235
1236  // Create the CALLSEQ_END node
1237  Chain = DAG.getCALLSEQ_END(
1238      Chain,
1239      DAG.getIntPtrConstant(NumBytes, true),
1240      DAG.getIntPtrConstant(0, true),
1241      InFlag);
1242  InFlag = Chain.getValue(1);
1243  // Handle result values, copying them out of physregs into vregs that
1244  // we return
1245  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1246      InVals);
1247}
1248
1249SDValue
1250AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
1251{
1252  EVT OVT = Op.getValueType();
1253  SDValue DST;
1254  if (OVT.getScalarType() == MVT::i64) {
1255    DST = LowerSDIV64(Op, DAG);
1256  } else if (OVT.getScalarType() == MVT::i32) {
1257    DST = LowerSDIV32(Op, DAG);
1258  } else if (OVT.getScalarType() == MVT::i16
1259      || OVT.getScalarType() == MVT::i8) {
1260    DST = LowerSDIV24(Op, DAG);
1261  } else {
1262    DST = SDValue(Op.getNode(), 0);
1263  }
1264  return DST;
1265}
1266
1267SDValue
1268AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
1269{
1270  EVT OVT = Op.getValueType();
1271  SDValue DST;
1272  if (OVT.getScalarType() == MVT::i64) {
1273    DST = LowerSREM64(Op, DAG);
1274  } else if (OVT.getScalarType() == MVT::i32) {
1275    DST = LowerSREM32(Op, DAG);
1276  } else if (OVT.getScalarType() == MVT::i16) {
1277    DST = LowerSREM16(Op, DAG);
1278  } else if (OVT.getScalarType() == MVT::i8) {
1279    DST = LowerSREM8(Op, DAG);
1280  } else {
1281    DST = SDValue(Op.getNode(), 0);
1282  }
1283  return DST;
1284}
1285
1286SDValue
1287AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
1288{
1289  EVT VT = Op.getValueType();
1290  SDValue Nodes1;
1291  SDValue second;
1292  SDValue third;
1293  SDValue fourth;
1294  DebugLoc DL = Op.getDebugLoc();
1295  Nodes1 = DAG.getNode(AMDILISD::VBUILD,
1296      DL,
1297      VT, Op.getOperand(0));
1298#if 0
1299  bool allEqual = true;
1300  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
1301    if (Op.getOperand(0) != Op.getOperand(x)) {
1302      allEqual = false;
1303      break;
1304    }
1305  }
1306  if (allEqual) {
1307    return Nodes1;
1308  }
1309#endif
1310  switch(Op.getNumOperands()) {
1311    default:
1312    case 1:
1313      break;
1314    case 4:
1315      fourth = Op.getOperand(3);
1316      if (fourth.getOpcode() != ISD::UNDEF) {
1317        Nodes1 = DAG.getNode(
1318            ISD::INSERT_VECTOR_ELT,
1319            DL,
1320            Op.getValueType(),
1321            Nodes1,
1322            fourth,
1323            DAG.getConstant(7, MVT::i32));
1324      }
1325    case 3:
1326      third = Op.getOperand(2);
1327      if (third.getOpcode() != ISD::UNDEF) {
1328        Nodes1 = DAG.getNode(
1329            ISD::INSERT_VECTOR_ELT,
1330            DL,
1331            Op.getValueType(),
1332            Nodes1,
1333            third,
1334            DAG.getConstant(6, MVT::i32));
1335      }
1336    case 2:
1337      second = Op.getOperand(1);
1338      if (second.getOpcode() != ISD::UNDEF) {
1339        Nodes1 = DAG.getNode(
1340            ISD::INSERT_VECTOR_ELT,
1341            DL,
1342            Op.getValueType(),
1343            Nodes1,
1344            second,
1345            DAG.getConstant(5, MVT::i32));
1346      }
1347      break;
1348  };
1349  return Nodes1;
1350}
1351
1352SDValue
1353AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
1354{
1355  SDValue Cond = Op.getOperand(0);
1356  SDValue LHS = Op.getOperand(1);
1357  SDValue RHS = Op.getOperand(2);
1358  DebugLoc DL = Op.getDebugLoc();
1359  Cond = getConversionNode(DAG, Cond, Op, true);
1360  Cond = DAG.getNode(AMDILISD::CMOVLOG,
1361      DL,
1362      Op.getValueType(), Cond, LHS, RHS);
1363  return Cond;
1364}
1365SDValue
1366AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
1367{
1368  SDValue Cond;
1369  SDValue LHS = Op.getOperand(0);
1370  SDValue RHS = Op.getOperand(1);
1371  SDValue CC  = Op.getOperand(2);
1372  DebugLoc DL = Op.getDebugLoc();
1373  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
1374  unsigned int AMDILCC = CondCCodeToCC(
1375      SetCCOpcode,
1376      LHS.getValueType().getSimpleVT().SimpleTy);
1377  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
1378  Cond = DAG.getNode(
1379      ISD::SELECT_CC,
1380      Op.getDebugLoc(),
1381      LHS.getValueType(),
1382      LHS, RHS,
1383      DAG.getConstant(-1, MVT::i32),
1384      DAG.getConstant(0, MVT::i32),
1385      CC);
1386  Cond = getConversionNode(DAG, Cond, Op, true);
1387  Cond = DAG.getNode(
1388      ISD::AND,
1389      DL,
1390      Cond.getValueType(),
1391      DAG.getConstant(1, Cond.getValueType()),
1392      Cond);
1393  return Cond;
1394}
1395
1396SDValue
1397AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
1398{
1399  SDValue Data = Op.getOperand(0);
1400  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
1401  DebugLoc DL = Op.getDebugLoc();
1402  EVT DVT = Data.getValueType();
1403  EVT BVT = BaseType->getVT();
1404  unsigned baseBits = BVT.getScalarType().getSizeInBits();
1405  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
1406  unsigned shiftBits = srcBits - baseBits;
1407  if (srcBits < 32) {
1408    // If the op is less than 32 bits, then it needs to extend to 32bits
1409    // so it can properly keep the upper bits valid.
1410    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
1411    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
1412    shiftBits = 32 - baseBits;
1413    DVT = IVT;
1414  }
1415  SDValue Shift = DAG.getConstant(shiftBits, DVT);
1416  // Shift left by 'Shift' bits.
1417  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
1418  // Signed shift Right by 'Shift' bits.
1419  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
1420  if (srcBits < 32) {
1421    // Once the sign extension is done, the op needs to be converted to
1422    // its original type.
1423    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
1424  }
1425  return Data;
1426}
1427EVT
1428AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
1429{
1430  int iSize = (size * numEle);
1431  int vEle = (iSize >> ((size == 64) ? 6 : 5));
1432  if (!vEle) {
1433    vEle = 1;
1434  }
1435  if (size == 64) {
1436    if (vEle == 1) {
1437      return EVT(MVT::i64);
1438    } else {
1439      return EVT(MVT::getVectorVT(MVT::i64, vEle));
1440    }
1441  } else {
1442    if (vEle == 1) {
1443      return EVT(MVT::i32);
1444    } else {
1445      return EVT(MVT::getVectorVT(MVT::i32, vEle));
1446    }
1447  }
1448}
1449
1450SDValue
1451AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1452    SelectionDAG &DAG) const
1453{
1454  SDValue Chain = Op.getOperand(0);
1455  SDValue Size = Op.getOperand(1);
1456  unsigned int SPReg = AMDGPU::SP;
1457  DebugLoc DL = Op.getDebugLoc();
1458  SDValue SP = DAG.getCopyFromReg(Chain,
1459      DL,
1460      SPReg, MVT::i32);
1461  SDValue NewSP = DAG.getNode(ISD::ADD,
1462      DL,
1463      MVT::i32, SP, Size);
1464  Chain = DAG.getCopyToReg(SP.getValue(1),
1465      DL,
1466      SPReg, NewSP);
1467  SDValue Ops[2] = {NewSP, Chain};
1468  Chain = DAG.getMergeValues(Ops, 2 ,DL);
1469  return Chain;
1470}
1471SDValue
1472AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
1473{
1474  SDValue Chain = Op.getOperand(0);
1475  SDValue Cond  = Op.getOperand(1);
1476  SDValue Jump  = Op.getOperand(2);
1477  SDValue Result;
1478  Result = DAG.getNode(
1479      AMDILISD::BRANCH_COND,
1480      Op.getDebugLoc(),
1481      Op.getValueType(),
1482      Chain, Jump, Cond);
1483  return Result;
1484}
1485
1486SDValue
1487AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
1488{
1489  SDValue Chain = Op.getOperand(0);
1490  SDValue CC = Op.getOperand(1);
1491  SDValue LHS   = Op.getOperand(2);
1492  SDValue RHS   = Op.getOperand(3);
1493  SDValue JumpT  = Op.getOperand(4);
1494  SDValue CmpValue;
1495  SDValue Result;
1496  CmpValue = DAG.getNode(
1497      ISD::SELECT_CC,
1498      Op.getDebugLoc(),
1499      LHS.getValueType(),
1500      LHS, RHS,
1501      DAG.getConstant(-1, MVT::i32),
1502      DAG.getConstant(0, MVT::i32),
1503      CC);
1504  Result = DAG.getNode(
1505      AMDILISD::BRANCH_COND,
1506      CmpValue.getDebugLoc(),
1507      MVT::Other, Chain,
1508      JumpT, CmpValue);
1509  return Result;
1510}
1511
1512// LowerRET - Lower an ISD::RET node.
1513SDValue
1514AMDILTargetLowering::LowerReturn(SDValue Chain,
1515    CallingConv::ID CallConv, bool isVarArg,
1516    const SmallVectorImpl<ISD::OutputArg> &Outs,
1517    const SmallVectorImpl<SDValue> &OutVals,
1518    DebugLoc dl, SelectionDAG &DAG)
1519const
1520{
1521  //MachineFunction& MF = DAG.getMachineFunction();
1522  // CCValAssign - represent the assignment of the return value
1523  // to a location
1524  SmallVector<CCValAssign, 16> RVLocs;
1525
1526  // CCState - Info about the registers and stack slot
1527  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1528                 getTargetMachine(), RVLocs, *DAG.getContext());
1529
1530  // Analyze return values of ISD::RET
1531  CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
1532  // If this is the first return lowered for this function, add
1533  // the regs to the liveout set for the function
1534  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
1535  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1536    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
1537      MRI.addLiveOut(RVLocs[i].getLocReg());
1538    }
1539  }
1540  // FIXME: implement this when tail call is implemented
1541  // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
1542  // both x86 and ppc implement this in ISelLowering
1543
1544  // Regular return here
1545  SDValue Flag;
1546  SmallVector<SDValue, 6> RetOps;
1547  RetOps.push_back(Chain);
1548  RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
1549  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1550    CCValAssign &VA = RVLocs[i];
1551    SDValue ValToCopy = OutVals[i];
1552    assert(VA.isRegLoc() && "Can only return in registers!");
1553    // ISD::Ret => ret chain, (regnum1, val1), ...
1554    // So i * 2 + 1 index only the regnums
1555    Chain = DAG.getCopyToReg(Chain,
1556        dl,
1557        VA.getLocReg(),
1558        ValToCopy,
1559        Flag);
1560    // guarantee that all emitted copies are stuck together
1561    // avoiding something bad
1562    Flag = Chain.getValue(1);
1563  }
1564  /*if (MF.getFunction()->hasStructRetAttr()) {
1565    assert(0 && "Struct returns are not yet implemented!");
1566  // Both MIPS and X86 have this
1567  }*/
1568  RetOps[0] = Chain;
1569  if (Flag.getNode())
1570    RetOps.push_back(Flag);
1571
1572  Flag = DAG.getNode(AMDILISD::RET_FLAG,
1573      dl,
1574      MVT::Other, &RetOps[0], RetOps.size());
1575  return Flag;
1576}
1577
1578unsigned int
1579AMDILTargetLowering::getFunctionAlignment(const Function *) const
1580{
1581  return 0;
1582}
1583
1584SDValue
1585AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
1586{
1587  DebugLoc DL = Op.getDebugLoc();
1588  EVT OVT = Op.getValueType();
1589  SDValue LHS = Op.getOperand(0);
1590  SDValue RHS = Op.getOperand(1);
1591  MVT INTTY;
1592  MVT FLTTY;
1593  if (!OVT.isVector()) {
1594    INTTY = MVT::i32;
1595    FLTTY = MVT::f32;
1596  } else if (OVT.getVectorNumElements() == 2) {
1597    INTTY = MVT::v2i32;
1598    FLTTY = MVT::v2f32;
1599  } else if (OVT.getVectorNumElements() == 4) {
1600    INTTY = MVT::v4i32;
1601    FLTTY = MVT::v4f32;
1602  }
1603  unsigned bitsize = OVT.getScalarType().getSizeInBits();
1604  // char|short jq = ia ^ ib;
1605  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
1606
1607  // jq = jq >> (bitsize - 2)
1608  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
1609
1610  // jq = jq | 0x1
1611  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
1612
1613  // jq = (int)jq
1614  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
1615
1616  // int ia = (int)LHS;
1617  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
1618
1619  // int ib, (int)RHS;
1620  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
1621
1622  // float fa = (float)ia;
1623  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
1624
1625  // float fb = (float)ib;
1626  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
1627
1628  // float fq = native_divide(fa, fb);
1629  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
1630
1631  // fq = trunc(fq);
1632  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
1633
1634  // float fqneg = -fq;
1635  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
1636
1637  // float fr = mad(fqneg, fb, fa);
1638  SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
1639
1640  // int iq = (int)fq;
1641  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
1642
1643  // fr = fabs(fr);
1644  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
1645
1646  // fb = fabs(fb);
1647  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
1648
1649  // int cv = fr >= fb;
1650  SDValue cv;
1651  if (INTTY == MVT::i32) {
1652    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1653  } else {
1654    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1655  }
1656  // jq = (cv ? jq : 0);
1657  jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
1658      DAG.getConstant(0, OVT));
1659  // dst = iq + jq;
1660  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
1661  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
1662  return iq;
1663}
1664
1665SDValue
1666AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
1667{
1668  DebugLoc DL = Op.getDebugLoc();
1669  EVT OVT = Op.getValueType();
1670  SDValue LHS = Op.getOperand(0);
1671  SDValue RHS = Op.getOperand(1);
1672  // The LowerSDIV32 function generates equivalent to the following IL.
1673  // mov r0, LHS
1674  // mov r1, RHS
1675  // ilt r10, r0, 0
1676  // ilt r11, r1, 0
1677  // iadd r0, r0, r10
1678  // iadd r1, r1, r11
1679  // ixor r0, r0, r10
1680  // ixor r1, r1, r11
1681  // udiv r0, r0, r1
1682  // ixor r10, r10, r11
1683  // iadd r0, r0, r10
1684  // ixor DST, r0, r10
1685
1686  // mov r0, LHS
1687  SDValue r0 = LHS;
1688
1689  // mov r1, RHS
1690  SDValue r1 = RHS;
1691
1692  // ilt r10, r0, 0
1693  SDValue r10 = DAG.getSelectCC(DL,
1694      r0, DAG.getConstant(0, OVT),
1695      DAG.getConstant(-1, MVT::i32),
1696      DAG.getConstant(0, MVT::i32),
1697      ISD::SETLT);
1698
1699  // ilt r11, r1, 0
1700  SDValue r11 = DAG.getSelectCC(DL,
1701      r1, DAG.getConstant(0, OVT),
1702      DAG.getConstant(-1, MVT::i32),
1703      DAG.getConstant(0, MVT::i32),
1704      ISD::SETLT);
1705
1706  // iadd r0, r0, r10
1707  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1708
1709  // iadd r1, r1, r11
1710  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1711
1712  // ixor r0, r0, r10
1713  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1714
1715  // ixor r1, r1, r11
1716  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1717
1718  // udiv r0, r0, r1
1719  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
1720
1721  // ixor r10, r10, r11
1722  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
1723
1724  // iadd r0, r0, r10
1725  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1726
1727  // ixor DST, r0, r10
1728  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1729  return DST;
1730}
1731
1732SDValue
1733AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
1734{
1735  return SDValue(Op.getNode(), 0);
1736}
1737
1738SDValue
1739AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
1740{
1741  DebugLoc DL = Op.getDebugLoc();
1742  EVT OVT = Op.getValueType();
1743  MVT INTTY = MVT::i32;
1744  if (OVT == MVT::v2i8) {
1745    INTTY = MVT::v2i32;
1746  } else if (OVT == MVT::v4i8) {
1747    INTTY = MVT::v4i32;
1748  }
1749  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1750  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1751  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1752  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1753  return LHS;
1754}
1755
1756SDValue
1757AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
1758{
1759  DebugLoc DL = Op.getDebugLoc();
1760  EVT OVT = Op.getValueType();
1761  MVT INTTY = MVT::i32;
1762  if (OVT == MVT::v2i16) {
1763    INTTY = MVT::v2i32;
1764  } else if (OVT == MVT::v4i16) {
1765    INTTY = MVT::v4i32;
1766  }
1767  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1768  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1769  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1770  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1771  return LHS;
1772}
1773
1774SDValue
1775AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
1776{
1777  DebugLoc DL = Op.getDebugLoc();
1778  EVT OVT = Op.getValueType();
1779  SDValue LHS = Op.getOperand(0);
1780  SDValue RHS = Op.getOperand(1);
1781  // The LowerSREM32 function generates equivalent to the following IL.
1782  // mov r0, LHS
1783  // mov r1, RHS
1784  // ilt r10, r0, 0
1785  // ilt r11, r1, 0
1786  // iadd r0, r0, r10
1787  // iadd r1, r1, r11
1788  // ixor r0, r0, r10
1789  // ixor r1, r1, r11
1790  // udiv r20, r0, r1
1791  // umul r20, r20, r1
1792  // sub r0, r0, r20
1793  // iadd r0, r0, r10
1794  // ixor DST, r0, r10
1795
1796  // mov r0, LHS
1797  SDValue r0 = LHS;
1798
1799  // mov r1, RHS
1800  SDValue r1 = RHS;
1801
1802  // ilt r10, r0, 0
1803  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1804      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1805      r0, DAG.getConstant(0, OVT));
1806
1807  // ilt r11, r1, 0
1808  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1809      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1810      r1, DAG.getConstant(0, OVT));
1811
1812  // iadd r0, r0, r10
1813  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1814
1815  // iadd r1, r1, r11
1816  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1817
1818  // ixor r0, r0, r10
1819  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1820
1821  // ixor r1, r1, r11
1822  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1823
1824  // udiv r20, r0, r1
1825  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
1826
1827  // umul r20, r20, r1
1828  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
1829
1830  // sub r0, r0, r20
1831  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
1832
1833  // iadd r0, r0, r10
1834  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1835
1836  // ixor DST, r0, r10
1837  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1838  return DST;
1839}
1840
1841SDValue
1842AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
1843{
1844  return SDValue(Op.getNode(), 0);
1845}
1846