AMDILISelLowering.cpp revision 984ad0788c54386801b185740b973c446e55d3b9
1//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//==-----------------------------------------------------------------------===//
9//
10// This file implements the interfaces that AMDIL uses to lower LLVM code into a
11// selection DAG.
12//
13//===----------------------------------------------------------------------===//
14
15#include "AMDILISelLowering.h"
16#include "AMDILDevices.h"
17#include "AMDILIntrinsicInfo.h"
18#include "AMDILRegisterInfo.h"
19#include "AMDILSubtarget.h"
20#include "AMDILUtilityFunctions.h"
21#include "llvm/CallingConv.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineRegisterInfo.h"
24#include "llvm/CodeGen/PseudoSourceValue.h"
25#include "llvm/CodeGen/SelectionDAG.h"
26#include "llvm/CodeGen/SelectionDAGNodes.h"
27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
28#include "llvm/DerivedTypes.h"
29#include "llvm/Instructions.h"
30#include "llvm/Intrinsics.h"
31#include "llvm/Support/raw_ostream.h"
32#include "llvm/Target/TargetInstrInfo.h"
33#include "llvm/Target/TargetOptions.h"
34
35using namespace llvm;
36#define ISDBITCAST  ISD::BITCAST
37#define MVTGLUE     MVT::Glue
38//===----------------------------------------------------------------------===//
39// Calling Convention Implementation
40//===----------------------------------------------------------------------===//
41#include "AMDGPUGenCallingConv.inc"
42
43//===----------------------------------------------------------------------===//
44// TargetLowering Implementation Help Functions Begin
45//===----------------------------------------------------------------------===//
46  static SDValue
47getConversionNode(SelectionDAG &DAG, SDValue& Src, SDValue& Dst, bool asType)
48{
49  DebugLoc DL = Src.getDebugLoc();
50  EVT svt = Src.getValueType().getScalarType();
51  EVT dvt = Dst.getValueType().getScalarType();
52  if (svt.isFloatingPoint() && dvt.isFloatingPoint()) {
53    if (dvt.bitsGT(svt)) {
54      Src = DAG.getNode(ISD::FP_EXTEND, DL, dvt, Src);
55    } else if (svt.bitsLT(svt)) {
56      Src = DAG.getNode(ISD::FP_ROUND, DL, dvt, Src,
57          DAG.getConstant(1, MVT::i32));
58    }
59  } else if (svt.isInteger() && dvt.isInteger()) {
60    if (!svt.bitsEq(dvt)) {
61      Src = DAG.getSExtOrTrunc(Src, DL, dvt);
62    }
63  } else if (svt.isInteger()) {
64    unsigned opcode = (asType) ? ISDBITCAST : ISD::SINT_TO_FP;
65    if (!svt.bitsEq(dvt)) {
66      if (dvt.getSimpleVT().SimpleTy == MVT::f32) {
67        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i32);
68      } else if (dvt.getSimpleVT().SimpleTy == MVT::f64) {
69        Src = DAG.getSExtOrTrunc(Src, DL, MVT::i64);
70      } else {
71        assert(0 && "We only support 32 and 64bit fp types");
72      }
73    }
74    Src = DAG.getNode(opcode, DL, dvt, Src);
75  } else if (dvt.isInteger()) {
76    unsigned opcode = (asType) ? ISDBITCAST : ISD::FP_TO_SINT;
77    if (svt.getSimpleVT().SimpleTy == MVT::f32) {
78      Src = DAG.getNode(opcode, DL, MVT::i32, Src);
79    } else if (svt.getSimpleVT().SimpleTy == MVT::f64) {
80      Src = DAG.getNode(opcode, DL, MVT::i64, Src);
81    } else {
82      assert(0 && "We only support 32 and 64bit fp types");
83    }
84    Src = DAG.getSExtOrTrunc(Src, DL, dvt);
85  }
86  return Src;
87}
88// CondCCodeToCC - Convert a DAG condition code to a AMDIL CC
89// condition.
90  static AMDILCC::CondCodes
91CondCCodeToCC(ISD::CondCode CC, const MVT::SimpleValueType& type)
92{
93  switch (CC) {
94    default:
95      {
96        errs()<<"Condition Code: "<< (unsigned int)CC<<"\n";
97        assert(0 && "Unknown condition code!");
98      }
99    case ISD::SETO:
100      switch(type) {
101        case MVT::f32:
102          return AMDILCC::IL_CC_F_O;
103        case MVT::f64:
104          return AMDILCC::IL_CC_D_O;
105        default:
106          assert(0 && "Opcode combination not generated correctly!");
107          return AMDILCC::COND_ERROR;
108      };
109    case ISD::SETUO:
110      switch(type) {
111        case MVT::f32:
112          return AMDILCC::IL_CC_F_UO;
113        case MVT::f64:
114          return AMDILCC::IL_CC_D_UO;
115        default:
116          assert(0 && "Opcode combination not generated correctly!");
117          return AMDILCC::COND_ERROR;
118      };
119    case ISD::SETGT:
120      switch (type) {
121        case MVT::i1:
122        case MVT::i8:
123        case MVT::i16:
124        case MVT::i32:
125          return AMDILCC::IL_CC_I_GT;
126        case MVT::f32:
127          return AMDILCC::IL_CC_F_GT;
128        case MVT::f64:
129          return AMDILCC::IL_CC_D_GT;
130        case MVT::i64:
131          return AMDILCC::IL_CC_L_GT;
132        default:
133          assert(0 && "Opcode combination not generated correctly!");
134          return AMDILCC::COND_ERROR;
135      };
136    case ISD::SETGE:
137      switch (type) {
138        case MVT::i1:
139        case MVT::i8:
140        case MVT::i16:
141        case MVT::i32:
142          return AMDILCC::IL_CC_I_GE;
143        case MVT::f32:
144          return AMDILCC::IL_CC_F_GE;
145        case MVT::f64:
146          return AMDILCC::IL_CC_D_GE;
147        case MVT::i64:
148          return AMDILCC::IL_CC_L_GE;
149        default:
150          assert(0 && "Opcode combination not generated correctly!");
151          return AMDILCC::COND_ERROR;
152      };
153    case ISD::SETLT:
154      switch (type) {
155        case MVT::i1:
156        case MVT::i8:
157        case MVT::i16:
158        case MVT::i32:
159          return AMDILCC::IL_CC_I_LT;
160        case MVT::f32:
161          return AMDILCC::IL_CC_F_LT;
162        case MVT::f64:
163          return AMDILCC::IL_CC_D_LT;
164        case MVT::i64:
165          return AMDILCC::IL_CC_L_LT;
166        default:
167          assert(0 && "Opcode combination not generated correctly!");
168          return AMDILCC::COND_ERROR;
169      };
170    case ISD::SETLE:
171      switch (type) {
172        case MVT::i1:
173        case MVT::i8:
174        case MVT::i16:
175        case MVT::i32:
176          return AMDILCC::IL_CC_I_LE;
177        case MVT::f32:
178          return AMDILCC::IL_CC_F_LE;
179        case MVT::f64:
180          return AMDILCC::IL_CC_D_LE;
181        case MVT::i64:
182          return AMDILCC::IL_CC_L_LE;
183        default:
184          assert(0 && "Opcode combination not generated correctly!");
185          return AMDILCC::COND_ERROR;
186      };
187    case ISD::SETNE:
188      switch (type) {
189        case MVT::i1:
190        case MVT::i8:
191        case MVT::i16:
192        case MVT::i32:
193          return AMDILCC::IL_CC_I_NE;
194        case MVT::f32:
195          return AMDILCC::IL_CC_F_NE;
196        case MVT::f64:
197          return AMDILCC::IL_CC_D_NE;
198        case MVT::i64:
199          return AMDILCC::IL_CC_L_NE;
200        default:
201          assert(0 && "Opcode combination not generated correctly!");
202          return AMDILCC::COND_ERROR;
203      };
204    case ISD::SETEQ:
205      switch (type) {
206        case MVT::i1:
207        case MVT::i8:
208        case MVT::i16:
209        case MVT::i32:
210          return AMDILCC::IL_CC_I_EQ;
211        case MVT::f32:
212          return AMDILCC::IL_CC_F_EQ;
213        case MVT::f64:
214          return AMDILCC::IL_CC_D_EQ;
215        case MVT::i64:
216          return AMDILCC::IL_CC_L_EQ;
217        default:
218          assert(0 && "Opcode combination not generated correctly!");
219          return AMDILCC::COND_ERROR;
220      };
221    case ISD::SETUGT:
222      switch (type) {
223        case MVT::i1:
224        case MVT::i8:
225        case MVT::i16:
226        case MVT::i32:
227          return AMDILCC::IL_CC_U_GT;
228        case MVT::f32:
229          return AMDILCC::IL_CC_F_UGT;
230        case MVT::f64:
231          return AMDILCC::IL_CC_D_UGT;
232        case MVT::i64:
233          return AMDILCC::IL_CC_UL_GT;
234        default:
235          assert(0 && "Opcode combination not generated correctly!");
236          return AMDILCC::COND_ERROR;
237      };
238    case ISD::SETUGE:
239      switch (type) {
240        case MVT::i1:
241        case MVT::i8:
242        case MVT::i16:
243        case MVT::i32:
244          return AMDILCC::IL_CC_U_GE;
245        case MVT::f32:
246          return AMDILCC::IL_CC_F_UGE;
247        case MVT::f64:
248          return AMDILCC::IL_CC_D_UGE;
249        case MVT::i64:
250          return AMDILCC::IL_CC_UL_GE;
251        default:
252          assert(0 && "Opcode combination not generated correctly!");
253          return AMDILCC::COND_ERROR;
254      };
255    case ISD::SETULT:
256      switch (type) {
257        case MVT::i1:
258        case MVT::i8:
259        case MVT::i16:
260        case MVT::i32:
261          return AMDILCC::IL_CC_U_LT;
262        case MVT::f32:
263          return AMDILCC::IL_CC_F_ULT;
264        case MVT::f64:
265          return AMDILCC::IL_CC_D_ULT;
266        case MVT::i64:
267          return AMDILCC::IL_CC_UL_LT;
268        default:
269          assert(0 && "Opcode combination not generated correctly!");
270          return AMDILCC::COND_ERROR;
271      };
272    case ISD::SETULE:
273      switch (type) {
274        case MVT::i1:
275        case MVT::i8:
276        case MVT::i16:
277        case MVT::i32:
278          return AMDILCC::IL_CC_U_LE;
279        case MVT::f32:
280          return AMDILCC::IL_CC_F_ULE;
281        case MVT::f64:
282          return AMDILCC::IL_CC_D_ULE;
283        case MVT::i64:
284          return AMDILCC::IL_CC_UL_LE;
285        default:
286          assert(0 && "Opcode combination not generated correctly!");
287          return AMDILCC::COND_ERROR;
288      };
289    case ISD::SETUNE:
290      switch (type) {
291        case MVT::i1:
292        case MVT::i8:
293        case MVT::i16:
294        case MVT::i32:
295          return AMDILCC::IL_CC_U_NE;
296        case MVT::f32:
297          return AMDILCC::IL_CC_F_UNE;
298        case MVT::f64:
299          return AMDILCC::IL_CC_D_UNE;
300        case MVT::i64:
301          return AMDILCC::IL_CC_UL_NE;
302        default:
303          assert(0 && "Opcode combination not generated correctly!");
304          return AMDILCC::COND_ERROR;
305      };
306    case ISD::SETUEQ:
307      switch (type) {
308        case MVT::i1:
309        case MVT::i8:
310        case MVT::i16:
311        case MVT::i32:
312          return AMDILCC::IL_CC_U_EQ;
313        case MVT::f32:
314          return AMDILCC::IL_CC_F_UEQ;
315        case MVT::f64:
316          return AMDILCC::IL_CC_D_UEQ;
317        case MVT::i64:
318          return AMDILCC::IL_CC_UL_EQ;
319        default:
320          assert(0 && "Opcode combination not generated correctly!");
321          return AMDILCC::COND_ERROR;
322      };
323    case ISD::SETOGT:
324      switch (type) {
325        case MVT::f32:
326          return AMDILCC::IL_CC_F_OGT;
327        case MVT::f64:
328          return AMDILCC::IL_CC_D_OGT;
329        case MVT::i1:
330        case MVT::i8:
331        case MVT::i16:
332        case MVT::i32:
333        case MVT::i64:
334        default:
335          assert(0 && "Opcode combination not generated correctly!");
336          return AMDILCC::COND_ERROR;
337      };
338    case ISD::SETOGE:
339      switch (type) {
340        case MVT::f32:
341          return AMDILCC::IL_CC_F_OGE;
342        case MVT::f64:
343          return AMDILCC::IL_CC_D_OGE;
344        case MVT::i1:
345        case MVT::i8:
346        case MVT::i16:
347        case MVT::i32:
348        case MVT::i64:
349        default:
350          assert(0 && "Opcode combination not generated correctly!");
351          return AMDILCC::COND_ERROR;
352      };
353    case ISD::SETOLT:
354      switch (type) {
355        case MVT::f32:
356          return AMDILCC::IL_CC_F_OLT;
357        case MVT::f64:
358          return AMDILCC::IL_CC_D_OLT;
359        case MVT::i1:
360        case MVT::i8:
361        case MVT::i16:
362        case MVT::i32:
363        case MVT::i64:
364        default:
365          assert(0 && "Opcode combination not generated correctly!");
366          return AMDILCC::COND_ERROR;
367      };
368    case ISD::SETOLE:
369      switch (type) {
370        case MVT::f32:
371          return AMDILCC::IL_CC_F_OLE;
372        case MVT::f64:
373          return AMDILCC::IL_CC_D_OLE;
374        case MVT::i1:
375        case MVT::i8:
376        case MVT::i16:
377        case MVT::i32:
378        case MVT::i64:
379        default:
380          assert(0 && "Opcode combination not generated correctly!");
381          return AMDILCC::COND_ERROR;
382      };
383    case ISD::SETONE:
384      switch (type) {
385        case MVT::f32:
386          return AMDILCC::IL_CC_F_ONE;
387        case MVT::f64:
388          return AMDILCC::IL_CC_D_ONE;
389        case MVT::i1:
390        case MVT::i8:
391        case MVT::i16:
392        case MVT::i32:
393        case MVT::i64:
394        default:
395          assert(0 && "Opcode combination not generated correctly!");
396          return AMDILCC::COND_ERROR;
397      };
398    case ISD::SETOEQ:
399      switch (type) {
400        case MVT::f32:
401          return AMDILCC::IL_CC_F_OEQ;
402        case MVT::f64:
403          return AMDILCC::IL_CC_D_OEQ;
404        case MVT::i1:
405        case MVT::i8:
406        case MVT::i16:
407        case MVT::i32:
408        case MVT::i64:
409        default:
410          assert(0 && "Opcode combination not generated correctly!");
411          return AMDILCC::COND_ERROR;
412      };
413  };
414}
415
416SDValue
417AMDILTargetLowering::LowerMemArgument(
418    SDValue Chain,
419    CallingConv::ID CallConv,
420    const SmallVectorImpl<ISD::InputArg> &Ins,
421    DebugLoc dl, SelectionDAG &DAG,
422    const CCValAssign &VA,
423    MachineFrameInfo *MFI,
424    unsigned i) const
425{
426  // Create the nodes corresponding to a load from this parameter slot.
427  ISD::ArgFlagsTy Flags = Ins[i].Flags;
428
429  bool AlwaysUseMutable = (CallConv==CallingConv::Fast) &&
430    getTargetMachine().Options.GuaranteedTailCallOpt;
431  bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
432
433  // FIXME: For now, all byval parameter objects are marked mutable. This can
434  // be changed with more analysis.
435  // In case of tail call optimization mark all arguments mutable. Since they
436  // could be overwritten by lowering of arguments in case of a tail call.
437  int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
438      VA.getLocMemOffset(), isImmutable);
439  SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
440
441  if (Flags.isByVal())
442    return FIN;
443  return DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
444      MachinePointerInfo::getFixedStack(FI),
445      false, false, false, 0);
446}
447//===----------------------------------------------------------------------===//
448// TargetLowering Implementation Help Functions End
449//===----------------------------------------------------------------------===//
450
451//===----------------------------------------------------------------------===//
452// TargetLowering Class Implementation Begins
453//===----------------------------------------------------------------------===//
454  AMDILTargetLowering::AMDILTargetLowering(TargetMachine &TM)
455: TargetLowering(TM, new TargetLoweringObjectFileELF())
456{
457  int types[] =
458  {
459    (int)MVT::i8,
460    (int)MVT::i16,
461    (int)MVT::i32,
462    (int)MVT::f32,
463    (int)MVT::f64,
464    (int)MVT::i64,
465    (int)MVT::v2i8,
466    (int)MVT::v4i8,
467    (int)MVT::v2i16,
468    (int)MVT::v4i16,
469    (int)MVT::v4f32,
470    (int)MVT::v4i32,
471    (int)MVT::v2f32,
472    (int)MVT::v2i32,
473    (int)MVT::v2f64,
474    (int)MVT::v2i64
475  };
476
477  int IntTypes[] =
478  {
479    (int)MVT::i8,
480    (int)MVT::i16,
481    (int)MVT::i32,
482    (int)MVT::i64
483  };
484
485  int FloatTypes[] =
486  {
487    (int)MVT::f32,
488    (int)MVT::f64
489  };
490
491  int VectorTypes[] =
492  {
493    (int)MVT::v2i8,
494    (int)MVT::v4i8,
495    (int)MVT::v2i16,
496    (int)MVT::v4i16,
497    (int)MVT::v4f32,
498    (int)MVT::v4i32,
499    (int)MVT::v2f32,
500    (int)MVT::v2i32,
501    (int)MVT::v2f64,
502    (int)MVT::v2i64
503  };
504  size_t numTypes = sizeof(types) / sizeof(*types);
505  size_t numFloatTypes = sizeof(FloatTypes) / sizeof(*FloatTypes);
506  size_t numIntTypes = sizeof(IntTypes) / sizeof(*IntTypes);
507  size_t numVectorTypes = sizeof(VectorTypes) / sizeof(*VectorTypes);
508
509  const AMDILSubtarget &STM = getTargetMachine().getSubtarget<AMDILSubtarget>();
510  // These are the current register classes that are
511  // supported
512
513  for (unsigned int x  = 0; x < numTypes; ++x) {
514    MVT::SimpleValueType VT = (MVT::SimpleValueType)types[x];
515
516    //FIXME: SIGN_EXTEND_INREG is not meaningful for floating point types
517    // We cannot sextinreg, expand to shifts
518    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
519    setOperationAction(ISD::SUBE, VT, Expand);
520    setOperationAction(ISD::SUBC, VT, Expand);
521    setOperationAction(ISD::ADDE, VT, Expand);
522    setOperationAction(ISD::ADDC, VT, Expand);
523    setOperationAction(ISD::SETCC, VT, Custom);
524    setOperationAction(ISD::BRCOND, VT, Custom);
525    setOperationAction(ISD::BR_CC, VT, Custom);
526    setOperationAction(ISD::BR_JT, VT, Expand);
527    setOperationAction(ISD::BRIND, VT, Expand);
528    // TODO: Implement custom UREM/SREM routines
529    setOperationAction(ISD::SREM, VT, Expand);
530    setOperationAction(ISD::GlobalAddress, VT, Custom);
531    setOperationAction(ISD::JumpTable, VT, Custom);
532    setOperationAction(ISD::ConstantPool, VT, Custom);
533    setOperationAction(ISD::SELECT, VT, Custom);
534    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
535    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
536    if (VT != MVT::i64 && VT != MVT::v2i64) {
537      setOperationAction(ISD::SDIV, VT, Custom);
538    }
539  }
540  for (unsigned int x = 0; x < numFloatTypes; ++x) {
541    MVT::SimpleValueType VT = (MVT::SimpleValueType)FloatTypes[x];
542
543    // IL does not have these operations for floating point types
544    setOperationAction(ISD::FP_ROUND_INREG, VT, Expand);
545    setOperationAction(ISD::SETOLT, VT, Expand);
546    setOperationAction(ISD::SETOGE, VT, Expand);
547    setOperationAction(ISD::SETOGT, VT, Expand);
548    setOperationAction(ISD::SETOLE, VT, Expand);
549    setOperationAction(ISD::SETULT, VT, Expand);
550    setOperationAction(ISD::SETUGE, VT, Expand);
551    setOperationAction(ISD::SETUGT, VT, Expand);
552    setOperationAction(ISD::SETULE, VT, Expand);
553  }
554
555  for (unsigned int x = 0; x < numIntTypes; ++x) {
556    MVT::SimpleValueType VT = (MVT::SimpleValueType)IntTypes[x];
557
558    // GPU also does not have divrem function for signed or unsigned
559    setOperationAction(ISD::SDIVREM, VT, Expand);
560
561    // GPU does not have [S|U]MUL_LOHI functions as a single instruction
562    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
563    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
564
565    // GPU doesn't have a rotl, rotr, or byteswap instruction
566    setOperationAction(ISD::ROTR, VT, Expand);
567    setOperationAction(ISD::ROTL, VT, Expand);
568    setOperationAction(ISD::BSWAP, VT, Expand);
569
570    // GPU doesn't have any counting operators
571    setOperationAction(ISD::CTPOP, VT, Expand);
572    setOperationAction(ISD::CTTZ, VT, Expand);
573    setOperationAction(ISD::CTLZ, VT, Expand);
574  }
575
576  for ( unsigned int ii = 0; ii < numVectorTypes; ++ii )
577  {
578    MVT::SimpleValueType VT = (MVT::SimpleValueType)VectorTypes[ii];
579
580    setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
581    setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand);
582    setOperationAction(ISD::SDIVREM, VT, Expand);
583    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
584    // setOperationAction(ISD::VSETCC, VT, Expand);
585    setOperationAction(ISD::SETCC, VT, Expand);
586    setOperationAction(ISD::SELECT_CC, VT, Expand);
587    setOperationAction(ISD::SELECT, VT, Expand);
588
589  }
590  if (STM.device()->isSupported(AMDILDeviceInfo::LongOps)) {
591    setOperationAction(ISD::MULHU, MVT::i64, Expand);
592    setOperationAction(ISD::MULHU, MVT::v2i64, Expand);
593    setOperationAction(ISD::MULHS, MVT::i64, Expand);
594    setOperationAction(ISD::MULHS, MVT::v2i64, Expand);
595    setOperationAction(ISD::ADD, MVT::v2i64, Expand);
596    setOperationAction(ISD::SREM, MVT::v2i64, Expand);
597    setOperationAction(ISD::Constant          , MVT::i64  , Legal);
598    setOperationAction(ISD::SDIV, MVT::v2i64, Expand);
599    setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand);
600    setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand);
601    setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand);
602    setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand);
603  }
604  if (STM.device()->isSupported(AMDILDeviceInfo::DoubleOps)) {
605    // we support loading/storing v2f64 but not operations on the type
606    setOperationAction(ISD::FADD, MVT::v2f64, Expand);
607    setOperationAction(ISD::FSUB, MVT::v2f64, Expand);
608    setOperationAction(ISD::FMUL, MVT::v2f64, Expand);
609    setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand);
610    setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand);
611    setOperationAction(ISD::ConstantFP        , MVT::f64  , Legal);
612    // We want to expand vector conversions into their scalar
613    // counterparts.
614    setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand);
615    setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand);
616    setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand);
617    setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand);
618    setOperationAction(ISD::FABS, MVT::f64, Expand);
619    setOperationAction(ISD::FABS, MVT::v2f64, Expand);
620  }
621  // TODO: Fix the UDIV24 algorithm so it works for these
622  // types correctly. This needs vector comparisons
623  // for this to work correctly.
624  setOperationAction(ISD::UDIV, MVT::v2i8, Expand);
625  setOperationAction(ISD::UDIV, MVT::v4i8, Expand);
626  setOperationAction(ISD::UDIV, MVT::v2i16, Expand);
627  setOperationAction(ISD::UDIV, MVT::v4i16, Expand);
628  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Custom);
629  setOperationAction(ISD::SUBC, MVT::Other, Expand);
630  setOperationAction(ISD::ADDE, MVT::Other, Expand);
631  setOperationAction(ISD::ADDC, MVT::Other, Expand);
632  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
633  setOperationAction(ISD::BR_CC, MVT::Other, Custom);
634  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
635  setOperationAction(ISD::BRIND, MVT::Other, Expand);
636  setOperationAction(ISD::SETCC, MVT::Other, Custom);
637  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Expand);
638
639  setOperationAction(ISD::BUILD_VECTOR, MVT::Other, Custom);
640  // Use the default implementation.
641  setOperationAction(ISD::VAARG             , MVT::Other, Expand);
642  setOperationAction(ISD::VACOPY            , MVT::Other, Expand);
643  setOperationAction(ISD::VAEND             , MVT::Other, Expand);
644  setOperationAction(ISD::STACKSAVE         , MVT::Other, Expand);
645  setOperationAction(ISD::STACKRESTORE      , MVT::Other, Expand);
646  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32  , Custom);
647  setOperationAction(ISD::ConstantFP        , MVT::f32    , Legal);
648  setOperationAction(ISD::Constant          , MVT::i32    , Legal);
649  setOperationAction(ISD::TRAP              , MVT::Other  , Legal);
650
651  setStackPointerRegisterToSaveRestore(AMDIL::SP);
652  setSchedulingPreference(Sched::RegPressure);
653  setPow2DivIsCheap(false);
654  setPrefLoopAlignment(16);
655  setSelectIsExpensive(true);
656  setJumpIsExpensive(true);
657
658  maxStoresPerMemcpy  = 4096;
659  maxStoresPerMemmove = 4096;
660  maxStoresPerMemset  = 4096;
661
662#undef numTypes
663#undef numIntTypes
664#undef numVectorTypes
665#undef numFloatTypes
666}
667
668const char *
669AMDILTargetLowering::getTargetNodeName(unsigned Opcode) const
670{
671  switch (Opcode) {
672    default: return 0;
673    case AMDILISD::CMOVLOG:  return "AMDILISD::CMOVLOG";
674    case AMDILISD::MAD:  return "AMDILISD::MAD";
675    case AMDILISD::CALL:  return "AMDILISD::CALL";
676    case AMDILISD::SELECT_CC: return "AMDILISD::SELECT_CC";
677    case AMDILISD::UMUL: return "AMDILISD::UMUL";
678    case AMDILISD::DIV_INF: return "AMDILISD::DIV_INF";
679    case AMDILISD::VBUILD: return "AMDILISD::VBUILD";
680    case AMDILISD::CMP: return "AMDILISD::CMP";
681    case AMDILISD::IL_CC_I_LT: return "AMDILISD::IL_CC_I_LT";
682    case AMDILISD::IL_CC_I_LE: return "AMDILISD::IL_CC_I_LE";
683    case AMDILISD::IL_CC_I_GT: return "AMDILISD::IL_CC_I_GT";
684    case AMDILISD::IL_CC_I_GE: return "AMDILISD::IL_CC_I_GE";
685    case AMDILISD::IL_CC_I_EQ: return "AMDILISD::IL_CC_I_EQ";
686    case AMDILISD::IL_CC_I_NE: return "AMDILISD::IL_CC_I_NE";
687    case AMDILISD::RET_FLAG: return "AMDILISD::RET_FLAG";
688    case AMDILISD::BRANCH_COND: return "AMDILISD::BRANCH_COND";
689
690  };
691}
692bool
693AMDILTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
694    const CallInst &I, unsigned Intrinsic) const
695{
696  return false;
697}
698// The backend supports 32 and 64 bit floating point immediates
699bool
700AMDILTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const
701{
702  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
703      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
704    return true;
705  } else {
706    return false;
707  }
708}
709
710bool
711AMDILTargetLowering::ShouldShrinkFPConstant(EVT VT) const
712{
713  if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32
714      || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) {
715    return false;
716  } else {
717    return true;
718  }
719}
720
721
722// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to
723// be zero. Op is expected to be a target specific node. Used by DAG
724// combiner.
725
726void
727AMDILTargetLowering::computeMaskedBitsForTargetNode(
728    const SDValue Op,
729    APInt &KnownZero,
730    APInt &KnownOne,
731    const SelectionDAG &DAG,
732    unsigned Depth) const
733{
734  APInt KnownZero2;
735  APInt KnownOne2;
736  KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); // Don't know anything
737  switch (Op.getOpcode()) {
738    default: break;
739    case AMDILISD::SELECT_CC:
740             DAG.ComputeMaskedBits(
741                 Op.getOperand(1),
742                 KnownZero,
743                 KnownOne,
744                 Depth + 1
745                 );
746             DAG.ComputeMaskedBits(
747                 Op.getOperand(0),
748                 KnownZero2,
749                 KnownOne2
750                 );
751             assert((KnownZero & KnownOne) == 0
752                 && "Bits known to be one AND zero?");
753             assert((KnownZero2 & KnownOne2) == 0
754                 && "Bits known to be one AND zero?");
755             // Only known if known in both the LHS and RHS
756             KnownOne &= KnownOne2;
757             KnownZero &= KnownZero2;
758             break;
759  };
760}
761
762// This is the function that determines which calling convention should
763// be used. Currently there is only one calling convention
764CCAssignFn*
765AMDILTargetLowering::CCAssignFnForNode(unsigned int Op) const
766{
767  //uint64_t CC = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
768  return CC_AMDIL32;
769}
770
771// LowerCallResult - Lower the result values of an ISD::CALL into the
772// appropriate copies out of appropriate physical registers.  This assumes that
773// Chain/InFlag are the input chain/flag to use, and that TheCall is the call
774// being lowered.  The returns a SDNode with the same number of values as the
775// ISD::CALL.
776SDValue
777AMDILTargetLowering::LowerCallResult(
778    SDValue Chain,
779    SDValue InFlag,
780    CallingConv::ID CallConv,
781    bool isVarArg,
782    const SmallVectorImpl<ISD::InputArg> &Ins,
783    DebugLoc dl,
784    SelectionDAG &DAG,
785    SmallVectorImpl<SDValue> &InVals) const
786{
787  // Assign locations to each value returned by this call
788  SmallVector<CCValAssign, 16> RVLocs;
789  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
790                 getTargetMachine(), RVLocs, *DAG.getContext());
791  CCInfo.AnalyzeCallResult(Ins, RetCC_AMDIL32);
792
793  // Copy all of the result registers out of their specified physreg.
794  for (unsigned i = 0; i != RVLocs.size(); ++i) {
795    EVT CopyVT = RVLocs[i].getValVT();
796    if (RVLocs[i].isRegLoc()) {
797      Chain = DAG.getCopyFromReg(
798          Chain,
799          dl,
800          RVLocs[i].getLocReg(),
801          CopyVT,
802          InFlag
803          ).getValue(1);
804      SDValue Val = Chain.getValue(0);
805      InFlag = Chain.getValue(2);
806      InVals.push_back(Val);
807    }
808  }
809
810  return Chain;
811
812}
813
814//===----------------------------------------------------------------------===//
815//                           Other Lowering Hooks
816//===----------------------------------------------------------------------===//
817
818// Recursively assign SDNodeOrdering to any unordered nodes
819// This is necessary to maintain source ordering of instructions
820// under -O0 to avoid odd-looking "skipping around" issues.
821  static const SDValue
822Ordered( SelectionDAG &DAG, unsigned order, const SDValue New )
823{
824  if (order != 0 && DAG.GetOrdering( New.getNode() ) == 0) {
825    DAG.AssignOrdering( New.getNode(), order );
826    for (unsigned i = 0, e = New.getNumOperands(); i < e; ++i)
827      Ordered( DAG, order, New.getOperand(i) );
828  }
829  return New;
830}
831
832#define LOWER(A) \
833  case ISD:: A: \
834return Ordered( DAG, DAG.GetOrdering( Op.getNode() ), Lower##A(Op, DAG) )
835
836SDValue
837AMDILTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
838{
839  switch (Op.getOpcode()) {
840    default:
841      Op.getNode()->dump();
842      assert(0 && "Custom lowering code for this"
843          "instruction is not implemented yet!");
844      break;
845      LOWER(GlobalAddress);
846      LOWER(JumpTable);
847      LOWER(ConstantPool);
848      LOWER(ExternalSymbol);
849      LOWER(SDIV);
850      LOWER(SREM);
851      LOWER(BUILD_VECTOR);
852      LOWER(SELECT);
853      LOWER(SETCC);
854      LOWER(SIGN_EXTEND_INREG);
855      LOWER(DYNAMIC_STACKALLOC);
856      LOWER(BRCOND);
857      LOWER(BR_CC);
858  }
859  return Op;
860}
861
862#undef LOWER
863
864SDValue
865AMDILTargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
866{
867  SDValue DST = Op;
868  const GlobalAddressSDNode *GADN = cast<GlobalAddressSDNode>(Op);
869  const GlobalValue *G = GADN->getGlobal();
870  DebugLoc DL = Op.getDebugLoc();
871  const GlobalVariable *GV = dyn_cast<GlobalVariable>(G);
872  if (!GV) {
873    DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
874  } else {
875    if (GV->hasInitializer()) {
876      const Constant *C = dyn_cast<Constant>(GV->getInitializer());
877      if (const ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
878        DST = DAG.getConstant(CI->getValue(), Op.getValueType());
879      } else if (const ConstantFP *CF = dyn_cast<ConstantFP>(C)) {
880        DST = DAG.getConstantFP(CF->getValueAPF(),
881            Op.getValueType());
882      } else if (dyn_cast<ConstantAggregateZero>(C)) {
883        EVT VT = Op.getValueType();
884        if (VT.isInteger()) {
885          DST = DAG.getConstant(0, VT);
886        } else {
887          DST = DAG.getConstantFP(0, VT);
888        }
889      } else {
890        assert(!"lowering this type of Global Address "
891            "not implemented yet!");
892        C->dump();
893        DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
894      }
895    } else {
896      DST = DAG.getTargetGlobalAddress(GV, DL, MVT::i32);
897    }
898  }
899  return DST;
900}
901
902SDValue
903AMDILTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const
904{
905  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
906  SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
907  return Result;
908}
909SDValue
910AMDILTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const
911{
912  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
913  EVT PtrVT = Op.getValueType();
914  SDValue Result;
915  if (CP->isMachineConstantPoolEntry()) {
916    Result = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT,
917        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
918  } else {
919    Result = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT,
920        CP->getAlignment(), CP->getOffset(), CP->getTargetFlags());
921  }
922  return Result;
923}
924
925SDValue
926AMDILTargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const
927{
928  const char *Sym = cast<ExternalSymbolSDNode>(Op)->getSymbol();
929  SDValue Result = DAG.getTargetExternalSymbol(Sym, MVT::i32);
930  return Result;
931}
932
933/// LowerFORMAL_ARGUMENTS - transform physical registers into
934/// virtual registers and generate load operations for
935/// arguments places on the stack.
936/// TODO: isVarArg, hasStructRet, isMemReg
937  SDValue
938AMDILTargetLowering::LowerFormalArguments(SDValue Chain,
939    CallingConv::ID CallConv,
940    bool isVarArg,
941    const SmallVectorImpl<ISD::InputArg> &Ins,
942    DebugLoc dl,
943    SelectionDAG &DAG,
944    SmallVectorImpl<SDValue> &InVals)
945const
946{
947
948  MachineFunction &MF = DAG.getMachineFunction();
949  MachineFrameInfo *MFI = MF.getFrameInfo();
950  //const Function *Fn = MF.getFunction();
951  //MachineRegisterInfo &RegInfo = MF.getRegInfo();
952
953  SmallVector<CCValAssign, 16> ArgLocs;
954  CallingConv::ID CC = MF.getFunction()->getCallingConv();
955  //bool hasStructRet = MF.getFunction()->hasStructRetAttr();
956
957  CCState CCInfo(CC, isVarArg, DAG.getMachineFunction(),
958                 getTargetMachine(), ArgLocs, *DAG.getContext());
959
960  // When more calling conventions are added, they need to be chosen here
961  CCInfo.AnalyzeFormalArguments(Ins, CC_AMDIL32);
962  SDValue StackPtr;
963
964  //unsigned int FirstStackArgLoc = 0;
965
966  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
967    CCValAssign &VA = ArgLocs[i];
968    if (VA.isRegLoc()) {
969      EVT RegVT = VA.getLocVT();
970      const TargetRegisterClass *RC = getRegClassFor(
971          RegVT.getSimpleVT().SimpleTy);
972
973      unsigned int Reg = MF.addLiveIn(VA.getLocReg(), RC);
974      SDValue ArgValue = DAG.getCopyFromReg(
975          Chain,
976          dl,
977          Reg,
978          RegVT);
979      // If this is an 8 or 16-bit value, it is really passed
980      // promoted to 32 bits.  Insert an assert[sz]ext to capture
981      // this, then truncate to the right size.
982
983      if (VA.getLocInfo() == CCValAssign::SExt) {
984        ArgValue = DAG.getNode(
985            ISD::AssertSext,
986            dl,
987            RegVT,
988            ArgValue,
989            DAG.getValueType(VA.getValVT()));
990      } else if (VA.getLocInfo() == CCValAssign::ZExt) {
991        ArgValue = DAG.getNode(
992            ISD::AssertZext,
993            dl,
994            RegVT,
995            ArgValue,
996            DAG.getValueType(VA.getValVT()));
997      }
998      if (VA.getLocInfo() != CCValAssign::Full) {
999        ArgValue = DAG.getNode(
1000            ISD::TRUNCATE,
1001            dl,
1002            VA.getValVT(),
1003            ArgValue);
1004      }
1005      // Add the value to the list of arguments
1006      // to be passed in registers
1007      InVals.push_back(ArgValue);
1008      if (isVarArg) {
1009        assert(0 && "Variable arguments are not yet supported");
1010        // See MipsISelLowering.cpp for ideas on how to implement
1011      }
1012    } else if(VA.isMemLoc()) {
1013      InVals.push_back(LowerMemArgument(Chain, CallConv, Ins,
1014            dl, DAG, VA, MFI, i));
1015    } else {
1016      assert(0 && "found a Value Assign that is "
1017          "neither a register or a memory location");
1018    }
1019  }
1020  /*if (hasStructRet) {
1021    assert(0 && "Has struct return is not yet implemented");
1022  // See MipsISelLowering.cpp for ideas on how to implement
1023  }*/
1024
1025  if (isVarArg) {
1026    assert(0 && "Variable arguments are not yet supported");
1027    // See X86/PPC/CellSPU ISelLowering.cpp for ideas on how to implement
1028  }
1029  // This needs to be changed to non-zero if the return function needs
1030  // to pop bytes
1031  return Chain;
1032}
1033/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
1034/// by "Src" to address "Dst" with size and alignment information specified by
1035/// the specific parameter attribute. The copy will be passed as a byval
1036/// function parameter.
1037static SDValue
1038CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
1039    ISD::ArgFlagsTy Flags, SelectionDAG &DAG) {
1040  assert(0 && "MemCopy does not exist yet");
1041  SDValue SizeNode     = DAG.getConstant(Flags.getByValSize(), MVT::i32);
1042
1043  return DAG.getMemcpy(Chain,
1044      Src.getDebugLoc(),
1045      Dst, Src, SizeNode, Flags.getByValAlign(),
1046      /*IsVol=*/false, /*AlwaysInline=*/true,
1047      MachinePointerInfo(), MachinePointerInfo());
1048}
1049
1050SDValue
1051AMDILTargetLowering::LowerMemOpCallTo(SDValue Chain,
1052    SDValue StackPtr, SDValue Arg,
1053    DebugLoc dl, SelectionDAG &DAG,
1054    const CCValAssign &VA,
1055    ISD::ArgFlagsTy Flags) const
1056{
1057  unsigned int LocMemOffset = VA.getLocMemOffset();
1058  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
1059  PtrOff = DAG.getNode(ISD::ADD,
1060      dl,
1061      getPointerTy(), StackPtr, PtrOff);
1062  if (Flags.isByVal()) {
1063    PtrOff = CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG);
1064  } else {
1065    PtrOff = DAG.getStore(Chain, dl, Arg, PtrOff,
1066        MachinePointerInfo::getStack(LocMemOffset),
1067        false, false, 0);
1068  }
1069  return PtrOff;
1070}
1071/// LowerCAL - functions arguments are copied from virtual
1072/// regs to (physical regs)/(stack frame), CALLSEQ_START and
1073/// CALLSEQ_END are emitted.
1074/// TODO: isVarArg, isTailCall, hasStructRet
1075SDValue
1076AMDILTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
1077    CallingConv::ID CallConv, bool isVarArg, bool doesNotRet,
1078    bool& isTailCall,
1079    const SmallVectorImpl<ISD::OutputArg> &Outs,
1080    const SmallVectorImpl<SDValue> &OutVals,
1081    const SmallVectorImpl<ISD::InputArg> &Ins,
1082    DebugLoc dl, SelectionDAG &DAG,
1083    SmallVectorImpl<SDValue> &InVals)
1084const
1085{
1086  isTailCall = false;
1087  MachineFunction& MF = DAG.getMachineFunction();
1088  // FIXME: DO we need to handle fast calling conventions and tail call
1089  // optimizations?? X86/PPC ISelLowering
1090  /*bool hasStructRet = (TheCall->getNumArgs())
1091    ? TheCall->getArgFlags(0).device()->isSRet()
1092    : false;*/
1093
1094  MachineFrameInfo *MFI = MF.getFrameInfo();
1095
1096  // Analyze operands of the call, assigning locations to each operand
1097  SmallVector<CCValAssign, 16> ArgLocs;
1098  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1099                 getTargetMachine(), ArgLocs, *DAG.getContext());
1100  // Analyize the calling operands, but need to change
1101  // if we have more than one calling convetion
1102  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForNode(CallConv));
1103
1104  unsigned int NumBytes = CCInfo.getNextStackOffset();
1105  if (isTailCall) {
1106    assert(isTailCall && "Tail Call not handled yet!");
1107    // See X86/PPC ISelLowering
1108  }
1109
1110  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
1111
1112  SmallVector<std::pair<unsigned int, SDValue>, 8> RegsToPass;
1113  SmallVector<SDValue, 8> MemOpChains;
1114  SDValue StackPtr;
1115  //unsigned int FirstStacArgLoc = 0;
1116  //int LastArgStackLoc = 0;
1117
1118  // Walk the register/memloc assignments, insert copies/loads
1119  for (unsigned int i = 0, e = ArgLocs.size(); i != e; ++i) {
1120    CCValAssign &VA = ArgLocs[i];
1121    //bool isByVal = Flags.isByVal(); // handle byval/bypointer registers
1122    // Arguments start after the 5 first operands of ISD::CALL
1123    SDValue Arg = OutVals[i];
1124    //Promote the value if needed
1125    switch(VA.getLocInfo()) {
1126      default: assert(0 && "Unknown loc info!");
1127      case CCValAssign::Full:
1128               break;
1129      case CCValAssign::SExt:
1130               Arg = DAG.getNode(ISD::SIGN_EXTEND,
1131                   dl,
1132                   VA.getLocVT(), Arg);
1133               break;
1134      case CCValAssign::ZExt:
1135               Arg = DAG.getNode(ISD::ZERO_EXTEND,
1136                   dl,
1137                   VA.getLocVT(), Arg);
1138               break;
1139      case CCValAssign::AExt:
1140               Arg = DAG.getNode(ISD::ANY_EXTEND,
1141                   dl,
1142                   VA.getLocVT(), Arg);
1143               break;
1144    }
1145
1146    if (VA.isRegLoc()) {
1147      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
1148    } else if (VA.isMemLoc()) {
1149      // Create the frame index object for this incoming parameter
1150      int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
1151          VA.getLocMemOffset(), true);
1152      SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
1153
1154      // emit ISD::STORE whichs stores the
1155      // parameter value to a stack Location
1156      MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff,
1157            MachinePointerInfo::getFixedStack(FI),
1158            false, false, 0));
1159    } else {
1160      assert(0 && "Not a Reg/Mem Loc, major error!");
1161    }
1162  }
1163  if (!MemOpChains.empty()) {
1164    Chain = DAG.getNode(ISD::TokenFactor,
1165        dl,
1166        MVT::Other,
1167        &MemOpChains[0],
1168        MemOpChains.size());
1169  }
1170  SDValue InFlag;
1171  if (!isTailCall) {
1172    for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1173      Chain = DAG.getCopyToReg(Chain,
1174          dl,
1175          RegsToPass[i].first,
1176          RegsToPass[i].second,
1177          InFlag);
1178      InFlag = Chain.getValue(1);
1179    }
1180  }
1181
1182  // If the callee is a GlobalAddress/ExternalSymbol node (quite common,
1183  // every direct call is) turn it into a TargetGlobalAddress/
1184  // TargetExternalSymbol
1185  // node so that legalize doesn't hack it.
1186  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))  {
1187    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy());
1188  }
1189  else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
1190    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy());
1191  }
1192  else if (isTailCall) {
1193    assert(0 && "Tail calls are not handled yet");
1194    // see X86 ISelLowering for ideas on implementation: 1708
1195  }
1196
1197  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVTGLUE);
1198  SmallVector<SDValue, 8> Ops;
1199
1200  if (isTailCall) {
1201    assert(0 && "Tail calls are not handled yet");
1202    // see X86 ISelLowering for ideas on implementation: 1721
1203  }
1204  // If this is a direct call, pass the chain and the callee
1205  if (Callee.getNode()) {
1206    Ops.push_back(Chain);
1207    Ops.push_back(Callee);
1208  }
1209
1210  if (isTailCall) {
1211    assert(0 && "Tail calls are not handled yet");
1212    // see X86 ISelLowering for ideas on implementation: 1739
1213  }
1214
1215  // Add argument registers to the end of the list so that they are known
1216  // live into the call
1217  for (unsigned int i = 0, e = RegsToPass.size(); i != e; ++i) {
1218    Ops.push_back(DAG.getRegister(
1219          RegsToPass[i].first,
1220          RegsToPass[i].second.getValueType()));
1221  }
1222  if (InFlag.getNode()) {
1223    Ops.push_back(InFlag);
1224  }
1225
1226  // Emit Tail Call
1227  if (isTailCall) {
1228    assert(0 && "Tail calls are not handled yet");
1229    // see X86 ISelLowering for ideas on implementation: 1762
1230  }
1231
1232  Chain = DAG.getNode(AMDILISD::CALL,
1233      dl,
1234      NodeTys, &Ops[0], Ops.size());
1235  InFlag = Chain.getValue(1);
1236
1237  // Create the CALLSEQ_END node
1238  Chain = DAG.getCALLSEQ_END(
1239      Chain,
1240      DAG.getIntPtrConstant(NumBytes, true),
1241      DAG.getIntPtrConstant(0, true),
1242      InFlag);
1243  InFlag = Chain.getValue(1);
1244  // Handle result values, copying them out of physregs into vregs that
1245  // we return
1246  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
1247      InVals);
1248}
1249
1250SDValue
1251AMDILTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const
1252{
1253  EVT OVT = Op.getValueType();
1254  SDValue DST;
1255  if (OVT.getScalarType() == MVT::i64) {
1256    DST = LowerSDIV64(Op, DAG);
1257  } else if (OVT.getScalarType() == MVT::i32) {
1258    DST = LowerSDIV32(Op, DAG);
1259  } else if (OVT.getScalarType() == MVT::i16
1260      || OVT.getScalarType() == MVT::i8) {
1261    DST = LowerSDIV24(Op, DAG);
1262  } else {
1263    DST = SDValue(Op.getNode(), 0);
1264  }
1265  return DST;
1266}
1267
1268SDValue
1269AMDILTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const
1270{
1271  EVT OVT = Op.getValueType();
1272  SDValue DST;
1273  if (OVT.getScalarType() == MVT::i64) {
1274    DST = LowerSREM64(Op, DAG);
1275  } else if (OVT.getScalarType() == MVT::i32) {
1276    DST = LowerSREM32(Op, DAG);
1277  } else if (OVT.getScalarType() == MVT::i16) {
1278    DST = LowerSREM16(Op, DAG);
1279  } else if (OVT.getScalarType() == MVT::i8) {
1280    DST = LowerSREM8(Op, DAG);
1281  } else {
1282    DST = SDValue(Op.getNode(), 0);
1283  }
1284  return DST;
1285}
1286
1287SDValue
1288AMDILTargetLowering::LowerBUILD_VECTOR( SDValue Op, SelectionDAG &DAG ) const
1289{
1290  EVT VT = Op.getValueType();
1291  SDValue Nodes1;
1292  SDValue second;
1293  SDValue third;
1294  SDValue fourth;
1295  DebugLoc DL = Op.getDebugLoc();
1296  Nodes1 = DAG.getNode(AMDILISD::VBUILD,
1297      DL,
1298      VT, Op.getOperand(0));
1299#if 0
1300  bool allEqual = true;
1301  for (unsigned x = 1, y = Op.getNumOperands(); x < y; ++x) {
1302    if (Op.getOperand(0) != Op.getOperand(x)) {
1303      allEqual = false;
1304      break;
1305    }
1306  }
1307  if (allEqual) {
1308    return Nodes1;
1309  }
1310#endif
1311  switch(Op.getNumOperands()) {
1312    default:
1313    case 1:
1314      break;
1315    case 4:
1316      fourth = Op.getOperand(3);
1317      if (fourth.getOpcode() != ISD::UNDEF) {
1318        Nodes1 = DAG.getNode(
1319            ISD::INSERT_VECTOR_ELT,
1320            DL,
1321            Op.getValueType(),
1322            Nodes1,
1323            fourth,
1324            DAG.getConstant(7, MVT::i32));
1325      }
1326    case 3:
1327      third = Op.getOperand(2);
1328      if (third.getOpcode() != ISD::UNDEF) {
1329        Nodes1 = DAG.getNode(
1330            ISD::INSERT_VECTOR_ELT,
1331            DL,
1332            Op.getValueType(),
1333            Nodes1,
1334            third,
1335            DAG.getConstant(6, MVT::i32));
1336      }
1337    case 2:
1338      second = Op.getOperand(1);
1339      if (second.getOpcode() != ISD::UNDEF) {
1340        Nodes1 = DAG.getNode(
1341            ISD::INSERT_VECTOR_ELT,
1342            DL,
1343            Op.getValueType(),
1344            Nodes1,
1345            second,
1346            DAG.getConstant(5, MVT::i32));
1347      }
1348      break;
1349  };
1350  return Nodes1;
1351}
1352
1353SDValue
1354AMDILTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const
1355{
1356  SDValue Cond = Op.getOperand(0);
1357  SDValue LHS = Op.getOperand(1);
1358  SDValue RHS = Op.getOperand(2);
1359  DebugLoc DL = Op.getDebugLoc();
1360  Cond = getConversionNode(DAG, Cond, Op, true);
1361  Cond = DAG.getNode(AMDILISD::CMOVLOG,
1362      DL,
1363      Op.getValueType(), Cond, LHS, RHS);
1364  return Cond;
1365}
1366SDValue
1367AMDILTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const
1368{
1369  SDValue Cond;
1370  SDValue LHS = Op.getOperand(0);
1371  SDValue RHS = Op.getOperand(1);
1372  SDValue CC  = Op.getOperand(2);
1373  DebugLoc DL = Op.getDebugLoc();
1374  ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
1375  unsigned int AMDILCC = CondCCodeToCC(
1376      SetCCOpcode,
1377      LHS.getValueType().getSimpleVT().SimpleTy);
1378  assert((AMDILCC != AMDILCC::COND_ERROR) && "Invalid SetCC!");
1379  Cond = DAG.getNode(
1380      ISD::SELECT_CC,
1381      Op.getDebugLoc(),
1382      LHS.getValueType(),
1383      LHS, RHS,
1384      DAG.getConstant(-1, MVT::i32),
1385      DAG.getConstant(0, MVT::i32),
1386      CC);
1387  Cond = getConversionNode(DAG, Cond, Op, true);
1388  Cond = DAG.getNode(
1389      ISD::AND,
1390      DL,
1391      Cond.getValueType(),
1392      DAG.getConstant(1, Cond.getValueType()),
1393      Cond);
1394  return Cond;
1395}
1396
1397SDValue
1398AMDILTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const
1399{
1400  SDValue Data = Op.getOperand(0);
1401  VTSDNode *BaseType = cast<VTSDNode>(Op.getOperand(1));
1402  DebugLoc DL = Op.getDebugLoc();
1403  EVT DVT = Data.getValueType();
1404  EVT BVT = BaseType->getVT();
1405  unsigned baseBits = BVT.getScalarType().getSizeInBits();
1406  unsigned srcBits = DVT.isSimple() ? DVT.getScalarType().getSizeInBits() : 1;
1407  unsigned shiftBits = srcBits - baseBits;
1408  if (srcBits < 32) {
1409    // If the op is less than 32 bits, then it needs to extend to 32bits
1410    // so it can properly keep the upper bits valid.
1411    EVT IVT = genIntType(32, DVT.isVector() ? DVT.getVectorNumElements() : 1);
1412    Data = DAG.getNode(ISD::ZERO_EXTEND, DL, IVT, Data);
1413    shiftBits = 32 - baseBits;
1414    DVT = IVT;
1415  }
1416  SDValue Shift = DAG.getConstant(shiftBits, DVT);
1417  // Shift left by 'Shift' bits.
1418  Data = DAG.getNode(ISD::SHL, DL, DVT, Data, Shift);
1419  // Signed shift Right by 'Shift' bits.
1420  Data = DAG.getNode(ISD::SRA, DL, DVT, Data, Shift);
1421  if (srcBits < 32) {
1422    // Once the sign extension is done, the op needs to be converted to
1423    // its original type.
1424    Data = DAG.getSExtOrTrunc(Data, DL, Op.getOperand(0).getValueType());
1425  }
1426  return Data;
1427}
1428EVT
1429AMDILTargetLowering::genIntType(uint32_t size, uint32_t numEle) const
1430{
1431  int iSize = (size * numEle);
1432  int vEle = (iSize >> ((size == 64) ? 6 : 5));
1433  if (!vEle) {
1434    vEle = 1;
1435  }
1436  if (size == 64) {
1437    if (vEle == 1) {
1438      return EVT(MVT::i64);
1439    } else {
1440      return EVT(MVT::getVectorVT(MVT::i64, vEle));
1441    }
1442  } else {
1443    if (vEle == 1) {
1444      return EVT(MVT::i32);
1445    } else {
1446      return EVT(MVT::getVectorVT(MVT::i32, vEle));
1447    }
1448  }
1449}
1450
1451SDValue
1452AMDILTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1453    SelectionDAG &DAG) const
1454{
1455  SDValue Chain = Op.getOperand(0);
1456  SDValue Size = Op.getOperand(1);
1457  unsigned int SPReg = AMDIL::SP;
1458  DebugLoc DL = Op.getDebugLoc();
1459  SDValue SP = DAG.getCopyFromReg(Chain,
1460      DL,
1461      SPReg, MVT::i32);
1462  SDValue NewSP = DAG.getNode(ISD::ADD,
1463      DL,
1464      MVT::i32, SP, Size);
1465  Chain = DAG.getCopyToReg(SP.getValue(1),
1466      DL,
1467      SPReg, NewSP);
1468  SDValue Ops[2] = {NewSP, Chain};
1469  Chain = DAG.getMergeValues(Ops, 2 ,DL);
1470  return Chain;
1471}
1472SDValue
1473AMDILTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const
1474{
1475  SDValue Chain = Op.getOperand(0);
1476  SDValue Cond  = Op.getOperand(1);
1477  SDValue Jump  = Op.getOperand(2);
1478  SDValue Result;
1479  Result = DAG.getNode(
1480      AMDILISD::BRANCH_COND,
1481      Op.getDebugLoc(),
1482      Op.getValueType(),
1483      Chain, Jump, Cond);
1484  return Result;
1485}
1486
1487SDValue
1488AMDILTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const
1489{
1490  SDValue Chain = Op.getOperand(0);
1491  SDValue CC = Op.getOperand(1);
1492  SDValue LHS   = Op.getOperand(2);
1493  SDValue RHS   = Op.getOperand(3);
1494  SDValue JumpT  = Op.getOperand(4);
1495  SDValue CmpValue;
1496  SDValue Result;
1497  CmpValue = DAG.getNode(
1498      ISD::SELECT_CC,
1499      Op.getDebugLoc(),
1500      LHS.getValueType(),
1501      LHS, RHS,
1502      DAG.getConstant(-1, MVT::i32),
1503      DAG.getConstant(0, MVT::i32),
1504      CC);
1505  Result = DAG.getNode(
1506      AMDILISD::BRANCH_COND,
1507      CmpValue.getDebugLoc(),
1508      MVT::Other, Chain,
1509      JumpT, CmpValue);
1510  return Result;
1511}
1512
1513// LowerRET - Lower an ISD::RET node.
1514SDValue
1515AMDILTargetLowering::LowerReturn(SDValue Chain,
1516    CallingConv::ID CallConv, bool isVarArg,
1517    const SmallVectorImpl<ISD::OutputArg> &Outs,
1518    const SmallVectorImpl<SDValue> &OutVals,
1519    DebugLoc dl, SelectionDAG &DAG)
1520const
1521{
1522  //MachineFunction& MF = DAG.getMachineFunction();
1523  // CCValAssign - represent the assignment of the return value
1524  // to a location
1525  SmallVector<CCValAssign, 16> RVLocs;
1526
1527  // CCState - Info about the registers and stack slot
1528  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
1529                 getTargetMachine(), RVLocs, *DAG.getContext());
1530
1531  // Analyze return values of ISD::RET
1532  CCInfo.AnalyzeReturn(Outs, RetCC_AMDIL32);
1533  // If this is the first return lowered for this function, add
1534  // the regs to the liveout set for the function
1535  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
1536  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1537    if (RVLocs[i].isRegLoc() && !MRI.isLiveOut(RVLocs[i].getLocReg())) {
1538      MRI.addLiveOut(RVLocs[i].getLocReg());
1539    }
1540  }
1541  // FIXME: implement this when tail call is implemented
1542  // Chain = GetPossiblePreceedingTailCall(Chain, AMDILISD::TAILCALL);
1543  // both x86 and ppc implement this in ISelLowering
1544
1545  // Regular return here
1546  SDValue Flag;
1547  SmallVector<SDValue, 6> RetOps;
1548  RetOps.push_back(Chain);
1549  RetOps.push_back(DAG.getConstant(0/*getBytesToPopOnReturn()*/, MVT::i32));
1550  for (unsigned int i = 0, e = RVLocs.size(); i != e; ++i) {
1551    CCValAssign &VA = RVLocs[i];
1552    SDValue ValToCopy = OutVals[i];
1553    assert(VA.isRegLoc() && "Can only return in registers!");
1554    // ISD::Ret => ret chain, (regnum1, val1), ...
1555    // So i * 2 + 1 index only the regnums
1556    Chain = DAG.getCopyToReg(Chain,
1557        dl,
1558        VA.getLocReg(),
1559        ValToCopy,
1560        Flag);
1561    // guarantee that all emitted copies are stuck together
1562    // avoiding something bad
1563    Flag = Chain.getValue(1);
1564  }
1565  /*if (MF.getFunction()->hasStructRetAttr()) {
1566    assert(0 && "Struct returns are not yet implemented!");
1567  // Both MIPS and X86 have this
1568  }*/
1569  RetOps[0] = Chain;
1570  if (Flag.getNode())
1571    RetOps.push_back(Flag);
1572
1573  Flag = DAG.getNode(AMDILISD::RET_FLAG,
1574      dl,
1575      MVT::Other, &RetOps[0], RetOps.size());
1576  return Flag;
1577}
1578
1579unsigned int
1580AMDILTargetLowering::getFunctionAlignment(const Function *) const
1581{
1582  return 0;
1583}
1584
1585SDValue
1586AMDILTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const
1587{
1588  DebugLoc DL = Op.getDebugLoc();
1589  EVT OVT = Op.getValueType();
1590  SDValue LHS = Op.getOperand(0);
1591  SDValue RHS = Op.getOperand(1);
1592  MVT INTTY;
1593  MVT FLTTY;
1594  if (!OVT.isVector()) {
1595    INTTY = MVT::i32;
1596    FLTTY = MVT::f32;
1597  } else if (OVT.getVectorNumElements() == 2) {
1598    INTTY = MVT::v2i32;
1599    FLTTY = MVT::v2f32;
1600  } else if (OVT.getVectorNumElements() == 4) {
1601    INTTY = MVT::v4i32;
1602    FLTTY = MVT::v4f32;
1603  }
1604  unsigned bitsize = OVT.getScalarType().getSizeInBits();
1605  // char|short jq = ia ^ ib;
1606  SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS);
1607
1608  // jq = jq >> (bitsize - 2)
1609  jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT));
1610
1611  // jq = jq | 0x1
1612  jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT));
1613
1614  // jq = (int)jq
1615  jq = DAG.getSExtOrTrunc(jq, DL, INTTY);
1616
1617  // int ia = (int)LHS;
1618  SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY);
1619
1620  // int ib, (int)RHS;
1621  SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY);
1622
1623  // float fa = (float)ia;
1624  SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia);
1625
1626  // float fb = (float)ib;
1627  SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib);
1628
1629  // float fq = native_divide(fa, fb);
1630  SDValue fq = DAG.getNode(AMDILISD::DIV_INF, DL, FLTTY, fa, fb);
1631
1632  // fq = trunc(fq);
1633  fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq);
1634
1635  // float fqneg = -fq;
1636  SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq);
1637
1638  // float fr = mad(fqneg, fb, fa);
1639  SDValue fr = DAG.getNode(AMDILISD::MAD, DL, FLTTY, fqneg, fb, fa);
1640
1641  // int iq = (int)fq;
1642  SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq);
1643
1644  // fr = fabs(fr);
1645  fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr);
1646
1647  // fb = fabs(fb);
1648  fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb);
1649
1650  // int cv = fr >= fb;
1651  SDValue cv;
1652  if (INTTY == MVT::i32) {
1653    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1654  } else {
1655    cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE);
1656  }
1657  // jq = (cv ? jq : 0);
1658  jq = DAG.getNode(AMDILISD::CMOVLOG, DL, OVT, cv, jq,
1659      DAG.getConstant(0, OVT));
1660  // dst = iq + jq;
1661  iq = DAG.getSExtOrTrunc(iq, DL, OVT);
1662  iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq);
1663  return iq;
1664}
1665
1666SDValue
1667AMDILTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const
1668{
1669  DebugLoc DL = Op.getDebugLoc();
1670  EVT OVT = Op.getValueType();
1671  SDValue LHS = Op.getOperand(0);
1672  SDValue RHS = Op.getOperand(1);
1673  // The LowerSDIV32 function generates equivalent to the following IL.
1674  // mov r0, LHS
1675  // mov r1, RHS
1676  // ilt r10, r0, 0
1677  // ilt r11, r1, 0
1678  // iadd r0, r0, r10
1679  // iadd r1, r1, r11
1680  // ixor r0, r0, r10
1681  // ixor r1, r1, r11
1682  // udiv r0, r0, r1
1683  // ixor r10, r10, r11
1684  // iadd r0, r0, r10
1685  // ixor DST, r0, r10
1686
1687  // mov r0, LHS
1688  SDValue r0 = LHS;
1689
1690  // mov r1, RHS
1691  SDValue r1 = RHS;
1692
1693  // ilt r10, r0, 0
1694  SDValue r10 = DAG.getSelectCC(DL,
1695      r0, DAG.getConstant(0, OVT),
1696      DAG.getConstant(-1, MVT::i32),
1697      DAG.getConstant(0, MVT::i32),
1698      ISD::SETLT);
1699
1700  // ilt r11, r1, 0
1701  SDValue r11 = DAG.getSelectCC(DL,
1702      r1, DAG.getConstant(0, OVT),
1703      DAG.getConstant(-1, MVT::i32),
1704      DAG.getConstant(0, MVT::i32),
1705      ISD::SETLT);
1706
1707  // iadd r0, r0, r10
1708  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1709
1710  // iadd r1, r1, r11
1711  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1712
1713  // ixor r0, r0, r10
1714  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1715
1716  // ixor r1, r1, r11
1717  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1718
1719  // udiv r0, r0, r1
1720  r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1);
1721
1722  // ixor r10, r10, r11
1723  r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11);
1724
1725  // iadd r0, r0, r10
1726  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1727
1728  // ixor DST, r0, r10
1729  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1730  return DST;
1731}
1732
1733SDValue
1734AMDILTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const
1735{
1736  return SDValue(Op.getNode(), 0);
1737}
1738
1739SDValue
1740AMDILTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const
1741{
1742  DebugLoc DL = Op.getDebugLoc();
1743  EVT OVT = Op.getValueType();
1744  MVT INTTY = MVT::i32;
1745  if (OVT == MVT::v2i8) {
1746    INTTY = MVT::v2i32;
1747  } else if (OVT == MVT::v4i8) {
1748    INTTY = MVT::v4i32;
1749  }
1750  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1751  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1752  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1753  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1754  return LHS;
1755}
1756
1757SDValue
1758AMDILTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const
1759{
1760  DebugLoc DL = Op.getDebugLoc();
1761  EVT OVT = Op.getValueType();
1762  MVT INTTY = MVT::i32;
1763  if (OVT == MVT::v2i16) {
1764    INTTY = MVT::v2i32;
1765  } else if (OVT == MVT::v4i16) {
1766    INTTY = MVT::v4i32;
1767  }
1768  SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY);
1769  SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY);
1770  LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS);
1771  LHS = DAG.getSExtOrTrunc(LHS, DL, OVT);
1772  return LHS;
1773}
1774
1775SDValue
1776AMDILTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const
1777{
1778  DebugLoc DL = Op.getDebugLoc();
1779  EVT OVT = Op.getValueType();
1780  SDValue LHS = Op.getOperand(0);
1781  SDValue RHS = Op.getOperand(1);
1782  // The LowerSREM32 function generates equivalent to the following IL.
1783  // mov r0, LHS
1784  // mov r1, RHS
1785  // ilt r10, r0, 0
1786  // ilt r11, r1, 0
1787  // iadd r0, r0, r10
1788  // iadd r1, r1, r11
1789  // ixor r0, r0, r10
1790  // ixor r1, r1, r11
1791  // udiv r20, r0, r1
1792  // umul r20, r20, r1
1793  // sub r0, r0, r20
1794  // iadd r0, r0, r10
1795  // ixor DST, r0, r10
1796
1797  // mov r0, LHS
1798  SDValue r0 = LHS;
1799
1800  // mov r1, RHS
1801  SDValue r1 = RHS;
1802
1803  // ilt r10, r0, 0
1804  SDValue r10 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1805      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1806      r0, DAG.getConstant(0, OVT));
1807
1808  // ilt r11, r1, 0
1809  SDValue r11 = DAG.getNode(AMDILISD::CMP, DL, OVT,
1810      DAG.getConstant(CondCCodeToCC(ISD::SETLT, MVT::i32), MVT::i32),
1811      r1, DAG.getConstant(0, OVT));
1812
1813  // iadd r0, r0, r10
1814  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1815
1816  // iadd r1, r1, r11
1817  r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11);
1818
1819  // ixor r0, r0, r10
1820  r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1821
1822  // ixor r1, r1, r11
1823  r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11);
1824
1825  // udiv r20, r0, r1
1826  SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1);
1827
1828  // umul r20, r20, r1
1829  r20 = DAG.getNode(AMDILISD::UMUL, DL, OVT, r20, r1);
1830
1831  // sub r0, r0, r20
1832  r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20);
1833
1834  // iadd r0, r0, r10
1835  r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10);
1836
1837  // ixor DST, r0, r10
1838  SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10);
1839  return DST;
1840}
1841
1842SDValue
1843AMDILTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const
1844{
1845  return SDValue(Op.getNode(), 0);
1846}
1847