AMDGPUISelLowering.cpp revision a2b4eb6d15a13de257319ac6231b5ab622cd02b1
12a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===//
22a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//
32a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//                     The LLVM Compiler Infrastructure
490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)//
52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// This file is distributed under the University of Illinois Open Source
62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// License. See LICENSE.TXT for details.
72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//
82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===//
92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//
10a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)/// \file
112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief This is the parent TargetLowering class for hardware code gen
12ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch/// targets.
132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//
142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===//
152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
16868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "AMDGPUISelLowering.h"
17ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "AMDGPU.h"
182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPUFrameLowering.h"
192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPURegisterInfo.h"
202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPUSubtarget.h"
212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDILIntrinsicInfo.h"
222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "R600MachineFunctionInfo.h"
232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "SIMachineFunctionInfo.h"
242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/CodeGen/CallingConvLower.h"
252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/CodeGen/MachineFunction.h"
262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/CodeGen/MachineRegisterInfo.h"
272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/CodeGen/SelectionDAG.h"
282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/IR/DataLayout.h"
302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)using namespace llvm;
322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT,
332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                      CCValAssign::LocInfo LocInfo,
342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                      ISD::ArgFlagsTy ArgFlags, CCState &State) {
352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign());
362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return true;
392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPUGenCallingConv.inc"
422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  TargetLowering(TM, new TargetLoweringObjectFileELF()) {
452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // Initialize target lowering borrowed from AMDIL
472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  InitAMDILLowering();
482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // We need to custom lower some of the intrinsics
50ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // Library functions.  These default to Expand, but we have instructions
532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  // for them.
542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setOperationAction(ISD::FEXP2,  MVT::f32, Legal);
562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setOperationAction(ISD::FPOW,   MVT::f32, Legal);
572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setOperationAction(ISD::FLOG2,  MVT::f32, Legal);
587d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  setOperationAction(ISD::FABS,   MVT::f32, Legal);
597d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
60ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
6190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
623551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  // The hardware supports ROTR, but not ROTL
6390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  setOperationAction(ISD::ROTL, MVT::i32, Expand);
642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
653551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  // Lower floating point store/load to integer store/load to reduce the number
663551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  // of patterns in tablegen.
673551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  setOperationAction(ISD::STORE, MVT::f32, Promote);
683551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32);
693551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
703551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  setOperationAction(ISD::STORE, MVT::v2f32, Promote);
713551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32);
723551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
733551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  setOperationAction(ISD::STORE, MVT::v4f32, Promote);
743551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32);
753551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
763551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  setOperationAction(ISD::STORE, MVT::v8f32, Promote);
773551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32);
783551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
793551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  setOperationAction(ISD::STORE, MVT::v16f32, Promote);
803551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32);
813551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
823551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  setOperationAction(ISD::STORE, MVT::f64, Promote);
83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
85ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  // Custom lowering of vector stores is required for local address space
86a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  // stores.
874e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
88a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  // XXX: Native v2i32 local address space stores are possible, but not
894e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  // currently implemented.
90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
91ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
95ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  // XXX: This can be change to Custom, once ExpandVectorStores can
96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  // handle 64-bit stores.
97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)  setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
987d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
997d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  setOperationAction(ISD::LOAD, MVT::f32, Promote);
100ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32);
101a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
102a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  setOperationAction(ISD::LOAD, MVT::v2f32, Promote);
103a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32);
104ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
10590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  setOperationAction(ISD::LOAD, MVT::v4f32, Promote);
10690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32);
107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles)
108a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  setOperationAction(ISD::LOAD, MVT::v8f32, Promote);
109a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32);
110ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
111a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  setOperationAction(ISD::LOAD, MVT::v16f32, Promote);
112a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32);
113a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch
114a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  setOperationAction(ISD::LOAD, MVT::f64, Promote);
115a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64);
116ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
117a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom);
118a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom);
119a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom);
120ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom);
121ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch
122ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand);
123ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand);
124ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand);
125ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand);
126ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch  setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand);
12790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand);
1282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand);
1297d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand);
1302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand);
1312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand);
1322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand);
1332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand);
13490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
1352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setOperationAction(ISD::FNEG, MVT::v2f32, Expand);
1367d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  setOperationAction(ISD::FNEG, MVT::v4f32, Expand);
1372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
1392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setOperationAction(ISD::MUL, MVT::i64, Expand);
1412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1427d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)  setOperationAction(ISD::UDIV, MVT::i32, Expand);
1432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  setOperationAction(ISD::UDIVREM, MVT::i32, Custom);
144ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  setOperationAction(ISD::UREM, MVT::i32, Expand);
14590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  setOperationAction(ISD::VSELECT, MVT::v2f32, Expand);
1464e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)  setOperationAction(ISD::VSELECT, MVT::v4f32, Expand);
1472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  static const MVT::SimpleValueType IntTypes[] = {
1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    MVT::v2i32, MVT::v4i32
1502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  };
1512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const size_t NumIntTypes = array_lengthof(IntTypes);
15290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
15390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  for (unsigned int x  = 0; x < NumIntTypes; ++x) {
15490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    MVT::SimpleValueType VT = IntTypes[x];
15590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)    //Expand the following operations for the current type by default
1562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::ADD,  VT, Expand);
1572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::AND,  VT, Expand);
1582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::FP_TO_SINT, VT, Expand);
1592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::FP_TO_UINT, VT, Expand);
1602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::MUL,  VT, Expand);
1612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::OR,   VT, Expand);
162a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    setOperationAction(ISD::SHL,  VT, Expand);
1632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::SINT_TO_FP, VT, Expand);
1642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::SRL,  VT, Expand);
1652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::SRA,  VT, Expand);
1662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::SUB,  VT, Expand);
1672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::UDIV, VT, Expand);
1682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::UINT_TO_FP, VT, Expand);
1697d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)    setOperationAction(ISD::UREM, VT, Expand);
1707d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)    setOperationAction(ISD::VSELECT, VT, Expand);
171ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch    setOperationAction(ISD::XOR,  VT, Expand);
17290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)  }
17390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
1742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  static const MVT::SimpleValueType FloatTypes[] = {
1752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    MVT::v2f32, MVT::v4f32
1762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  };
1772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  const size_t NumFloatTypes = array_lengthof(FloatTypes);
1782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
179ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  for (unsigned int x = 0; x < NumFloatTypes; ++x) {
1802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    MVT::SimpleValueType VT = FloatTypes[x];
1814e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    setOperationAction(ISD::FADD, VT, Expand);
1822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::FDIV, VT, Expand);
1832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::FFLOOR, VT, Expand);
1842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::FMUL, VT, Expand);
1852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::FRINT, VT, Expand);
1862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::FSQRT, VT, Expand);
1872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    setOperationAction(ISD::FSUB, VT, Expand);
1882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  }
1892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
1902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===//
1922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Target Information
1932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===//
1942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
1952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)MVT AMDGPUTargetLowering::getVectorIdxTy() const {
1962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return MVT::i32;
1977d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)}
1987d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
199ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch
2002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===---------------------------------------------------------------------===//
2012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Target Properties
2022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===---------------------------------------------------------------------===//
2032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
2052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  assert(VT.isFloatingPoint());
206ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch  return VT == MVT::f32;
2072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2084e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)
2092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
2102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  assert(VT.isFloatingPoint());
2112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return VT == MVT::f32;
2122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)}
2132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===---------------------------------------------------------------------===//
2152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// TargetLowering Callbacks
2162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===---------------------------------------------------------------------===//
2172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)
2182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
2192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                             const SmallVectorImpl<ISD::InputArg> &Ins) const {
220a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
2212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
22290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)}
22390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)
2242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)SDValue AMDGPUTargetLowering::LowerReturn(
2252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                     SDValue Chain,
22690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                     CallingConv::ID CallConv,
22790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                     bool isVarArg,
22890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
2292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                     const SmallVectorImpl<SDValue> &OutVals,
2302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)                                     SDLoc DL, SelectionDAG &DAG) const {
2312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)  return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain);
2327d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)}
2337d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)
234ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch//===---------------------------------------------------------------------===//
235a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)// Target specific lowering
236a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)//===---------------------------------------------------------------------===//
237a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
238a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG)
239a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    const {
240a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  switch (Op.getOpcode()) {
241a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  default:
2424e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles)    Op.getNode()->dump();
243a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    assert(0 && "Custom lowering code for this"
244a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)        "instruction is not implemented yet!");
245a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)    break;
246a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  // AMDIL DAG lowering
247a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case ISD::SDIV: return LowerSDIV(Op, DAG);
248a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case ISD::SREM: return LowerSREM(Op, DAG);
249a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG);
250a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
251a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  // AMDGPU DAG lowering
252a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
253a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
254a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
255a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
256a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
257a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
258a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  }
259a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  return Op;
260a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)}
261a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
262a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
263a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)                                                 SDValue Op,
264a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)                                                 SelectionDAG &DAG) const {
265a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
266a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  const DataLayout *TD = getTargetMachine().getDataLayout();
267a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)  GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
268a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)
269  assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
270  // XXX: What does the value of G->getOffset() mean?
271  assert(G->getOffset() == 0 &&
272         "Do not know what to do with an non-zero offset");
273
274  const GlobalValue *GV = G->getGlobal();
275
276  unsigned Offset;
277  if (MFI->LocalMemoryObjects.count(GV) == 0) {
278    uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType());
279    Offset = MFI->LDSSize;
280    MFI->LocalMemoryObjects[GV] = Offset;
281    // XXX: Account for alignment?
282    MFI->LDSSize += Size;
283  } else {
284    Offset = MFI->LocalMemoryObjects[GV];
285  }
286
287  return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
288}
289
290void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
291                                         SmallVectorImpl<SDValue> &Args,
292                                         unsigned Start,
293                                         unsigned Count) const {
294  EVT VT = Op.getValueType();
295  for (unsigned i = Start, e = Start + Count; i != e; ++i) {
296    Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op),
297                               VT.getVectorElementType(),
298                               Op, DAG.getConstant(i, MVT::i32)));
299  }
300}
301
302SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
303                                                  SelectionDAG &DAG) const {
304  SmallVector<SDValue, 8> Args;
305  SDValue A = Op.getOperand(0);
306  SDValue B = Op.getOperand(1);
307
308  ExtractVectorElements(A, DAG, Args, 0,
309                        A.getValueType().getVectorNumElements());
310  ExtractVectorElements(B, DAG, Args, 0,
311                        B.getValueType().getVectorNumElements());
312
313  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
314                     &Args[0], Args.size());
315}
316
317SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
318                                                     SelectionDAG &DAG) const {
319
320  SmallVector<SDValue, 8> Args;
321  EVT VT = Op.getValueType();
322  unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
323  ExtractVectorElements(Op.getOperand(0), DAG, Args, Start,
324                        VT.getVectorNumElements());
325
326  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(),
327                     &Args[0], Args.size());
328}
329
330SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op,
331                                              SelectionDAG &DAG) const {
332
333  MachineFunction &MF = DAG.getMachineFunction();
334  const AMDGPUFrameLowering *TFL =
335   static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering());
336
337  FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op);
338  assert(FIN);
339
340  unsigned FrameIndex = FIN->getIndex();
341  unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex);
342  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF),
343                         Op.getValueType());
344}
345
346SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
347    SelectionDAG &DAG) const {
348  unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
349  SDLoc DL(Op);
350  EVT VT = Op.getValueType();
351
352  switch (IntrinsicID) {
353    default: return Op;
354    case AMDGPUIntrinsic::AMDIL_abs:
355      return LowerIntrinsicIABS(Op, DAG);
356    case AMDGPUIntrinsic::AMDIL_exp:
357      return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1));
358    case AMDGPUIntrinsic::AMDGPU_lrp:
359      return LowerIntrinsicLRP(Op, DAG);
360    case AMDGPUIntrinsic::AMDIL_fraction:
361      return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
362    case AMDGPUIntrinsic::AMDIL_max:
363      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1),
364                                                  Op.getOperand(2));
365    case AMDGPUIntrinsic::AMDGPU_imax:
366      return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1),
367                                                  Op.getOperand(2));
368    case AMDGPUIntrinsic::AMDGPU_umax:
369      return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1),
370                                                  Op.getOperand(2));
371    case AMDGPUIntrinsic::AMDIL_min:
372      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1),
373                                                  Op.getOperand(2));
374    case AMDGPUIntrinsic::AMDGPU_imin:
375      return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1),
376                                                  Op.getOperand(2));
377    case AMDGPUIntrinsic::AMDGPU_umin:
378      return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1),
379                                                  Op.getOperand(2));
380    case AMDGPUIntrinsic::AMDIL_round_nearest:
381      return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1));
382  }
383}
384
385///IABS(a) = SMAX(sub(0, a), a)
386SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op,
387    SelectionDAG &DAG) const {
388
389  SDLoc DL(Op);
390  EVT VT = Op.getValueType();
391  SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
392                                              Op.getOperand(1));
393
394  return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1));
395}
396
397/// Linear Interpolation
398/// LRP(a, b, c) = muladd(a,  b, (1 - a) * c)
399SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op,
400    SelectionDAG &DAG) const {
401  SDLoc DL(Op);
402  EVT VT = Op.getValueType();
403  SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT,
404                                DAG.getConstantFP(1.0f, MVT::f32),
405                                Op.getOperand(1));
406  SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA,
407                                                    Op.getOperand(3));
408  return DAG.getNode(ISD::FADD, DL, VT,
409      DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)),
410      OneSubAC);
411}
412
413/// \brief Generate Min/Max node
414SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op,
415    SelectionDAG &DAG) const {
416  SDLoc DL(Op);
417  EVT VT = Op.getValueType();
418
419  SDValue LHS = Op.getOperand(0);
420  SDValue RHS = Op.getOperand(1);
421  SDValue True = Op.getOperand(2);
422  SDValue False = Op.getOperand(3);
423  SDValue CC = Op.getOperand(4);
424
425  if (VT != MVT::f32 ||
426      !((LHS == True && RHS == False) || (LHS == False && RHS == True))) {
427    return SDValue();
428  }
429
430  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
431  switch (CCOpcode) {
432  case ISD::SETOEQ:
433  case ISD::SETONE:
434  case ISD::SETUNE:
435  case ISD::SETNE:
436  case ISD::SETUEQ:
437  case ISD::SETEQ:
438  case ISD::SETFALSE:
439  case ISD::SETFALSE2:
440  case ISD::SETTRUE:
441  case ISD::SETTRUE2:
442  case ISD::SETUO:
443  case ISD::SETO:
444    assert(0 && "Operation should already be optimised !");
445  case ISD::SETULE:
446  case ISD::SETULT:
447  case ISD::SETOLE:
448  case ISD::SETOLT:
449  case ISD::SETLE:
450  case ISD::SETLT: {
451    if (LHS == True)
452      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
453    else
454      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
455  }
456  case ISD::SETGT:
457  case ISD::SETGE:
458  case ISD::SETUGE:
459  case ISD::SETOGE:
460  case ISD::SETUGT:
461  case ISD::SETOGT: {
462    if (LHS == True)
463      return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS);
464    else
465      return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS);
466  }
467  case ISD::SETCC_INVALID:
468    assert(0 && "Invalid setcc condcode !");
469  }
470  return Op;
471}
472
473SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op,
474                                              SelectionDAG &DAG) const {
475  LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
476  EVT MemEltVT = Load->getMemoryVT().getVectorElementType();
477  EVT EltVT = Op.getValueType().getVectorElementType();
478  EVT PtrVT = Load->getBasePtr().getValueType();
479  unsigned NumElts = Load->getMemoryVT().getVectorNumElements();
480  SmallVector<SDValue, 8> Loads;
481  SDLoc SL(Op);
482
483  for (unsigned i = 0, e = NumElts; i != e; ++i) {
484    SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(),
485                    DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT));
486    Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT,
487                        Load->getChain(), Ptr,
488                        MachinePointerInfo(Load->getMemOperand()->getValue()),
489                        MemEltVT, Load->isVolatile(), Load->isNonTemporal(),
490                        Load->getAlignment()));
491  }
492  return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0],
493                     Loads.size());
494}
495
496SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
497                                               SelectionDAG &DAG) const {
498  StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
499  EVT MemVT = Store->getMemoryVT();
500  unsigned MemBits = MemVT.getSizeInBits();
501
502  // Byte stores are really expensive, so if possible, try to pack
503  // 32-bit vector truncatating store into an i32 store.
504  // XXX: We could also handle optimize other vector bitwidths
505  if (!MemVT.isVector() || MemBits > 32) {
506    return SDValue();
507  }
508
509  SDLoc DL(Op);
510  const SDValue &Value = Store->getValue();
511  EVT VT = Value.getValueType();
512  const SDValue &Ptr = Store->getBasePtr();
513  EVT MemEltVT = MemVT.getVectorElementType();
514  unsigned MemEltBits = MemEltVT.getSizeInBits();
515  unsigned MemNumElements = MemVT.getVectorNumElements();
516  EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
517  SDValue Mask;
518  switch(MemEltBits) {
519  case 8:
520    Mask = DAG.getConstant(0xFF, PackedVT);
521    break;
522  case 16:
523    Mask = DAG.getConstant(0xFFFF, PackedVT);
524    break;
525  default:
526    llvm_unreachable("Cannot lower this vector store");
527  }
528  SDValue PackedValue;
529  for (unsigned i = 0; i < MemNumElements; ++i) {
530    EVT ElemVT = VT.getVectorElementType();
531    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
532                              DAG.getConstant(i, MVT::i32));
533    Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
534    Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
535    SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
536    Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
537    if (i == 0) {
538      PackedValue = Elt;
539    } else {
540      PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
541    }
542  }
543  return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
544                      MachinePointerInfo(Store->getMemOperand()->getValue()),
545                      Store->isVolatile(),  Store->isNonTemporal(),
546                      Store->getAlignment());
547}
548
549SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
550                                            SelectionDAG &DAG) const {
551  StoreSDNode *Store = cast<StoreSDNode>(Op);
552  EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
553  EVT EltVT = Store->getValue().getValueType().getVectorElementType();
554  EVT PtrVT = Store->getBasePtr().getValueType();
555  unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
556  SDLoc SL(Op);
557
558  SmallVector<SDValue, 8> Chains;
559
560  for (unsigned i = 0, e = NumElts; i != e; ++i) {
561    SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
562                              Store->getValue(), DAG.getConstant(i, MVT::i32));
563    SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
564                              Store->getBasePtr(),
565                            DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
566                                            PtrVT));
567    Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr,
568                         MachinePointerInfo(Store->getMemOperand()->getValue()),
569                         MemEltVT, Store->isVolatile(), Store->isNonTemporal(),
570                         Store->getAlignment()));
571  }
572  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
573}
574
575SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
576  SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
577  if (Result.getNode()) {
578    return Result;
579  }
580
581  StoreSDNode *Store = cast<StoreSDNode>(Op);
582  if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
583       Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
584      Store->getValue().getValueType().isVector()) {
585    return SplitVectorStore(Op, DAG);
586  }
587  return SDValue();
588}
589
590SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
591    SelectionDAG &DAG) const {
592  SDLoc DL(Op);
593  EVT VT = Op.getValueType();
594
595  SDValue Num = Op.getOperand(0);
596  SDValue Den = Op.getOperand(1);
597
598  SmallVector<SDValue, 8> Results;
599
600  // RCP =  URECIP(Den) = 2^32 / Den + e
601  // e is rounding error.
602  SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den);
603
604  // RCP_LO = umulo(RCP, Den) */
605  SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den);
606
607  // RCP_HI = mulhu (RCP, Den) */
608  SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den);
609
610  // NEG_RCP_LO = -RCP_LO
611  SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT),
612                                                     RCP_LO);
613
614  // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO)
615  SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
616                                           NEG_RCP_LO, RCP_LO,
617                                           ISD::SETEQ);
618  // Calculate the rounding error from the URECIP instruction
619  // E = mulhu(ABS_RCP_LO, RCP)
620  SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP);
621
622  // RCP_A_E = RCP + E
623  SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E);
624
625  // RCP_S_E = RCP - E
626  SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E);
627
628  // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E)
629  SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT),
630                                     RCP_A_E, RCP_S_E,
631                                     ISD::SETEQ);
632  // Quotient = mulhu(Tmp0, Num)
633  SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num);
634
635  // Num_S_Remainder = Quotient * Den
636  SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den);
637
638  // Remainder = Num - Num_S_Remainder
639  SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder);
640
641  // Remainder_GE_Den = (Remainder >= Den ? -1 : 0)
642  SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den,
643                                                 DAG.getConstant(-1, VT),
644                                                 DAG.getConstant(0, VT),
645                                                 ISD::SETUGE);
646  // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0)
647  SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num,
648                                                  Num_S_Remainder,
649                                                  DAG.getConstant(-1, VT),
650                                                  DAG.getConstant(0, VT),
651                                                  ISD::SETUGE);
652  // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero
653  SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den,
654                                               Remainder_GE_Zero);
655
656  // Calculate Division result:
657
658  // Quotient_A_One = Quotient + 1
659  SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient,
660                                                         DAG.getConstant(1, VT));
661
662  // Quotient_S_One = Quotient - 1
663  SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient,
664                                                         DAG.getConstant(1, VT));
665
666  // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One)
667  SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
668                                     Quotient, Quotient_A_One, ISD::SETEQ);
669
670  // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div)
671  Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
672                            Quotient_S_One, Div, ISD::SETEQ);
673
674  // Calculate Rem result:
675
676  // Remainder_S_Den = Remainder - Den
677  SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den);
678
679  // Remainder_A_Den = Remainder + Den
680  SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den);
681
682  // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den)
683  SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT),
684                                    Remainder, Remainder_S_Den, ISD::SETEQ);
685
686  // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem)
687  Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT),
688                            Remainder_A_Den, Rem, ISD::SETEQ);
689  SDValue Ops[2];
690  Ops[0] = Div;
691  Ops[1] = Rem;
692  return DAG.getMergeValues(Ops, 2, DL);
693}
694
695SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
696                                               SelectionDAG &DAG) const {
697  SDValue S0 = Op.getOperand(0);
698  SDLoc DL(Op);
699  if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64)
700    return SDValue();
701
702  // f32 uint_to_fp i64
703  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
704                           DAG.getConstant(0, MVT::i32));
705  SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
706  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
707                           DAG.getConstant(1, MVT::i32));
708  SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
709  FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
710                        DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32
711  return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
712
713}
714
715//===----------------------------------------------------------------------===//
716// Helper functions
717//===----------------------------------------------------------------------===//
718
719void AMDGPUTargetLowering::getOriginalFunctionArgs(
720                               SelectionDAG &DAG,
721                               const Function *F,
722                               const SmallVectorImpl<ISD::InputArg> &Ins,
723                               SmallVectorImpl<ISD::InputArg> &OrigIns) const {
724
725  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
726    if (Ins[i].ArgVT == Ins[i].VT) {
727      OrigIns.push_back(Ins[i]);
728      continue;
729    }
730
731    EVT VT;
732    if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
733      // Vector has been split into scalars.
734      VT = Ins[i].ArgVT.getVectorElementType();
735    } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
736               Ins[i].ArgVT.getVectorElementType() !=
737               Ins[i].VT.getVectorElementType()) {
738      // Vector elements have been promoted
739      VT = Ins[i].ArgVT;
740    } else {
741      // Vector has been spilt into smaller vectors.
742      VT = Ins[i].VT;
743    }
744
745    ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
746                      Ins[i].OrigArgIndex, Ins[i].PartOffset);
747    OrigIns.push_back(Arg);
748  }
749}
750
751bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const {
752  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
753    return CFP->isExactlyValue(1.0);
754  }
755  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
756    return C->isAllOnesValue();
757  }
758  return false;
759}
760
761bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const {
762  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
763    return CFP->getValueAPF().isZero();
764  }
765  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
766    return C->isNullValue();
767  }
768  return false;
769}
770
771SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
772                                                  const TargetRegisterClass *RC,
773                                                   unsigned Reg, EVT VT) const {
774  MachineFunction &MF = DAG.getMachineFunction();
775  MachineRegisterInfo &MRI = MF.getRegInfo();
776  unsigned VirtualRegister;
777  if (!MRI.isLiveIn(Reg)) {
778    VirtualRegister = MRI.createVirtualRegister(RC);
779    MRI.addLiveIn(Reg, VirtualRegister);
780  } else {
781    VirtualRegister = MRI.getLiveInVirtReg(Reg);
782  }
783  return DAG.getRegister(VirtualRegister, VT);
784}
785
786#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node;
787
788const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
789  switch (Opcode) {
790  default: return 0;
791  // AMDIL DAG nodes
792  NODE_NAME_CASE(CALL);
793  NODE_NAME_CASE(UMUL);
794  NODE_NAME_CASE(DIV_INF);
795  NODE_NAME_CASE(RET_FLAG);
796  NODE_NAME_CASE(BRANCH_COND);
797
798  // AMDGPU DAG nodes
799  NODE_NAME_CASE(DWORDADDR)
800  NODE_NAME_CASE(FRACT)
801  NODE_NAME_CASE(FMAX)
802  NODE_NAME_CASE(SMAX)
803  NODE_NAME_CASE(UMAX)
804  NODE_NAME_CASE(FMIN)
805  NODE_NAME_CASE(SMIN)
806  NODE_NAME_CASE(UMIN)
807  NODE_NAME_CASE(URECIP)
808  NODE_NAME_CASE(EXPORT)
809  NODE_NAME_CASE(CONST_ADDRESS)
810  NODE_NAME_CASE(REGISTER_LOAD)
811  NODE_NAME_CASE(REGISTER_STORE)
812  NODE_NAME_CASE(LOAD_CONSTANT)
813  NODE_NAME_CASE(LOAD_INPUT)
814  NODE_NAME_CASE(SAMPLE)
815  NODE_NAME_CASE(SAMPLEB)
816  NODE_NAME_CASE(SAMPLED)
817  NODE_NAME_CASE(SAMPLEL)
818  NODE_NAME_CASE(STORE_MSKOR)
819  NODE_NAME_CASE(TBUFFER_STORE_FORMAT)
820  }
821}
822