AMDGPUISelLowering.cpp revision a2b4eb6d15a13de257319ac6231b5ab622cd02b1
12a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 22a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 32a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// The LLVM Compiler Infrastructure 490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)// 52a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// This file is distributed under the University of Illinois Open Source 62a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// License. See LICENSE.TXT for details. 72a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 82a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===// 92a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 10a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)/// \file 112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)/// \brief This is the parent TargetLowering class for hardware code gen 12ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch/// targets. 132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// 142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===// 152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 16868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)#include "AMDGPUISelLowering.h" 17ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch#include "AMDGPU.h" 182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPUFrameLowering.h" 192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPURegisterInfo.h" 202a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPUSubtarget.h" 212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDILIntrinsicInfo.h" 222a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "R600MachineFunctionInfo.h" 232a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "SIMachineFunctionInfo.h" 242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/CodeGen/CallingConvLower.h" 252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/CodeGen/MachineFunction.h" 262a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/CodeGen/MachineRegisterInfo.h" 272a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/CodeGen/SelectionDAG.h" 282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "llvm/IR/DataLayout.h" 302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)using namespace llvm; 322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, 332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) CCValAssign::LocInfo LocInfo, 342a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) ISD::ArgFlagsTy ArgFlags, CCState &State) { 352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) unsigned Offset = State.AllocateStack(ValVT.getSizeInBits() / 8, ArgFlags.getOrigAlign()); 362a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); 372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return true; 392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "AMDGPUGenCallingConv.inc" 422a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : 442a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) TargetLowering(TM, new TargetLoweringObjectFileELF()) { 452a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 46c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // Initialize target lowering borrowed from AMDIL 472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) InitAMDILLowering(); 482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // We need to custom lower some of the intrinsics 50ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 522a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Library functions. These default to Expand, but we have instructions 532a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // for them. 542a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FCEIL, MVT::f32, Legal); 552a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FEXP2, MVT::f32, Legal); 562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FPOW, MVT::f32, Legal); 572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FLOG2, MVT::f32, Legal); 587d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) setOperationAction(ISD::FABS, MVT::f32, Legal); 597d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 60ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch setOperationAction(ISD::FRINT, MVT::f32, Legal); 6190dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 623551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) // The hardware supports ROTR, but not ROTL 6390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) setOperationAction(ISD::ROTL, MVT::i32, Expand); 642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 653551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) // Lower floating point store/load to integer store/load to reduce the number 663551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) // of patterns in tablegen. 673551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) setOperationAction(ISD::STORE, MVT::f32, Promote); 683551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 693551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 703551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) setOperationAction(ISD::STORE, MVT::v2f32, Promote); 713551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 723551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 733551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) setOperationAction(ISD::STORE, MVT::v4f32, Promote); 743551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 753551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 763551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) setOperationAction(ISD::STORE, MVT::v8f32, Promote); 773551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); 783551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 793551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) setOperationAction(ISD::STORE, MVT::v16f32, Promote); 803551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); 813551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 823551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) setOperationAction(ISD::STORE, MVT::f64, Promote); 83c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64); 84c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 85ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch // Custom lowering of vector stores is required for local address space 86a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch // stores. 874e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) setOperationAction(ISD::STORE, MVT::v4i32, Custom); 88a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch // XXX: Native v2i32 local address space stores are possible, but not 894e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) // currently implemented. 90c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) setOperationAction(ISD::STORE, MVT::v2i32, Custom); 91ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch 92c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom); 93c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom); 94c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom); 95ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch // XXX: This can be change to Custom, once ExpandVectorStores can 96c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) // handle 64-bit stores. 97c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand); 987d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 997d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) setOperationAction(ISD::LOAD, MVT::f32, Promote); 100ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 101a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch 102a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 103a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 104ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch 10590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 10690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 107c2e0dbddbe15c98d52c4786dac06cb8952a8ae6dTorne (Richard Coles) 108a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch setOperationAction(ISD::LOAD, MVT::v8f32, Promote); 109a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); 110ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch 111a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch setOperationAction(ISD::LOAD, MVT::v16f32, Promote); 112a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); 113a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch 114a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch setOperationAction(ISD::LOAD, MVT::f64, Promote); 115a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch AddPromotedToType(ISD::LOAD, MVT::f64, MVT::i64); 116ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch 117a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Custom); 118a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Custom); 119a3f7b4e666c476898878fa745f637129375cd889Ben Murdoch setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2i32, Custom); 120ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v2f32, Custom); 121ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch 122ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch setLoadExtAction(ISD::EXTLOAD, MVT::v2i8, Expand); 123ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch setLoadExtAction(ISD::SEXTLOAD, MVT::v2i8, Expand); 124ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i8, Expand); 125ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch setLoadExtAction(ISD::EXTLOAD, MVT::v4i8, Expand); 126ba5b9a6411cb1792fd21f0a078d7a25cd1ceec16Ben Murdoch setLoadExtAction(ISD::SEXTLOAD, MVT::v4i8, Expand); 12790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i8, Expand); 1282a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setLoadExtAction(ISD::EXTLOAD, MVT::v2i16, Expand); 1297d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, Expand); 1302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, Expand); 1312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, Expand); 1322a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, Expand); 1332a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, Expand); 13490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 1352a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FNEG, MVT::v2f32, Expand); 1367d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) setOperationAction(ISD::FNEG, MVT::v4f32, Expand); 1372a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 1392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1402a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::MUL, MVT::i64, Expand); 1412a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1427d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) setOperationAction(ISD::UDIV, MVT::i32, Expand); 1432a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::UDIVREM, MVT::i32, Custom); 144ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch setOperationAction(ISD::UREM, MVT::i32, Expand); 14590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); 1464e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); 1472a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1482a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) static const MVT::SimpleValueType IntTypes[] = { 1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) MVT::v2i32, MVT::v4i32 1502a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) }; 1512a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const size_t NumIntTypes = array_lengthof(IntTypes); 15290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 15390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) for (unsigned int x = 0; x < NumIntTypes; ++x) { 15490dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) MVT::SimpleValueType VT = IntTypes[x]; 15590dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) //Expand the following operations for the current type by default 1562a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::ADD, VT, Expand); 1572a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::AND, VT, Expand); 1582a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FP_TO_SINT, VT, Expand); 1592a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FP_TO_UINT, VT, Expand); 1602a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::MUL, VT, Expand); 1612a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::OR, VT, Expand); 162a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) setOperationAction(ISD::SHL, VT, Expand); 1632a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::SINT_TO_FP, VT, Expand); 1642a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::SRL, VT, Expand); 1652a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::SRA, VT, Expand); 1662a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::SUB, VT, Expand); 1672a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::UDIV, VT, Expand); 1682a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::UINT_TO_FP, VT, Expand); 1697d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) setOperationAction(ISD::UREM, VT, Expand); 1707d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) setOperationAction(ISD::VSELECT, VT, Expand); 171ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch setOperationAction(ISD::XOR, VT, Expand); 17290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) } 17390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 1742a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) static const MVT::SimpleValueType FloatTypes[] = { 1752a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) MVT::v2f32, MVT::v4f32 1762a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) }; 1772a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const size_t NumFloatTypes = array_lengthof(FloatTypes); 1782a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 179ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch for (unsigned int x = 0; x < NumFloatTypes; ++x) { 1802a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) MVT::SimpleValueType VT = FloatTypes[x]; 1814e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) setOperationAction(ISD::FADD, VT, Expand); 1822a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FDIV, VT, Expand); 1832a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FFLOOR, VT, Expand); 1842a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FMUL, VT, Expand); 1852a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FRINT, VT, Expand); 1862a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FSQRT, VT, Expand); 1872a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) setOperationAction(ISD::FSUB, VT, Expand); 1882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) } 1892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 1902a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1912a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===// 1922a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Target Information 1932a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===----------------------------------------------------------------------===// 1942a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1952a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)MVT AMDGPUTargetLowering::getVectorIdxTy() const { 1962a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return MVT::i32; 1977d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)} 1987d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 199ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch 2002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===---------------------------------------------------------------------===// 2012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// Target Properties 2022a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===---------------------------------------------------------------------===// 2032a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2042a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 2052a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(VT.isFloatingPoint()); 206ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch return VT == MVT::f32; 2072a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 2084e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 2092a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 2102a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) assert(VT.isFloatingPoint()); 2112a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return VT == MVT::f32; 2122a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)} 2132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2142a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===---------------------------------------------------------------------===// 2152a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)// TargetLowering Callbacks 2162a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)//===---------------------------------------------------------------------===// 2172a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 2182a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State, 2192a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const SmallVectorImpl<ISD::InputArg> &Ins) const { 220a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) 2212a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) State.AnalyzeFormalArguments(Ins, CC_AMDGPU); 22290dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles)} 22390dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) 2242a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)SDValue AMDGPUTargetLowering::LowerReturn( 2252a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SDValue Chain, 22690dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) CallingConv::ID CallConv, 22790dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) bool isVarArg, 22890dce4d38c5ff5333bea97d859d4e484e27edf0cTorne (Richard Coles) const SmallVectorImpl<ISD::OutputArg> &Outs, 2292a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const SmallVectorImpl<SDValue> &OutVals, 2302a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) SDLoc DL, SelectionDAG &DAG) const { 2312a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, Chain); 2327d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles)} 2337d4cd473f85ac64c3747c96c277f9e506a0d2246Torne (Richard Coles) 234ca12bfac764ba476d6cd062bf1dde12cc64c3f40Ben Murdoch//===---------------------------------------------------------------------===// 235a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)// Target specific lowering 236a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)//===---------------------------------------------------------------------===// 237a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) 238a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) 239a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) const { 240a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) switch (Op.getOpcode()) { 241a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) default: 2424e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) Op.getNode()->dump(); 243a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) assert(0 && "Custom lowering code for this" 244a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) "instruction is not implemented yet!"); 245a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) break; 246a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) // AMDIL DAG lowering 247a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) case ISD::SDIV: return LowerSDIV(Op, DAG); 248a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) case ISD::SREM: return LowerSREM(Op, DAG); 249a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 250a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) case ISD::BRCOND: return LowerBRCOND(Op, DAG); 251a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) // AMDGPU DAG lowering 252a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 253a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); 254a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); 255a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 256a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 257a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 258a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) } 259a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) return Op; 260a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)} 261a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) 262a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles)SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 263a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) SDValue Op, 264a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) SelectionDAG &DAG) const { 265a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) 266a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) const DataLayout *TD = getTargetMachine().getDataLayout(); 267a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 268a36e5920737c6adbddd3e43b760e5de8431db6e0Torne (Richard Coles) 269 assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS); 270 // XXX: What does the value of G->getOffset() mean? 271 assert(G->getOffset() == 0 && 272 "Do not know what to do with an non-zero offset"); 273 274 const GlobalValue *GV = G->getGlobal(); 275 276 unsigned Offset; 277 if (MFI->LocalMemoryObjects.count(GV) == 0) { 278 uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); 279 Offset = MFI->LDSSize; 280 MFI->LocalMemoryObjects[GV] = Offset; 281 // XXX: Account for alignment? 282 MFI->LDSSize += Size; 283 } else { 284 Offset = MFI->LocalMemoryObjects[GV]; 285 } 286 287 return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace())); 288} 289 290void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG, 291 SmallVectorImpl<SDValue> &Args, 292 unsigned Start, 293 unsigned Count) const { 294 EVT VT = Op.getValueType(); 295 for (unsigned i = Start, e = Start + Count; i != e; ++i) { 296 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op), 297 VT.getVectorElementType(), 298 Op, DAG.getConstant(i, MVT::i32))); 299 } 300} 301 302SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, 303 SelectionDAG &DAG) const { 304 SmallVector<SDValue, 8> Args; 305 SDValue A = Op.getOperand(0); 306 SDValue B = Op.getOperand(1); 307 308 ExtractVectorElements(A, DAG, Args, 0, 309 A.getValueType().getVectorNumElements()); 310 ExtractVectorElements(B, DAG, Args, 0, 311 B.getValueType().getVectorNumElements()); 312 313 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 314 &Args[0], Args.size()); 315} 316 317SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 318 SelectionDAG &DAG) const { 319 320 SmallVector<SDValue, 8> Args; 321 EVT VT = Op.getValueType(); 322 unsigned Start = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 323 ExtractVectorElements(Op.getOperand(0), DAG, Args, Start, 324 VT.getVectorNumElements()); 325 326 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Op), Op.getValueType(), 327 &Args[0], Args.size()); 328} 329 330SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, 331 SelectionDAG &DAG) const { 332 333 MachineFunction &MF = DAG.getMachineFunction(); 334 const AMDGPUFrameLowering *TFL = 335 static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); 336 337 FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); 338 assert(FIN); 339 340 unsigned FrameIndex = FIN->getIndex(); 341 unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); 342 return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), 343 Op.getValueType()); 344} 345 346SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 347 SelectionDAG &DAG) const { 348 unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 349 SDLoc DL(Op); 350 EVT VT = Op.getValueType(); 351 352 switch (IntrinsicID) { 353 default: return Op; 354 case AMDGPUIntrinsic::AMDIL_abs: 355 return LowerIntrinsicIABS(Op, DAG); 356 case AMDGPUIntrinsic::AMDIL_exp: 357 return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); 358 case AMDGPUIntrinsic::AMDGPU_lrp: 359 return LowerIntrinsicLRP(Op, DAG); 360 case AMDGPUIntrinsic::AMDIL_fraction: 361 return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); 362 case AMDGPUIntrinsic::AMDIL_max: 363 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), 364 Op.getOperand(2)); 365 case AMDGPUIntrinsic::AMDGPU_imax: 366 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), 367 Op.getOperand(2)); 368 case AMDGPUIntrinsic::AMDGPU_umax: 369 return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), 370 Op.getOperand(2)); 371 case AMDGPUIntrinsic::AMDIL_min: 372 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), 373 Op.getOperand(2)); 374 case AMDGPUIntrinsic::AMDGPU_imin: 375 return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), 376 Op.getOperand(2)); 377 case AMDGPUIntrinsic::AMDGPU_umin: 378 return DAG.getNode(AMDGPUISD::UMIN, DL, VT, Op.getOperand(1), 379 Op.getOperand(2)); 380 case AMDGPUIntrinsic::AMDIL_round_nearest: 381 return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); 382 } 383} 384 385///IABS(a) = SMAX(sub(0, a), a) 386SDValue AMDGPUTargetLowering::LowerIntrinsicIABS(SDValue Op, 387 SelectionDAG &DAG) const { 388 389 SDLoc DL(Op); 390 EVT VT = Op.getValueType(); 391 SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 392 Op.getOperand(1)); 393 394 return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Neg, Op.getOperand(1)); 395} 396 397/// Linear Interpolation 398/// LRP(a, b, c) = muladd(a, b, (1 - a) * c) 399SDValue AMDGPUTargetLowering::LowerIntrinsicLRP(SDValue Op, 400 SelectionDAG &DAG) const { 401 SDLoc DL(Op); 402 EVT VT = Op.getValueType(); 403 SDValue OneSubA = DAG.getNode(ISD::FSUB, DL, VT, 404 DAG.getConstantFP(1.0f, MVT::f32), 405 Op.getOperand(1)); 406 SDValue OneSubAC = DAG.getNode(ISD::FMUL, DL, VT, OneSubA, 407 Op.getOperand(3)); 408 return DAG.getNode(ISD::FADD, DL, VT, 409 DAG.getNode(ISD::FMUL, DL, VT, Op.getOperand(1), Op.getOperand(2)), 410 OneSubAC); 411} 412 413/// \brief Generate Min/Max node 414SDValue AMDGPUTargetLowering::LowerMinMax(SDValue Op, 415 SelectionDAG &DAG) const { 416 SDLoc DL(Op); 417 EVT VT = Op.getValueType(); 418 419 SDValue LHS = Op.getOperand(0); 420 SDValue RHS = Op.getOperand(1); 421 SDValue True = Op.getOperand(2); 422 SDValue False = Op.getOperand(3); 423 SDValue CC = Op.getOperand(4); 424 425 if (VT != MVT::f32 || 426 !((LHS == True && RHS == False) || (LHS == False && RHS == True))) { 427 return SDValue(); 428 } 429 430 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 431 switch (CCOpcode) { 432 case ISD::SETOEQ: 433 case ISD::SETONE: 434 case ISD::SETUNE: 435 case ISD::SETNE: 436 case ISD::SETUEQ: 437 case ISD::SETEQ: 438 case ISD::SETFALSE: 439 case ISD::SETFALSE2: 440 case ISD::SETTRUE: 441 case ISD::SETTRUE2: 442 case ISD::SETUO: 443 case ISD::SETO: 444 assert(0 && "Operation should already be optimised !"); 445 case ISD::SETULE: 446 case ISD::SETULT: 447 case ISD::SETOLE: 448 case ISD::SETOLT: 449 case ISD::SETLE: 450 case ISD::SETLT: { 451 if (LHS == True) 452 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 453 else 454 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 455 } 456 case ISD::SETGT: 457 case ISD::SETGE: 458 case ISD::SETUGE: 459 case ISD::SETOGE: 460 case ISD::SETUGT: 461 case ISD::SETOGT: { 462 if (LHS == True) 463 return DAG.getNode(AMDGPUISD::FMAX, DL, VT, LHS, RHS); 464 else 465 return DAG.getNode(AMDGPUISD::FMIN, DL, VT, LHS, RHS); 466 } 467 case ISD::SETCC_INVALID: 468 assert(0 && "Invalid setcc condcode !"); 469 } 470 return Op; 471} 472 473SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, 474 SelectionDAG &DAG) const { 475 LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); 476 EVT MemEltVT = Load->getMemoryVT().getVectorElementType(); 477 EVT EltVT = Op.getValueType().getVectorElementType(); 478 EVT PtrVT = Load->getBasePtr().getValueType(); 479 unsigned NumElts = Load->getMemoryVT().getVectorNumElements(); 480 SmallVector<SDValue, 8> Loads; 481 SDLoc SL(Op); 482 483 for (unsigned i = 0, e = NumElts; i != e; ++i) { 484 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(), 485 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); 486 Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, 487 Load->getChain(), Ptr, 488 MachinePointerInfo(Load->getMemOperand()->getValue()), 489 MemEltVT, Load->isVolatile(), Load->isNonTemporal(), 490 Load->getAlignment())); 491 } 492 return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), &Loads[0], 493 Loads.size()); 494} 495 496SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, 497 SelectionDAG &DAG) const { 498 StoreSDNode *Store = dyn_cast<StoreSDNode>(Op); 499 EVT MemVT = Store->getMemoryVT(); 500 unsigned MemBits = MemVT.getSizeInBits(); 501 502 // Byte stores are really expensive, so if possible, try to pack 503 // 32-bit vector truncatating store into an i32 store. 504 // XXX: We could also handle optimize other vector bitwidths 505 if (!MemVT.isVector() || MemBits > 32) { 506 return SDValue(); 507 } 508 509 SDLoc DL(Op); 510 const SDValue &Value = Store->getValue(); 511 EVT VT = Value.getValueType(); 512 const SDValue &Ptr = Store->getBasePtr(); 513 EVT MemEltVT = MemVT.getVectorElementType(); 514 unsigned MemEltBits = MemEltVT.getSizeInBits(); 515 unsigned MemNumElements = MemVT.getVectorNumElements(); 516 EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits()); 517 SDValue Mask; 518 switch(MemEltBits) { 519 case 8: 520 Mask = DAG.getConstant(0xFF, PackedVT); 521 break; 522 case 16: 523 Mask = DAG.getConstant(0xFFFF, PackedVT); 524 break; 525 default: 526 llvm_unreachable("Cannot lower this vector store"); 527 } 528 SDValue PackedValue; 529 for (unsigned i = 0; i < MemNumElements; ++i) { 530 EVT ElemVT = VT.getVectorElementType(); 531 SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value, 532 DAG.getConstant(i, MVT::i32)); 533 Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT); 534 Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask); 535 SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT); 536 Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift); 537 if (i == 0) { 538 PackedValue = Elt; 539 } else { 540 PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt); 541 } 542 } 543 return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr, 544 MachinePointerInfo(Store->getMemOperand()->getValue()), 545 Store->isVolatile(), Store->isNonTemporal(), 546 Store->getAlignment()); 547} 548 549SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, 550 SelectionDAG &DAG) const { 551 StoreSDNode *Store = cast<StoreSDNode>(Op); 552 EVT MemEltVT = Store->getMemoryVT().getVectorElementType(); 553 EVT EltVT = Store->getValue().getValueType().getVectorElementType(); 554 EVT PtrVT = Store->getBasePtr().getValueType(); 555 unsigned NumElts = Store->getMemoryVT().getVectorNumElements(); 556 SDLoc SL(Op); 557 558 SmallVector<SDValue, 8> Chains; 559 560 for (unsigned i = 0, e = NumElts; i != e; ++i) { 561 SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT, 562 Store->getValue(), DAG.getConstant(i, MVT::i32)); 563 SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, 564 Store->getBasePtr(), 565 DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), 566 PtrVT)); 567 Chains.push_back(DAG.getTruncStore(Store->getChain(), SL, Val, Ptr, 568 MachinePointerInfo(Store->getMemOperand()->getValue()), 569 MemEltVT, Store->isVolatile(), Store->isNonTemporal(), 570 Store->getAlignment())); 571 } 572 return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts); 573} 574 575SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 576 SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG); 577 if (Result.getNode()) { 578 return Result; 579 } 580 581 StoreSDNode *Store = cast<StoreSDNode>(Op); 582 if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 583 Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) && 584 Store->getValue().getValueType().isVector()) { 585 return SplitVectorStore(Op, DAG); 586 } 587 return SDValue(); 588} 589 590SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 591 SelectionDAG &DAG) const { 592 SDLoc DL(Op); 593 EVT VT = Op.getValueType(); 594 595 SDValue Num = Op.getOperand(0); 596 SDValue Den = Op.getOperand(1); 597 598 SmallVector<SDValue, 8> Results; 599 600 // RCP = URECIP(Den) = 2^32 / Den + e 601 // e is rounding error. 602 SDValue RCP = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Den); 603 604 // RCP_LO = umulo(RCP, Den) */ 605 SDValue RCP_LO = DAG.getNode(ISD::UMULO, DL, VT, RCP, Den); 606 607 // RCP_HI = mulhu (RCP, Den) */ 608 SDValue RCP_HI = DAG.getNode(ISD::MULHU, DL, VT, RCP, Den); 609 610 // NEG_RCP_LO = -RCP_LO 611 SDValue NEG_RCP_LO = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, VT), 612 RCP_LO); 613 614 // ABS_RCP_LO = (RCP_HI == 0 ? NEG_RCP_LO : RCP_LO) 615 SDValue ABS_RCP_LO = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 616 NEG_RCP_LO, RCP_LO, 617 ISD::SETEQ); 618 // Calculate the rounding error from the URECIP instruction 619 // E = mulhu(ABS_RCP_LO, RCP) 620 SDValue E = DAG.getNode(ISD::MULHU, DL, VT, ABS_RCP_LO, RCP); 621 622 // RCP_A_E = RCP + E 623 SDValue RCP_A_E = DAG.getNode(ISD::ADD, DL, VT, RCP, E); 624 625 // RCP_S_E = RCP - E 626 SDValue RCP_S_E = DAG.getNode(ISD::SUB, DL, VT, RCP, E); 627 628 // Tmp0 = (RCP_HI == 0 ? RCP_A_E : RCP_SUB_E) 629 SDValue Tmp0 = DAG.getSelectCC(DL, RCP_HI, DAG.getConstant(0, VT), 630 RCP_A_E, RCP_S_E, 631 ISD::SETEQ); 632 // Quotient = mulhu(Tmp0, Num) 633 SDValue Quotient = DAG.getNode(ISD::MULHU, DL, VT, Tmp0, Num); 634 635 // Num_S_Remainder = Quotient * Den 636 SDValue Num_S_Remainder = DAG.getNode(ISD::UMULO, DL, VT, Quotient, Den); 637 638 // Remainder = Num - Num_S_Remainder 639 SDValue Remainder = DAG.getNode(ISD::SUB, DL, VT, Num, Num_S_Remainder); 640 641 // Remainder_GE_Den = (Remainder >= Den ? -1 : 0) 642 SDValue Remainder_GE_Den = DAG.getSelectCC(DL, Remainder, Den, 643 DAG.getConstant(-1, VT), 644 DAG.getConstant(0, VT), 645 ISD::SETUGE); 646 // Remainder_GE_Zero = (Num >= Num_S_Remainder ? -1 : 0) 647 SDValue Remainder_GE_Zero = DAG.getSelectCC(DL, Num, 648 Num_S_Remainder, 649 DAG.getConstant(-1, VT), 650 DAG.getConstant(0, VT), 651 ISD::SETUGE); 652 // Tmp1 = Remainder_GE_Den & Remainder_GE_Zero 653 SDValue Tmp1 = DAG.getNode(ISD::AND, DL, VT, Remainder_GE_Den, 654 Remainder_GE_Zero); 655 656 // Calculate Division result: 657 658 // Quotient_A_One = Quotient + 1 659 SDValue Quotient_A_One = DAG.getNode(ISD::ADD, DL, VT, Quotient, 660 DAG.getConstant(1, VT)); 661 662 // Quotient_S_One = Quotient - 1 663 SDValue Quotient_S_One = DAG.getNode(ISD::SUB, DL, VT, Quotient, 664 DAG.getConstant(1, VT)); 665 666 // Div = (Tmp1 == 0 ? Quotient : Quotient_A_One) 667 SDValue Div = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 668 Quotient, Quotient_A_One, ISD::SETEQ); 669 670 // Div = (Remainder_GE_Zero == 0 ? Quotient_S_One : Div) 671 Div = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 672 Quotient_S_One, Div, ISD::SETEQ); 673 674 // Calculate Rem result: 675 676 // Remainder_S_Den = Remainder - Den 677 SDValue Remainder_S_Den = DAG.getNode(ISD::SUB, DL, VT, Remainder, Den); 678 679 // Remainder_A_Den = Remainder + Den 680 SDValue Remainder_A_Den = DAG.getNode(ISD::ADD, DL, VT, Remainder, Den); 681 682 // Rem = (Tmp1 == 0 ? Remainder : Remainder_S_Den) 683 SDValue Rem = DAG.getSelectCC(DL, Tmp1, DAG.getConstant(0, VT), 684 Remainder, Remainder_S_Den, ISD::SETEQ); 685 686 // Rem = (Remainder_GE_Zero == 0 ? Remainder_A_Den : Rem) 687 Rem = DAG.getSelectCC(DL, Remainder_GE_Zero, DAG.getConstant(0, VT), 688 Remainder_A_Den, Rem, ISD::SETEQ); 689 SDValue Ops[2]; 690 Ops[0] = Div; 691 Ops[1] = Rem; 692 return DAG.getMergeValues(Ops, 2, DL); 693} 694 695SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, 696 SelectionDAG &DAG) const { 697 SDValue S0 = Op.getOperand(0); 698 SDLoc DL(Op); 699 if (Op.getValueType() != MVT::f32 || S0.getValueType() != MVT::i64) 700 return SDValue(); 701 702 // f32 uint_to_fp i64 703 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, 704 DAG.getConstant(0, MVT::i32)); 705 SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo); 706 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0, 707 DAG.getConstant(1, MVT::i32)); 708 SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi); 709 FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi, 710 DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32 711 return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); 712 713} 714 715//===----------------------------------------------------------------------===// 716// Helper functions 717//===----------------------------------------------------------------------===// 718 719void AMDGPUTargetLowering::getOriginalFunctionArgs( 720 SelectionDAG &DAG, 721 const Function *F, 722 const SmallVectorImpl<ISD::InputArg> &Ins, 723 SmallVectorImpl<ISD::InputArg> &OrigIns) const { 724 725 for (unsigned i = 0, e = Ins.size(); i < e; ++i) { 726 if (Ins[i].ArgVT == Ins[i].VT) { 727 OrigIns.push_back(Ins[i]); 728 continue; 729 } 730 731 EVT VT; 732 if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) { 733 // Vector has been split into scalars. 734 VT = Ins[i].ArgVT.getVectorElementType(); 735 } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() && 736 Ins[i].ArgVT.getVectorElementType() != 737 Ins[i].VT.getVectorElementType()) { 738 // Vector elements have been promoted 739 VT = Ins[i].ArgVT; 740 } else { 741 // Vector has been spilt into smaller vectors. 742 VT = Ins[i].VT; 743 } 744 745 ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used, 746 Ins[i].OrigArgIndex, Ins[i].PartOffset); 747 OrigIns.push_back(Arg); 748 } 749} 750 751bool AMDGPUTargetLowering::isHWTrueValue(SDValue Op) const { 752 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 753 return CFP->isExactlyValue(1.0); 754 } 755 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 756 return C->isAllOnesValue(); 757 } 758 return false; 759} 760 761bool AMDGPUTargetLowering::isHWFalseValue(SDValue Op) const { 762 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) { 763 return CFP->getValueAPF().isZero(); 764 } 765 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { 766 return C->isNullValue(); 767 } 768 return false; 769} 770 771SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 772 const TargetRegisterClass *RC, 773 unsigned Reg, EVT VT) const { 774 MachineFunction &MF = DAG.getMachineFunction(); 775 MachineRegisterInfo &MRI = MF.getRegInfo(); 776 unsigned VirtualRegister; 777 if (!MRI.isLiveIn(Reg)) { 778 VirtualRegister = MRI.createVirtualRegister(RC); 779 MRI.addLiveIn(Reg, VirtualRegister); 780 } else { 781 VirtualRegister = MRI.getLiveInVirtReg(Reg); 782 } 783 return DAG.getRegister(VirtualRegister, VT); 784} 785 786#define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 787 788const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 789 switch (Opcode) { 790 default: return 0; 791 // AMDIL DAG nodes 792 NODE_NAME_CASE(CALL); 793 NODE_NAME_CASE(UMUL); 794 NODE_NAME_CASE(DIV_INF); 795 NODE_NAME_CASE(RET_FLAG); 796 NODE_NAME_CASE(BRANCH_COND); 797 798 // AMDGPU DAG nodes 799 NODE_NAME_CASE(DWORDADDR) 800 NODE_NAME_CASE(FRACT) 801 NODE_NAME_CASE(FMAX) 802 NODE_NAME_CASE(SMAX) 803 NODE_NAME_CASE(UMAX) 804 NODE_NAME_CASE(FMIN) 805 NODE_NAME_CASE(SMIN) 806 NODE_NAME_CASE(UMIN) 807 NODE_NAME_CASE(URECIP) 808 NODE_NAME_CASE(EXPORT) 809 NODE_NAME_CASE(CONST_ADDRESS) 810 NODE_NAME_CASE(REGISTER_LOAD) 811 NODE_NAME_CASE(REGISTER_STORE) 812 NODE_NAME_CASE(LOAD_CONSTANT) 813 NODE_NAME_CASE(LOAD_INPUT) 814 NODE_NAME_CASE(SAMPLE) 815 NODE_NAME_CASE(SAMPLEB) 816 NODE_NAME_CASE(SAMPLED) 817 NODE_NAME_CASE(SAMPLEL) 818 NODE_NAME_CASE(STORE_MSKOR) 819 NODE_NAME_CASE(TBUFFER_STORE_FORMAT) 820 } 821} 822