1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===// 2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// The LLVM Compiler Infrastructure 4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source 6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details. 7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file 11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \brief Custom DAG lowering for SI 12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// 13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "SIISelLowering.h" 16cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig#include "AMDGPU.h" 17cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines#include "AMDGPUIntrinsicInfo.h" 1836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "AMDGPUSubtarget.h" 19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "SIInstrInfo.h" 20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "SIMachineFunctionInfo.h" 21f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "SIRegisterInfo.h" 2290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig#include "llvm/CodeGen/CallingConvLower.h" 23f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h" 24f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h" 25f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/SelectionDAG.h" 265c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "llvm/IR/Function.h" 27cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines#include "llvm/ADT/SmallString.h" 28f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 29f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm; 30f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 31f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardSITargetLowering::SITargetLowering(TargetMachine &TM) : 32b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling AMDGPUTargetLowering(TM) { 33dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass); 34dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass); 35204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig 36204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass); 37204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass); 38204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig 39dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass); 40dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass); 41f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 42dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass); 43dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass); 44dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass); 45204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig 46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass); 47204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass); 48204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig 4936ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass); 50204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass); 51204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig 5236ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass); 53204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass); 54f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 55f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard computeRegisterProperties(); 56f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 57fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling // Condition Codes 58fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 59fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 60fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); 61fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 62fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETULE, MVT::f32, Expand); 63fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 64fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling 65fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 66fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 67fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); 68fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 69fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETULE, MVT::f64, Expand); 70fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 71fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling 72b9e8678025891a3eb5431c652df541b632902fc6Christian Konig setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand); 73b9e8678025891a3eb5431c652df541b632902fc6Christian Konig setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand); 74b9e8678025891a3eb5431c652df541b632902fc6Christian Konig setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand); 75b9e8678025891a3eb5431c652df541b632902fc6Christian Konig setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand); 76b9e8678025891a3eb5431c652df541b632902fc6Christian Konig 77f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard setOperationAction(ISD::ADD, MVT::i32, Legal); 78f38be91a829ad5a0391b6f139f0bd8cef341e689Matt Arsenault setOperationAction(ISD::ADDC, MVT::i32, Legal); 79f38be91a829ad5a0391b6f139f0bd8cef341e689Matt Arsenault setOperationAction(ISD::ADDE, MVT::i32, Legal); 80cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines setOperationAction(ISD::SUBC, MVT::i32, Legal); 81cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines setOperationAction(ISD::SUBE, MVT::i32, Legal); 821842ec4d9fc9eeceb2a77527026dfd84ee24cff1Aaron Watry 83d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard // We need to custom lower vector stores from local memory 84d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard setOperationAction(ISD::LOAD, MVT::v2i32, Custom); 85d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard setOperationAction(ISD::LOAD, MVT::v4i32, Custom); 86f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard setOperationAction(ISD::LOAD, MVT::v8i32, Custom); 87f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard setOperationAction(ISD::LOAD, MVT::v16i32, Custom); 88f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard 89f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard setOperationAction(ISD::STORE, MVT::v8i32, Custom); 90f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard setOperationAction(ISD::STORE, MVT::v16i32, Custom); 91d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard 92a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard // We need to custom lower loads/stores from private memory 93a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard setOperationAction(ISD::LOAD, MVT::i32, Custom); 94a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard setOperationAction(ISD::LOAD, MVT::v2i32, Custom); 95a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard setOperationAction(ISD::LOAD, MVT::v4i32, Custom); 9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::LOAD, MVT::v8i32, Custom); 97a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::STORE, MVT::i1, Custom); 99a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard setOperationAction(ISD::STORE, MVT::i32, Custom); 100a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard setOperationAction(ISD::STORE, MVT::v2i32, Custom); 101a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard setOperationAction(ISD::STORE, MVT::v4i32, Custom); 102a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 103dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SELECT, MVT::f32, Promote); 104dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines AddPromotedToType(ISD::SELECT, MVT::f32, MVT::i32); 10536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::SELECT, MVT::i64, Custom); 10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::SELECT, MVT::f64, Promote); 10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); 108a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 109cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); 110cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); 111cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 112cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); 11317ea10cb792832c99677afa13b9b866098bc4679Tom Stellard 1144e518fd941b119834b5764708fbabf41adc45040Tom Stellard setOperationAction(ISD::SETCC, MVT::v2i1, Expand); 1154e518fd941b119834b5764708fbabf41adc45040Tom Stellard setOperationAction(ISD::SETCC, MVT::v4i1, Expand); 1164e518fd941b119834b5764708fbabf41adc45040Tom Stellard 117dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal); 118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); 119dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); 120dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 121dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal); 122dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom); 123dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom); 124dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 125dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal); 126dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom); 127dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom); 128dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 129dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom); 130dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 131dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom); 132dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 133e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 13468db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom); 13568db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom); 13668db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); 137e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard 138a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); 139cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines setOperationAction(ISD::BRCOND, MVT::Other, Custom); 140a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard 141dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 14236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); 14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom); 144dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand); 145f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand); 146f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand); 147cd0f2458641ebebdb887da0381bd3acea6a1902fTom Stellard 148dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); 149dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom); 150dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom); 151dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand); 152dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 153dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); 15436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom); 15536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom); 15636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand); 157014773626d2678868adf696ac58c44d2b2980fa8Niels Ole Salscheider setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 158dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 15936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setTruncStoreAction(MVT::i32, MVT::i8, Custom); 16036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setTruncStoreAction(MVT::i32, MVT::i16, Custom); 161014773626d2678868adf696ac58c44d2b2980fa8Niels Ole Salscheider setTruncStoreAction(MVT::f64, MVT::f32, Expand); 1623987e532f7e7b326083d3b5145bd29bb69e15410Matt Arsenault setTruncStoreAction(MVT::i64, MVT::i32, Expand); 163f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand); 164f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand); 165014773626d2678868adf696ac58c44d2b2980fa8Niels Ole Salscheider 166dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::LOAD, MVT::i1, Custom); 167dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 168da25cd3e6de8f21005590c2de49868f883cf2410Tom Stellard setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 16936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 17036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::FrameIndex, MVT::i32, Custom); 171a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer 172dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // These should use UDIVREM, so set them to expand 173dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::UDIV, MVT::i64, Expand); 174dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::UREM, MVT::i64, Expand); 175dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 17636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // We only support LOAD/STORE and vector manipulation ops for vectors 17736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // with > 4 elements. 17836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MVT VecTypes[] = { 17936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32 18036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines }; 18136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 182dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (MVT VT : VecTypes) { 18336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) { 18436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines switch(Op) { 18536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case ISD::LOAD: 18636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case ISD::STORE: 18736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case ISD::BUILD_VECTOR: 18836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case ISD::BITCAST: 18936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case ISD::EXTRACT_VECTOR_ELT: 19036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case ISD::INSERT_VECTOR_ELT: 19136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case ISD::CONCAT_VECTORS: 19236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case ISD::INSERT_SUBVECTOR: 19336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case ISD::EXTRACT_SUBVECTOR: 19436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 19536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines default: 196dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(Op, VT, Expand); 19736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines break; 19836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 19936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 20036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 20136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 20236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines for (int I = MVT::v1f64; I <= MVT::v8f64; ++I) { 20336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MVT::SimpleValueType VT = static_cast<MVT::SimpleValueType>(I); 20436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::FTRUNC, VT, Expand); 20536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::FCEIL, VT, Expand); 20636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::FFLOOR, VT, Expand); 20736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 208f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 20936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) { 21036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 21136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::FCEIL, MVT::f64, Legal); 21236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 213dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines setOperationAction(ISD::FRINT, MVT::f64, Legal); 21436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 21536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 216cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // FIXME: These should be removed and handled the same was as f32 fneg. Source 217cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // modifiers also work for the double instructions. 218cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines setOperationAction(ISD::FNEG, MVT::f64, Expand); 219cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines setOperationAction(ISD::FABS, MVT::f64, Expand); 220cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 22136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines setTargetDAGCombine(ISD::SELECT_CC); 222f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard setTargetDAGCombine(ISD::SETCC); 223d787c047bcc5d0d81743f9d4403d5c54fe4757feMichel Danzer 224cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines setTargetDAGCombine(ISD::UINT_TO_FP); 225cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 226c53270f885e8d778cfe0e741e07d7def2b66884aChristian Konig setSchedulingPreference(Sched::RegPressure); 227f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 228f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 22973e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard//===----------------------------------------------------------------------===// 23073e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard// TargetLowering queries 23173e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard//===----------------------------------------------------------------------===// 23273e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard 23373e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellardbool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, 23436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned AddrSpace, 23573e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard bool *IsFast) const { 236dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (IsFast) 237dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines *IsFast = false; 238dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 23973e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard // XXX: This depends on the address space and also we may want to revist 24073e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard // the alignment values we specify in the DataLayout. 241dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 242dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96, 243dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // which isn't a simple VT. 244a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard if (!VT.isSimple() || VT == MVT::Other) 245a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard return false; 246dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 247dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // XXX - CI changes say "Support for unaligned memory accesses" but I don't 248dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // see what for specifically. The wording everywhere else seems to be the 249dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // same. 250dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // 3.6.4 - Operations using pairs of VGPRs (for example: double-floats) have 252dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // no alignment restrictions. 253dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) { 254dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // Using any pair of GPRs should be the same as any other pair. 255dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (IsFast) 256dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines *IsFast = true; 257dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return VT.bitsGE(MVT::i64); 258dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 259dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 260dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // XXX - The only mention I see of this in the ISA manual is for LDS direct 261dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // reads the "byte address and must be dword aligned". Is it also true for the 262dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // normal loads and stores? 263dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS) 264dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return false; 265dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 266dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the 267dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // byte-address are ignored, thus forcing Dword alignment. 268dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (IsFast) 269dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines *IsFast = true; 27073e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard return VT.bitsGT(MVT::i32); 27173e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard} 27273e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard 273cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesTargetLoweringBase::LegalizeTypeAction 274cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesSITargetLowering::getPreferredVectorAction(EVT VT) const { 275cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16)) 276cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return TypeSplitVector; 277cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 278cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return TargetLoweringBase::getPreferredVectorAction(VT); 27936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 28036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 28136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesbool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, 28236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Type *Ty) const { 28336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines const SIInstrInfo *TII = 28436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); 28536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return TII->isInlineConstant(Imm); 286a41520cf9b9cefed2091a0624a34c5f7fdb42a68Tom Stellard} 28773e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard 288f95b1621887e3409ceec2db47e1b44271d934735Tom StellardSDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT, 289e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard SDLoc DL, SDValue Chain, 290dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Offset, bool Signed) const { 291e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 292e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()), 293e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard AMDGPUAS::CONSTANT_ADDRESS); 294e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard SDValue BasePtr = DAG.getCopyFromReg(Chain, DL, 295e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64); 296e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr, 297e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard DAG.getConstant(Offset, MVT::i64)); 298dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr, 299f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard MachinePointerInfo(UndefValue::get(PtrTy)), MemVT, 300f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard false, false, MemVT.getSizeInBits() >> 3); 301e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard 302e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard} 303e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard 30490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian KonigSDValue SITargetLowering::LowerFormalArguments( 30590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig SDValue Chain, 30690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig CallingConv::ID CallConv, 30790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig bool isVarArg, 30890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig const SmallVectorImpl<ISD::InputArg> &Ins, 309ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick SDLoc DL, SelectionDAG &DAG, 31090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig SmallVectorImpl<SDValue> &InVals) const { 31190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 31290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 31390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 31490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig MachineFunction &MF = DAG.getMachineFunction(); 31590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig FunctionType *FType = MF.getFunction()->getFunctionType(); 316cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 31790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 31890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig assert(CallConv == CallingConv::C); 31990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 32090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig SmallVector<ISD::InputArg, 16> Splits; 321cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig uint32_t Skipped = 0; 322cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig 323cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) { 32490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig const ISD::InputArg &Arg = Ins[i]; 325225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt Arsenault 326225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt Arsenault // First check if it's a PS input addr 3276a809a8d292ca4aa70cc07ce6b573239f397bb47Vincent Lejeune if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg() && 3286a809a8d292ca4aa70cc07ce6b573239f397bb47Vincent Lejeune !Arg.Flags.isByVal()) { 329cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig 330cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig assert((PSInputNum <= 15) && "Too many PS inputs!"); 331cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig 332cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig if (!Arg.Used) { 333cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig // We can savely skip PS inputs 334cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig Skipped |= 1 << i; 335cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig ++PSInputNum; 336cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig continue; 337cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig } 338cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig 339cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig Info->PSInputAddr |= 1 << PSInputNum++; 340cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig } 341cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig 342cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig // Second split vertices into their elements 343e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard if (Info->ShaderType != ShaderType::COMPUTE && Arg.VT.isVector()) { 34490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig ISD::InputArg NewArg = Arg; 34590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig NewArg.Flags.setSplit(); 34690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig NewArg.VT = Arg.VT.getVectorElementType(); 34790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 34890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig // We REALLY want the ORIGINAL number of vertex elements here, e.g. a 34990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig // three or five element vertex only needs three or five registers, 35090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig // NOT four or eigth. 35190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Type *ParamType = FType->getParamType(Arg.OrigArgIndex); 35290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig unsigned NumElements = ParamType->getVectorNumElements(); 35390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 35490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig for (unsigned j = 0; j != NumElements; ++j) { 35590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Splits.push_back(NewArg); 35690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig NewArg.PartOffset += NewArg.VT.getStoreSize(); 35790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig } 35890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 359f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard } else if (Info->ShaderType != ShaderType::COMPUTE) { 36090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Splits.push_back(Arg); 36190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig } 36290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig } 36390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 36490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig SmallVector<CCValAssign, 16> ArgLocs; 36590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 36690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig getTargetMachine(), ArgLocs, *DAG.getContext()); 36790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 368cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig // At least one interpolation mode must be enabled or else the GPU will hang. 369cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) { 370cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig Info->PSInputAddr |= 1; 371cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig CCInfo.AllocateReg(AMDGPU::VGPR0); 372cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig CCInfo.AllocateReg(AMDGPU::VGPR1); 373cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig } 374cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig 375e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard // The pointer to the list of arguments is stored in SGPR0, SGPR1 376e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard if (Info->ShaderType == ShaderType::COMPUTE) { 377e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard CCInfo.AllocateReg(AMDGPU::SGPR0); 378e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard CCInfo.AllocateReg(AMDGPU::SGPR1); 379e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass); 380e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard } 381e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard 382f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard if (Info->ShaderType == ShaderType::COMPUTE) { 383f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins, 384f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard Splits); 385f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard } 386f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard 38790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig AnalyzeFormalArguments(CCInfo, Splits); 38890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 38990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) { 39090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 391e91967820879b79f95b0378124c5f40d9e6d54eeChristian Konig const ISD::InputArg &Arg = Ins[i]; 392cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig if (Skipped & (1 << i)) { 393e91967820879b79f95b0378124c5f40d9e6d54eeChristian Konig InVals.push_back(DAG.getUNDEF(Arg.VT)); 394cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig continue; 395cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig } 396cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig 39790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig CCValAssign &VA = ArgLocs[ArgIdx++]; 398e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard EVT VT = VA.getLocVT(); 399e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard 400e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard if (VA.isMemLoc()) { 401f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard VT = Ins[i].VT; 402f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard EVT MemVT = Splits[i].VT; 403e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard // The first 36 bytes of the input buffer contains information about 404e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard // thread group and global sizes. 405f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard SDValue Arg = LowerParameter(DAG, VT, MemVT, DL, DAG.getRoot(), 406dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 36 + VA.getLocMemOffset(), 407dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Ins[i].Flags.isSExt()); 408e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard InVals.push_back(Arg); 409e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard continue; 410e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard } 41190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig assert(VA.isRegLoc() && "Parameter must be in a register!"); 41290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 41390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig unsigned Reg = VA.getLocReg(); 41490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 41590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig if (VT == MVT::i64) { 41690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig // For now assume it is a pointer 41790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0, 41890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig &AMDGPU::SReg_64RegClass); 41990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass); 42090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); 42190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig continue; 42290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig } 42390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 42490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); 42590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 42690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Reg = MF.addLiveIn(Reg, RC); 42790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT); 42890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 42990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig if (Arg.VT.isVector()) { 43090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 43190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig // Build a vector from the registers 43290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Type *ParamType = FType->getParamType(Arg.OrigArgIndex); 43390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig unsigned NumElements = ParamType->getVectorNumElements(); 43490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 43590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig SmallVector<SDValue, 4> Regs; 43690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Regs.push_back(Val); 43790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig for (unsigned j = 1; j != NumElements; ++j) { 43890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Reg = ArgLocs[ArgIdx++].getLocReg(); 43990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Reg = MF.addLiveIn(Reg, RC); 44090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT)); 44190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig } 44290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 44390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig // Fill up the missing vector elements 44490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig NumElements = Arg.VT.getVectorNumElements() - NumElements; 44590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig for (unsigned j = 0; j != NumElements; ++j) 44690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig Regs.push_back(DAG.getUNDEF(VT)); 447225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt Arsenault 448dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs)); 44990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig continue; 45090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig } 45190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 45290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig InVals.push_back(Val); 45390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig } 45490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig return Chain; 45590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig} 45690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig 457f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardMachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( 458f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard MachineInstr * MI, MachineBasicBlock * BB) const { 459f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 4604956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard MachineBasicBlock::iterator I = *MI; 461dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const SIInstrInfo *TII = 462dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); 463dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 4644956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard 465f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (MI->getOpcode()) { 466f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: 467f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB); 468f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case AMDGPU::BRANCH: return BB; 4694956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard case AMDGPU::SI_ADDR64_RSRC: { 4704956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard unsigned SuperReg = MI->getOperand(0).getReg(); 47136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); 47236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass); 47336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 47436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass); 4754956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo) 4764956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard .addOperand(MI->getOperand(1)); 4774956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo) 4784956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard .addImm(0); 4794956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi) 48036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32); 4814956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi) 4824956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard .addReg(SubRegHiLo) 4834956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard .addImm(AMDGPU::sub0) 4844956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard .addReg(SubRegHiHi) 4854956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard .addImm(AMDGPU::sub1); 4864956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SuperReg) 4874956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard .addReg(SubRegLo) 4884956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard .addImm(AMDGPU::sub0_sub1) 4894956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard .addReg(SubRegHi) 4904956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard .addImm(AMDGPU::sub2_sub3); 4914956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard MI->eraseFromParent(); 4924956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard break; 4934956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard } 494cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines case AMDGPU::V_SUB_F64: { 495cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines unsigned DestReg = MI->getOperand(0).getReg(); 496cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg) 497cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines .addImm(0) // SRC0 modifiers 498cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines .addReg(MI->getOperand(1).getReg()) 499cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines .addImm(1) // SRC1 modifiers 500cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines .addReg(MI->getOperand(2).getReg()) 501cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines .addImm(0) // SRC2 modifiers 502cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines .addImm(0) // src2 503cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines .addImm(0) // CLAMP 504cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines .addImm(0); // OMOD 505d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1Tom Stellard MI->eraseFromParent(); 506d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1Tom Stellard break; 507cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 508a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard case AMDGPU::SI_RegisterStorePseudo: { 509a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 510a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); 511a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard MachineInstrBuilder MIB = 512a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore), 513a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard Reg); 514a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) 515a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard MIB.addOperand(MI->getOperand(i)); 516a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 517a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard MI->eraseFromParent(); 518dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 519dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 520dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::FABS_SI: { 521dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 522dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const SIInstrInfo *TII = 523dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); 524dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 525dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), 526dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Reg) 527dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addImm(0x7fffffff); 528dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_AND_B32_e32), 529dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MI->getOperand(0).getReg()) 530dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addReg(MI->getOperand(1).getReg()) 531dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addReg(Reg); 532dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MI->eraseFromParent(); 533dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 534dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 535dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::FNEG_SI: { 536dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 537dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const SIInstrInfo *TII = 538dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); 539dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass); 540dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32), 541dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Reg) 542dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addImm(0x80000000); 543dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_XOR_B32_e32), 544dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MI->getOperand(0).getReg()) 545dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addReg(MI->getOperand(1).getReg()) 546dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addReg(Reg); 547dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MI->eraseFromParent(); 548dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines break; 549dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 550dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines case AMDGPU::FCLAMP_SI: { 551dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const SIInstrInfo *TII = 552dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); 553dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F32_e64), 554dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MI->getOperand(0).getReg()) 555dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addImm(0) // SRC0 modifiers 556dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addOperand(MI->getOperand(1)) 557dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addImm(0) // SRC1 modifiers 558dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addImm(0) // SRC1 559dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addImm(1) // CLAMP 560dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines .addImm(0); // OMOD 561dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines MI->eraseFromParent(); 562a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard } 563f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 564f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return BB; 565f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 566f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 567225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt ArsenaultEVT SITargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 5684e518fd941b119834b5764708fbabf41adc45040Tom Stellard if (!VT.isVector()) { 5694e518fd941b119834b5764708fbabf41adc45040Tom Stellard return MVT::i1; 5704e518fd941b119834b5764708fbabf41adc45040Tom Stellard } 5714e518fd941b119834b5764708fbabf41adc45040Tom Stellard return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); 572f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 573f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 574b87082228bb5151598addcf0eb3756cf0f906ab6Christian KonigMVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const { 575b87082228bb5151598addcf0eb3756cf0f906ab6Christian Konig return MVT::i32; 576b87082228bb5151598addcf0eb3756cf0f906ab6Christian Konig} 577b87082228bb5151598addcf0eb3756cf0f906ab6Christian Konig 5786e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheiderbool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 5796e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider VT = VT.getScalarType(); 5806e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider 5816e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider if (!VT.isSimple()) 5826e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider return false; 5836e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider 5846e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider switch (VT.getSimpleVT().SimpleTy) { 5856e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider case MVT::f32: 5866e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider return false; /* There is V_MAD_F32 for f32 */ 5876e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider case MVT::f64: 5886e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider return true; 5896e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider default: 5906e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider break; 5916e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider } 5926e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider 5936e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider return false; 5946e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider} 5956e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider 596f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 597f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// Custom DAG Lowering Operations 598f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===// 599f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 600f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardSDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 601a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer MachineFunction &MF = DAG.getMachineFunction(); 602a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 603f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (Op.getOpcode()) { 604f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 6056b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard case ISD::BRCOND: return LowerBRCOND(Op, DAG); 606d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard case ISD::LOAD: { 607d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); 608cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines EVT VT = Op.getValueType(); 609cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 610cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // These loads are legal. 611cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && 612cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines VT.isVector() && VT.getVectorNumElements() == 2 && 613cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines VT.getVectorElementType() == MVT::i32) 614cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return SDValue(); 615cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 61636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Op.getValueType().isVector() && 61736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 61836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS || 61936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines (Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && 62036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Op.getValueType().getVectorNumElements() > 4))) { 621cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return SplitVectorLoad(Op, DAG); 622d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard } else { 623cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue Result = LowerLOAD(Op, DAG); 624cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines assert((!Result.getNode() || 625cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Result.getNode()->getNumValues() == 2) && 626cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines "Load should return a value and a chain"); 627cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return Result; 628d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard } 629d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard } 630f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard 63136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines case ISD::SELECT: return LowerSELECT(Op, DAG); 632a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard case ISD::STORE: return LowerSTORE(Op, DAG); 633a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); 634e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case ISD::INTRINSIC_WO_CHAIN: { 635e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard unsigned IntrinsicID = 636e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 637e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard EVT VT = Op.getValueType(); 638e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard SDLoc DL(Op); 639e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard //XXX: Hardcoded we only use two to store the pointer to the parameters. 640e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard unsigned NumUserSGPRs = 2; 641e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard switch (IntrinsicID) { 642e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); 643e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_ngroups_x: 644dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0, false); 645e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_ngroups_y: 646dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4, false); 647e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_ngroups_z: 648dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8, false); 649e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_global_size_x: 650dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12, false); 651e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_global_size_y: 652dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16, false); 653e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_global_size_z: 654dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20, false); 655e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_local_size_x: 656dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24, false); 657e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_local_size_y: 658dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false); 659e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_local_size_z: 660dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false); 661e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_tgid_x: 662e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, 663e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT); 664e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_tgid_y: 665e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, 666e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 1), VT); 667e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_tgid_z: 668e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass, 669e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 2), VT); 670e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_tidig_x: 671e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, 672e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard AMDGPU::VGPR0, VT); 673e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_tidig_y: 674e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, 675e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard AMDGPU::VGPR1, VT); 676e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard case Intrinsic::r600_read_tidig_z: 677e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass, 678e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard AMDGPU::VGPR2, VT); 67968db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard case AMDGPUIntrinsic::SI_load_const: { 68068db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard SDValue Ops [] = { 681dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Op.getOperand(1), 68268db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard Op.getOperand(2) 68368db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard }; 68468db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard 685d0cad1f3ed71bd98ef8017c31573abf22fac9588Benjamin Kramer MachineMemOperand *MMO = MF.getMachineMemOperand( 686d0cad1f3ed71bd98ef8017c31573abf22fac9588Benjamin Kramer MachinePointerInfo(), 687d0cad1f3ed71bd98ef8017c31573abf22fac9588Benjamin Kramer MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, 688d0cad1f3ed71bd98ef8017c31573abf22fac9588Benjamin Kramer VT.getSizeInBits() / 8, 4); 68968db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL, 690dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Op->getVTList(), Ops, VT, MMO); 69168db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard } 69268db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard case AMDGPUIntrinsic::SI_sample: 69368db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG); 69468db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard case AMDGPUIntrinsic::SI_sampleb: 69568db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG); 69668db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard case AMDGPUIntrinsic::SI_sampled: 69768db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG); 69868db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard case AMDGPUIntrinsic::SI_samplel: 69968db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG); 70068db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard case AMDGPUIntrinsic::SI_vs_load_input: 70168db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT, 702dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Op.getOperand(1), 70368db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard Op.getOperand(2), 70468db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard Op.getOperand(3)); 705e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard } 706e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard } 707a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard 708a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard case ISD::INTRINSIC_VOID: 709a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard SDValue Chain = Op.getOperand(0); 710a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 711a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard 712a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard switch (IntrinsicID) { 713a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard case AMDGPUIntrinsic::SI_tbuffer_store: { 714a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard SDLoc DL(Op); 715a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard SDValue Ops [] = { 716a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Chain, 717dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Op.getOperand(2), 718a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(3), 719a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(4), 720a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(5), 721a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(6), 722a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(7), 723a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(8), 724a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(9), 725a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(10), 726a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(11), 727a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(12), 728a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(13), 729a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard Op.getOperand(14) 730a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard }; 731a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard EVT VT = Op.getOperand(3).getValueType(); 732a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard 733a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard MachineMemOperand *MMO = MF.getMachineMemOperand( 734a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard MachinePointerInfo(), 735a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard MachineMemOperand::MOStore, 736a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard VT.getSizeInBits() / 8, 4); 737a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL, 738dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Op->getVTList(), Ops, VT, MMO); 739a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard } 740a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard default: 741a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard break; 742a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard } 743f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 744f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return SDValue(); 745f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 746f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 7476b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard/// \brief Helper function for LowerBRCOND 7486b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellardstatic SDNode *findUser(SDValue Value, unsigned Opcode) { 7496b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 7506b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDNode *Parent = Value.getNode(); 7516b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end(); 7526b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard I != E; ++I) { 7536b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 7546b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard if (I.getUse().get() != Value) 7556b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard continue; 7566b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 7576b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard if (I->getOpcode() == Opcode) 7586b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard return *I; 7596b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard } 760dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return nullptr; 7616b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard} 7626b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 7636b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard/// This transforms the control flow intrinsics to get the branch destination as 7646b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard/// last parameter, also switches branch target with BR if the need arise 7656b7d99d47321ebb478b22afd2e317fe89d2149dbTom StellardSDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, 7666b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SelectionDAG &DAG) const { 7676b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 768ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick SDLoc DL(BRCOND); 7696b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 7706b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDNode *Intr = BRCOND.getOperand(1).getNode(); 7716b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDValue Target = BRCOND.getOperand(2); 772dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines SDNode *BR = nullptr; 7736b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 7746b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard if (Intr->getOpcode() == ISD::SETCC) { 7756b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard // As long as we negate the condition everything is fine 7766b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDNode *SetCC = Intr; 7776b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard assert(SetCC->getConstantOperandVal(1) == 1); 778e13a2a3fdeca6153314bb08c1b7161205d39d168NAKAMURA Takumi assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() == 779e13a2a3fdeca6153314bb08c1b7161205d39d168NAKAMURA Takumi ISD::SETNE); 7806b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard Intr = SetCC->getOperand(0).getNode(); 7816b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 7826b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard } else { 7836b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard // Get the target from BR if we don't negate the condition 7846b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard BR = findUser(BRCOND, ISD::BR); 7856b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard Target = BR->getOperand(1); 7866b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard } 7876b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 7886b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN); 7896b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 7906b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard // Build the result and 7916b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SmallVector<EVT, 4> Res; 7926b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i) 7936b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard Res.push_back(Intr->getValueType(i)); 7946b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 7956b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard // operands of the new intrinsic call 7966b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SmallVector<SDValue, 4> Ops; 7976b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard Ops.push_back(BRCOND.getOperand(0)); 7986b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i) 7996b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard Ops.push_back(Intr->getOperand(i)); 8006b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard Ops.push_back(Target); 8016b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 8026b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard // build the new intrinsic call 8036b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDNode *Result = DAG.getNode( 8046b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL, 805dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DAG.getVTList(Res), Ops).getNode(); 8066b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 8076b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard if (BR) { 8086b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard // Give the branch instruction our target 8096b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDValue Ops[] = { 8106b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard BR->getOperand(0), 8116b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard BRCOND.getOperand(2) 8126b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard }; 813dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops); 8146b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard } 8156b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 8166b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDValue Chain = SDValue(Result, Result->getNumValues() - 1); 8176b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 8186b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard // Copy the intrinsic results to registers 8196b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) { 8206b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg); 8216b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard if (!CopyToReg) 8226b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard continue; 8236b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 8246b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard Chain = DAG.getCopyToReg( 8256b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard Chain, DL, 8266b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard CopyToReg->getOperand(1), 8276b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDValue(Result, i - 1), 8286b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDValue()); 8296b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 8306b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0)); 8316b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard } 8326b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 8336b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard // Remove the old intrinsic from the chain 8346b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard DAG.ReplaceAllUsesOfValueWith( 8356b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard SDValue(Intr, Intr->getNumValues() - 1), 8366b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard Intr->getOperand(0)); 8376b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard 8386b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard return Chain; 839f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 840f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 841a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom StellardSDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { 842a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard SDLoc DL(Op); 843a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard LoadSDNode *Load = cast<LoadSDNode>(Op); 844cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue Lowered = AMDGPUTargetLowering::LowerLOAD(Op, DAG); 845cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (Lowered.getNode()) 846cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return Lowered; 847a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 84836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { 849a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard return SDValue(); 85036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 85136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 85236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines EVT MemVT = Load->getMemoryVT(); 85336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 85436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines assert(!MemVT.isVector() && "Private loads should be scalarized"); 85536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines assert(!MemVT.isFloatingPoint() && "FP loads should be promoted to int"); 856a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 85736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), 858a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard DAG.getConstant(2, MVT::i32)); 859cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 860cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // FIXME: REGISTER_LOAD should probably have a chain result. 861cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue Chain = Load->getChain(); 862cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue LoLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, 863cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Chain, Ptr, 864cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines DAG.getTargetConstant(0, MVT::i32), 865cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Op.getOperand(2)); 866cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 867cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue Ret = LoLoad.getValue(0); 86836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (MemVT.getSizeInBits() == 64) { 869cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // TODO: This needs a test to make sure the right thing is happening with 870cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // the chain. That is hard without general function support. 871cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 87236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue IncPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, 87336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DAG.getConstant(1, MVT::i32)); 87436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 875cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue HiLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, 876cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Chain, IncPtr, 877cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines DAG.getTargetConstant(0, MVT::i32), 878cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Op.getOperand(2)); 87936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 880cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, LoLoad, HiLoad); 881cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 882cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // LoLoad.getValue(1), HiLoad.getValue(1)); 88336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 884a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 885cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue Ops[] = { 886cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Ret, 887cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Chain 888cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines }; 88968db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard 890cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return DAG.getMergeValues(Ops, DL); 89168db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard} 89268db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard 89368db37b952be497c94c7aa98cf26f3baadb5afd3Tom StellardSDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode, 89468db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard const SDValue &Op, 89568db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard SelectionDAG &DAG) const { 89668db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1), 89768db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard Op.getOperand(2), 898dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Op.getOperand(3), 89968db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard Op.getOperand(4)); 90068db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard} 90168db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard 90236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesSDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 90336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Op.getValueType() != MVT::i64) 90436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return SDValue(); 90536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 90636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDLoc DL(Op); 90736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Cond = Op.getOperand(0); 90836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 90936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Zero = DAG.getConstant(0, MVT::i32); 91036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue One = DAG.getConstant(1, MVT::i32); 91136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 91236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1)); 91336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2)); 91436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 91536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero); 91636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero); 91736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 91836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Lo = DAG.getSelect(DL, MVT::i32, Cond, Lo0, Lo1); 91936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 92036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Hi0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, One); 92136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Hi1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, One); 92236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 92336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Hi = DAG.getSelect(DL, MVT::i32, Cond, Hi0, Hi1); 92436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 92536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i32, Lo, Hi); 92636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res); 92736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines} 92836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 929a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom StellardSDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { 930a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard SDLoc DL(Op); 931a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard StoreSDNode *Store = cast<StoreSDNode>(Op); 932a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard EVT VT = Store->getMemoryVT(); 933a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 934cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // These stores are legal. 935cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && 936cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines VT.isVector() && VT.getVectorNumElements() == 2 && 937cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines VT.getVectorElementType() == MVT::i32) 938cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return SDValue(); 939cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 940a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG); 941a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard if (Ret.getNode()) 942a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard return Ret; 943a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 944a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard if (VT.isVector() && VT.getVectorNumElements() >= 8) 945a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard return SplitVectorStore(Op, DAG); 946a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 94736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (VT == MVT::i1) 94836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines return DAG.getTruncStore(Store->getChain(), DL, 94936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32), 95036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Store->getBasePtr(), MVT::i1, Store->getMemOperand()); 95136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 952a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) 953a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard return SDValue(); 954a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 95536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Store->getBasePtr(), 956a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard DAG.getConstant(2, MVT::i32)); 957a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard SDValue Chain = Store->getChain(); 958a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard SmallVector<SDValue, 8> Values; 959a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 96036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Store->isTruncatingStore()) { 96136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines unsigned Mask = 0; 96236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines if (Store->getMemoryVT() == MVT::i8) { 96336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Mask = 0xff; 96436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else if (Store->getMemoryVT() == MVT::i16) { 96536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Mask = 0xffff; 96636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } 96736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, 96836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Chain, Store->getBasePtr(), 96936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DAG.getConstant(0, MVT::i32)); 97036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getBasePtr(), 97136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DAG.getConstant(0x3, MVT::i32)); 97236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx, 97336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DAG.getConstant(3, MVT::i32)); 97436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getValue(), 97536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DAG.getConstant(Mask, MVT::i32)); 97636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32, 97736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines MaskedValue, ShiftAmt); 97836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue RotrAmt = DAG.getNode(ISD::SUB, DL, MVT::i32, 97936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DAG.getConstant(32, MVT::i32), ShiftAmt); 98036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines SDValue DstMask = DAG.getNode(ISD::ROTR, DL, MVT::i32, 98136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines DAG.getConstant(Mask, MVT::i32), 98236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines RotrAmt); 98336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask); 98436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Dst = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue); 98536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines 98636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines Values.push_back(Dst); 98736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines } else if (VT == MVT::i64) { 988a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard for (unsigned i = 0; i < 2; ++i) { 989a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, 990a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard Store->getValue(), DAG.getConstant(i, MVT::i32))); 991a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard } 992a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard } else if (VT == MVT::i128) { 993a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard for (unsigned i = 0; i < 2; ++i) { 994a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard for (unsigned j = 0; j < 2; ++j) { 995a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, 996a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, 997a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard Store->getValue(), DAG.getConstant(i, MVT::i32)), 998a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard DAG.getConstant(j, MVT::i32))); 999a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard } 1000a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard } 1001a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard } else { 1002a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard Values.push_back(Store->getValue()); 1003a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard } 1004a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 1005a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard for (unsigned i = 0; i < Values.size(); ++i) { 1006a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, 1007a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard Ptr, DAG.getConstant(i, MVT::i32)); 1008a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, 1009a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard Chain, Values[i], PartPtr, 1010a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard DAG.getTargetConstant(0, MVT::i32)); 1011a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard } 1012a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard return Chain; 1013a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard} 1014a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 1015cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines//===----------------------------------------------------------------------===// 1016cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// Custom DAG optimizations 1017cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines//===----------------------------------------------------------------------===// 1018cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1019cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesSDValue SITargetLowering::performUCharToFloatCombine(SDNode *N, 1020cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines DAGCombinerInfo &DCI) { 1021cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines EVT VT = N->getValueType(0); 1022cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines EVT ScalarVT = VT.getScalarType(); 1023cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (ScalarVT != MVT::f32) 1024cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return SDValue(); 1025a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard 1026cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SelectionDAG &DAG = DCI.DAG; 1027cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDLoc DL(N); 10288cd70d3a5bbc5c2b02d288337748a4fd5ddc9f54Tom Stellard 1029cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue Src = N->getOperand(0); 1030cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines EVT SrcVT = Src.getValueType(); 1031cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1032cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // TODO: We could try to match extracting the higher bytes, which would be 1033cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // easier if i8 vectors weren't promoted to i32 vectors, particularly after 1034cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // types are legalized. v4i8 -> v4f32 is probably the only case to worry 1035cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // about in practice. 1036cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (DCI.isAfterLegalizeVectorOps() && SrcVT == MVT::i32) { 1037cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) { 1038cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Src); 1039cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines DCI.AddToWorklist(Cvt.getNode()); 1040cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return Cvt; 1041cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 1042cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 1043cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1044cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // We are primarily trying to catch operations on illegal vector types 1045cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // before they are expanded. 1046cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // For scalars, we can use the more flexible method of checking masked bits 1047cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // after legalization. 1048cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (!DCI.isBeforeLegalize() || 1049cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines !SrcVT.isVector() || 1050cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SrcVT.getVectorElementType() != MVT::i8) { 10518cd70d3a5bbc5c2b02d288337748a4fd5ddc9f54Tom Stellard return SDValue(); 10528cd70d3a5bbc5c2b02d288337748a4fd5ddc9f54Tom Stellard } 10538cd70d3a5bbc5c2b02d288337748a4fd5ddc9f54Tom Stellard 1054cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines assert(DCI.isBeforeLegalize() && "Unexpected legal type"); 1055dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 1056cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Weird sized vectors are a pain to handle, but we know 3 is really the same 1057cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // size as 4. 1058cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines unsigned NElts = SrcVT.getVectorNumElements(); 1059cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (!SrcVT.isSimple() && NElts != 3) 1060cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return SDValue(); 10618cd70d3a5bbc5c2b02d288337748a4fd5ddc9f54Tom Stellard 1062cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Handle v4i8 -> v4f32 extload. Replace the v4i8 with a legal i32 load to 1063cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // prevent a mess from expanding to v4i32 and repacking. 1064cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) { 1065cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT); 1066cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT); 1067cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts); 1068cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1069cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines LoadSDNode *Load = cast<LoadSDNode>(Src); 1070cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT, 1071cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Load->getChain(), 1072cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Load->getBasePtr(), 1073cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines LoadVT, 1074cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Load->getMemOperand()); 1075cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1076cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Make sure successors of the original load stay after it by updating 1077cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // them to use the new Chain. 1078cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), NewLoad.getValue(1)); 1079cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1080cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SmallVector<SDValue, 4> Elts; 1081cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (RegVT.isVector()) 1082cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines DAG.ExtractVectorElements(NewLoad, Elts); 1083cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines else 1084cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Elts.push_back(NewLoad); 1085cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1086cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SmallVector<SDValue, 4> Ops; 1087cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1088cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines unsigned EltIdx = 0; 1089cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines for (SDValue Elt : Elts) { 1090cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines unsigned ComponentsInElt = std::min(4u, NElts - 4 * EltIdx); 1091cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines for (unsigned I = 0; I < ComponentsInElt; ++I) { 1092cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines unsigned Opc = AMDGPUISD::CVT_F32_UBYTE0 + I; 1093cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue Cvt = DAG.getNode(Opc, DL, MVT::f32, Elt); 1094cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines DCI.AddToWorklist(Cvt.getNode()); 1095cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines Ops.push_back(Cvt); 1096cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 1097cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1098cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines ++EltIdx; 1099cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 1100cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1101cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines assert(Ops.size() == NElts); 1102cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1103cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return DAG.getNode(ISD::BUILD_VECTOR, DL, FloatVT, Ops); 1104cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 1105cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1106cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return SDValue(); 1107cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines} 1108f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 1109f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardSDValue SITargetLowering::PerformDAGCombine(SDNode *N, 1110f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard DAGCombinerInfo &DCI) const { 1111f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard SelectionDAG &DAG = DCI.DAG; 1112ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick SDLoc DL(N); 1113f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard EVT VT = N->getValueType(0); 1114f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 1115f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard switch (N->getOpcode()) { 1116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); 1117f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case ISD::SELECT_CC: { 1118f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard ConstantSDNode *True, *False; 1119f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc) 1120f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2))) 1121f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard && (False = dyn_cast<ConstantSDNode>(N->getOperand(3))) 1122f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard && True->isAllOnesValue() 1123f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard && False->isNullValue() 1124f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard && VT == MVT::i1) { 1125f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0), 1126f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard N->getOperand(1), N->getOperand(4)); 1127f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 1128f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 1129f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 1130f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 1131f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard case ISD::SETCC: { 1132f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard SDValue Arg0 = N->getOperand(0); 1133f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard SDValue Arg1 = N->getOperand(1); 1134f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard SDValue CC = N->getOperand(2); 1135dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines ConstantSDNode * C = nullptr; 1136f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get(); 1137f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard 1138f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne) 1139f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard if (VT == MVT::i1 1140f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard && Arg0.getOpcode() == ISD::SIGN_EXTEND 1141f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard && Arg0.getOperand(0).getValueType() == MVT::i1 1142f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard && (C = dyn_cast<ConstantSDNode>(Arg1)) 1143f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard && C->isNullValue() 1144f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard && CCOp == ISD::SETNE) { 1145f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard return SimplifySetCC(VT, Arg0.getOperand(0), 1146f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL); 1147f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 1148f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard break; 1149f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 1150cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1151cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines case AMDGPUISD::CVT_F32_UBYTE0: 1152cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines case AMDGPUISD::CVT_F32_UBYTE1: 1153cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines case AMDGPUISD::CVT_F32_UBYTE2: 1154cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines case AMDGPUISD::CVT_F32_UBYTE3: { 1155cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0; 1156cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1157cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines SDValue Src = N->getOperand(0); 1158cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8); 1159cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1160cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines APInt KnownZero, KnownOne; 1161cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), 1162cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines !DCI.isBeforeLegalizeOps()); 1163cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 1164cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines if (TLO.ShrinkDemandedConstant(Src, Demanded) || 1165cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) { 1166cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines DCI.CommitTargetLoweringOpt(TLO); 1167cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 1168cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1169cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines break; 1170cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 1171cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines 1172cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines case ISD::UINT_TO_FP: { 1173cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines return performUCharToFloatCombine(N, DCI); 1174cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines } 1175f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard } 1176dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 1177dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); 1178f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard} 1179c018ecac2f2f475b6e1023e90d0e48fcf9bd6e1dChristian Konig 1180225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt Arsenault/// \brief Test if RegClass is one of the VSrc classes 1181d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konigstatic bool isVSrc(unsigned RegClass) { 1182d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return AMDGPU::VSrc_32RegClassID == RegClass || 1183d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig AMDGPU::VSrc_64RegClassID == RegClass; 1184d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig} 1185d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1186225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt Arsenault/// \brief Test if RegClass is one of the SSrc classes 1187d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konigstatic bool isSSrc(unsigned RegClass) { 1188d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return AMDGPU::SSrc_32RegClassID == RegClass || 1189d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig AMDGPU::SSrc_64RegClassID == RegClass; 1190d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig} 1191d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1192d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// \brief Analyze the possible immediate value Op 1193d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// 1194d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate 1195d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// and the immediate value if it's a literal immediate 1196d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konigint32_t SITargetLowering::analyzeImmediate(const SDNode *N) const { 1197d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1198d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig union { 1199d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig int32_t I; 1200d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig float F; 1201d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig } Imm; 1202d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 12032fc7443498aee66e0112ef65a8466fa98d46e712Tom Stellard if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) { 12042fc7443498aee66e0112ef65a8466fa98d46e712Tom Stellard if (Node->getZExtValue() >> 32) { 12052fc7443498aee66e0112ef65a8466fa98d46e712Tom Stellard return -1; 12062fc7443498aee66e0112ef65a8466fa98d46e712Tom Stellard } 1207d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Imm.I = Node->getSExtValue(); 1208dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) { 1209dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (N->getValueType(0) != MVT::f32) 1210dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines return -1; 1211d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Imm.F = Node->getValueAPF().convertToFloat(); 1212dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } else 1213d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return -1; // It isn't an immediate 1214d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1215d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig if ((Imm.I >= -16 && Imm.I <= 64) || 1216d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Imm.F == 0.5f || Imm.F == -0.5f || 1217d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Imm.F == 1.0f || Imm.F == -1.0f || 1218d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Imm.F == 2.0f || Imm.F == -2.0f || 1219d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Imm.F == 4.0f || Imm.F == -4.0f) 1220d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return 0; // It's an inline immediate 1221d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1222d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return Imm.I; // It's a literal immediate 1223d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig} 1224d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1225d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// \brief Try to fold an immediate directly into an instruction 1226d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konigbool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate, 1227d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig bool &ScalarSlotUsed) const { 1228d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1229d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand); 1230b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling const SIInstrInfo *TII = 1231b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); 1232dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!Mov || !TII->isMov(Mov->getMachineOpcode())) 1233d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return false; 1234d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1235d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig const SDValue &Op = Mov->getOperand(0); 1236d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig int32_t Value = analyzeImmediate(Op.getNode()); 1237d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig if (Value == -1) { 1238d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // Not an immediate at all 1239d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return false; 1240d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1241d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig } else if (Value == 0) { 1242d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // Inline immediates can always be fold 1243d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Operand = Op; 1244d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return true; 1245d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1246d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig } else if (Value == Immediate) { 1247d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // Already fold literal immediate 1248d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Operand = Op; 1249d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return true; 1250d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1251d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig } else if (!ScalarSlotUsed && !Immediate) { 1252d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // Fold this literal immediate 1253d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig ScalarSlotUsed = true; 1254d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Immediate = Value; 1255d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Operand = Op; 1256d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return true; 1257d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1258d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig } 1259d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1260d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return false; 1261d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig} 1262d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 12633406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellardconst TargetRegisterClass *SITargetLowering::getRegClassForNode( 12643406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard SelectionDAG &DAG, const SDValue &Op) const { 12653406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard const SIInstrInfo *TII = 12663406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); 12673406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard const SIRegisterInfo &TRI = TII->getRegisterInfo(); 12683406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard 12693406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard if (!Op->isMachineOpcode()) { 12703406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard switch(Op->getOpcode()) { 12713406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard case ISD::CopyFromReg: { 12723406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo(); 12733406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard unsigned Reg = cast<RegisterSDNode>(Op->getOperand(1))->getReg(); 12743406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard if (TargetRegisterInfo::isVirtualRegister(Reg)) { 12753406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard return MRI.getRegClass(Reg); 12763406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard } 12773406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard return TRI.getPhysRegClass(Reg); 12783406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard } 1279dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines default: return nullptr; 12803406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard } 12813406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard } 12823406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode()); 12833406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass; 12843406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard if (OpClassID != -1) { 12853406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard return TRI.getRegClass(OpClassID); 12863406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard } 12873406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard switch(Op.getMachineOpcode()) { 12883406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard case AMDGPU::COPY_TO_REGCLASS: 12893406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard // Operand 1 is the register class id for COPY_TO_REGCLASS instructions. 12903406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard OpClassID = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue(); 12913406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard 12923406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard // If the COPY_TO_REGCLASS instruction is copying to a VSrc register 12933406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard // class, then the register class for the value could be either a 12943406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard // VReg or and SReg. In order to get a more accurate 12953406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard if (OpClassID == AMDGPU::VSrc_32RegClassID || 12963406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard OpClassID == AMDGPU::VSrc_64RegClassID) { 12973406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard return getRegClassForNode(DAG, Op.getOperand(0)); 12983406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard } 12993406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard return TRI.getRegClass(OpClassID); 13003406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard case AMDGPU::EXTRACT_SUBREG: { 13013406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard int SubIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 13023406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard const TargetRegisterClass *SuperClass = 13033406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard getRegClassForNode(DAG, Op.getOperand(0)); 13043406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard return TRI.getSubClassWithSubReg(SuperClass, SubIdx); 13053406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard } 13063406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard case AMDGPU::REG_SEQUENCE: 13073406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard // Operand 0 is the register class id for REG_SEQUENCE instructions. 13083406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard return TRI.getRegClass( 13093406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()); 13103406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard default: 13113406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard return getRegClassFor(Op.getSimpleValueType()); 13123406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard } 13133406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard} 13143406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard 1315d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// \brief Does "Op" fit into register class "RegClass" ? 13169bf4590aaa26ebb5afdbec079daeee8e0b268b47Tom Stellardbool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op, 1317d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig unsigned RegClass) const { 1318b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 13193406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard const TargetRegisterClass *RC = getRegClassForNode(DAG, Op); 13203406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard if (!RC) { 1321d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return false; 13223406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard } 13233406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard return TRI->getRegClass(RegClass)->hasSubClassEq(RC); 1324d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig} 1325d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1326d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// \brief Make sure that we don't exeed the number of allowed scalars 1327d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konigvoid SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand, 1328d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig unsigned RegClass, 1329d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig bool &ScalarSlotUsed) const { 1330d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1331d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // First map the operands register class to a destination class 1332d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig if (RegClass == AMDGPU::VSrc_32RegClassID) 1333d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig RegClass = AMDGPU::VReg_32RegClassID; 1334d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig else if (RegClass == AMDGPU::VSrc_64RegClassID) 1335d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig RegClass = AMDGPU::VReg_64RegClassID; 1336d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig else 1337d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return; 1338d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 133936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines // Nothing to do if they fit naturally 1340d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig if (fitsRegClass(DAG, Operand, RegClass)) 1341d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return; 1342d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1343d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // If the scalar slot isn't used yet use it now 1344d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig if (!ScalarSlotUsed) { 1345d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig ScalarSlotUsed = true; 1346d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return; 1347d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig } 1348d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 13496c066c044ed5b477cdec3eb3e95267783e6ce757Matt Arsenault // This is a conservative aproach. It is possible that we can't determine the 13506c066c044ed5b477cdec3eb3e95267783e6ce757Matt Arsenault // correct register class and copy too often, but better safe than sorry. 1351d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32); 1352ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(), 1353d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Operand.getValueType(), Operand, RC); 1354d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Operand = SDValue(Node, 0); 1355d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig} 1356d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1357c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard/// \returns true if \p Node's operands are different from the SDValue list 1358c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard/// \p Ops 1359c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellardstatic bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) { 1360c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard for (unsigned i = 0, e = Node->getNumOperands(); i < e; ++i) { 1361c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard if (Ops[i].getNode() != Node->getOperand(i).getNode()) { 1362c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard return true; 1363c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard } 1364c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard } 1365c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard return false; 1366c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard} 1367c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard 136884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig/// \brief Try to fold the Nodes operands into the Node 136984a775d8e3d5a3765e01db4b454f849ed8be99beChristian KonigSDNode *SITargetLowering::foldOperands(MachineSDNode *Node, 137084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig SelectionDAG &DAG) const { 1371d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1372d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // Original encoding (either e32 or e64) 1373d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig int Opcode = Node->getMachineOpcode(); 1374b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling const SIInstrInfo *TII = 1375b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); 1376d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig const MCInstrDesc *Desc = &TII->get(Opcode); 1377d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1378d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig unsigned NumDefs = Desc->getNumDefs(); 1379d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig unsigned NumOps = Desc->getNumOperands(); 1380d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1381e49230895d9c666b84beaa748259fbf1f6715122Christian Konig // Commuted opcode if available 1382e49230895d9c666b84beaa748259fbf1f6715122Christian Konig int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1; 1383dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const MCInstrDesc *DescRev = OpcodeRev == -1 ? nullptr : &TII->get(OpcodeRev); 1384e49230895d9c666b84beaa748259fbf1f6715122Christian Konig 1385e49230895d9c666b84beaa748259fbf1f6715122Christian Konig assert(!DescRev || DescRev->getNumDefs() == NumDefs); 1386e49230895d9c666b84beaa748259fbf1f6715122Christian Konig assert(!DescRev || DescRev->getNumOperands() == NumOps); 1387e49230895d9c666b84beaa748259fbf1f6715122Christian Konig 13883c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig // e64 version if available, -1 otherwise 13893c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig int OpcodeE64 = AMDGPU::getVOPe64(Opcode); 1390dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64); 1391dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines int InputModifiers[3] = {0}; 13923c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig 13933c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig assert(!DescE64 || DescE64->getNumDefs() == NumDefs); 13943c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig 1395d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig int32_t Immediate = Desc->getSize() == 4 ? 0 : -1; 1396d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig bool HaveVSrc = false, HaveSSrc = false; 1397d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1398cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // First figure out what we already have in this instruction. 1399d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; 1400d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig i != e && Op < NumOps; ++i, ++Op) { 1401d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1402d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig unsigned RegClass = Desc->OpInfo[Op].RegClass; 1403d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig if (isVSrc(RegClass)) 1404d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig HaveVSrc = true; 1405d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig else if (isSSrc(RegClass)) 1406d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig HaveSSrc = true; 1407d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig else 1408d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig continue; 1409d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1410d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode()); 1411d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig if (Imm != -1 && Imm != 0) { 1412d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // Literal immediate 1413d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Immediate = Imm; 1414d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig } 1415d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig } 1416d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1417cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // If we neither have VSrc nor SSrc, it makes no sense to continue. 1418d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig if (!HaveVSrc && !HaveSSrc) 1419d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig return Node; 1420d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1421d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // No scalar allowed when we have both VSrc and SSrc 1422d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig bool ScalarSlotUsed = HaveVSrc && HaveSSrc; 1423d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1424d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // Second go over the operands and try to fold them 1425d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig std::vector<SDValue> Ops; 14263c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig bool Promote2e64 = false; 1427d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; 1428d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig i != e && Op < NumOps; ++i, ++Op) { 1429d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1430d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig const SDValue &Operand = Node->getOperand(i); 1431d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Ops.push_back(Operand); 1432d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1433cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Already folded immediate? 1434d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig if (isa<ConstantSDNode>(Operand.getNode()) || 1435d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig isa<ConstantFPSDNode>(Operand.getNode())) 1436d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig continue; 1437d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1438cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Is this a VSrc or SSrc operand? 1439d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig unsigned RegClass = Desc->OpInfo[Op].RegClass; 14400c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig if (isVSrc(RegClass) || isSSrc(RegClass)) { 14410c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig // Try to fold the immediates 14420c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { 1443cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines // Folding didn't work, make sure we don't hit the SReg limit. 14440c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed); 14450c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig } 14460c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig continue; 14470c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig } 1448b37afdcf3fa41596ab1f70eef915a8ade68ccc24Christian Konig 1449e49230895d9c666b84beaa748259fbf1f6715122Christian Konig if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) { 1450b37afdcf3fa41596ab1f70eef915a8ade68ccc24Christian Konig 14510c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass; 14520c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass)); 14530c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig 14540c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig // Test if it makes sense to swap operands 14550c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig if (foldImm(Ops[1], Immediate, ScalarSlotUsed) || 14560c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig (!fitsRegClass(DAG, Ops[1], RegClass) && 14570c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig fitsRegClass(DAG, Ops[1], OtherRegClass))) { 1458b37afdcf3fa41596ab1f70eef915a8ade68ccc24Christian Konig 1459b37afdcf3fa41596ab1f70eef915a8ade68ccc24Christian Konig // Swap commutable operands 1460dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines std::swap(Ops[0], Ops[1]); 1461e49230895d9c666b84beaa748259fbf1f6715122Christian Konig 1462e49230895d9c666b84beaa748259fbf1f6715122Christian Konig Desc = DescRev; 1463dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DescRev = nullptr; 14640c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig continue; 14650c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig } 14660c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig } 14673c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig 1468dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Immediate) 1469dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines continue; 1470dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 1471dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (DescE64) { 14720c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig // Test if it makes sense to switch to e64 encoding 14730c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass; 14740c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass)) 14750c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig continue; 14763c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig 14770c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig int32_t TmpImm = -1; 14780c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) || 14790c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig (!fitsRegClass(DAG, Ops[i], RegClass) && 14800c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig fitsRegClass(DAG, Ops[1], OtherRegClass))) { 1481d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 14820c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig // Switch to e64 encoding 14830c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig Immediate = -1; 14840c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig Promote2e64 = true; 14850c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig Desc = DescE64; 1486dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines DescE64 = nullptr; 14870c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig } 1488d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig } 1489dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines 1490dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!DescE64 && !Promote2e64) 1491dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines continue; 1492dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!Operand.isMachineOpcode()) 1493dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines continue; 1494dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) { 1495dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Ops.pop_back(); 1496dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Ops.push_back(Operand.getOperand(0)); 1497dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines InputModifiers[i] = 1; 1498dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Promote2e64 = true; 1499dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!DescE64) 1500dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines continue; 1501dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Desc = DescE64; 1502cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines DescE64 = nullptr; 1503dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 1504dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) { 1505dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Ops.pop_back(); 1506dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Ops.push_back(Operand.getOperand(0)); 1507dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines InputModifiers[i] = 2; 1508dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Promote2e64 = true; 1509dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines if (!DescE64) 1510dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines continue; 1511dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Desc = DescE64; 1512cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines DescE64 = nullptr; 1513dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 1514d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig } 1515d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 15163c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig if (Promote2e64) { 1517dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines std::vector<SDValue> OldOps(Ops); 1518dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Ops.clear(); 1519dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (unsigned i = 0; i < OldOps.size(); ++i) { 1520dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines // src_modifier 1521dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32)); 1522dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Ops.push_back(OldOps[i]); 1523dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines } 15243c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig // Add the modifier flags while promoting 1525dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines for (unsigned i = 0; i < 2; ++i) 15263c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig Ops.push_back(DAG.getTargetConstant(0, MVT::i32)); 15273c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig } 15283c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig 1529d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig // Add optional chain and glue 1530d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i) 1531d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig Ops.push_back(Node->getOperand(i)); 1532d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig 1533051a28e0e8a5a6f41d9360a58079af6231557152Tom Stellard // Nodes that have a glue result are not CSE'd by getMachineNode(), so in 1534051a28e0e8a5a6f41d9360a58079af6231557152Tom Stellard // this case a brand new node is always be created, even if the operands 1535051a28e0e8a5a6f41d9360a58079af6231557152Tom Stellard // are the same as before. So, manually check if anything has been changed. 1536c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard if (Desc->Opcode == Opcode && !isNodeChanged(Node, Ops)) { 1537c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard return Node; 1538051a28e0e8a5a6f41d9360a58079af6231557152Tom Stellard } 1539051a28e0e8a5a6f41d9360a58079af6231557152Tom Stellard 1540e49230895d9c666b84beaa748259fbf1f6715122Christian Konig // Create a complete new instruction 1541ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick return DAG.getMachineNode(Desc->Opcode, SDLoc(Node), Node->getVTList(), Ops); 1542c018ecac2f2f475b6e1023e90d0e48fcf9bd6e1dChristian Konig} 154384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 154484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig/// \brief Helper function for adjustWritemask 1545879b071bf539163f90a5ef449d3e6a9ec73faa2fBenjamin Kramerstatic unsigned SubIdx2Lane(unsigned Idx) { 154684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig switch (Idx) { 154784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig default: return 0; 154884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig case AMDGPU::sub0: return 0; 154984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig case AMDGPU::sub1: return 1; 155084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig case AMDGPU::sub2: return 2; 155184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig case AMDGPU::sub3: return 3; 155284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig } 155384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig} 155484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 155584a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig/// \brief Adjust the writemask of MIMG instructions 155684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konigvoid SITargetLowering::adjustWritemask(MachineSDNode *&Node, 155784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig SelectionDAG &DAG) const { 155884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig SDNode *Users[4] = { }; 155996b5670cf454a586bee6d364fa91398c2e085852Tom Stellard unsigned Lane = 0; 156096b5670cf454a586bee6d364fa91398c2e085852Tom Stellard unsigned OldDmask = Node->getConstantOperandVal(0); 156196b5670cf454a586bee6d364fa91398c2e085852Tom Stellard unsigned NewDmask = 0; 156284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 156384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig // Try to figure out the used register components 156484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end(); 156584a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig I != E; ++I) { 156684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 156784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig // Abort if we can't understand the usage 156884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig if (!I->isMachineOpcode() || 156984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG) 157084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig return; 157184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 157296b5670cf454a586bee6d364fa91398c2e085852Tom Stellard // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used. 157396b5670cf454a586bee6d364fa91398c2e085852Tom Stellard // Note that subregs are packed, i.e. Lane==0 is the first bit set 157496b5670cf454a586bee6d364fa91398c2e085852Tom Stellard // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit 157596b5670cf454a586bee6d364fa91398c2e085852Tom Stellard // set, etc. 15764d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig Lane = SubIdx2Lane(I->getConstantOperandVal(1)); 157784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 157896b5670cf454a586bee6d364fa91398c2e085852Tom Stellard // Set which texture component corresponds to the lane. 157996b5670cf454a586bee6d364fa91398c2e085852Tom Stellard unsigned Comp; 158096b5670cf454a586bee6d364fa91398c2e085852Tom Stellard for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) { 158196b5670cf454a586bee6d364fa91398c2e085852Tom Stellard assert(Dmask); 15829242b73286f050c53a26225b2a9acd14aeaa91daTom Stellard Comp = countTrailingZeros(Dmask); 158396b5670cf454a586bee6d364fa91398c2e085852Tom Stellard Dmask &= ~(1 << Comp); 158496b5670cf454a586bee6d364fa91398c2e085852Tom Stellard } 158596b5670cf454a586bee6d364fa91398c2e085852Tom Stellard 158684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig // Abort if we have more than one user per component 158784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig if (Users[Lane]) 158884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig return; 158984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 159084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig Users[Lane] = *I; 159196b5670cf454a586bee6d364fa91398c2e085852Tom Stellard NewDmask |= 1 << Comp; 159284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig } 159384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 159496b5670cf454a586bee6d364fa91398c2e085852Tom Stellard // Abort if there's no change 159596b5670cf454a586bee6d364fa91398c2e085852Tom Stellard if (NewDmask == OldDmask) 159684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig return; 159784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 159884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig // Adjust the writemask in the node 159984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig std::vector<SDValue> Ops; 160096b5670cf454a586bee6d364fa91398c2e085852Tom Stellard Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32)); 160184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) 160284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig Ops.push_back(Node->getOperand(i)); 1603dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops); 160484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 16054d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig // If we only got one lane, replace it with a copy 160696b5670cf454a586bee6d364fa91398c2e085852Tom Stellard // (if NewDmask has only one bit set...) 160796b5670cf454a586bee6d364fa91398c2e085852Tom Stellard if (NewDmask && (NewDmask & (NewDmask-1)) == 0) { 16084d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32); 16094d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 1610ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick SDLoc(), Users[Lane]->getValueType(0), 16114d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig SDValue(Node, 0), RC); 16124d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig DAG.ReplaceAllUsesWith(Users[Lane], Copy); 16134d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig return; 16144d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig } 16154d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig 161684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig // Update the users of the node with the new indices 161784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) { 161884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 161984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig SDNode *User = Users[i]; 162084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig if (!User) 162184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig continue; 162284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 162384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig SDValue Op = DAG.getTargetConstant(Idx, MVT::i32); 162484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig DAG.UpdateNodeOperands(User, User->getOperand(0), Op); 162584a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 162684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig switch (Idx) { 162784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig default: break; 162884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig case AMDGPU::sub0: Idx = AMDGPU::sub1; break; 162984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig case AMDGPU::sub1: Idx = AMDGPU::sub2; break; 163084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig case AMDGPU::sub2: Idx = AMDGPU::sub3; break; 163184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig } 163284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig } 163384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig} 163484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 1635cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines/// \brief Fold the instructions after selecting them. 163684a775d8e3d5a3765e01db4b454f849ed8be99beChristian KonigSDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, 163784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig SelectionDAG &DAG) const { 1638df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard const SIInstrInfo *TII = 1639df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); 164017e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard Node = AdjustRegClass(Node, DAG); 164184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 1642df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard if (TII->isMIMG(Node->getMachineOpcode())) 164384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig adjustWritemask(Node, DAG); 164484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig 164584a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig return foldOperands(Node, DAG); 164684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig} 16474d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig 16484d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig/// \brief Assign the register class depending on the number of 16494d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig/// bits set in the writemask 16504d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konigvoid SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 16514d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig SDNode *Node) const { 1652df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard const SIInstrInfo *TII = 1653df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo()); 1654df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard if (!TII->isMIMG(MI->getOpcode())) 16554d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig return; 16564d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig 16574d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig unsigned VReg = MI->getOperand(0).getReg(); 16584d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig unsigned Writemask = MI->getOperand(1).getImm(); 16594d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig unsigned BitsSet = 0; 16604d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig for (unsigned i = 0; i < 4; ++i) 16614d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig BitsSet += Writemask & (1 << i) ? 1 : 0; 16624d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig 16634d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig const TargetRegisterClass *RC; 16644d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig switch (BitsSet) { 16654d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig default: return; 16664d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig case 1: RC = &AMDGPU::VReg_32RegClass; break; 16674d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig case 2: RC = &AMDGPU::VReg_64RegClass; break; 16684d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig case 3: RC = &AMDGPU::VReg_96RegClass; break; 16694d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig } 16704d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig 16710f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet); 16720f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard MI->setDesc(TII->get(NewOpcode)); 16734d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 16744d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig MRI.setRegClass(VReg, RC); 16754d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig} 167617e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard 167717e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom StellardMachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N, 167817e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard SelectionDAG &DAG) const { 167917e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard 168017e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard SDLoc DL(N); 168117e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard unsigned NewOpcode = N->getMachineOpcode(); 168217e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard 168317e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard switch (N->getMachineOpcode()) { 168417e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard default: return N; 168517e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard case AMDGPU::S_LOAD_DWORD_IMM: 168617e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard NewOpcode = AMDGPU::BUFFER_LOAD_DWORD_ADDR64; 168717e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard // Fall-through 168817e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard case AMDGPU::S_LOAD_DWORDX2_SGPR: 168917e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard if (NewOpcode == N->getMachineOpcode()) { 169017e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; 169117e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard } 169217e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard // Fall-through 169317e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard case AMDGPU::S_LOAD_DWORDX4_IMM: 169417e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard case AMDGPU::S_LOAD_DWORDX4_SGPR: { 169517e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard if (NewOpcode == N->getMachineOpcode()) { 169617e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; 169717e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard } 169817e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard if (fitsRegClass(DAG, N->getOperand(0), AMDGPU::SReg_64RegClassID)) { 169917e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard return N; 170017e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard } 170117e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard ConstantSDNode *Offset = cast<ConstantSDNode>(N->getOperand(1)); 170217e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard SDValue Ops[] = { 170317e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard SDValue(DAG.getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::i128, 170417e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard DAG.getConstant(0, MVT::i64)), 0), 170517e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard N->getOperand(0), 170617e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard DAG.getConstant(Offset->getSExtValue() << 2, MVT::i32) 170717e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard }; 170817e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard return DAG.getMachineNode(NewOpcode, DL, N->getVTList(), Ops); 170917e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard } 171017e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard } 171117e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard} 1712e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard 1713e5fcc0dee4b41658986047f346201ad98757e7d5Tom StellardSDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 1714e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard const TargetRegisterClass *RC, 1715e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard unsigned Reg, EVT VT) const { 1716e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard SDValue VReg = AMDGPUTargetLowering::CreateLiveInRegister(DAG, RC, Reg, VT); 1717e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard 1718e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()), 1719e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard cast<RegisterSDNode>(VReg)->getReg(), VT); 1720e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard} 1721