1f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===-- SIISelLowering.cpp - SI DAG Lowering Implementation ---------------===//
2f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
3f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//                     The LLVM Compiler Infrastructure
4f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
5f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// This file is distributed under the University of Illinois Open Source
6f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// License. See LICENSE.TXT for details.
7f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
8f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===//
9f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
10f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \file
11f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard/// \brief Custom DAG lowering for SI
12f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//
13f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===//
14f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
15f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "SIISelLowering.h"
16cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig#include "AMDGPU.h"
17cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines#include "AMDGPUIntrinsicInfo.h"
1836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines#include "AMDGPUSubtarget.h"
19f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "SIInstrInfo.h"
20f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "SIMachineFunctionInfo.h"
21f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "SIRegisterInfo.h"
2290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig#include "llvm/CodeGen/CallingConvLower.h"
23f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineInstrBuilder.h"
24f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/MachineRegisterInfo.h"
25f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard#include "llvm/CodeGen/SelectionDAG.h"
265c35290fa35ae234fed02496404cb0fc37e1c8a5Benjamin Kramer#include "llvm/IR/Function.h"
27cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines#include "llvm/ADT/SmallString.h"
28f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
29f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellardusing namespace llvm;
30f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
31f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardSITargetLowering::SITargetLowering(TargetMachine &TM) :
32b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling    AMDGPUTargetLowering(TM) {
33dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
34dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
35204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig
36204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig  addRegisterClass(MVT::v32i8, &AMDGPU::SReg_256RegClass);
37204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig  addRegisterClass(MVT::v64i8, &AMDGPU::SReg_512RegClass);
38204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig
39dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
40dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  addRegisterClass(MVT::f32, &AMDGPU::VReg_32RegClass);
41f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
42dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
43dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  addRegisterClass(MVT::v2i32, &AMDGPU::SReg_64RegClass);
44dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  addRegisterClass(MVT::v2f32, &AMDGPU::VReg_64RegClass);
45204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig
46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  addRegisterClass(MVT::v4i32, &AMDGPU::SReg_128RegClass);
47204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig  addRegisterClass(MVT::v4f32, &AMDGPU::VReg_128RegClass);
48204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig
4936ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard  addRegisterClass(MVT::v8i32, &AMDGPU::VReg_256RegClass);
50204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig  addRegisterClass(MVT::v8f32, &AMDGPU::VReg_256RegClass);
51204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig
5236ba9091843bd1205fe3499ba4b55bbedc6583c9Tom Stellard  addRegisterClass(MVT::v16i32, &AMDGPU::VReg_512RegClass);
53204a2d32ba44ff150e2201d7e0900815b4446badChristian Konig  addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
54f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
55f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  computeRegisterProperties();
56f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
57fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  // Condition Codes
58fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETONE, MVT::f32, Expand);
59fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand);
60fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETUGE, MVT::f32, Expand);
61fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETUGT, MVT::f32, Expand);
62fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETULE, MVT::f32, Expand);
63fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETULT, MVT::f32, Expand);
64fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling
65fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETONE, MVT::f64, Expand);
66fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand);
67fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETUGE, MVT::f64, Expand);
68fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETUGT, MVT::f64, Expand);
69fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETULE, MVT::f64, Expand);
70fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling  setCondCodeAction(ISD::SETULT, MVT::f64, Expand);
71fd76325f8afd780f3b5863a32d4a7f1bc88fec07Bill Wendling
72b9e8678025891a3eb5431c652df541b632902fc6Christian Konig  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i32, Expand);
73b9e8678025891a3eb5431c652df541b632902fc6Christian Konig  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8f32, Expand);
74b9e8678025891a3eb5431c652df541b632902fc6Christian Konig  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i32, Expand);
75b9e8678025891a3eb5431c652df541b632902fc6Christian Konig  setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16f32, Expand);
76b9e8678025891a3eb5431c652df541b632902fc6Christian Konig
77f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  setOperationAction(ISD::ADD, MVT::i32, Legal);
78f38be91a829ad5a0391b6f139f0bd8cef341e689Matt Arsenault  setOperationAction(ISD::ADDC, MVT::i32, Legal);
79f38be91a829ad5a0391b6f139f0bd8cef341e689Matt Arsenault  setOperationAction(ISD::ADDE, MVT::i32, Legal);
80cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  setOperationAction(ISD::SUBC, MVT::i32, Legal);
81cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  setOperationAction(ISD::SUBE, MVT::i32, Legal);
821842ec4d9fc9eeceb2a77527026dfd84ee24cff1Aaron Watry
83d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard  // We need to custom lower vector stores from local memory
84d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
85d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
86f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard  setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
87f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard  setOperationAction(ISD::LOAD, MVT::v16i32, Custom);
88f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard
89f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard  setOperationAction(ISD::STORE, MVT::v8i32, Custom);
90f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard  setOperationAction(ISD::STORE, MVT::v16i32, Custom);
91d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard
92a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  // We need to custom lower loads/stores from private memory
93a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  setOperationAction(ISD::LOAD, MVT::i32, Custom);
94a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
95a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  setOperationAction(ISD::LOAD, MVT::v4i32, Custom);
9636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setOperationAction(ISD::LOAD, MVT::v8i32, Custom);
97a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
9836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setOperationAction(ISD::STORE, MVT::i1, Custom);
99a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  setOperationAction(ISD::STORE, MVT::i32, Custom);
100a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  setOperationAction(ISD::STORE, MVT::v2i32, Custom);
101a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  setOperationAction(ISD::STORE, MVT::v4i32, Custom);
102a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
103dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SELECT, MVT::f32, Promote);
104dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  AddPromotedToType(ISD::SELECT, MVT::f32, MVT::i32);
10536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setOperationAction(ISD::SELECT, MVT::i64, Custom);
10636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setOperationAction(ISD::SELECT, MVT::f64, Promote);
10736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64);
108a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
109cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
110cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
111cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
112cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
11317ea10cb792832c99677afa13b9b866098bc4679Tom Stellard
1144e518fd941b119834b5764708fbabf41adc45040Tom Stellard  setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
1154e518fd941b119834b5764708fbabf41adc45040Tom Stellard  setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
1164e518fd941b119834b5764708fbabf41adc45040Tom Stellard
117dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal);
118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom);
119dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom);
120dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
121dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
122dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8, Custom);
123dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i8, Custom);
124dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
125dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
126dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Custom);
127dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i16, Custom);
128dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
129dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Custom);
130dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
131dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::Other, Custom);
132dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
133e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
13468db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f32, Custom);
13568db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v16i8, Custom);
13668db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom);
137e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard
138a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
139cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  setOperationAction(ISD::BRCOND, MVT::Other, Custom);
140a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard
141dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
14236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
14336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
144dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
145f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard  setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
146f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard  setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
147cd0f2458641ebebdb887da0381bd3acea6a1902fTom Stellard
148dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
149dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setLoadExtAction(ISD::ZEXTLOAD, MVT::i8, Custom);
150dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Custom);
151dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand);
152dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
153dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
15436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
15536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
15636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
157014773626d2678868adf696ac58c44d2b2980fa8Niels Ole Salscheider  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
158dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
15936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setTruncStoreAction(MVT::i32, MVT::i8, Custom);
16036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setTruncStoreAction(MVT::i32, MVT::i16, Custom);
161014773626d2678868adf696ac58c44d2b2980fa8Niels Ole Salscheider  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
1623987e532f7e7b326083d3b5145bd29bb69e15410Matt Arsenault  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
163f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard  setTruncStoreAction(MVT::v8i32, MVT::v8i16, Expand);
164f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard  setTruncStoreAction(MVT::v16i32, MVT::v16i16, Expand);
165014773626d2678868adf696ac58c44d2b2980fa8Niels Ole Salscheider
166dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::LOAD, MVT::i1, Custom);
167dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
168da25cd3e6de8f21005590c2de49868f883cf2410Tom Stellard  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
16936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
17036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
171a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer
172dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // These should use UDIVREM, so set them to expand
173dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::UDIV, MVT::i64, Expand);
174dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  setOperationAction(ISD::UREM, MVT::i64, Expand);
175dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
17636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // We only support LOAD/STORE and vector manipulation ops for vectors
17736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // with > 4 elements.
17836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  MVT VecTypes[] = {
17936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    MVT::v8i32, MVT::v8f32, MVT::v16i32, MVT::v16f32
18036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  };
18136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
182dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  for (MVT VT : VecTypes) {
18336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op) {
18436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      switch(Op) {
18536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      case ISD::LOAD:
18636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      case ISD::STORE:
18736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      case ISD::BUILD_VECTOR:
18836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      case ISD::BITCAST:
18936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      case ISD::EXTRACT_VECTOR_ELT:
19036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      case ISD::INSERT_VECTOR_ELT:
19136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      case ISD::CONCAT_VECTORS:
19236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      case ISD::INSERT_SUBVECTOR:
19336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      case ISD::EXTRACT_SUBVECTOR:
19436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
19536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      default:
196dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        setOperationAction(Op, VT, Expand);
19736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        break;
19836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      }
19936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
20036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
20136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
20236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  for (int I = MVT::v1f64; I <= MVT::v8f64; ++I) {
20336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    MVT::SimpleValueType VT = static_cast<MVT::SimpleValueType>(I);
20436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    setOperationAction(ISD::FTRUNC, VT, Expand);
20536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    setOperationAction(ISD::FCEIL, VT, Expand);
20636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    setOperationAction(ISD::FFLOOR, VT, Expand);
20736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
208f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
20936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS) {
21036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
21136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    setOperationAction(ISD::FCEIL, MVT::f64, Legal);
21236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
213dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    setOperationAction(ISD::FRINT, MVT::f64, Legal);
21436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
21536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
216cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // FIXME: These should be removed and handled the same was as f32 fneg. Source
217cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // modifiers also work for the double instructions.
218cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  setOperationAction(ISD::FNEG, MVT::f64, Expand);
219cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  setOperationAction(ISD::FABS, MVT::f64, Expand);
220cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
22136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  setTargetDAGCombine(ISD::SELECT_CC);
222f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  setTargetDAGCombine(ISD::SETCC);
223d787c047bcc5d0d81743f9d4403d5c54fe4757feMichel Danzer
224cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  setTargetDAGCombine(ISD::UINT_TO_FP);
225cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
226c53270f885e8d778cfe0e741e07d7def2b66884aChristian Konig  setSchedulingPreference(Sched::RegPressure);
227f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
228f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
22973e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard//===----------------------------------------------------------------------===//
23073e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard// TargetLowering queries
23173e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard//===----------------------------------------------------------------------===//
23273e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard
23373e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellardbool SITargetLowering::allowsUnalignedMemoryAccesses(EVT  VT,
23436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                                     unsigned AddrSpace,
23573e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard                                                     bool *IsFast) const {
236dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (IsFast)
237dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    *IsFast = false;
238dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
23973e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard  // XXX: This depends on the address space and also we may want to revist
24073e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard  // the alignment values we specify in the DataLayout.
241dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
242dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
243dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // which isn't a simple VT.
244a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  if (!VT.isSimple() || VT == MVT::Other)
245a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    return false;
246dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
247dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // XXX - CI changes say "Support for unaligned memory accesses" but I don't
248dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // see what for specifically. The wording everywhere else seems to be the
249dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // same.
250dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // 3.6.4 - Operations using pairs of VGPRs (for example: double-floats) have
252dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // no alignment restrictions.
253dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS) {
254dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // Using any pair of GPRs should be the same as any other pair.
255dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (IsFast)
256dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      *IsFast = true;
257dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return VT.bitsGE(MVT::i64);
258dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
259dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
260dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // XXX - The only mention I see of this in the ISA manual is for LDS direct
261dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // reads the "byte address and must be dword aligned". Is it also true for the
262dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // normal loads and stores?
263dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS)
264dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return false;
265dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
266dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // 8.1.6 - For Dword or larger reads or writes, the two LSBs of the
267dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // byte-address are ignored, thus forcing Dword alignment.
268dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (IsFast)
269dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    *IsFast = true;
27073e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard  return VT.bitsGT(MVT::i32);
27173e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard}
27273e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard
273cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesTargetLoweringBase::LegalizeTypeAction
274cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesSITargetLowering::getPreferredVectorAction(EVT VT) const {
275cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16))
276cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return TypeSplitVector;
277cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
278cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return TargetLoweringBase::getPreferredVectorAction(VT);
27936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
28036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
28136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hinesbool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
28236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                                         Type *Ty) const {
28336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  const SIInstrInfo *TII =
28436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
28536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return TII->isInlineConstant(Imm);
286a41520cf9b9cefed2091a0624a34c5f7fdb42a68Tom Stellard}
28773e44d8ae4c227af92b8f96f447e4a7ed38f6de5Tom Stellard
288f95b1621887e3409ceec2db47e1b44271d934735Tom StellardSDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
289e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                                         SDLoc DL, SDValue Chain,
290dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                         unsigned Offset, bool Signed) const {
291e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard  MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
292e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard  PointerType *PtrTy = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
293e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                                            AMDGPUAS::CONSTANT_ADDRESS);
294e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard  SDValue BasePtr =  DAG.getCopyFromReg(Chain, DL,
295e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                           MRI.getLiveInVirtReg(AMDGPU::SGPR0_SGPR1), MVT::i64);
296e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard  SDValue Ptr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
297e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                                             DAG.getConstant(Offset, MVT::i64));
298dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return DAG.getExtLoad(Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD, DL, VT, Chain, Ptr,
299f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard                            MachinePointerInfo(UndefValue::get(PtrTy)), MemVT,
300f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard                            false, false, MemVT.getSizeInBits() >> 3);
301e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard
302e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard}
303e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard
30490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian KonigSDValue SITargetLowering::LowerFormalArguments(
30590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig                                      SDValue Chain,
30690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig                                      CallingConv::ID CallConv,
30790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig                                      bool isVarArg,
30890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig                                      const SmallVectorImpl<ISD::InputArg> &Ins,
309ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick                                      SDLoc DL, SelectionDAG &DAG,
31090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig                                      SmallVectorImpl<SDValue> &InVals) const {
31190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
31290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
31390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
31490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  MachineFunction &MF = DAG.getMachineFunction();
31590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  FunctionType *FType = MF.getFunction()->getFunctionType();
316cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig  SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
31790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
31890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  assert(CallConv == CallingConv::C);
31990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
32090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  SmallVector<ISD::InputArg, 16> Splits;
321cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig  uint32_t Skipped = 0;
322cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig
323cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig  for (unsigned i = 0, e = Ins.size(), PSInputNum = 0; i != e; ++i) {
32490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    const ISD::InputArg &Arg = Ins[i];
325225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt Arsenault
326225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt Arsenault    // First check if it's a PS input addr
3276a809a8d292ca4aa70cc07ce6b573239f397bb47Vincent Lejeune    if (Info->ShaderType == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
3286a809a8d292ca4aa70cc07ce6b573239f397bb47Vincent Lejeune        !Arg.Flags.isByVal()) {
329cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig
330cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig      assert((PSInputNum <= 15) && "Too many PS inputs!");
331cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig
332cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig      if (!Arg.Used) {
333cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig        // We can savely skip PS inputs
334cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig        Skipped |= 1 << i;
335cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig        ++PSInputNum;
336cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig        continue;
337cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig      }
338cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig
339cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig      Info->PSInputAddr |= 1 << PSInputNum++;
340cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig    }
341cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig
342cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig    // Second split vertices into their elements
343e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard    if (Info->ShaderType != ShaderType::COMPUTE && Arg.VT.isVector()) {
34490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      ISD::InputArg NewArg = Arg;
34590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      NewArg.Flags.setSplit();
34690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      NewArg.VT = Arg.VT.getVectorElementType();
34790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
34890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      // We REALLY want the ORIGINAL number of vertex elements here, e.g. a
34990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      // three or five element vertex only needs three or five registers,
35090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      // NOT four or eigth.
35190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
35290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      unsigned NumElements = ParamType->getVectorNumElements();
35390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
35490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      for (unsigned j = 0; j != NumElements; ++j) {
35590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig        Splits.push_back(NewArg);
35690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig        NewArg.PartOffset += NewArg.VT.getStoreSize();
35790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      }
35890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
359f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard    } else if (Info->ShaderType != ShaderType::COMPUTE) {
36090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      Splits.push_back(Arg);
36190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    }
36290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  }
36390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
36490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  SmallVector<CCValAssign, 16> ArgLocs;
36590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(),
36690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig                 getTargetMachine(), ArgLocs, *DAG.getContext());
36790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
368cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig  // At least one interpolation mode must be enabled or else the GPU will hang.
369cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig  if (Info->ShaderType == ShaderType::PIXEL && (Info->PSInputAddr & 0x7F) == 0) {
370cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig    Info->PSInputAddr |= 1;
371cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig    CCInfo.AllocateReg(AMDGPU::VGPR0);
372cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig    CCInfo.AllocateReg(AMDGPU::VGPR1);
373cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig  }
374cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig
375e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard  // The pointer to the list of arguments is stored in SGPR0, SGPR1
376e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard  if (Info->ShaderType == ShaderType::COMPUTE) {
377e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard    CCInfo.AllocateReg(AMDGPU::SGPR0);
378e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard    CCInfo.AllocateReg(AMDGPU::SGPR1);
379e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    MF.addLiveIn(AMDGPU::SGPR0_SGPR1, &AMDGPU::SReg_64RegClass);
380e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard  }
381e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard
382f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard  if (Info->ShaderType == ShaderType::COMPUTE) {
383f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard    getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
384f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard                            Splits);
385f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard  }
386f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard
38790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  AnalyzeFormalArguments(CCInfo, Splits);
38890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
38990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  for (unsigned i = 0, e = Ins.size(), ArgIdx = 0; i != e; ++i) {
39090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
391e91967820879b79f95b0378124c5f40d9e6d54eeChristian Konig    const ISD::InputArg &Arg = Ins[i];
392cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig    if (Skipped & (1 << i)) {
393e91967820879b79f95b0378124c5f40d9e6d54eeChristian Konig      InVals.push_back(DAG.getUNDEF(Arg.VT));
394cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig      continue;
395cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig    }
396cc22640c4c8f0bc5d1e37b4ddcdf9e7c873e4383Christian Konig
39790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    CCValAssign &VA = ArgLocs[ArgIdx++];
398e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard    EVT VT = VA.getLocVT();
399e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard
400e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard    if (VA.isMemLoc()) {
401f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard      VT = Ins[i].VT;
402f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard      EVT MemVT = Splits[i].VT;
403e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard      // The first 36 bytes of the input buffer contains information about
404e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard      // thread group and global sizes.
405f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard      SDValue Arg = LowerParameter(DAG, VT, MemVT,  DL, DAG.getRoot(),
406dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                   36 + VA.getLocMemOffset(),
407dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                   Ins[i].Flags.isSExt());
408e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard      InVals.push_back(Arg);
409e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard      continue;
410e7397ee81ad07cab36362bab5a086f20acc60a80Tom Stellard    }
41190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    assert(VA.isRegLoc() && "Parameter must be in a register!");
41290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
41390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    unsigned Reg = VA.getLocReg();
41490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
41590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    if (VT == MVT::i64) {
41690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      // For now assume it is a pointer
41790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
41890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig                                     &AMDGPU::SReg_64RegClass);
41990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
42090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      InVals.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
42190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      continue;
42290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    }
42390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
42490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT);
42590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
42690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    Reg = MF.addLiveIn(Reg, RC);
42790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, VT);
42890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
42990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    if (Arg.VT.isVector()) {
43090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
43190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      // Build a vector from the registers
43290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      Type *ParamType = FType->getParamType(Arg.OrigArgIndex);
43390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      unsigned NumElements = ParamType->getVectorNumElements();
43490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
43590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      SmallVector<SDValue, 4> Regs;
43690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      Regs.push_back(Val);
43790c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      for (unsigned j = 1; j != NumElements; ++j) {
43890c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig        Reg = ArgLocs[ArgIdx++].getLocReg();
43990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig        Reg = MF.addLiveIn(Reg, RC);
44090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig        Regs.push_back(DAG.getCopyFromReg(Chain, DL, Reg, VT));
44190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      }
44290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
44390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      // Fill up the missing vector elements
44490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      NumElements = Arg.VT.getVectorNumElements() - NumElements;
44590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      for (unsigned j = 0; j != NumElements; ++j)
44690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig        Regs.push_back(DAG.getUNDEF(VT));
447225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt Arsenault
448dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      InVals.push_back(DAG.getNode(ISD::BUILD_VECTOR, DL, Arg.VT, Regs));
44990c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig      continue;
45090c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    }
45190c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
45290c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig    InVals.push_back(Val);
45390c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  }
45490c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig  return Chain;
45590c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig}
45690c64cbaa124e0e8541680efeaa56f0e6eb78d9aChristian Konig
457f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardMachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
458f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    MachineInstr * MI, MachineBasicBlock * BB) const {
459f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
4604956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard  MachineBasicBlock::iterator I = *MI;
461dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const SIInstrInfo *TII =
462dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
463dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
4644956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard
465f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  switch (MI->getOpcode()) {
466f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  default:
467f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
468f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  case AMDGPU::BRANCH: return BB;
4694956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard  case AMDGPU::SI_ADDR64_RSRC: {
4704956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard    unsigned SuperReg = MI->getOperand(0).getReg();
47136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    unsigned SubRegLo = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
47236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    unsigned SubRegHi = MRI.createVirtualRegister(&AMDGPU::SGPR_64RegClass);
47336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    unsigned SubRegHiHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
47436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    unsigned SubRegHiLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
4754956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64), SubRegLo)
4764956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard            .addOperand(MI->getOperand(1));
4774956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiLo)
4784956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard            .addImm(0);
4794956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), SubRegHiHi)
48036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines            .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
4814956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SubRegHi)
4824956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard            .addReg(SubRegHiLo)
4834956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard            .addImm(AMDGPU::sub0)
4844956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard            .addReg(SubRegHiHi)
4854956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard            .addImm(AMDGPU::sub1);
4864956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::REG_SEQUENCE), SuperReg)
4874956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard            .addReg(SubRegLo)
4884956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard            .addImm(AMDGPU::sub0_sub1)
4894956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard            .addReg(SubRegHi)
4904956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard            .addImm(AMDGPU::sub2_sub3);
4914956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard    MI->eraseFromParent();
4924956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard    break;
4934956bc61e1c86e781fd8abe14431c121d960d65bTom Stellard  }
494cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  case AMDGPU::V_SUB_F64: {
495cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    unsigned DestReg = MI->getOperand(0).getReg();
496cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg)
497cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      .addImm(0)  // SRC0 modifiers
498cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      .addReg(MI->getOperand(1).getReg())
499cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      .addImm(1)  // SRC1 modifiers
500cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      .addReg(MI->getOperand(2).getReg())
501cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      .addImm(0)  // SRC2 modifiers
502cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      .addImm(0)  // src2
503cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      .addImm(0)  // CLAMP
504cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      .addImm(0); // OMOD
505d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1Tom Stellard    MI->eraseFromParent();
506d2442c10f9bfe8a9d6cdcb28030d32deb5b192b1Tom Stellard    break;
507cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
508a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  case AMDGPU::SI_RegisterStorePseudo: {
509a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
510a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
511a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    MachineInstrBuilder MIB =
512a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard        BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::SI_RegisterStore),
513a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard                Reg);
514a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
515a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard      MIB.addOperand(MI->getOperand(i));
516a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
517a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    MI->eraseFromParent();
518dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    break;
519dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
520dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AMDGPU::FABS_SI: {
521dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
522dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    const SIInstrInfo *TII =
523dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
524dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
525dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
526dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            Reg)
527dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addImm(0x7fffffff);
528dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_AND_B32_e32),
529dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            MI->getOperand(0).getReg())
530dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addReg(MI->getOperand(1).getReg())
531dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addReg(Reg);
532dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MI->eraseFromParent();
533dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    break;
534dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
535dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AMDGPU::FNEG_SI: {
536dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
537dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    const SIInstrInfo *TII =
538dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
539dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    unsigned Reg = MRI.createVirtualRegister(&AMDGPU::VReg_32RegClass);
540dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_MOV_B32_e32),
541dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            Reg)
542dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addImm(0x80000000);
543dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_XOR_B32_e32),
544dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            MI->getOperand(0).getReg())
545dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addReg(MI->getOperand(1).getReg())
546dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addReg(Reg);
547dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MI->eraseFromParent();
548dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    break;
549dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
550dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  case AMDGPU::FCLAMP_SI: {
551dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    const SIInstrInfo *TII =
552dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
553dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F32_e64),
554dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            MI->getOperand(0).getReg())
555dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addImm(0) // SRC0 modifiers
556dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addOperand(MI->getOperand(1))
557dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addImm(0) // SRC1 modifiers
558dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addImm(0) // SRC1
559dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addImm(1) // CLAMP
560dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            .addImm(0); // OMOD
561dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    MI->eraseFromParent();
562a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  }
563f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  }
564f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  return BB;
565f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
566f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
567225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt ArsenaultEVT SITargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const {
5684e518fd941b119834b5764708fbabf41adc45040Tom Stellard  if (!VT.isVector()) {
5694e518fd941b119834b5764708fbabf41adc45040Tom Stellard    return MVT::i1;
5704e518fd941b119834b5764708fbabf41adc45040Tom Stellard  }
5714e518fd941b119834b5764708fbabf41adc45040Tom Stellard  return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
572f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
573f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
574b87082228bb5151598addcf0eb3756cf0f906ab6Christian KonigMVT SITargetLowering::getScalarShiftAmountTy(EVT VT) const {
575b87082228bb5151598addcf0eb3756cf0f906ab6Christian Konig  return MVT::i32;
576b87082228bb5151598addcf0eb3756cf0f906ab6Christian Konig}
577b87082228bb5151598addcf0eb3756cf0f906ab6Christian Konig
5786e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheiderbool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
5796e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider  VT = VT.getScalarType();
5806e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider
5816e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider  if (!VT.isSimple())
5826e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider    return false;
5836e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider
5846e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider  switch (VT.getSimpleVT().SimpleTy) {
5856e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider  case MVT::f32:
5866e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider    return false; /* There is V_MAD_F32 for f32 */
5876e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider  case MVT::f64:
5886e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider    return true;
5896e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider  default:
5906e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider    break;
5916e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider  }
5926e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider
5936e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider  return false;
5946e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider}
5956e4dbcd1150ea6d4fbf87a7840b3d8481bfcc8c5Niels Ole Salscheider
596f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===//
597f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard// Custom DAG Lowering Operations
598f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard//===----------------------------------------------------------------------===//
599f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
600f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardSDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
601a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer  MachineFunction &MF = DAG.getMachineFunction();
602a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
603f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  switch (Op.getOpcode()) {
604f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
6056b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
606d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard  case ISD::LOAD: {
607d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard    LoadSDNode *Load = dyn_cast<LoadSDNode>(Op);
608cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    EVT VT = Op.getValueType();
609cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
610cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // These loads are legal.
611cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
612cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        VT.isVector() && VT.getVectorNumElements() == 2 &&
613cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        VT.getVectorElementType() == MVT::i32)
614cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      return SDValue();
615cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
61636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (Op.getValueType().isVector() &&
61736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines        (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
61836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines         Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS ||
61936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines         (Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
62036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines          Op.getValueType().getVectorNumElements() > 4))) {
621cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      return SplitVectorLoad(Op, DAG);
622d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard    } else {
623cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      SDValue Result = LowerLOAD(Op, DAG);
624cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      assert((!Result.getNode() ||
625cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines              Result.getNode()->getNumValues() == 2) &&
626cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines             "Load should return a value and a chain");
627cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      return Result;
628d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard    }
629d08a9303614355cfdcac5f2c27c09ce809565423Tom Stellard  }
630f95b1621887e3409ceec2db47e1b44271d934735Tom Stellard
63136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  case ISD::SELECT: return LowerSELECT(Op, DAG);
632a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  case ISD::STORE: return LowerSTORE(Op, DAG);
633a3e39dc7055486cbf514ccd868cfabc69d7f6f4eMichel Danzer  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
634e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard  case ISD::INTRINSIC_WO_CHAIN: {
635e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    unsigned IntrinsicID =
636e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                         cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
637e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    EVT VT = Op.getValueType();
638e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    SDLoc DL(Op);
639e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    //XXX: Hardcoded we only use two to store the pointer to the parameters.
640e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    unsigned NumUserSGPRs = 2;
641e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    switch (IntrinsicID) {
642e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
643e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_ngroups_x:
644dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 0, false);
645e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_ngroups_y:
646dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 4, false);
647e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_ngroups_z:
648dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 8, false);
649e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_global_size_x:
650dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 12, false);
651e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_global_size_y:
652dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 16, false);
653e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_global_size_z:
654dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 20, false);
655e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_local_size_x:
656dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 24, false);
657e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_local_size_y:
658dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 28, false);
659e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_local_size_z:
660dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return LowerParameter(DAG, VT, VT, DL, DAG.getEntryNode(), 32, false);
661e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_tgid_x:
662e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard      return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
663e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                     AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 0), VT);
664e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_tgid_y:
665e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard      return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
666e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                     AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 1), VT);
667e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_tgid_z:
668e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard      return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
669e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                     AMDGPU::SReg_32RegClass.getRegister(NumUserSGPRs + 2), VT);
670e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_tidig_x:
671e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard      return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
672e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                                  AMDGPU::VGPR0, VT);
673e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_tidig_y:
674e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard      return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
675e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                                  AMDGPU::VGPR1, VT);
676e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    case Intrinsic::r600_read_tidig_z:
677e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard      return CreateLiveInRegister(DAG, &AMDGPU::VReg_32RegClass,
678e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                                  AMDGPU::VGPR2, VT);
67968db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard    case AMDGPUIntrinsic::SI_load_const: {
68068db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard      SDValue Ops [] = {
681dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        Op.getOperand(1),
68268db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard        Op.getOperand(2)
68368db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard      };
68468db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard
685d0cad1f3ed71bd98ef8017c31573abf22fac9588Benjamin Kramer      MachineMemOperand *MMO = MF.getMachineMemOperand(
686d0cad1f3ed71bd98ef8017c31573abf22fac9588Benjamin Kramer          MachinePointerInfo(),
687d0cad1f3ed71bd98ef8017c31573abf22fac9588Benjamin Kramer          MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
688d0cad1f3ed71bd98ef8017c31573abf22fac9588Benjamin Kramer          VT.getSizeInBits() / 8, 4);
68968db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard      return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
690dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                     Op->getVTList(), Ops, VT, MMO);
69168db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard    }
69268db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard    case AMDGPUIntrinsic::SI_sample:
69368db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard      return LowerSampleIntrinsic(AMDGPUISD::SAMPLE, Op, DAG);
69468db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard    case AMDGPUIntrinsic::SI_sampleb:
69568db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard      return LowerSampleIntrinsic(AMDGPUISD::SAMPLEB, Op, DAG);
69668db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard    case AMDGPUIntrinsic::SI_sampled:
69768db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard      return LowerSampleIntrinsic(AMDGPUISD::SAMPLED, Op, DAG);
69868db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard    case AMDGPUIntrinsic::SI_samplel:
69968db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard      return LowerSampleIntrinsic(AMDGPUISD::SAMPLEL, Op, DAG);
70068db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard    case AMDGPUIntrinsic::SI_vs_load_input:
70168db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard      return DAG.getNode(AMDGPUISD::LOAD_INPUT, DL, VT,
702dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                         Op.getOperand(1),
70368db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard                         Op.getOperand(2),
70468db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard                         Op.getOperand(3));
705e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard    }
706e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard  }
707a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard
708a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard  case ISD::INTRINSIC_VOID:
709a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard    SDValue Chain = Op.getOperand(0);
710a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard    unsigned IntrinsicID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
711a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard
712a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard    switch (IntrinsicID) {
713a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard      case AMDGPUIntrinsic::SI_tbuffer_store: {
714a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard        SDLoc DL(Op);
715a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard        SDValue Ops [] = {
716a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Chain,
717dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          Op.getOperand(2),
718a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(3),
719a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(4),
720a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(5),
721a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(6),
722a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(7),
723a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(8),
724a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(9),
725a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(10),
726a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(11),
727a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(12),
728a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(13),
729a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard          Op.getOperand(14)
730a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard        };
731a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard        EVT VT = Op.getOperand(3).getValueType();
732a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard
733a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard        MachineMemOperand *MMO = MF.getMachineMemOperand(
734a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard            MachinePointerInfo(),
735a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard            MachineMemOperand::MOStore,
736a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard            VT.getSizeInBits() / 8, 4);
737a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard        return DAG.getMemIntrinsicNode(AMDGPUISD::TBUFFER_STORE_FORMAT, DL,
738dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                       Op->getVTList(), Ops, VT, MMO);
739a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard      }
740a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard      default:
741a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard        break;
742a3c2bcf0ee2f63584f7a1e9df9fa153a8b5dfea1Tom Stellard    }
743f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  }
744f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  return SDValue();
745f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
746f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
7476b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard/// \brief Helper function for LowerBRCOND
7486b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellardstatic SDNode *findUser(SDValue Value, unsigned Opcode) {
7496b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
7506b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  SDNode *Parent = Value.getNode();
7516b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  for (SDNode::use_iterator I = Parent->use_begin(), E = Parent->use_end();
7526b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard       I != E; ++I) {
7536b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
7546b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    if (I.getUse().get() != Value)
7556b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard      continue;
7566b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
7576b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    if (I->getOpcode() == Opcode)
7586b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard      return *I;
7596b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  }
760dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return nullptr;
7616b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard}
7626b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
7636b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard/// This transforms the control flow intrinsics to get the branch destination as
7646b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard/// last parameter, also switches branch target with BR if the need arise
7656b7d99d47321ebb478b22afd2e317fe89d2149dbTom StellardSDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
7666b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard                                      SelectionDAG &DAG) const {
7676b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
768ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick  SDLoc DL(BRCOND);
7696b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
7706b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  SDNode *Intr = BRCOND.getOperand(1).getNode();
7716b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  SDValue Target = BRCOND.getOperand(2);
772dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  SDNode *BR = nullptr;
7736b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
7746b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  if (Intr->getOpcode() == ISD::SETCC) {
7756b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    // As long as we negate the condition everything is fine
7766b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    SDNode *SetCC = Intr;
7776b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    assert(SetCC->getConstantOperandVal(1) == 1);
778e13a2a3fdeca6153314bb08c1b7161205d39d168NAKAMURA Takumi    assert(cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
779e13a2a3fdeca6153314bb08c1b7161205d39d168NAKAMURA Takumi           ISD::SETNE);
7806b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    Intr = SetCC->getOperand(0).getNode();
7816b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
7826b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  } else {
7836b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    // Get the target from BR if we don't negate the condition
7846b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    BR = findUser(BRCOND, ISD::BR);
7856b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    Target = BR->getOperand(1);
7866b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  }
7876b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
7886b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  assert(Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN);
7896b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
7906b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  // Build the result and
7916b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  SmallVector<EVT, 4> Res;
7926b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  for (unsigned i = 1, e = Intr->getNumValues(); i != e; ++i)
7936b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    Res.push_back(Intr->getValueType(i));
7946b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
7956b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  // operands of the new intrinsic call
7966b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  SmallVector<SDValue, 4> Ops;
7976b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  Ops.push_back(BRCOND.getOperand(0));
7986b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  for (unsigned i = 1, e = Intr->getNumOperands(); i != e; ++i)
7996b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    Ops.push_back(Intr->getOperand(i));
8006b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  Ops.push_back(Target);
8016b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
8026b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  // build the new intrinsic call
8036b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  SDNode *Result = DAG.getNode(
8046b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
805dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    DAG.getVTList(Res), Ops).getNode();
8066b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
8076b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  if (BR) {
8086b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    // Give the branch instruction our target
8096b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    SDValue Ops[] = {
8106b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard      BR->getOperand(0),
8116b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard      BRCOND.getOperand(2)
8126b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    };
813dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    DAG.MorphNodeTo(BR, ISD::BR, BR->getVTList(), Ops);
8146b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  }
8156b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
8166b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  SDValue Chain = SDValue(Result, Result->getNumValues() - 1);
8176b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
8186b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  // Copy the intrinsic results to registers
8196b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  for (unsigned i = 1, e = Intr->getNumValues() - 1; i != e; ++i) {
8206b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    SDNode *CopyToReg = findUser(SDValue(Intr, i), ISD::CopyToReg);
8216b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    if (!CopyToReg)
8226b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard      continue;
8236b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
8246b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    Chain = DAG.getCopyToReg(
8256b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard      Chain, DL,
8266b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard      CopyToReg->getOperand(1),
8276b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard      SDValue(Result, i - 1),
8286b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard      SDValue());
8296b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
8306b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    DAG.ReplaceAllUsesWith(SDValue(CopyToReg, 0), CopyToReg->getOperand(0));
8316b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  }
8326b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
8336b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  // Remove the old intrinsic from the chain
8346b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  DAG.ReplaceAllUsesOfValueWith(
8356b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    SDValue(Intr, Intr->getNumValues() - 1),
8366b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard    Intr->getOperand(0));
8376b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard
8386b7d99d47321ebb478b22afd2e317fe89d2149dbTom Stellard  return Chain;
839f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
840f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
841a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom StellardSDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
842a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  SDLoc DL(Op);
843a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  LoadSDNode *Load = cast<LoadSDNode>(Op);
844cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  SDValue Lowered = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
845cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (Lowered.getNode())
846cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return Lowered;
847a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
84836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
849a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    return SDValue();
85036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
85136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
85236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  EVT MemVT = Load->getMemoryVT();
85336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
85436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  assert(!MemVT.isVector() && "Private loads should be scalarized");
85536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  assert(!MemVT.isFloatingPoint() && "FP loads should be promoted to int");
856a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
85736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
858a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard                            DAG.getConstant(2, MVT::i32));
859cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
860cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // FIXME: REGISTER_LOAD should probably have a chain result.
861cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  SDValue Chain = Load->getChain();
862cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  SDValue LoLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
863cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                               Chain, Ptr,
864cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                               DAG.getTargetConstant(0, MVT::i32),
865cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                               Op.getOperand(2));
866cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
867cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  SDValue Ret = LoLoad.getValue(0);
86836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (MemVT.getSizeInBits() == 64) {
869cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // TODO: This needs a test to make sure the right thing is happening with
870cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // the chain. That is hard without general function support.
871cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
87236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    SDValue IncPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
87336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                 DAG.getConstant(1, MVT::i32));
87436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
875cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    SDValue HiLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
876cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                 Chain, IncPtr,
877cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                 DAG.getTargetConstant(0, MVT::i32),
878cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                 Op.getOperand(2));
87936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
880cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, LoLoad, HiLoad);
881cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
882cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //                     LoLoad.getValue(1), HiLoad.getValue(1));
88336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  }
884a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
885cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  SDValue Ops[] = {
886cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    Ret,
887cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    Chain
888cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  };
88968db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard
890cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return DAG.getMergeValues(Ops, DL);
89168db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard}
89268db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard
89368db37b952be497c94c7aa98cf26f3baadb5afd3Tom StellardSDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode,
89468db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard                                               const SDValue &Op,
89568db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard                                               SelectionDAG &DAG) const {
89668db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard  return DAG.getNode(Opcode, SDLoc(Op), Op.getValueType(), Op.getOperand(1),
89768db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard                     Op.getOperand(2),
898dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                     Op.getOperand(3),
89968db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard                     Op.getOperand(4));
90068db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard}
90168db37b952be497c94c7aa98cf26f3baadb5afd3Tom Stellard
90236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen HinesSDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
90336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (Op.getValueType() != MVT::i64)
90436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return SDValue();
90536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
90636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDLoc DL(Op);
90736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Cond = Op.getOperand(0);
90836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
90936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Zero = DAG.getConstant(0, MVT::i32);
91036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue One = DAG.getConstant(1, MVT::i32);
91136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
91236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue LHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(1));
91336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue RHS = DAG.getNode(ISD::BITCAST, DL, MVT::v2i32, Op.getOperand(2));
91436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
91536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Lo0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, Zero);
91636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Lo1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, Zero);
91736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
91836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Lo = DAG.getSelect(DL, MVT::i32, Cond, Lo0, Lo1);
91936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
92036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Hi0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, LHS, One);
92136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Hi1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, RHS, One);
92236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
92336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Hi = DAG.getSelect(DL, MVT::i32, Cond, Hi0, Hi1);
92436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
92536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i32, Lo, Hi);
92636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res);
92736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines}
92836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
929a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom StellardSDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
930a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  SDLoc DL(Op);
931a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  StoreSDNode *Store = cast<StoreSDNode>(Op);
932a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  EVT VT = Store->getMemoryVT();
933a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
934cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // These stores are legal.
935cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
936cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      VT.isVector() && VT.getVectorNumElements() == 2 &&
937cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      VT.getVectorElementType() == MVT::i32)
938cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return SDValue();
939cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
940a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
941a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  if (Ret.getNode())
942a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    return Ret;
943a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
944a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  if (VT.isVector() && VT.getVectorNumElements() >= 8)
945a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard      return SplitVectorStore(Op, DAG);
946a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
94736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (VT == MVT::i1)
94836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    return DAG.getTruncStore(Store->getChain(), DL,
94936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                        DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
95036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                        Store->getBasePtr(), MVT::i1, Store->getMemOperand());
95136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
952a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  if (Store->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
953a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    return SDValue();
954a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
95536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Store->getBasePtr(),
956a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard                            DAG.getConstant(2, MVT::i32));
957a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  SDValue Chain = Store->getChain();
958a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  SmallVector<SDValue, 8> Values;
959a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
96036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  if (Store->isTruncatingStore()) {
96136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    unsigned Mask = 0;
96236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    if (Store->getMemoryVT() == MVT::i8) {
96336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Mask = 0xff;
96436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    } else if (Store->getMemoryVT() == MVT::i16) {
96536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines      Mask = 0xffff;
96636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    }
96736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
96836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                              Chain, Store->getBasePtr(),
96936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                              DAG.getConstant(0, MVT::i32));
97036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getBasePtr(),
97136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                  DAG.getConstant(0x3, MVT::i32));
97236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
97336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                   DAG.getConstant(3, MVT::i32));
97436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getValue(),
97536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                      DAG.getConstant(Mask, MVT::i32));
97636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
97736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                       MaskedValue, ShiftAmt);
97836b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    SDValue RotrAmt = DAG.getNode(ISD::SUB, DL, MVT::i32,
97936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                  DAG.getConstant(32, MVT::i32), ShiftAmt);
98036b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    SDValue DstMask = DAG.getNode(ISD::ROTR, DL, MVT::i32,
98136b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                  DAG.getConstant(Mask, MVT::i32),
98236b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines                                  RotrAmt);
98336b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
98436b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Dst = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
98536b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines
98636b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines    Values.push_back(Dst);
98736b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  } else if (VT == MVT::i64) {
988a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    for (unsigned i = 0; i < 2; ++i) {
989a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard      Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
990a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard                       Store->getValue(), DAG.getConstant(i, MVT::i32)));
991a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    }
992a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  } else if (VT == MVT::i128) {
993a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    for (unsigned i = 0; i < 2; ++i) {
994a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard      for (unsigned j = 0; j < 2; ++j) {
995a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard        Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
996a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard                           DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64,
997a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard                           Store->getValue(), DAG.getConstant(i, MVT::i32)),
998a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard                         DAG.getConstant(j, MVT::i32)));
999a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard      }
1000a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    }
1001a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  } else {
1002a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    Values.push_back(Store->getValue());
1003a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  }
1004a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
1005a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  for (unsigned i = 0; i < Values.size(); ++i) {
1006a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    SDValue PartPtr = DAG.getNode(ISD::ADD, DL, MVT::i32,
1007a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard                                  Ptr, DAG.getConstant(i, MVT::i32));
1008a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard    Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1009a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard                        Chain, Values[i], PartPtr,
1010a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard                        DAG.getTargetConstant(0, MVT::i32));
1011a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  }
1012a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard  return Chain;
1013a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard}
1014a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
1015cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines//===----------------------------------------------------------------------===//
1016cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines// Custom DAG optimizations
1017cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines//===----------------------------------------------------------------------===//
1018cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1019cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesSDValue SITargetLowering::performUCharToFloatCombine(SDNode *N,
1020cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                                     DAGCombinerInfo &DCI) {
1021cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  EVT VT = N->getValueType(0);
1022cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  EVT ScalarVT = VT.getScalarType();
1023cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (ScalarVT != MVT::f32)
1024cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return SDValue();
1025a2b4eb6d15a13de257319ac6231b5ab622cd02b1Tom Stellard
1026cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  SelectionDAG &DAG = DCI.DAG;
1027cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  SDLoc DL(N);
10288cd70d3a5bbc5c2b02d288337748a4fd5ddc9f54Tom Stellard
1029cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  SDValue Src = N->getOperand(0);
1030cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  EVT SrcVT = Src.getValueType();
1031cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1032cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // TODO: We could try to match extracting the higher bytes, which would be
1033cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // easier if i8 vectors weren't promoted to i32 vectors, particularly after
1034cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // types are legalized. v4i8 -> v4f32 is probably the only case to worry
1035cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // about in practice.
1036cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (DCI.isAfterLegalizeVectorOps() && SrcVT == MVT::i32) {
1037cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) {
1038cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Src);
1039cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      DCI.AddToWorklist(Cvt.getNode());
1040cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      return Cvt;
1041cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    }
1042cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
1043cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1044cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // We are primarily trying to catch operations on illegal vector types
1045cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // before they are expanded.
1046cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // For scalars, we can use the more flexible method of checking masked bits
1047cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // after legalization.
1048cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (!DCI.isBeforeLegalize() ||
1049cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      !SrcVT.isVector() ||
1050cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      SrcVT.getVectorElementType() != MVT::i8) {
10518cd70d3a5bbc5c2b02d288337748a4fd5ddc9f54Tom Stellard    return SDValue();
10528cd70d3a5bbc5c2b02d288337748a4fd5ddc9f54Tom Stellard  }
10538cd70d3a5bbc5c2b02d288337748a4fd5ddc9f54Tom Stellard
1054cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  assert(DCI.isBeforeLegalize() && "Unexpected legal type");
1055dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
1056cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Weird sized vectors are a pain to handle, but we know 3 is really the same
1057cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // size as 4.
1058cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  unsigned NElts = SrcVT.getVectorNumElements();
1059cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (!SrcVT.isSimple() && NElts != 3)
1060cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return SDValue();
10618cd70d3a5bbc5c2b02d288337748a4fd5ddc9f54Tom Stellard
1062cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Handle v4i8 -> v4f32 extload. Replace the v4i8 with a legal i32 load to
1063cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // prevent a mess from expanding to v4i32 and repacking.
1064cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) {
1065cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT);
1066cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT);
1067cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts);
1068cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1069cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    LoadSDNode *Load = cast<LoadSDNode>(Src);
1070cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT,
1071cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                     Load->getChain(),
1072cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                     Load->getBasePtr(),
1073cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                     LoadVT,
1074cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                     Load->getMemOperand());
1075cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1076cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Make sure successors of the original load stay after it by updating
1077cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // them to use the new Chain.
1078cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), NewLoad.getValue(1));
1079cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1080cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    SmallVector<SDValue, 4> Elts;
1081cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (RegVT.isVector())
1082cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      DAG.ExtractVectorElements(NewLoad, Elts);
1083cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    else
1084cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      Elts.push_back(NewLoad);
1085cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1086cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    SmallVector<SDValue, 4> Ops;
1087cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1088cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    unsigned EltIdx = 0;
1089cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    for (SDValue Elt : Elts) {
1090cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      unsigned ComponentsInElt = std::min(4u, NElts - 4 * EltIdx);
1091cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      for (unsigned I = 0; I < ComponentsInElt; ++I) {
1092cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        unsigned Opc = AMDGPUISD::CVT_F32_UBYTE0 + I;
1093cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        SDValue Cvt = DAG.getNode(Opc, DL, MVT::f32, Elt);
1094cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        DCI.AddToWorklist(Cvt.getNode());
1095cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        Ops.push_back(Cvt);
1096cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      }
1097cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1098cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      ++EltIdx;
1099cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    }
1100cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1101cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    assert(Ops.size() == NElts);
1102cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1103cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return DAG.getNode(ISD::BUILD_VECTOR, DL, FloatVT, Ops);
1104cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
1105cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1106cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return SDValue();
1107cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines}
1108f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
1109f98f2ce29e6e2996fa58f38979143eceaa818335Tom StellardSDValue SITargetLowering::PerformDAGCombine(SDNode *N,
1110f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                                            DAGCombinerInfo &DCI) const {
1111f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  SelectionDAG &DAG = DCI.DAG;
1112ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick  SDLoc DL(N);
1113f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  EVT VT = N->getValueType(0);
1114f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
1115f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  switch (N->getOpcode()) {
1116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1117f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    case ISD::SELECT_CC: {
1118f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      ConstantSDNode *True, *False;
1119f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      // i1 selectcc(l, r, -1, 0, cc) -> i1 setcc(l, r, cc)
1120f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      if ((True = dyn_cast<ConstantSDNode>(N->getOperand(2)))
1121f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          && (False = dyn_cast<ConstantSDNode>(N->getOperand(3)))
1122f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          && True->isAllOnesValue()
1123f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          && False->isNullValue()
1124f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          && VT == MVT::i1) {
1125f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        return DAG.getNode(ISD::SETCC, DL, VT, N->getOperand(0),
1126f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                           N->getOperand(1), N->getOperand(4));
1127f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
1128f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      }
1129f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      break;
1130f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    }
1131f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    case ISD::SETCC: {
1132f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      SDValue Arg0 = N->getOperand(0);
1133f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      SDValue Arg1 = N->getOperand(1);
1134f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      SDValue CC = N->getOperand(2);
1135dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      ConstantSDNode * C = nullptr;
1136f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      ISD::CondCode CCOp = dyn_cast<CondCodeSDNode>(CC)->get();
1137f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard
1138f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      // i1 setcc (sext(i1), 0, setne) -> i1 setcc(i1, 0, setne)
1139f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      if (VT == MVT::i1
1140f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          && Arg0.getOpcode() == ISD::SIGN_EXTEND
1141f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          && Arg0.getOperand(0).getValueType() == MVT::i1
1142f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          && (C = dyn_cast<ConstantSDNode>(Arg1))
1143f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          && C->isNullValue()
1144f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard          && CCOp == ISD::SETNE) {
1145f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard        return SimplifySetCC(VT, Arg0.getOperand(0),
1146f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard                             DAG.getConstant(0, MVT::i1), CCOp, true, DCI, DL);
1147f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      }
1148f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard      break;
1149f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard    }
1150cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1151cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  case AMDGPUISD::CVT_F32_UBYTE0:
1152cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  case AMDGPUISD::CVT_F32_UBYTE1:
1153cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  case AMDGPUISD::CVT_F32_UBYTE2:
1154cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  case AMDGPUISD::CVT_F32_UBYTE3: {
1155cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
1156cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1157cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    SDValue Src = N->getOperand(0);
1158cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
1159cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1160cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    APInt KnownZero, KnownOne;
1161cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
1162cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                          !DCI.isBeforeLegalizeOps());
1163cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1164cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
1165cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
1166cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      DCI.CommitTargetLoweringOpt(TLO);
1167cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    }
1168cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1169cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    break;
1170cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
1171cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
1172cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  case ISD::UINT_TO_FP: {
1173cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return performUCharToFloatCombine(N, DCI);
1174cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
1175f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard  }
1176dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
1177dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1178f98f2ce29e6e2996fa58f38979143eceaa818335Tom Stellard}
1179c018ecac2f2f475b6e1023e90d0e48fcf9bd6e1dChristian Konig
1180225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt Arsenault/// \brief Test if RegClass is one of the VSrc classes
1181d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konigstatic bool isVSrc(unsigned RegClass) {
1182d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  return AMDGPU::VSrc_32RegClassID == RegClass ||
1183d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig         AMDGPU::VSrc_64RegClassID == RegClass;
1184d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig}
1185d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1186225ed7069caae9ece32d8bd3d15c6e41e21cc04bMatt Arsenault/// \brief Test if RegClass is one of the SSrc classes
1187d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konigstatic bool isSSrc(unsigned RegClass) {
1188d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  return AMDGPU::SSrc_32RegClassID == RegClass ||
1189d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig         AMDGPU::SSrc_64RegClassID == RegClass;
1190d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig}
1191d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1192d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// \brief Analyze the possible immediate value Op
1193d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig///
1194d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
1195d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// and the immediate value if it's a literal immediate
1196d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konigint32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
1197d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1198d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  union {
1199d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    int32_t I;
1200d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    float F;
1201d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  } Imm;
1202d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
12032fc7443498aee66e0112ef65a8466fa98d46e712Tom Stellard  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
12042fc7443498aee66e0112ef65a8466fa98d46e712Tom Stellard    if (Node->getZExtValue() >> 32) {
12052fc7443498aee66e0112ef65a8466fa98d46e712Tom Stellard        return -1;
12062fc7443498aee66e0112ef65a8466fa98d46e712Tom Stellard    }
1207d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    Imm.I = Node->getSExtValue();
1208dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  } else if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
1209dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (N->getValueType(0) != MVT::f32)
1210dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      return -1;
1211d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    Imm.F = Node->getValueAPF().convertToFloat();
1212dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  } else
1213d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return -1; // It isn't an immediate
1214d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1215d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  if ((Imm.I >= -16 && Imm.I <= 64) ||
1216d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig      Imm.F == 0.5f || Imm.F == -0.5f ||
1217d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig      Imm.F == 1.0f || Imm.F == -1.0f ||
1218d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig      Imm.F == 2.0f || Imm.F == -2.0f ||
1219d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig      Imm.F == 4.0f || Imm.F == -4.0f)
1220d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return 0; // It's an inline immediate
1221d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1222d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  return Imm.I; // It's a literal immediate
1223d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig}
1224d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1225d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// \brief Try to fold an immediate directly into an instruction
1226d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konigbool SITargetLowering::foldImm(SDValue &Operand, int32_t &Immediate,
1227d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig                               bool &ScalarSlotUsed) const {
1228d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1229d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  MachineSDNode *Mov = dyn_cast<MachineSDNode>(Operand);
1230b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling  const SIInstrInfo *TII =
1231b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
1232dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!Mov || !TII->isMov(Mov->getMachineOpcode()))
1233d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return false;
1234d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1235d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  const SDValue &Op = Mov->getOperand(0);
1236d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  int32_t Value = analyzeImmediate(Op.getNode());
1237d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  if (Value == -1) {
1238d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    // Not an immediate at all
1239d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return false;
1240d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1241d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  } else if (Value == 0) {
1242d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    // Inline immediates can always be fold
1243d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    Operand = Op;
1244d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return true;
1245d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1246d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  } else if (Value == Immediate) {
1247d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    // Already fold literal immediate
1248d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    Operand = Op;
1249d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return true;
1250d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1251d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  } else if (!ScalarSlotUsed && !Immediate) {
1252d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    // Fold this literal immediate
1253d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    ScalarSlotUsed = true;
1254d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    Immediate = Value;
1255d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    Operand = Op;
1256d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return true;
1257d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1258d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  }
1259d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1260d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  return false;
1261d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig}
1262d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
12633406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellardconst TargetRegisterClass *SITargetLowering::getRegClassForNode(
12643406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard                                   SelectionDAG &DAG, const SDValue &Op) const {
12653406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  const SIInstrInfo *TII =
12663406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
12673406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  const SIRegisterInfo &TRI = TII->getRegisterInfo();
12683406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard
12693406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  if (!Op->isMachineOpcode()) {
12703406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    switch(Op->getOpcode()) {
12713406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    case ISD::CopyFromReg: {
12723406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard      MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
12733406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard      unsigned Reg = cast<RegisterSDNode>(Op->getOperand(1))->getReg();
12743406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
12753406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard        return MRI.getRegClass(Reg);
12763406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard      }
12773406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard      return TRI.getPhysRegClass(Reg);
12783406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    }
1279dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    default:  return nullptr;
12803406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    }
12813406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  }
12823406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  const MCInstrDesc &Desc = TII->get(Op->getMachineOpcode());
12833406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  int OpClassID = Desc.OpInfo[Op.getResNo()].RegClass;
12843406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  if (OpClassID != -1) {
12853406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    return TRI.getRegClass(OpClassID);
12863406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  }
12873406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  switch(Op.getMachineOpcode()) {
12883406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  case AMDGPU::COPY_TO_REGCLASS:
12893406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    // Operand 1 is the register class id for COPY_TO_REGCLASS instructions.
12903406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    OpClassID = cast<ConstantSDNode>(Op->getOperand(1))->getZExtValue();
12913406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard
12923406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    // If the COPY_TO_REGCLASS instruction is copying to a VSrc register
12933406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    // class, then the register class for the value could be either a
12943406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    // VReg or and SReg.  In order to get a more accurate
12953406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    if (OpClassID == AMDGPU::VSrc_32RegClassID ||
12963406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard        OpClassID == AMDGPU::VSrc_64RegClassID) {
12973406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard      return getRegClassForNode(DAG, Op.getOperand(0));
12983406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    }
12993406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    return TRI.getRegClass(OpClassID);
13003406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  case AMDGPU::EXTRACT_SUBREG: {
13013406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    int SubIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
13023406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    const TargetRegisterClass *SuperClass =
13033406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard      getRegClassForNode(DAG, Op.getOperand(0));
13043406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    return TRI.getSubClassWithSubReg(SuperClass, SubIdx);
13053406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  }
13063406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  case AMDGPU::REG_SEQUENCE:
13073406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    // Operand 0 is the register class id for REG_SEQUENCE instructions.
13083406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    return TRI.getRegClass(
13093406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard      cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue());
13103406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  default:
13113406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard    return getRegClassFor(Op.getSimpleValueType());
13123406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  }
13133406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard}
13143406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard
1315d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// \brief Does "Op" fit into register class "RegClass" ?
13169bf4590aaa26ebb5afdbec079daeee8e0b268b47Tom Stellardbool SITargetLowering::fitsRegClass(SelectionDAG &DAG, const SDValue &Op,
1317d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig                                    unsigned RegClass) const {
1318b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling  const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo();
13193406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  const TargetRegisterClass *RC = getRegClassForNode(DAG, Op);
13203406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  if (!RC) {
1321d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return false;
13223406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  }
13233406d882c02a6cd1e16f4636351c23dcb68d785fTom Stellard  return TRI->getRegClass(RegClass)->hasSubClassEq(RC);
1324d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig}
1325d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1326d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig/// \brief Make sure that we don't exeed the number of allowed scalars
1327d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konigvoid SITargetLowering::ensureSRegLimit(SelectionDAG &DAG, SDValue &Operand,
1328d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig                                       unsigned RegClass,
1329d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig                                       bool &ScalarSlotUsed) const {
1330d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1331d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  // First map the operands register class to a destination class
1332d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  if (RegClass == AMDGPU::VSrc_32RegClassID)
1333d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    RegClass = AMDGPU::VReg_32RegClassID;
1334d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  else if (RegClass == AMDGPU::VSrc_64RegClassID)
1335d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    RegClass = AMDGPU::VReg_64RegClassID;
1336d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  else
1337d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return;
1338d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
133936b56886974eae4f9c5ebc96befd3e7bfe5de338Stephen Hines  // Nothing to do if they fit naturally
1340d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  if (fitsRegClass(DAG, Operand, RegClass))
1341d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return;
1342d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1343d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  // If the scalar slot isn't used yet use it now
1344d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  if (!ScalarSlotUsed) {
1345d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    ScalarSlotUsed = true;
1346d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return;
1347d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  }
1348d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
13496c066c044ed5b477cdec3eb3e95267783e6ce757Matt Arsenault  // This is a conservative aproach. It is possible that we can't determine the
13506c066c044ed5b477cdec3eb3e95267783e6ce757Matt Arsenault  // correct register class and copy too often, but better safe than sorry.
1351d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  SDValue RC = DAG.getTargetConstant(RegClass, MVT::i32);
1352ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick  SDNode *Node = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, SDLoc(),
1353d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig                                    Operand.getValueType(), Operand, RC);
1354d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  Operand = SDValue(Node, 0);
1355d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig}
1356d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1357c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard/// \returns true if \p Node's operands are different from the SDValue list
1358c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard/// \p Ops
1359c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellardstatic bool isNodeChanged(const SDNode *Node, const std::vector<SDValue> &Ops) {
1360c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard  for (unsigned i = 0, e = Node->getNumOperands(); i < e; ++i) {
1361c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard    if (Ops[i].getNode() != Node->getOperand(i).getNode()) {
1362c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard      return true;
1363c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard    }
1364c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard  }
1365c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard  return false;
1366c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard}
1367c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard
136884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig/// \brief Try to fold the Nodes operands into the Node
136984a775d8e3d5a3765e01db4b454f849ed8be99beChristian KonigSDNode *SITargetLowering::foldOperands(MachineSDNode *Node,
137084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig                                       SelectionDAG &DAG) const {
1371d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1372d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  // Original encoding (either e32 or e64)
1373d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  int Opcode = Node->getMachineOpcode();
1374b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling  const SIInstrInfo *TII =
1375b5632b5b456db647b42239cbd4d8b58c82290c4eBill Wendling    static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
1376d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  const MCInstrDesc *Desc = &TII->get(Opcode);
1377d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1378d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  unsigned NumDefs = Desc->getNumDefs();
1379d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  unsigned NumOps = Desc->getNumOperands();
1380d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1381e49230895d9c666b84beaa748259fbf1f6715122Christian Konig  // Commuted opcode if available
1382e49230895d9c666b84beaa748259fbf1f6715122Christian Konig  int OpcodeRev = Desc->isCommutable() ? TII->commuteOpcode(Opcode) : -1;
1383dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const MCInstrDesc *DescRev = OpcodeRev == -1 ? nullptr : &TII->get(OpcodeRev);
1384e49230895d9c666b84beaa748259fbf1f6715122Christian Konig
1385e49230895d9c666b84beaa748259fbf1f6715122Christian Konig  assert(!DescRev || DescRev->getNumDefs() == NumDefs);
1386e49230895d9c666b84beaa748259fbf1f6715122Christian Konig  assert(!DescRev || DescRev->getNumOperands() == NumOps);
1387e49230895d9c666b84beaa748259fbf1f6715122Christian Konig
13883c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig  // e64 version if available, -1 otherwise
13893c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig  int OpcodeE64 = AMDGPU::getVOPe64(Opcode);
1390dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const MCInstrDesc *DescE64 = OpcodeE64 == -1 ? nullptr : &TII->get(OpcodeE64);
1391dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  int InputModifiers[3] = {0};
13923c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig
13933c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig  assert(!DescE64 || DescE64->getNumDefs() == NumDefs);
13943c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig
1395d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  int32_t Immediate = Desc->getSize() == 4 ? 0 : -1;
1396d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  bool HaveVSrc = false, HaveSSrc = false;
1397d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1398cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // First figure out what we already have in this instruction.
1399d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
1400d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig       i != e && Op < NumOps; ++i, ++Op) {
1401d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1402d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    unsigned RegClass = Desc->OpInfo[Op].RegClass;
1403d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    if (isVSrc(RegClass))
1404d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig      HaveVSrc = true;
1405d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    else if (isSSrc(RegClass))
1406d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig      HaveSSrc = true;
1407d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    else
1408d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig      continue;
1409d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1410d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    int32_t Imm = analyzeImmediate(Node->getOperand(i).getNode());
1411d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    if (Imm != -1 && Imm != 0) {
1412d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig      // Literal immediate
1413d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig      Immediate = Imm;
1414d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    }
1415d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  }
1416d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1417cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // If we neither have VSrc nor SSrc, it makes no sense to continue.
1418d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  if (!HaveVSrc && !HaveSSrc)
1419d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    return Node;
1420d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1421d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  // No scalar allowed when we have both VSrc and SSrc
1422d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  bool ScalarSlotUsed = HaveVSrc && HaveSSrc;
1423d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1424d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  // Second go over the operands and try to fold them
1425d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  std::vector<SDValue> Ops;
14263c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig  bool Promote2e64 = false;
1427d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs;
1428d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig       i != e && Op < NumOps; ++i, ++Op) {
1429d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1430d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    const SDValue &Operand = Node->getOperand(i);
1431d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    Ops.push_back(Operand);
1432d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1433cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Already folded immediate?
1434d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    if (isa<ConstantSDNode>(Operand.getNode()) ||
1435d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig        isa<ConstantFPSDNode>(Operand.getNode()))
1436d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig      continue;
1437d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1438cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Is this a VSrc or SSrc operand?
1439d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    unsigned RegClass = Desc->OpInfo[Op].RegClass;
14400c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig    if (isVSrc(RegClass) || isSSrc(RegClass)) {
14410c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      // Try to fold the immediates
14420c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) {
1443cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        // Folding didn't work, make sure we don't hit the SReg limit.
14440c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig        ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed);
14450c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      }
14460c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      continue;
14470c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig    }
1448b37afdcf3fa41596ab1f70eef915a8ade68ccc24Christian Konig
1449e49230895d9c666b84beaa748259fbf1f6715122Christian Konig    if (i == 1 && DescRev && fitsRegClass(DAG, Ops[0], RegClass)) {
1450b37afdcf3fa41596ab1f70eef915a8ade68ccc24Christian Konig
14510c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      unsigned OtherRegClass = Desc->OpInfo[NumDefs].RegClass;
14520c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      assert(isVSrc(OtherRegClass) || isSSrc(OtherRegClass));
14530c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig
14540c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      // Test if it makes sense to swap operands
14550c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      if (foldImm(Ops[1], Immediate, ScalarSlotUsed) ||
14560c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig          (!fitsRegClass(DAG, Ops[1], RegClass) &&
14570c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig           fitsRegClass(DAG, Ops[1], OtherRegClass))) {
1458b37afdcf3fa41596ab1f70eef915a8ade68ccc24Christian Konig
1459b37afdcf3fa41596ab1f70eef915a8ade68ccc24Christian Konig        // Swap commutable operands
1460dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        std::swap(Ops[0], Ops[1]);
1461e49230895d9c666b84beaa748259fbf1f6715122Christian Konig
1462e49230895d9c666b84beaa748259fbf1f6715122Christian Konig        Desc = DescRev;
1463dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        DescRev = nullptr;
14640c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig        continue;
14650c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      }
14660c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig    }
14673c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig
1468dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (Immediate)
1469dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      continue;
1470dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
1471dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (DescE64) {
14720c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      // Test if it makes sense to switch to e64 encoding
14730c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass;
14740c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass))
14750c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig        continue;
14763c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig
14770c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      int32_t TmpImm = -1;
14780c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      if (foldImm(Ops[i], TmpImm, ScalarSlotUsed) ||
14790c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig          (!fitsRegClass(DAG, Ops[i], RegClass) &&
14800c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig           fitsRegClass(DAG, Ops[1], OtherRegClass))) {
1481d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
14820c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig        // Switch to e64 encoding
14830c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig        Immediate = -1;
14840c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig        Promote2e64 = true;
14850c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig        Desc = DescE64;
1486dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        DescE64 = nullptr;
14870c4e61ff0a712927eead06a4730f33ddd1a1ca55Christian Konig      }
1488d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    }
1489dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
1490dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (!DescE64 && !Promote2e64)
1491dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      continue;
1492dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (!Operand.isMachineOpcode())
1493dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      continue;
1494dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (Operand.getMachineOpcode() == AMDGPU::FNEG_SI) {
1495dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Ops.pop_back();
1496dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Ops.push_back(Operand.getOperand(0));
1497dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      InputModifiers[i] = 1;
1498dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Promote2e64 = true;
1499dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (!DescE64)
1500dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        continue;
1501dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Desc = DescE64;
1502cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      DescE64 = nullptr;
1503dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    }
1504dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) {
1505dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Ops.pop_back();
1506dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Ops.push_back(Operand.getOperand(0));
1507dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      InputModifiers[i] = 2;
1508dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Promote2e64 = true;
1509dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (!DescE64)
1510dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        continue;
1511dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Desc = DescE64;
1512cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      DescE64 = nullptr;
1513dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    }
1514d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  }
1515d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
15163c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig  if (Promote2e64) {
1517dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    std::vector<SDValue> OldOps(Ops);
1518dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    Ops.clear();
1519dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    for (unsigned i = 0; i < OldOps.size(); ++i) {
1520dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // src_modifier
1521dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Ops.push_back(DAG.getTargetConstant(InputModifiers[i], MVT::i32));
1522dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Ops.push_back(OldOps[i]);
1523dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    }
15243c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig    // Add the modifier flags while promoting
1525dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    for (unsigned i = 0; i < 2; ++i)
15263c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig      Ops.push_back(DAG.getTargetConstant(0, MVT::i32));
15273c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig  }
15283c980d1632fa0a1cef065e558fbc96d83ebbdf40Christian Konig
1529d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  // Add optional chain and glue
1530d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig  for (unsigned i = NumOps - NumDefs, e = Node->getNumOperands(); i < e; ++i)
1531d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig    Ops.push_back(Node->getOperand(i));
1532d3b5509b8099b72104bd8a0d9a998a69eb56ab2aChristian Konig
1533051a28e0e8a5a6f41d9360a58079af6231557152Tom Stellard  // Nodes that have a glue result are not CSE'd by getMachineNode(), so in
1534051a28e0e8a5a6f41d9360a58079af6231557152Tom Stellard  // this case a brand new node is always be created, even if the operands
1535051a28e0e8a5a6f41d9360a58079af6231557152Tom Stellard  // are the same as before.  So, manually check if anything has been changed.
1536c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard  if (Desc->Opcode == Opcode && !isNodeChanged(Node, Ops)) {
1537c170230b3a8c1e0a43614a929061ad24888bfe52Tom Stellard    return Node;
1538051a28e0e8a5a6f41d9360a58079af6231557152Tom Stellard  }
1539051a28e0e8a5a6f41d9360a58079af6231557152Tom Stellard
1540e49230895d9c666b84beaa748259fbf1f6715122Christian Konig  // Create a complete new instruction
1541ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick  return DAG.getMachineNode(Desc->Opcode, SDLoc(Node), Node->getVTList(), Ops);
1542c018ecac2f2f475b6e1023e90d0e48fcf9bd6e1dChristian Konig}
154384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
154484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig/// \brief Helper function for adjustWritemask
1545879b071bf539163f90a5ef449d3e6a9ec73faa2fBenjamin Kramerstatic unsigned SubIdx2Lane(unsigned Idx) {
154684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  switch (Idx) {
154784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  default: return 0;
154884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  case AMDGPU::sub0: return 0;
154984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  case AMDGPU::sub1: return 1;
155084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  case AMDGPU::sub2: return 2;
155184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  case AMDGPU::sub3: return 3;
155284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  }
155384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig}
155484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
155584a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig/// \brief Adjust the writemask of MIMG instructions
155684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konigvoid SITargetLowering::adjustWritemask(MachineSDNode *&Node,
155784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig                                       SelectionDAG &DAG) const {
155884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  SDNode *Users[4] = { };
155996b5670cf454a586bee6d364fa91398c2e085852Tom Stellard  unsigned Lane = 0;
156096b5670cf454a586bee6d364fa91398c2e085852Tom Stellard  unsigned OldDmask = Node->getConstantOperandVal(0);
156196b5670cf454a586bee6d364fa91398c2e085852Tom Stellard  unsigned NewDmask = 0;
156284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
156384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  // Try to figure out the used register components
156484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  for (SDNode::use_iterator I = Node->use_begin(), E = Node->use_end();
156584a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig       I != E; ++I) {
156684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
156784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    // Abort if we can't understand the usage
156884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    if (!I->isMachineOpcode() ||
156984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig        I->getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG)
157084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig      return;
157184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
157296b5670cf454a586bee6d364fa91398c2e085852Tom Stellard    // Lane means which subreg of %VGPRa_VGPRb_VGPRc_VGPRd is used.
157396b5670cf454a586bee6d364fa91398c2e085852Tom Stellard    // Note that subregs are packed, i.e. Lane==0 is the first bit set
157496b5670cf454a586bee6d364fa91398c2e085852Tom Stellard    // in OldDmask, so it can be any of X,Y,Z,W; Lane==1 is the second bit
157596b5670cf454a586bee6d364fa91398c2e085852Tom Stellard    // set, etc.
15764d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig    Lane = SubIdx2Lane(I->getConstantOperandVal(1));
157784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
157896b5670cf454a586bee6d364fa91398c2e085852Tom Stellard    // Set which texture component corresponds to the lane.
157996b5670cf454a586bee6d364fa91398c2e085852Tom Stellard    unsigned Comp;
158096b5670cf454a586bee6d364fa91398c2e085852Tom Stellard    for (unsigned i = 0, Dmask = OldDmask; i <= Lane; i++) {
158196b5670cf454a586bee6d364fa91398c2e085852Tom Stellard      assert(Dmask);
15829242b73286f050c53a26225b2a9acd14aeaa91daTom Stellard      Comp = countTrailingZeros(Dmask);
158396b5670cf454a586bee6d364fa91398c2e085852Tom Stellard      Dmask &= ~(1 << Comp);
158496b5670cf454a586bee6d364fa91398c2e085852Tom Stellard    }
158596b5670cf454a586bee6d364fa91398c2e085852Tom Stellard
158684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    // Abort if we have more than one user per component
158784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    if (Users[Lane])
158884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig      return;
158984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
159084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    Users[Lane] = *I;
159196b5670cf454a586bee6d364fa91398c2e085852Tom Stellard    NewDmask |= 1 << Comp;
159284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  }
159384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
159496b5670cf454a586bee6d364fa91398c2e085852Tom Stellard  // Abort if there's no change
159596b5670cf454a586bee6d364fa91398c2e085852Tom Stellard  if (NewDmask == OldDmask)
159684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    return;
159784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
159884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  // Adjust the writemask in the node
159984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  std::vector<SDValue> Ops;
160096b5670cf454a586bee6d364fa91398c2e085852Tom Stellard  Ops.push_back(DAG.getTargetConstant(NewDmask, MVT::i32));
160184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
160284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    Ops.push_back(Node->getOperand(i));
1603dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  Node = (MachineSDNode*)DAG.UpdateNodeOperands(Node, Ops);
160484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
16054d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  // If we only got one lane, replace it with a copy
160696b5670cf454a586bee6d364fa91398c2e085852Tom Stellard  // (if NewDmask has only one bit set...)
160796b5670cf454a586bee6d364fa91398c2e085852Tom Stellard  if (NewDmask && (NewDmask & (NewDmask-1)) == 0) {
16084d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig    SDValue RC = DAG.getTargetConstant(AMDGPU::VReg_32RegClassID, MVT::i32);
16094d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig    SDNode *Copy = DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
1610ac6d9bec671252dd1e596fa71180ff6b39d06b5dAndrew Trick                                      SDLoc(), Users[Lane]->getValueType(0),
16114d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig                                      SDValue(Node, 0), RC);
16124d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig    DAG.ReplaceAllUsesWith(Users[Lane], Copy);
16134d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig    return;
16144d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  }
16154d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig
161684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  // Update the users of the node with the new indices
161784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  for (unsigned i = 0, Idx = AMDGPU::sub0; i < 4; ++i) {
161884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
161984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    SDNode *User = Users[i];
162084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    if (!User)
162184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig      continue;
162284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
162384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    SDValue Op = DAG.getTargetConstant(Idx, MVT::i32);
162484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    DAG.UpdateNodeOperands(User, User->getOperand(0), Op);
162584a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
162684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    switch (Idx) {
162784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    default: break;
162884a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    case AMDGPU::sub0: Idx = AMDGPU::sub1; break;
162984a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    case AMDGPU::sub1: Idx = AMDGPU::sub2; break;
163084a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    case AMDGPU::sub2: Idx = AMDGPU::sub3; break;
163184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    }
163284a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  }
163384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig}
163484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
1635cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines/// \brief Fold the instructions after selecting them.
163684a775d8e3d5a3765e01db4b454f849ed8be99beChristian KonigSDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node,
163784a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig                                          SelectionDAG &DAG) const {
1638df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard  const SIInstrInfo *TII =
1639df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
164017e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  Node = AdjustRegClass(Node, DAG);
164184a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
1642df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard  if (TII->isMIMG(Node->getMachineOpcode()))
164384a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig    adjustWritemask(Node, DAG);
164484a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig
164584a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig  return foldOperands(Node, DAG);
164684a775d8e3d5a3765e01db4b454f849ed8be99beChristian Konig}
16474d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig
16484d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig/// \brief Assign the register class depending on the number of
16494d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig/// bits set in the writemask
16504d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konigvoid SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI,
16514d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig                                                     SDNode *Node) const {
1652df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard  const SIInstrInfo *TII =
1653df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard      static_cast<const SIInstrInfo*>(getTargetMachine().getInstrInfo());
1654df4626ef15ba0eb5f571a3ee6314e5c388258927Tom Stellard  if (!TII->isMIMG(MI->getOpcode()))
16554d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig    return;
16564d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig
16574d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  unsigned VReg = MI->getOperand(0).getReg();
16584d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  unsigned Writemask = MI->getOperand(1).getImm();
16594d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  unsigned BitsSet = 0;
16604d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  for (unsigned i = 0; i < 4; ++i)
16614d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig    BitsSet += Writemask & (1 << i) ? 1 : 0;
16624d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig
16634d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  const TargetRegisterClass *RC;
16644d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  switch (BitsSet) {
16654d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  default: return;
16664d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  case 1:  RC = &AMDGPU::VReg_32RegClass; break;
16674d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  case 2:  RC = &AMDGPU::VReg_64RegClass; break;
16684d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  case 3:  RC = &AMDGPU::VReg_96RegClass; break;
16694d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  }
16704d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig
16710f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard  unsigned NewOpcode = TII->getMaskedMIMGOp(MI->getOpcode(), BitsSet);
16720f9eaaa8aa10bdb658e887782b86f03dbea79cb1Tom Stellard  MI->setDesc(TII->get(NewOpcode));
16734d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
16744d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig  MRI.setRegClass(VReg, RC);
16754d0e8a8a3e2e5b98f598acad4d57452b99d52e74Christian Konig}
167617e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard
167717e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom StellardMachineSDNode *SITargetLowering::AdjustRegClass(MachineSDNode *N,
167817e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard                                                SelectionDAG &DAG) const {
167917e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard
168017e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  SDLoc DL(N);
168117e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  unsigned NewOpcode = N->getMachineOpcode();
168217e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard
168317e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  switch (N->getMachineOpcode()) {
168417e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  default: return N;
168517e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  case AMDGPU::S_LOAD_DWORD_IMM:
168617e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    NewOpcode = AMDGPU::BUFFER_LOAD_DWORD_ADDR64;
168717e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    // Fall-through
168817e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  case AMDGPU::S_LOAD_DWORDX2_SGPR:
168917e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    if (NewOpcode == N->getMachineOpcode()) {
169017e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard      NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64;
169117e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    }
169217e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    // Fall-through
169317e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  case AMDGPU::S_LOAD_DWORDX4_IMM:
169417e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  case AMDGPU::S_LOAD_DWORDX4_SGPR: {
169517e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    if (NewOpcode == N->getMachineOpcode()) {
169617e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard      NewOpcode = AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64;
169717e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    }
169817e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    if (fitsRegClass(DAG, N->getOperand(0), AMDGPU::SReg_64RegClassID)) {
169917e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard      return N;
170017e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    }
170117e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    ConstantSDNode *Offset = cast<ConstantSDNode>(N->getOperand(1));
170217e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    SDValue Ops[] = {
170317e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard      SDValue(DAG.getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::i128,
170417e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard                                 DAG.getConstant(0, MVT::i64)), 0),
170517e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard      N->getOperand(0),
170617e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard      DAG.getConstant(Offset->getSExtValue() << 2, MVT::i32)
170717e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    };
170817e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard    return DAG.getMachineNode(NewOpcode, DL, N->getVTList(), Ops);
170917e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  }
171017e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard  }
171117e8ad67f0ba5e81e53ce21cd260fe3368d6231dTom Stellard}
1712e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard
1713e5fcc0dee4b41658986047f346201ad98757e7d5Tom StellardSDValue SITargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
1714e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                                               const TargetRegisterClass *RC,
1715e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                                               unsigned Reg, EVT VT) const {
1716e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard  SDValue VReg = AMDGPUTargetLowering::CreateLiveInRegister(DAG, RC, Reg, VT);
1717e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard
1718e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard  return DAG.getCopyFromReg(DAG.getEntryNode(), SDLoc(DAG.getEntryNode()),
1719e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard                            cast<RegisterSDNode>(VReg)->getReg(), VT);
1720e5fcc0dee4b41658986047f346201ad98757e7d5Tom Stellard}
1721