X86InstrInfo.cpp revision 71c25b7d7bdf9b49dd70965c7486ce930b846aac
1//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the X86 implementation of the TargetInstrInfo class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "X86InstrInfo.h" 15#include "X86.h" 16#include "X86GenInstrInfo.inc" 17#include "X86InstrBuilder.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86Subtarget.h" 20#include "X86TargetMachine.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/LLVMContext.h" 23#include "llvm/ADT/STLExtras.h" 24#include "llvm/CodeGen/MachineConstantPool.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/LiveVariables.h" 29#include "llvm/CodeGen/PseudoSourceValue.h" 30#include "llvm/Support/CommandLine.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/Support/raw_ostream.h" 34#include "llvm/Target/TargetOptions.h" 35#include "llvm/MC/MCAsmInfo.h" 36 37#include <limits> 38 39using namespace llvm; 40 41static cl::opt<bool> 42NoFusing("disable-spill-fusing", 43 cl::desc("Disable fusing of spill code into instructions")); 44static cl::opt<bool> 45PrintFailedFusing("print-failed-fuse-candidates", 46 cl::desc("Print instructions that the allocator wants to" 47 " fuse, but the X86 backend currently can't"), 48 cl::Hidden); 49static cl::opt<bool> 50ReMatPICStubLoad("remat-pic-stub-load", 51 cl::desc("Re-materialize load from stub in PIC mode"), 52 cl::init(false), cl::Hidden); 53 54X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) 55 : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), 56 TM(tm), RI(tm, *this) { 57 SmallVector<unsigned,16> AmbEntries; 58 static const unsigned OpTbl2Addr[][2] = { 59 { X86::ADC32ri, X86::ADC32mi }, 60 { X86::ADC32ri8, X86::ADC32mi8 }, 61 { X86::ADC32rr, X86::ADC32mr }, 62 { X86::ADC64ri32, X86::ADC64mi32 }, 63 { X86::ADC64ri8, X86::ADC64mi8 }, 64 { X86::ADC64rr, X86::ADC64mr }, 65 { X86::ADD16ri, X86::ADD16mi }, 66 { X86::ADD16ri8, X86::ADD16mi8 }, 67 { X86::ADD16rr, X86::ADD16mr }, 68 { X86::ADD32ri, X86::ADD32mi }, 69 { X86::ADD32ri8, X86::ADD32mi8 }, 70 { X86::ADD32rr, X86::ADD32mr }, 71 { X86::ADD64ri32, X86::ADD64mi32 }, 72 { X86::ADD64ri8, X86::ADD64mi8 }, 73 { X86::ADD64rr, X86::ADD64mr }, 74 { X86::ADD8ri, X86::ADD8mi }, 75 { X86::ADD8rr, X86::ADD8mr }, 76 { X86::AND16ri, X86::AND16mi }, 77 { X86::AND16ri8, X86::AND16mi8 }, 78 { X86::AND16rr, X86::AND16mr }, 79 { X86::AND32ri, X86::AND32mi }, 80 { X86::AND32ri8, X86::AND32mi8 }, 81 { X86::AND32rr, X86::AND32mr }, 82 { X86::AND64ri32, X86::AND64mi32 }, 83 { X86::AND64ri8, X86::AND64mi8 }, 84 { X86::AND64rr, X86::AND64mr }, 85 { X86::AND8ri, X86::AND8mi }, 86 { X86::AND8rr, X86::AND8mr }, 87 { X86::DEC16r, X86::DEC16m }, 88 { X86::DEC32r, X86::DEC32m }, 89 { X86::DEC64_16r, X86::DEC64_16m }, 90 { X86::DEC64_32r, X86::DEC64_32m }, 91 { X86::DEC64r, X86::DEC64m }, 92 { X86::DEC8r, X86::DEC8m }, 93 { X86::INC16r, X86::INC16m }, 94 { X86::INC32r, X86::INC32m }, 95 { X86::INC64_16r, X86::INC64_16m }, 96 { X86::INC64_32r, X86::INC64_32m }, 97 { X86::INC64r, X86::INC64m }, 98 { X86::INC8r, X86::INC8m }, 99 { X86::NEG16r, X86::NEG16m }, 100 { X86::NEG32r, X86::NEG32m }, 101 { X86::NEG64r, X86::NEG64m }, 102 { X86::NEG8r, X86::NEG8m }, 103 { X86::NOT16r, X86::NOT16m }, 104 { X86::NOT32r, X86::NOT32m }, 105 { X86::NOT64r, X86::NOT64m }, 106 { X86::NOT8r, X86::NOT8m }, 107 { X86::OR16ri, X86::OR16mi }, 108 { X86::OR16ri8, X86::OR16mi8 }, 109 { X86::OR16rr, X86::OR16mr }, 110 { X86::OR32ri, X86::OR32mi }, 111 { X86::OR32ri8, X86::OR32mi8 }, 112 { X86::OR32rr, X86::OR32mr }, 113 { X86::OR64ri32, X86::OR64mi32 }, 114 { X86::OR64ri8, X86::OR64mi8 }, 115 { X86::OR64rr, X86::OR64mr }, 116 { X86::OR8ri, X86::OR8mi }, 117 { X86::OR8rr, X86::OR8mr }, 118 { X86::ROL16r1, X86::ROL16m1 }, 119 { X86::ROL16rCL, X86::ROL16mCL }, 120 { X86::ROL16ri, X86::ROL16mi }, 121 { X86::ROL32r1, X86::ROL32m1 }, 122 { X86::ROL32rCL, X86::ROL32mCL }, 123 { X86::ROL32ri, X86::ROL32mi }, 124 { X86::ROL64r1, X86::ROL64m1 }, 125 { X86::ROL64rCL, X86::ROL64mCL }, 126 { X86::ROL64ri, X86::ROL64mi }, 127 { X86::ROL8r1, X86::ROL8m1 }, 128 { X86::ROL8rCL, X86::ROL8mCL }, 129 { X86::ROL8ri, X86::ROL8mi }, 130 { X86::ROR16r1, X86::ROR16m1 }, 131 { X86::ROR16rCL, X86::ROR16mCL }, 132 { X86::ROR16ri, X86::ROR16mi }, 133 { X86::ROR32r1, X86::ROR32m1 }, 134 { X86::ROR32rCL, X86::ROR32mCL }, 135 { X86::ROR32ri, X86::ROR32mi }, 136 { X86::ROR64r1, X86::ROR64m1 }, 137 { X86::ROR64rCL, X86::ROR64mCL }, 138 { X86::ROR64ri, X86::ROR64mi }, 139 { X86::ROR8r1, X86::ROR8m1 }, 140 { X86::ROR8rCL, X86::ROR8mCL }, 141 { X86::ROR8ri, X86::ROR8mi }, 142 { X86::SAR16r1, X86::SAR16m1 }, 143 { X86::SAR16rCL, X86::SAR16mCL }, 144 { X86::SAR16ri, X86::SAR16mi }, 145 { X86::SAR32r1, X86::SAR32m1 }, 146 { X86::SAR32rCL, X86::SAR32mCL }, 147 { X86::SAR32ri, X86::SAR32mi }, 148 { X86::SAR64r1, X86::SAR64m1 }, 149 { X86::SAR64rCL, X86::SAR64mCL }, 150 { X86::SAR64ri, X86::SAR64mi }, 151 { X86::SAR8r1, X86::SAR8m1 }, 152 { X86::SAR8rCL, X86::SAR8mCL }, 153 { X86::SAR8ri, X86::SAR8mi }, 154 { X86::SBB32ri, X86::SBB32mi }, 155 { X86::SBB32ri8, X86::SBB32mi8 }, 156 { X86::SBB32rr, X86::SBB32mr }, 157 { X86::SBB64ri32, X86::SBB64mi32 }, 158 { X86::SBB64ri8, X86::SBB64mi8 }, 159 { X86::SBB64rr, X86::SBB64mr }, 160 { X86::SHL16rCL, X86::SHL16mCL }, 161 { X86::SHL16ri, X86::SHL16mi }, 162 { X86::SHL32rCL, X86::SHL32mCL }, 163 { X86::SHL32ri, X86::SHL32mi }, 164 { X86::SHL64rCL, X86::SHL64mCL }, 165 { X86::SHL64ri, X86::SHL64mi }, 166 { X86::SHL8rCL, X86::SHL8mCL }, 167 { X86::SHL8ri, X86::SHL8mi }, 168 { X86::SHLD16rrCL, X86::SHLD16mrCL }, 169 { X86::SHLD16rri8, X86::SHLD16mri8 }, 170 { X86::SHLD32rrCL, X86::SHLD32mrCL }, 171 { X86::SHLD32rri8, X86::SHLD32mri8 }, 172 { X86::SHLD64rrCL, X86::SHLD64mrCL }, 173 { X86::SHLD64rri8, X86::SHLD64mri8 }, 174 { X86::SHR16r1, X86::SHR16m1 }, 175 { X86::SHR16rCL, X86::SHR16mCL }, 176 { X86::SHR16ri, X86::SHR16mi }, 177 { X86::SHR32r1, X86::SHR32m1 }, 178 { X86::SHR32rCL, X86::SHR32mCL }, 179 { X86::SHR32ri, X86::SHR32mi }, 180 { X86::SHR64r1, X86::SHR64m1 }, 181 { X86::SHR64rCL, X86::SHR64mCL }, 182 { X86::SHR64ri, X86::SHR64mi }, 183 { X86::SHR8r1, X86::SHR8m1 }, 184 { X86::SHR8rCL, X86::SHR8mCL }, 185 { X86::SHR8ri, X86::SHR8mi }, 186 { X86::SHRD16rrCL, X86::SHRD16mrCL }, 187 { X86::SHRD16rri8, X86::SHRD16mri8 }, 188 { X86::SHRD32rrCL, X86::SHRD32mrCL }, 189 { X86::SHRD32rri8, X86::SHRD32mri8 }, 190 { X86::SHRD64rrCL, X86::SHRD64mrCL }, 191 { X86::SHRD64rri8, X86::SHRD64mri8 }, 192 { X86::SUB16ri, X86::SUB16mi }, 193 { X86::SUB16ri8, X86::SUB16mi8 }, 194 { X86::SUB16rr, X86::SUB16mr }, 195 { X86::SUB32ri, X86::SUB32mi }, 196 { X86::SUB32ri8, X86::SUB32mi8 }, 197 { X86::SUB32rr, X86::SUB32mr }, 198 { X86::SUB64ri32, X86::SUB64mi32 }, 199 { X86::SUB64ri8, X86::SUB64mi8 }, 200 { X86::SUB64rr, X86::SUB64mr }, 201 { X86::SUB8ri, X86::SUB8mi }, 202 { X86::SUB8rr, X86::SUB8mr }, 203 { X86::XOR16ri, X86::XOR16mi }, 204 { X86::XOR16ri8, X86::XOR16mi8 }, 205 { X86::XOR16rr, X86::XOR16mr }, 206 { X86::XOR32ri, X86::XOR32mi }, 207 { X86::XOR32ri8, X86::XOR32mi8 }, 208 { X86::XOR32rr, X86::XOR32mr }, 209 { X86::XOR64ri32, X86::XOR64mi32 }, 210 { X86::XOR64ri8, X86::XOR64mi8 }, 211 { X86::XOR64rr, X86::XOR64mr }, 212 { X86::XOR8ri, X86::XOR8mi }, 213 { X86::XOR8rr, X86::XOR8mr } 214 }; 215 216 for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { 217 unsigned RegOp = OpTbl2Addr[i][0]; 218 unsigned MemOp = OpTbl2Addr[i][1]; 219 if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, 220 std::make_pair(MemOp,0))).second) 221 assert(false && "Duplicated entries?"); 222 // Index 0, folded load and store, no alignment requirement. 223 unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); 224 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 225 std::make_pair(RegOp, 226 AuxInfo))).second) 227 AmbEntries.push_back(MemOp); 228 } 229 230 // If the third value is 1, then it's folding either a load or a store. 231 static const unsigned OpTbl0[][4] = { 232 { X86::BT16ri8, X86::BT16mi8, 1, 0 }, 233 { X86::BT32ri8, X86::BT32mi8, 1, 0 }, 234 { X86::BT64ri8, X86::BT64mi8, 1, 0 }, 235 { X86::CALL32r, X86::CALL32m, 1, 0 }, 236 { X86::CALL64r, X86::CALL64m, 1, 0 }, 237 { X86::CMP16ri, X86::CMP16mi, 1, 0 }, 238 { X86::CMP16ri8, X86::CMP16mi8, 1, 0 }, 239 { X86::CMP16rr, X86::CMP16mr, 1, 0 }, 240 { X86::CMP32ri, X86::CMP32mi, 1, 0 }, 241 { X86::CMP32ri8, X86::CMP32mi8, 1, 0 }, 242 { X86::CMP32rr, X86::CMP32mr, 1, 0 }, 243 { X86::CMP64ri32, X86::CMP64mi32, 1, 0 }, 244 { X86::CMP64ri8, X86::CMP64mi8, 1, 0 }, 245 { X86::CMP64rr, X86::CMP64mr, 1, 0 }, 246 { X86::CMP8ri, X86::CMP8mi, 1, 0 }, 247 { X86::CMP8rr, X86::CMP8mr, 1, 0 }, 248 { X86::DIV16r, X86::DIV16m, 1, 0 }, 249 { X86::DIV32r, X86::DIV32m, 1, 0 }, 250 { X86::DIV64r, X86::DIV64m, 1, 0 }, 251 { X86::DIV8r, X86::DIV8m, 1, 0 }, 252 { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0, 16 }, 253 { X86::FsMOVAPDrr, X86::MOVSDmr, 0, 0 }, 254 { X86::FsMOVAPSrr, X86::MOVSSmr, 0, 0 }, 255 { X86::IDIV16r, X86::IDIV16m, 1, 0 }, 256 { X86::IDIV32r, X86::IDIV32m, 1, 0 }, 257 { X86::IDIV64r, X86::IDIV64m, 1, 0 }, 258 { X86::IDIV8r, X86::IDIV8m, 1, 0 }, 259 { X86::IMUL16r, X86::IMUL16m, 1, 0 }, 260 { X86::IMUL32r, X86::IMUL32m, 1, 0 }, 261 { X86::IMUL64r, X86::IMUL64m, 1, 0 }, 262 { X86::IMUL8r, X86::IMUL8m, 1, 0 }, 263 { X86::JMP32r, X86::JMP32m, 1, 0 }, 264 { X86::JMP64r, X86::JMP64m, 1, 0 }, 265 { X86::MOV16ri, X86::MOV16mi, 0, 0 }, 266 { X86::MOV16rr, X86::MOV16mr, 0, 0 }, 267 { X86::MOV32ri, X86::MOV32mi, 0, 0 }, 268 { X86::MOV32rr, X86::MOV32mr, 0, 0 }, 269 { X86::MOV64ri32, X86::MOV64mi32, 0, 0 }, 270 { X86::MOV64rr, X86::MOV64mr, 0, 0 }, 271 { X86::MOV8ri, X86::MOV8mi, 0, 0 }, 272 { X86::MOV8rr, X86::MOV8mr, 0, 0 }, 273 { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0, 0 }, 274 { X86::MOVAPDrr, X86::MOVAPDmr, 0, 16 }, 275 { X86::MOVAPSrr, X86::MOVAPSmr, 0, 16 }, 276 { X86::MOVDQArr, X86::MOVDQAmr, 0, 16 }, 277 { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 }, 278 { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 }, 279 { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 }, 280 { X86::MOVSDrr, X86::MOVSDmr, 0, 0 }, 281 { X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 }, 282 { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 }, 283 { X86::MOVSSrr, X86::MOVSSmr, 0, 0 }, 284 { X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 }, 285 { X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 }, 286 { X86::MUL16r, X86::MUL16m, 1, 0 }, 287 { X86::MUL32r, X86::MUL32m, 1, 0 }, 288 { X86::MUL64r, X86::MUL64m, 1, 0 }, 289 { X86::MUL8r, X86::MUL8m, 1, 0 }, 290 { X86::SETAEr, X86::SETAEm, 0, 0 }, 291 { X86::SETAr, X86::SETAm, 0, 0 }, 292 { X86::SETBEr, X86::SETBEm, 0, 0 }, 293 { X86::SETBr, X86::SETBm, 0, 0 }, 294 { X86::SETEr, X86::SETEm, 0, 0 }, 295 { X86::SETGEr, X86::SETGEm, 0, 0 }, 296 { X86::SETGr, X86::SETGm, 0, 0 }, 297 { X86::SETLEr, X86::SETLEm, 0, 0 }, 298 { X86::SETLr, X86::SETLm, 0, 0 }, 299 { X86::SETNEr, X86::SETNEm, 0, 0 }, 300 { X86::SETNOr, X86::SETNOm, 0, 0 }, 301 { X86::SETNPr, X86::SETNPm, 0, 0 }, 302 { X86::SETNSr, X86::SETNSm, 0, 0 }, 303 { X86::SETOr, X86::SETOm, 0, 0 }, 304 { X86::SETPr, X86::SETPm, 0, 0 }, 305 { X86::SETSr, X86::SETSm, 0, 0 }, 306 { X86::TAILJMPr, X86::TAILJMPm, 1, 0 }, 307 { X86::TEST16ri, X86::TEST16mi, 1, 0 }, 308 { X86::TEST32ri, X86::TEST32mi, 1, 0 }, 309 { X86::TEST64ri32, X86::TEST64mi32, 1, 0 }, 310 { X86::TEST8ri, X86::TEST8mi, 1, 0 } 311 }; 312 313 for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { 314 unsigned RegOp = OpTbl0[i][0]; 315 unsigned MemOp = OpTbl0[i][1]; 316 unsigned Align = OpTbl0[i][3]; 317 if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, 318 std::make_pair(MemOp,Align))).second) 319 assert(false && "Duplicated entries?"); 320 unsigned FoldedLoad = OpTbl0[i][2]; 321 // Index 0, folded load or store. 322 unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); 323 if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 324 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 325 std::make_pair(RegOp, AuxInfo))).second) 326 AmbEntries.push_back(MemOp); 327 } 328 329 static const unsigned OpTbl1[][3] = { 330 { X86::CMP16rr, X86::CMP16rm, 0 }, 331 { X86::CMP32rr, X86::CMP32rm, 0 }, 332 { X86::CMP64rr, X86::CMP64rm, 0 }, 333 { X86::CMP8rr, X86::CMP8rm, 0 }, 334 { X86::CVTSD2SSrr, X86::CVTSD2SSrm, 0 }, 335 { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm, 0 }, 336 { X86::CVTSI2SDrr, X86::CVTSI2SDrm, 0 }, 337 { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm, 0 }, 338 { X86::CVTSI2SSrr, X86::CVTSI2SSrm, 0 }, 339 { X86::CVTSS2SDrr, X86::CVTSS2SDrm, 0 }, 340 { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm, 0 }, 341 { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm, 0 }, 342 { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm, 0 }, 343 { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm, 0 }, 344 { X86::FsMOVAPDrr, X86::MOVSDrm, 0 }, 345 { X86::FsMOVAPSrr, X86::MOVSSrm, 0 }, 346 { X86::IMUL16rri, X86::IMUL16rmi, 0 }, 347 { X86::IMUL16rri8, X86::IMUL16rmi8, 0 }, 348 { X86::IMUL32rri, X86::IMUL32rmi, 0 }, 349 { X86::IMUL32rri8, X86::IMUL32rmi8, 0 }, 350 { X86::IMUL64rri32, X86::IMUL64rmi32, 0 }, 351 { X86::IMUL64rri8, X86::IMUL64rmi8, 0 }, 352 { X86::Int_CMPSDrr, X86::Int_CMPSDrm, 0 }, 353 { X86::Int_CMPSSrr, X86::Int_CMPSSrm, 0 }, 354 { X86::Int_COMISDrr, X86::Int_COMISDrm, 0 }, 355 { X86::Int_COMISSrr, X86::Int_COMISSrm, 0 }, 356 { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm, 16 }, 357 { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm, 16 }, 358 { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm, 16 }, 359 { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm, 16 }, 360 { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm, 16 }, 361 { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm, 0 }, 362 { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm, 0 }, 363 { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm, 0 }, 364 { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm, 0 }, 365 { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm, 0 }, 366 { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm, 0 }, 367 { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm, 0 }, 368 { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm, 0 }, 369 { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm, 0 }, 370 { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm, 0 }, 371 { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm, 0 }, 372 { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm, 16 }, 373 { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm, 16 }, 374 { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm, 0 }, 375 { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm, 0 }, 376 { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm, 0 }, 377 { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm, 0 }, 378 { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm, 0 }, 379 { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 }, 380 { X86::MOV16rr, X86::MOV16rm, 0 }, 381 { X86::MOV32rr, X86::MOV32rm, 0 }, 382 { X86::MOV64rr, X86::MOV64rm, 0 }, 383 { X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 }, 384 { X86::MOV64toSDrr, X86::MOV64toSDrm, 0 }, 385 { X86::MOV8rr, X86::MOV8rm, 0 }, 386 { X86::MOVAPDrr, X86::MOVAPDrm, 16 }, 387 { X86::MOVAPSrr, X86::MOVAPSrm, 16 }, 388 { X86::MOVDDUPrr, X86::MOVDDUPrm, 0 }, 389 { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 }, 390 { X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 }, 391 { X86::MOVDQArr, X86::MOVDQArm, 16 }, 392 { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 }, 393 { X86::MOVSDrr, X86::MOVSDrm, 0 }, 394 { X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 }, 395 { X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 }, 396 { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 }, 397 { X86::MOVSSrr, X86::MOVSSrm, 0 }, 398 { X86::MOVSX16rr8, X86::MOVSX16rm8, 0 }, 399 { X86::MOVSX32rr16, X86::MOVSX32rm16, 0 }, 400 { X86::MOVSX32rr8, X86::MOVSX32rm8, 0 }, 401 { X86::MOVSX64rr16, X86::MOVSX64rm16, 0 }, 402 { X86::MOVSX64rr32, X86::MOVSX64rm32, 0 }, 403 { X86::MOVSX64rr8, X86::MOVSX64rm8, 0 }, 404 { X86::MOVUPDrr, X86::MOVUPDrm, 16 }, 405 { X86::MOVUPSrr, X86::MOVUPSrm, 16 }, 406 { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm, 0 }, 407 { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm, 0 }, 408 { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm, 16 }, 409 { X86::MOVZX16rr8, X86::MOVZX16rm8, 0 }, 410 { X86::MOVZX32rr16, X86::MOVZX32rm16, 0 }, 411 { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8, 0 }, 412 { X86::MOVZX32rr8, X86::MOVZX32rm8, 0 }, 413 { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, 414 { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, 415 { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, 416 { X86::PSHUFDri, X86::PSHUFDmi, 16 }, 417 { X86::PSHUFHWri, X86::PSHUFHWmi, 16 }, 418 { X86::PSHUFLWri, X86::PSHUFLWmi, 16 }, 419 { X86::RCPPSr, X86::RCPPSm, 16 }, 420 { X86::RCPPSr_Int, X86::RCPPSm_Int, 16 }, 421 { X86::RSQRTPSr, X86::RSQRTPSm, 16 }, 422 { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int, 16 }, 423 { X86::RSQRTSSr, X86::RSQRTSSm, 0 }, 424 { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, 0 }, 425 { X86::SQRTPDr, X86::SQRTPDm, 16 }, 426 { X86::SQRTPDr_Int, X86::SQRTPDm_Int, 16 }, 427 { X86::SQRTPSr, X86::SQRTPSm, 16 }, 428 { X86::SQRTPSr_Int, X86::SQRTPSm_Int, 16 }, 429 { X86::SQRTSDr, X86::SQRTSDm, 0 }, 430 { X86::SQRTSDr_Int, X86::SQRTSDm_Int, 0 }, 431 { X86::SQRTSSr, X86::SQRTSSm, 0 }, 432 { X86::SQRTSSr_Int, X86::SQRTSSm_Int, 0 }, 433 { X86::TEST16rr, X86::TEST16rm, 0 }, 434 { X86::TEST32rr, X86::TEST32rm, 0 }, 435 { X86::TEST64rr, X86::TEST64rm, 0 }, 436 { X86::TEST8rr, X86::TEST8rm, 0 }, 437 // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 438 { X86::UCOMISDrr, X86::UCOMISDrm, 0 }, 439 { X86::UCOMISSrr, X86::UCOMISSrm, 0 } 440 }; 441 442 for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { 443 unsigned RegOp = OpTbl1[i][0]; 444 unsigned MemOp = OpTbl1[i][1]; 445 unsigned Align = OpTbl1[i][2]; 446 if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, 447 std::make_pair(MemOp,Align))).second) 448 assert(false && "Duplicated entries?"); 449 // Index 1, folded load 450 unsigned AuxInfo = 1 | (1 << 4); 451 if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 452 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 453 std::make_pair(RegOp, AuxInfo))).second) 454 AmbEntries.push_back(MemOp); 455 } 456 457 static const unsigned OpTbl2[][3] = { 458 { X86::ADC32rr, X86::ADC32rm, 0 }, 459 { X86::ADC64rr, X86::ADC64rm, 0 }, 460 { X86::ADD16rr, X86::ADD16rm, 0 }, 461 { X86::ADD32rr, X86::ADD32rm, 0 }, 462 { X86::ADD64rr, X86::ADD64rm, 0 }, 463 { X86::ADD8rr, X86::ADD8rm, 0 }, 464 { X86::ADDPDrr, X86::ADDPDrm, 16 }, 465 { X86::ADDPSrr, X86::ADDPSrm, 16 }, 466 { X86::ADDSDrr, X86::ADDSDrm, 0 }, 467 { X86::ADDSSrr, X86::ADDSSrm, 0 }, 468 { X86::ADDSUBPDrr, X86::ADDSUBPDrm, 16 }, 469 { X86::ADDSUBPSrr, X86::ADDSUBPSrm, 16 }, 470 { X86::AND16rr, X86::AND16rm, 0 }, 471 { X86::AND32rr, X86::AND32rm, 0 }, 472 { X86::AND64rr, X86::AND64rm, 0 }, 473 { X86::AND8rr, X86::AND8rm, 0 }, 474 { X86::ANDNPDrr, X86::ANDNPDrm, 16 }, 475 { X86::ANDNPSrr, X86::ANDNPSrm, 16 }, 476 { X86::ANDPDrr, X86::ANDPDrm, 16 }, 477 { X86::ANDPSrr, X86::ANDPSrm, 16 }, 478 { X86::CMOVA16rr, X86::CMOVA16rm, 0 }, 479 { X86::CMOVA32rr, X86::CMOVA32rm, 0 }, 480 { X86::CMOVA64rr, X86::CMOVA64rm, 0 }, 481 { X86::CMOVAE16rr, X86::CMOVAE16rm, 0 }, 482 { X86::CMOVAE32rr, X86::CMOVAE32rm, 0 }, 483 { X86::CMOVAE64rr, X86::CMOVAE64rm, 0 }, 484 { X86::CMOVB16rr, X86::CMOVB16rm, 0 }, 485 { X86::CMOVB32rr, X86::CMOVB32rm, 0 }, 486 { X86::CMOVB64rr, X86::CMOVB64rm, 0 }, 487 { X86::CMOVBE16rr, X86::CMOVBE16rm, 0 }, 488 { X86::CMOVBE32rr, X86::CMOVBE32rm, 0 }, 489 { X86::CMOVBE64rr, X86::CMOVBE64rm, 0 }, 490 { X86::CMOVE16rr, X86::CMOVE16rm, 0 }, 491 { X86::CMOVE32rr, X86::CMOVE32rm, 0 }, 492 { X86::CMOVE64rr, X86::CMOVE64rm, 0 }, 493 { X86::CMOVG16rr, X86::CMOVG16rm, 0 }, 494 { X86::CMOVG32rr, X86::CMOVG32rm, 0 }, 495 { X86::CMOVG64rr, X86::CMOVG64rm, 0 }, 496 { X86::CMOVGE16rr, X86::CMOVGE16rm, 0 }, 497 { X86::CMOVGE32rr, X86::CMOVGE32rm, 0 }, 498 { X86::CMOVGE64rr, X86::CMOVGE64rm, 0 }, 499 { X86::CMOVL16rr, X86::CMOVL16rm, 0 }, 500 { X86::CMOVL32rr, X86::CMOVL32rm, 0 }, 501 { X86::CMOVL64rr, X86::CMOVL64rm, 0 }, 502 { X86::CMOVLE16rr, X86::CMOVLE16rm, 0 }, 503 { X86::CMOVLE32rr, X86::CMOVLE32rm, 0 }, 504 { X86::CMOVLE64rr, X86::CMOVLE64rm, 0 }, 505 { X86::CMOVNE16rr, X86::CMOVNE16rm, 0 }, 506 { X86::CMOVNE32rr, X86::CMOVNE32rm, 0 }, 507 { X86::CMOVNE64rr, X86::CMOVNE64rm, 0 }, 508 { X86::CMOVNO16rr, X86::CMOVNO16rm, 0 }, 509 { X86::CMOVNO32rr, X86::CMOVNO32rm, 0 }, 510 { X86::CMOVNO64rr, X86::CMOVNO64rm, 0 }, 511 { X86::CMOVNP16rr, X86::CMOVNP16rm, 0 }, 512 { X86::CMOVNP32rr, X86::CMOVNP32rm, 0 }, 513 { X86::CMOVNP64rr, X86::CMOVNP64rm, 0 }, 514 { X86::CMOVNS16rr, X86::CMOVNS16rm, 0 }, 515 { X86::CMOVNS32rr, X86::CMOVNS32rm, 0 }, 516 { X86::CMOVNS64rr, X86::CMOVNS64rm, 0 }, 517 { X86::CMOVO16rr, X86::CMOVO16rm, 0 }, 518 { X86::CMOVO32rr, X86::CMOVO32rm, 0 }, 519 { X86::CMOVO64rr, X86::CMOVO64rm, 0 }, 520 { X86::CMOVP16rr, X86::CMOVP16rm, 0 }, 521 { X86::CMOVP32rr, X86::CMOVP32rm, 0 }, 522 { X86::CMOVP64rr, X86::CMOVP64rm, 0 }, 523 { X86::CMOVS16rr, X86::CMOVS16rm, 0 }, 524 { X86::CMOVS32rr, X86::CMOVS32rm, 0 }, 525 { X86::CMOVS64rr, X86::CMOVS64rm, 0 }, 526 { X86::CMPPDrri, X86::CMPPDrmi, 16 }, 527 { X86::CMPPSrri, X86::CMPPSrmi, 16 }, 528 { X86::CMPSDrr, X86::CMPSDrm, 0 }, 529 { X86::CMPSSrr, X86::CMPSSrm, 0 }, 530 { X86::DIVPDrr, X86::DIVPDrm, 16 }, 531 { X86::DIVPSrr, X86::DIVPSrm, 16 }, 532 { X86::DIVSDrr, X86::DIVSDrm, 0 }, 533 { X86::DIVSSrr, X86::DIVSSrm, 0 }, 534 { X86::FsANDNPDrr, X86::FsANDNPDrm, 16 }, 535 { X86::FsANDNPSrr, X86::FsANDNPSrm, 16 }, 536 { X86::FsANDPDrr, X86::FsANDPDrm, 16 }, 537 { X86::FsANDPSrr, X86::FsANDPSrm, 16 }, 538 { X86::FsORPDrr, X86::FsORPDrm, 16 }, 539 { X86::FsORPSrr, X86::FsORPSrm, 16 }, 540 { X86::FsXORPDrr, X86::FsXORPDrm, 16 }, 541 { X86::FsXORPSrr, X86::FsXORPSrm, 16 }, 542 { X86::HADDPDrr, X86::HADDPDrm, 16 }, 543 { X86::HADDPSrr, X86::HADDPSrm, 16 }, 544 { X86::HSUBPDrr, X86::HSUBPDrm, 16 }, 545 { X86::HSUBPSrr, X86::HSUBPSrm, 16 }, 546 { X86::IMUL16rr, X86::IMUL16rm, 0 }, 547 { X86::IMUL32rr, X86::IMUL32rm, 0 }, 548 { X86::IMUL64rr, X86::IMUL64rm, 0 }, 549 { X86::MAXPDrr, X86::MAXPDrm, 16 }, 550 { X86::MAXPDrr_Int, X86::MAXPDrm_Int, 16 }, 551 { X86::MAXPSrr, X86::MAXPSrm, 16 }, 552 { X86::MAXPSrr_Int, X86::MAXPSrm_Int, 16 }, 553 { X86::MAXSDrr, X86::MAXSDrm, 0 }, 554 { X86::MAXSDrr_Int, X86::MAXSDrm_Int, 0 }, 555 { X86::MAXSSrr, X86::MAXSSrm, 0 }, 556 { X86::MAXSSrr_Int, X86::MAXSSrm_Int, 0 }, 557 { X86::MINPDrr, X86::MINPDrm, 16 }, 558 { X86::MINPDrr_Int, X86::MINPDrm_Int, 16 }, 559 { X86::MINPSrr, X86::MINPSrm, 16 }, 560 { X86::MINPSrr_Int, X86::MINPSrm_Int, 16 }, 561 { X86::MINSDrr, X86::MINSDrm, 0 }, 562 { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, 563 { X86::MINSSrr, X86::MINSSrm, 0 }, 564 { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, 565 { X86::MULPDrr, X86::MULPDrm, 16 }, 566 { X86::MULPSrr, X86::MULPSrm, 16 }, 567 { X86::MULSDrr, X86::MULSDrm, 0 }, 568 { X86::MULSSrr, X86::MULSSrm, 0 }, 569 { X86::OR16rr, X86::OR16rm, 0 }, 570 { X86::OR32rr, X86::OR32rm, 0 }, 571 { X86::OR64rr, X86::OR64rm, 0 }, 572 { X86::OR8rr, X86::OR8rm, 0 }, 573 { X86::ORPDrr, X86::ORPDrm, 16 }, 574 { X86::ORPSrr, X86::ORPSrm, 16 }, 575 { X86::PACKSSDWrr, X86::PACKSSDWrm, 16 }, 576 { X86::PACKSSWBrr, X86::PACKSSWBrm, 16 }, 577 { X86::PACKUSWBrr, X86::PACKUSWBrm, 16 }, 578 { X86::PADDBrr, X86::PADDBrm, 16 }, 579 { X86::PADDDrr, X86::PADDDrm, 16 }, 580 { X86::PADDQrr, X86::PADDQrm, 16 }, 581 { X86::PADDSBrr, X86::PADDSBrm, 16 }, 582 { X86::PADDSWrr, X86::PADDSWrm, 16 }, 583 { X86::PADDWrr, X86::PADDWrm, 16 }, 584 { X86::PANDNrr, X86::PANDNrm, 16 }, 585 { X86::PANDrr, X86::PANDrm, 16 }, 586 { X86::PAVGBrr, X86::PAVGBrm, 16 }, 587 { X86::PAVGWrr, X86::PAVGWrm, 16 }, 588 { X86::PCMPEQBrr, X86::PCMPEQBrm, 16 }, 589 { X86::PCMPEQDrr, X86::PCMPEQDrm, 16 }, 590 { X86::PCMPEQWrr, X86::PCMPEQWrm, 16 }, 591 { X86::PCMPGTBrr, X86::PCMPGTBrm, 16 }, 592 { X86::PCMPGTDrr, X86::PCMPGTDrm, 16 }, 593 { X86::PCMPGTWrr, X86::PCMPGTWrm, 16 }, 594 { X86::PINSRWrri, X86::PINSRWrmi, 16 }, 595 { X86::PMADDWDrr, X86::PMADDWDrm, 16 }, 596 { X86::PMAXSWrr, X86::PMAXSWrm, 16 }, 597 { X86::PMAXUBrr, X86::PMAXUBrm, 16 }, 598 { X86::PMINSWrr, X86::PMINSWrm, 16 }, 599 { X86::PMINUBrr, X86::PMINUBrm, 16 }, 600 { X86::PMULDQrr, X86::PMULDQrm, 16 }, 601 { X86::PMULHUWrr, X86::PMULHUWrm, 16 }, 602 { X86::PMULHWrr, X86::PMULHWrm, 16 }, 603 { X86::PMULLDrr, X86::PMULLDrm, 16 }, 604 { X86::PMULLDrr_int, X86::PMULLDrm_int, 16 }, 605 { X86::PMULLWrr, X86::PMULLWrm, 16 }, 606 { X86::PMULUDQrr, X86::PMULUDQrm, 16 }, 607 { X86::PORrr, X86::PORrm, 16 }, 608 { X86::PSADBWrr, X86::PSADBWrm, 16 }, 609 { X86::PSLLDrr, X86::PSLLDrm, 16 }, 610 { X86::PSLLQrr, X86::PSLLQrm, 16 }, 611 { X86::PSLLWrr, X86::PSLLWrm, 16 }, 612 { X86::PSRADrr, X86::PSRADrm, 16 }, 613 { X86::PSRAWrr, X86::PSRAWrm, 16 }, 614 { X86::PSRLDrr, X86::PSRLDrm, 16 }, 615 { X86::PSRLQrr, X86::PSRLQrm, 16 }, 616 { X86::PSRLWrr, X86::PSRLWrm, 16 }, 617 { X86::PSUBBrr, X86::PSUBBrm, 16 }, 618 { X86::PSUBDrr, X86::PSUBDrm, 16 }, 619 { X86::PSUBSBrr, X86::PSUBSBrm, 16 }, 620 { X86::PSUBSWrr, X86::PSUBSWrm, 16 }, 621 { X86::PSUBWrr, X86::PSUBWrm, 16 }, 622 { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm, 16 }, 623 { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm, 16 }, 624 { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm, 16 }, 625 { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm, 16 }, 626 { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm, 16 }, 627 { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm, 16 }, 628 { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm, 16 }, 629 { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, 16 }, 630 { X86::PXORrr, X86::PXORrm, 16 }, 631 { X86::SBB32rr, X86::SBB32rm, 0 }, 632 { X86::SBB64rr, X86::SBB64rm, 0 }, 633 { X86::SHUFPDrri, X86::SHUFPDrmi, 16 }, 634 { X86::SHUFPSrri, X86::SHUFPSrmi, 16 }, 635 { X86::SUB16rr, X86::SUB16rm, 0 }, 636 { X86::SUB32rr, X86::SUB32rm, 0 }, 637 { X86::SUB64rr, X86::SUB64rm, 0 }, 638 { X86::SUB8rr, X86::SUB8rm, 0 }, 639 { X86::SUBPDrr, X86::SUBPDrm, 16 }, 640 { X86::SUBPSrr, X86::SUBPSrm, 16 }, 641 { X86::SUBSDrr, X86::SUBSDrm, 0 }, 642 { X86::SUBSSrr, X86::SUBSSrm, 0 }, 643 // FIXME: TEST*rr -> swapped operand of TEST*mr. 644 { X86::UNPCKHPDrr, X86::UNPCKHPDrm, 16 }, 645 { X86::UNPCKHPSrr, X86::UNPCKHPSrm, 16 }, 646 { X86::UNPCKLPDrr, X86::UNPCKLPDrm, 16 }, 647 { X86::UNPCKLPSrr, X86::UNPCKLPSrm, 16 }, 648 { X86::XOR16rr, X86::XOR16rm, 0 }, 649 { X86::XOR32rr, X86::XOR32rm, 0 }, 650 { X86::XOR64rr, X86::XOR64rm, 0 }, 651 { X86::XOR8rr, X86::XOR8rm, 0 }, 652 { X86::XORPDrr, X86::XORPDrm, 16 }, 653 { X86::XORPSrr, X86::XORPSrm, 16 } 654 }; 655 656 for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { 657 unsigned RegOp = OpTbl2[i][0]; 658 unsigned MemOp = OpTbl2[i][1]; 659 unsigned Align = OpTbl2[i][2]; 660 if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, 661 std::make_pair(MemOp,Align))).second) 662 assert(false && "Duplicated entries?"); 663 // Index 2, folded load 664 unsigned AuxInfo = 2 | (1 << 4); 665 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 666 std::make_pair(RegOp, AuxInfo))).second) 667 AmbEntries.push_back(MemOp); 668 } 669 670 // Remove ambiguous entries. 671 assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); 672} 673 674bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, 675 unsigned &SrcReg, unsigned &DstReg, 676 unsigned &SrcSubIdx, unsigned &DstSubIdx) const { 677 switch (MI.getOpcode()) { 678 default: 679 return false; 680 case X86::MOV8rr: 681 case X86::MOV8rr_NOREX: 682 case X86::MOV16rr: 683 case X86::MOV32rr: 684 case X86::MOV64rr: 685 case X86::MOVSSrr: 686 case X86::MOVSDrr: 687 688 // FP Stack register class copies 689 case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: 690 case X86::MOV_Fp3264: case X86::MOV_Fp3280: 691 case X86::MOV_Fp6432: case X86::MOV_Fp8032: 692 693 case X86::FsMOVAPSrr: 694 case X86::FsMOVAPDrr: 695 case X86::MOVAPSrr: 696 case X86::MOVAPDrr: 697 case X86::MOVDQArr: 698 case X86::MOVSS2PSrr: 699 case X86::MOVSD2PDrr: 700 case X86::MOVPS2SSrr: 701 case X86::MOVPD2SDrr: 702 case X86::MMX_MOVQ64rr: 703 assert(MI.getNumOperands() >= 2 && 704 MI.getOperand(0).isReg() && 705 MI.getOperand(1).isReg() && 706 "invalid register-register move instruction"); 707 SrcReg = MI.getOperand(1).getReg(); 708 DstReg = MI.getOperand(0).getReg(); 709 SrcSubIdx = MI.getOperand(1).getSubReg(); 710 DstSubIdx = MI.getOperand(0).getSubReg(); 711 return true; 712 } 713} 714 715/// isFrameOperand - Return true and the FrameIndex if the specified 716/// operand and follow operands form a reference to the stack frame. 717bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op, 718 int &FrameIndex) const { 719 if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() && 720 MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() && 721 MI->getOperand(Op+1).getImm() == 1 && 722 MI->getOperand(Op+2).getReg() == 0 && 723 MI->getOperand(Op+3).getImm() == 0) { 724 FrameIndex = MI->getOperand(Op).getIndex(); 725 return true; 726 } 727 return false; 728} 729 730static bool isFrameLoadOpcode(int Opcode) { 731 switch (Opcode) { 732 default: break; 733 case X86::MOV8rm: 734 case X86::MOV16rm: 735 case X86::MOV32rm: 736 case X86::MOV64rm: 737 case X86::LD_Fp64m: 738 case X86::MOVSSrm: 739 case X86::MOVSDrm: 740 case X86::MOVAPSrm: 741 case X86::MOVAPDrm: 742 case X86::MOVDQArm: 743 case X86::MMX_MOVD64rm: 744 case X86::MMX_MOVQ64rm: 745 return true; 746 break; 747 } 748 return false; 749} 750 751static bool isFrameStoreOpcode(int Opcode) { 752 switch (Opcode) { 753 default: break; 754 case X86::MOV8mr: 755 case X86::MOV16mr: 756 case X86::MOV32mr: 757 case X86::MOV64mr: 758 case X86::ST_FpP64m: 759 case X86::MOVSSmr: 760 case X86::MOVSDmr: 761 case X86::MOVAPSmr: 762 case X86::MOVAPDmr: 763 case X86::MOVDQAmr: 764 case X86::MMX_MOVD64mr: 765 case X86::MMX_MOVQ64mr: 766 case X86::MMX_MOVNTQmr: 767 return true; 768 } 769 return false; 770} 771 772unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 773 int &FrameIndex) const { 774 if (isFrameLoadOpcode(MI->getOpcode())) 775 if (isFrameOperand(MI, 1, FrameIndex)) 776 return MI->getOperand(0).getReg(); 777 return 0; 778} 779 780unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI, 781 int &FrameIndex) const { 782 if (isFrameLoadOpcode(MI->getOpcode())) { 783 unsigned Reg; 784 if ((Reg = isLoadFromStackSlot(MI, FrameIndex))) 785 return Reg; 786 // Check for post-frame index elimination operations 787 const MachineMemOperand *Dummy; 788 return hasLoadFromStackSlot(MI, Dummy, FrameIndex); 789 } 790 return 0; 791} 792 793bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, 794 const MachineMemOperand *&MMO, 795 int &FrameIndex) const { 796 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 797 oe = MI->memoperands_end(); 798 o != oe; 799 ++o) { 800 if ((*o)->isLoad() && (*o)->getValue()) 801 if (const FixedStackPseudoSourceValue *Value = 802 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 803 FrameIndex = Value->getFrameIndex(); 804 MMO = *o; 805 return true; 806 } 807 } 808 return false; 809} 810 811unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 812 int &FrameIndex) const { 813 if (isFrameStoreOpcode(MI->getOpcode())) 814 if (isFrameOperand(MI, 0, FrameIndex)) 815 return MI->getOperand(X86AddrNumOperands).getReg(); 816 return 0; 817} 818 819unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI, 820 int &FrameIndex) const { 821 if (isFrameStoreOpcode(MI->getOpcode())) { 822 unsigned Reg; 823 if ((Reg = isStoreToStackSlot(MI, FrameIndex))) 824 return Reg; 825 // Check for post-frame index elimination operations 826 const MachineMemOperand *Dummy; 827 return hasStoreToStackSlot(MI, Dummy, FrameIndex); 828 } 829 return 0; 830} 831 832bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI, 833 const MachineMemOperand *&MMO, 834 int &FrameIndex) const { 835 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 836 oe = MI->memoperands_end(); 837 o != oe; 838 ++o) { 839 if ((*o)->isStore() && (*o)->getValue()) 840 if (const FixedStackPseudoSourceValue *Value = 841 dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) { 842 FrameIndex = Value->getFrameIndex(); 843 MMO = *o; 844 return true; 845 } 846 } 847 return false; 848} 849 850/// regIsPICBase - Return true if register is PIC base (i.e.g defined by 851/// X86::MOVPC32r. 852static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 853 bool isPICBase = false; 854 for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 855 E = MRI.def_end(); I != E; ++I) { 856 MachineInstr *DefMI = I.getOperand().getParent(); 857 if (DefMI->getOpcode() != X86::MOVPC32r) 858 return false; 859 assert(!isPICBase && "More than one PIC base?"); 860 isPICBase = true; 861 } 862 return isPICBase; 863} 864 865bool 866X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI, 867 AliasAnalysis *AA) const { 868 switch (MI->getOpcode()) { 869 default: break; 870 case X86::MOV8rm: 871 case X86::MOV16rm: 872 case X86::MOV32rm: 873 case X86::MOV64rm: 874 case X86::LD_Fp64m: 875 case X86::MOVSSrm: 876 case X86::MOVSDrm: 877 case X86::MOVAPSrm: 878 case X86::MOVUPSrm: 879 case X86::MOVUPSrm_Int: 880 case X86::MOVAPDrm: 881 case X86::MOVDQArm: 882 case X86::MMX_MOVD64rm: 883 case X86::MMX_MOVQ64rm: 884 case X86::FsMOVAPSrm: 885 case X86::FsMOVAPDrm: { 886 // Loads from constant pools are trivially rematerializable. 887 if (MI->getOperand(1).isReg() && 888 MI->getOperand(2).isImm() && 889 MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 890 MI->isInvariantLoad(AA)) { 891 unsigned BaseReg = MI->getOperand(1).getReg(); 892 if (BaseReg == 0 || BaseReg == X86::RIP) 893 return true; 894 // Allow re-materialization of PIC load. 895 if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) 896 return false; 897 const MachineFunction &MF = *MI->getParent()->getParent(); 898 const MachineRegisterInfo &MRI = MF.getRegInfo(); 899 bool isPICBase = false; 900 for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 901 E = MRI.def_end(); I != E; ++I) { 902 MachineInstr *DefMI = I.getOperand().getParent(); 903 if (DefMI->getOpcode() != X86::MOVPC32r) 904 return false; 905 assert(!isPICBase && "More than one PIC base?"); 906 isPICBase = true; 907 } 908 return isPICBase; 909 } 910 return false; 911 } 912 913 case X86::LEA32r: 914 case X86::LEA64r: { 915 if (MI->getOperand(2).isImm() && 916 MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 917 !MI->getOperand(4).isReg()) { 918 // lea fi#, lea GV, etc. are all rematerializable. 919 if (!MI->getOperand(1).isReg()) 920 return true; 921 unsigned BaseReg = MI->getOperand(1).getReg(); 922 if (BaseReg == 0) 923 return true; 924 // Allow re-materialization of lea PICBase + x. 925 const MachineFunction &MF = *MI->getParent()->getParent(); 926 const MachineRegisterInfo &MRI = MF.getRegInfo(); 927 return regIsPICBase(BaseReg, MRI); 928 } 929 return false; 930 } 931 } 932 933 // All other instructions marked M_REMATERIALIZABLE are always trivially 934 // rematerializable. 935 return true; 936} 937 938/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that 939/// would clobber the EFLAGS condition register. Note the result may be 940/// conservative. If it cannot definitely determine the safety after visiting 941/// a few instructions in each direction it assumes it's not safe. 942static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, 943 MachineBasicBlock::iterator I) { 944 // It's always safe to clobber EFLAGS at the end of a block. 945 if (I == MBB.end()) 946 return true; 947 948 // For compile time consideration, if we are not able to determine the 949 // safety after visiting 4 instructions in each direction, we will assume 950 // it's not safe. 951 MachineBasicBlock::iterator Iter = I; 952 for (unsigned i = 0; i < 4; ++i) { 953 bool SeenDef = false; 954 for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 955 MachineOperand &MO = Iter->getOperand(j); 956 if (!MO.isReg()) 957 continue; 958 if (MO.getReg() == X86::EFLAGS) { 959 if (MO.isUse()) 960 return false; 961 SeenDef = true; 962 } 963 } 964 965 if (SeenDef) 966 // This instruction defines EFLAGS, no need to look any further. 967 return true; 968 ++Iter; 969 970 // If we make it to the end of the block, it's safe to clobber EFLAGS. 971 if (Iter == MBB.end()) 972 return true; 973 } 974 975 Iter = I; 976 for (unsigned i = 0; i < 4; ++i) { 977 // If we make it to the beginning of the block, it's safe to clobber 978 // EFLAGS iff EFLAGS is not live-in. 979 if (Iter == MBB.begin()) 980 return !MBB.isLiveIn(X86::EFLAGS); 981 982 --Iter; 983 bool SawKill = false; 984 for (unsigned j = 0, e = Iter->getNumOperands(); j != e; ++j) { 985 MachineOperand &MO = Iter->getOperand(j); 986 if (MO.isReg() && MO.getReg() == X86::EFLAGS) { 987 if (MO.isDef()) return MO.isDead(); 988 if (MO.isKill()) SawKill = true; 989 } 990 } 991 992 if (SawKill) 993 // This instruction kills EFLAGS and doesn't redefine it, so 994 // there's no need to look further. 995 return true; 996 } 997 998 // Conservative answer. 999 return false; 1000} 1001 1002void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 1003 MachineBasicBlock::iterator I, 1004 unsigned DestReg, unsigned SubIdx, 1005 const MachineInstr *Orig, 1006 const TargetRegisterInfo *TRI) const { 1007 DebugLoc DL = DebugLoc::getUnknownLoc(); 1008 if (I != MBB.end()) DL = I->getDebugLoc(); 1009 1010 if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { 1011 DestReg = TRI->getSubReg(DestReg, SubIdx); 1012 SubIdx = 0; 1013 } 1014 1015 // MOV32r0 etc. are implemented with xor which clobbers condition code. 1016 // Re-materialize them as movri instructions to avoid side effects. 1017 bool Clone = true; 1018 unsigned Opc = Orig->getOpcode(); 1019 switch (Opc) { 1020 default: break; 1021 case X86::MOV8r0: 1022 case X86::MOV16r0: 1023 case X86::MOV32r0: 1024 case X86::MOV64r0: { 1025 if (!isSafeToClobberEFLAGS(MBB, I)) { 1026 switch (Opc) { 1027 default: break; 1028 case X86::MOV8r0: Opc = X86::MOV8ri; break; 1029 case X86::MOV16r0: Opc = X86::MOV16ri; break; 1030 case X86::MOV32r0: Opc = X86::MOV32ri; break; 1031 case X86::MOV64r0: Opc = X86::MOV64ri; break; 1032 } 1033 Clone = false; 1034 } 1035 break; 1036 } 1037 } 1038 1039 if (Clone) { 1040 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 1041 MI->getOperand(0).setReg(DestReg); 1042 MBB.insert(I, MI); 1043 } else { 1044 BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); 1045 } 1046 1047 MachineInstr *NewMI = prior(I); 1048 NewMI->getOperand(0).setSubReg(SubIdx); 1049} 1050 1051/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that 1052/// is not marked dead. 1053static bool hasLiveCondCodeDef(MachineInstr *MI) { 1054 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1055 MachineOperand &MO = MI->getOperand(i); 1056 if (MO.isReg() && MO.isDef() && 1057 MO.getReg() == X86::EFLAGS && !MO.isDead()) { 1058 return true; 1059 } 1060 } 1061 return false; 1062} 1063 1064/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when 1065/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting 1066/// to a 32-bit superregister and then truncating back down to a 16-bit 1067/// subregister. 1068MachineInstr * 1069X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, 1070 MachineFunction::iterator &MFI, 1071 MachineBasicBlock::iterator &MBBI, 1072 LiveVariables *LV) const { 1073 MachineInstr *MI = MBBI; 1074 unsigned Dest = MI->getOperand(0).getReg(); 1075 unsigned Src = MI->getOperand(1).getReg(); 1076 bool isDead = MI->getOperand(0).isDead(); 1077 bool isKill = MI->getOperand(1).isKill(); 1078 1079 unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() 1080 ? X86::LEA64_32r : X86::LEA32r; 1081 MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 1082 unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1083 unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1084 1085 // Build and insert into an implicit UNDEF value. This is OK because 1086 // well be shifting and then extracting the lower 16-bits. 1087 // This has the potential to cause partial register stall. e.g. 1088 // movw (%rbp,%rcx,2), %dx 1089 // leal -65(%rdx), %esi 1090 // But testing has shown this *does* help performance in 64-bit mode (at 1091 // least on modern x86 machines). 1092 BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); 1093 MachineInstr *InsMI = 1094 BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) 1095 .addReg(leaInReg) 1096 .addReg(Src, getKillRegState(isKill)) 1097 .addImm(X86::SUBREG_16BIT); 1098 1099 MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(), 1100 get(Opc), leaOutReg); 1101 switch (MIOpc) { 1102 default: 1103 llvm_unreachable(0); 1104 break; 1105 case X86::SHL16ri: { 1106 unsigned ShAmt = MI->getOperand(2).getImm(); 1107 MIB.addReg(0).addImm(1 << ShAmt) 1108 .addReg(leaInReg, RegState::Kill).addImm(0); 1109 break; 1110 } 1111 case X86::INC16r: 1112 case X86::INC64_16r: 1113 addLeaRegOffset(MIB, leaInReg, true, 1); 1114 break; 1115 case X86::DEC16r: 1116 case X86::DEC64_16r: 1117 addLeaRegOffset(MIB, leaInReg, true, -1); 1118 break; 1119 case X86::ADD16ri: 1120 case X86::ADD16ri8: 1121 addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm()); 1122 break; 1123 case X86::ADD16rr: { 1124 unsigned Src2 = MI->getOperand(2).getReg(); 1125 bool isKill2 = MI->getOperand(2).isKill(); 1126 unsigned leaInReg2 = 0; 1127 MachineInstr *InsMI2 = 0; 1128 if (Src == Src2) { 1129 // ADD16rr %reg1028<kill>, %reg1028 1130 // just a single insert_subreg. 1131 addRegReg(MIB, leaInReg, true, leaInReg, false); 1132 } else { 1133 leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1134 // Build and insert into an implicit UNDEF value. This is OK because 1135 // well be shifting and then extracting the lower 16-bits. 1136 BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2); 1137 InsMI2 = 1138 BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2) 1139 .addReg(leaInReg2) 1140 .addReg(Src2, getKillRegState(isKill2)) 1141 .addImm(X86::SUBREG_16BIT); 1142 addRegReg(MIB, leaInReg, true, leaInReg2, true); 1143 } 1144 if (LV && isKill2 && InsMI2) 1145 LV->replaceKillInstruction(Src2, MI, InsMI2); 1146 break; 1147 } 1148 } 1149 1150 MachineInstr *NewMI = MIB; 1151 MachineInstr *ExtMI = 1152 BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) 1153 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1154 .addReg(leaOutReg, RegState::Kill) 1155 .addImm(X86::SUBREG_16BIT); 1156 1157 if (LV) { 1158 // Update live variables 1159 LV->getVarInfo(leaInReg).Kills.push_back(NewMI); 1160 LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); 1161 if (isKill) 1162 LV->replaceKillInstruction(Src, MI, InsMI); 1163 if (isDead) 1164 LV->replaceKillInstruction(Dest, MI, ExtMI); 1165 } 1166 1167 return ExtMI; 1168} 1169 1170/// convertToThreeAddress - This method must be implemented by targets that 1171/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 1172/// may be able to convert a two-address instruction into a true 1173/// three-address instruction on demand. This allows the X86 target (for 1174/// example) to convert ADD and SHL instructions into LEA instructions if they 1175/// would require register copies due to two-addressness. 1176/// 1177/// This method returns a null pointer if the transformation cannot be 1178/// performed, otherwise it returns the new instruction. 1179/// 1180MachineInstr * 1181X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 1182 MachineBasicBlock::iterator &MBBI, 1183 LiveVariables *LV) const { 1184 MachineInstr *MI = MBBI; 1185 MachineFunction &MF = *MI->getParent()->getParent(); 1186 // All instructions input are two-addr instructions. Get the known operands. 1187 unsigned Dest = MI->getOperand(0).getReg(); 1188 unsigned Src = MI->getOperand(1).getReg(); 1189 bool isDead = MI->getOperand(0).isDead(); 1190 bool isKill = MI->getOperand(1).isKill(); 1191 1192 MachineInstr *NewMI = NULL; 1193 // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When 1194 // we have better subtarget support, enable the 16-bit LEA generation here. 1195 // 16-bit LEA is also slow on Core2. 1196 bool DisableLEA16 = true; 1197 bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 1198 1199 unsigned MIOpc = MI->getOpcode(); 1200 switch (MIOpc) { 1201 case X86::SHUFPSrri: { 1202 assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); 1203 if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; 1204 1205 unsigned B = MI->getOperand(1).getReg(); 1206 unsigned C = MI->getOperand(2).getReg(); 1207 if (B != C) return 0; 1208 unsigned A = MI->getOperand(0).getReg(); 1209 unsigned M = MI->getOperand(3).getImm(); 1210 NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) 1211 .addReg(A, RegState::Define | getDeadRegState(isDead)) 1212 .addReg(B, getKillRegState(isKill)).addImm(M); 1213 break; 1214 } 1215 case X86::SHL64ri: { 1216 assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1217 // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1218 // the flags produced by a shift yet, so this is safe. 1219 unsigned ShAmt = MI->getOperand(2).getImm(); 1220 if (ShAmt == 0 || ShAmt >= 4) return 0; 1221 1222 NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1223 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1224 .addReg(0).addImm(1 << ShAmt) 1225 .addReg(Src, getKillRegState(isKill)) 1226 .addImm(0); 1227 break; 1228 } 1229 case X86::SHL32ri: { 1230 assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1231 // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1232 // the flags produced by a shift yet, so this is safe. 1233 unsigned ShAmt = MI->getOperand(2).getImm(); 1234 if (ShAmt == 0 || ShAmt >= 4) return 0; 1235 1236 unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1237 NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1238 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1239 .addReg(0).addImm(1 << ShAmt) 1240 .addReg(Src, getKillRegState(isKill)).addImm(0); 1241 break; 1242 } 1243 case X86::SHL16ri: { 1244 assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1245 // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1246 // the flags produced by a shift yet, so this is safe. 1247 unsigned ShAmt = MI->getOperand(2).getImm(); 1248 if (ShAmt == 0 || ShAmt >= 4) return 0; 1249 1250 if (DisableLEA16) 1251 return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1252 NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1253 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1254 .addReg(0).addImm(1 << ShAmt) 1255 .addReg(Src, getKillRegState(isKill)) 1256 .addImm(0); 1257 break; 1258 } 1259 default: { 1260 // The following opcodes also sets the condition code register(s). Only 1261 // convert them to equivalent lea if the condition code register def's 1262 // are dead! 1263 if (hasLiveCondCodeDef(MI)) 1264 return 0; 1265 1266 switch (MIOpc) { 1267 default: return 0; 1268 case X86::INC64r: 1269 case X86::INC32r: 1270 case X86::INC64_32r: { 1271 assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1272 unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r 1273 : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1274 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1275 .addReg(Dest, RegState::Define | 1276 getDeadRegState(isDead)), 1277 Src, isKill, 1); 1278 break; 1279 } 1280 case X86::INC16r: 1281 case X86::INC64_16r: 1282 if (DisableLEA16) 1283 return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1284 assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1285 NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1286 .addReg(Dest, RegState::Define | 1287 getDeadRegState(isDead)), 1288 Src, isKill, 1); 1289 break; 1290 case X86::DEC64r: 1291 case X86::DEC32r: 1292 case X86::DEC64_32r: { 1293 assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1294 unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 1295 : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1296 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1297 .addReg(Dest, RegState::Define | 1298 getDeadRegState(isDead)), 1299 Src, isKill, -1); 1300 break; 1301 } 1302 case X86::DEC16r: 1303 case X86::DEC64_16r: 1304 if (DisableLEA16) 1305 return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1306 assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1307 NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1308 .addReg(Dest, RegState::Define | 1309 getDeadRegState(isDead)), 1310 Src, isKill, -1); 1311 break; 1312 case X86::ADD64rr: 1313 case X86::ADD32rr: { 1314 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1315 unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r 1316 : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1317 unsigned Src2 = MI->getOperand(2).getReg(); 1318 bool isKill2 = MI->getOperand(2).isKill(); 1319 NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1320 .addReg(Dest, RegState::Define | 1321 getDeadRegState(isDead)), 1322 Src, isKill, Src2, isKill2); 1323 if (LV && isKill2) 1324 LV->replaceKillInstruction(Src2, MI, NewMI); 1325 break; 1326 } 1327 case X86::ADD16rr: { 1328 if (DisableLEA16) 1329 return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1330 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1331 unsigned Src2 = MI->getOperand(2).getReg(); 1332 bool isKill2 = MI->getOperand(2).isKill(); 1333 NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1334 .addReg(Dest, RegState::Define | 1335 getDeadRegState(isDead)), 1336 Src, isKill, Src2, isKill2); 1337 if (LV && isKill2) 1338 LV->replaceKillInstruction(Src2, MI, NewMI); 1339 break; 1340 } 1341 case X86::ADD64ri32: 1342 case X86::ADD64ri8: 1343 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1344 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1345 .addReg(Dest, RegState::Define | 1346 getDeadRegState(isDead)), 1347 Src, isKill, MI->getOperand(2).getImm()); 1348 break; 1349 case X86::ADD32ri: 1350 case X86::ADD32ri8: { 1351 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1352 unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1353 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1354 .addReg(Dest, RegState::Define | 1355 getDeadRegState(isDead)), 1356 Src, isKill, MI->getOperand(2).getImm()); 1357 break; 1358 } 1359 case X86::ADD16ri: 1360 case X86::ADD16ri8: 1361 if (DisableLEA16) 1362 return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0; 1363 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1364 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1365 .addReg(Dest, RegState::Define | 1366 getDeadRegState(isDead)), 1367 Src, isKill, MI->getOperand(2).getImm()); 1368 break; 1369 } 1370 } 1371 } 1372 1373 if (!NewMI) return 0; 1374 1375 if (LV) { // Update live variables 1376 if (isKill) 1377 LV->replaceKillInstruction(Src, MI, NewMI); 1378 if (isDead) 1379 LV->replaceKillInstruction(Dest, MI, NewMI); 1380 } 1381 1382 MFI->insert(MBBI, NewMI); // Insert the new inst 1383 return NewMI; 1384} 1385 1386/// commuteInstruction - We have a few instructions that must be hacked on to 1387/// commute them. 1388/// 1389MachineInstr * 1390X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 1391 switch (MI->getOpcode()) { 1392 case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 1393 case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 1394 case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 1395 case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 1396 case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 1397 case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 1398 unsigned Opc; 1399 unsigned Size; 1400 switch (MI->getOpcode()) { 1401 default: llvm_unreachable("Unreachable!"); 1402 case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 1403 case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 1404 case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 1405 case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 1406 case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 1407 case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 1408 } 1409 unsigned Amt = MI->getOperand(3).getImm(); 1410 if (NewMI) { 1411 MachineFunction &MF = *MI->getParent()->getParent(); 1412 MI = MF.CloneMachineInstr(MI); 1413 NewMI = false; 1414 } 1415 MI->setDesc(get(Opc)); 1416 MI->getOperand(3).setImm(Size-Amt); 1417 return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1418 } 1419 case X86::CMOVB16rr: 1420 case X86::CMOVB32rr: 1421 case X86::CMOVB64rr: 1422 case X86::CMOVAE16rr: 1423 case X86::CMOVAE32rr: 1424 case X86::CMOVAE64rr: 1425 case X86::CMOVE16rr: 1426 case X86::CMOVE32rr: 1427 case X86::CMOVE64rr: 1428 case X86::CMOVNE16rr: 1429 case X86::CMOVNE32rr: 1430 case X86::CMOVNE64rr: 1431 case X86::CMOVBE16rr: 1432 case X86::CMOVBE32rr: 1433 case X86::CMOVBE64rr: 1434 case X86::CMOVA16rr: 1435 case X86::CMOVA32rr: 1436 case X86::CMOVA64rr: 1437 case X86::CMOVL16rr: 1438 case X86::CMOVL32rr: 1439 case X86::CMOVL64rr: 1440 case X86::CMOVGE16rr: 1441 case X86::CMOVGE32rr: 1442 case X86::CMOVGE64rr: 1443 case X86::CMOVLE16rr: 1444 case X86::CMOVLE32rr: 1445 case X86::CMOVLE64rr: 1446 case X86::CMOVG16rr: 1447 case X86::CMOVG32rr: 1448 case X86::CMOVG64rr: 1449 case X86::CMOVS16rr: 1450 case X86::CMOVS32rr: 1451 case X86::CMOVS64rr: 1452 case X86::CMOVNS16rr: 1453 case X86::CMOVNS32rr: 1454 case X86::CMOVNS64rr: 1455 case X86::CMOVP16rr: 1456 case X86::CMOVP32rr: 1457 case X86::CMOVP64rr: 1458 case X86::CMOVNP16rr: 1459 case X86::CMOVNP32rr: 1460 case X86::CMOVNP64rr: 1461 case X86::CMOVO16rr: 1462 case X86::CMOVO32rr: 1463 case X86::CMOVO64rr: 1464 case X86::CMOVNO16rr: 1465 case X86::CMOVNO32rr: 1466 case X86::CMOVNO64rr: { 1467 unsigned Opc = 0; 1468 switch (MI->getOpcode()) { 1469 default: break; 1470 case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; 1471 case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; 1472 case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; 1473 case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; 1474 case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; 1475 case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; 1476 case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; 1477 case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; 1478 case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; 1479 case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; 1480 case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; 1481 case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; 1482 case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; 1483 case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; 1484 case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; 1485 case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; 1486 case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; 1487 case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; 1488 case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; 1489 case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; 1490 case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; 1491 case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; 1492 case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; 1493 case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; 1494 case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; 1495 case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; 1496 case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; 1497 case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; 1498 case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; 1499 case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; 1500 case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; 1501 case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; 1502 case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; 1503 case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; 1504 case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; 1505 case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; 1506 case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; 1507 case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; 1508 case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; 1509 case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; 1510 case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; 1511 case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; 1512 case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; 1513 case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; 1514 case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; 1515 case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; 1516 case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; 1517 case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; 1518 } 1519 if (NewMI) { 1520 MachineFunction &MF = *MI->getParent()->getParent(); 1521 MI = MF.CloneMachineInstr(MI); 1522 NewMI = false; 1523 } 1524 MI->setDesc(get(Opc)); 1525 // Fallthrough intended. 1526 } 1527 default: 1528 return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1529 } 1530} 1531 1532static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { 1533 switch (BrOpc) { 1534 default: return X86::COND_INVALID; 1535 case X86::JE: return X86::COND_E; 1536 case X86::JNE: return X86::COND_NE; 1537 case X86::JL: return X86::COND_L; 1538 case X86::JLE: return X86::COND_LE; 1539 case X86::JG: return X86::COND_G; 1540 case X86::JGE: return X86::COND_GE; 1541 case X86::JB: return X86::COND_B; 1542 case X86::JBE: return X86::COND_BE; 1543 case X86::JA: return X86::COND_A; 1544 case X86::JAE: return X86::COND_AE; 1545 case X86::JS: return X86::COND_S; 1546 case X86::JNS: return X86::COND_NS; 1547 case X86::JP: return X86::COND_P; 1548 case X86::JNP: return X86::COND_NP; 1549 case X86::JO: return X86::COND_O; 1550 case X86::JNO: return X86::COND_NO; 1551 } 1552} 1553 1554unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { 1555 switch (CC) { 1556 default: llvm_unreachable("Illegal condition code!"); 1557 case X86::COND_E: return X86::JE; 1558 case X86::COND_NE: return X86::JNE; 1559 case X86::COND_L: return X86::JL; 1560 case X86::COND_LE: return X86::JLE; 1561 case X86::COND_G: return X86::JG; 1562 case X86::COND_GE: return X86::JGE; 1563 case X86::COND_B: return X86::JB; 1564 case X86::COND_BE: return X86::JBE; 1565 case X86::COND_A: return X86::JA; 1566 case X86::COND_AE: return X86::JAE; 1567 case X86::COND_S: return X86::JS; 1568 case X86::COND_NS: return X86::JNS; 1569 case X86::COND_P: return X86::JP; 1570 case X86::COND_NP: return X86::JNP; 1571 case X86::COND_O: return X86::JO; 1572 case X86::COND_NO: return X86::JNO; 1573 } 1574} 1575 1576/// GetOppositeBranchCondition - Return the inverse of the specified condition, 1577/// e.g. turning COND_E to COND_NE. 1578X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 1579 switch (CC) { 1580 default: llvm_unreachable("Illegal condition code!"); 1581 case X86::COND_E: return X86::COND_NE; 1582 case X86::COND_NE: return X86::COND_E; 1583 case X86::COND_L: return X86::COND_GE; 1584 case X86::COND_LE: return X86::COND_G; 1585 case X86::COND_G: return X86::COND_LE; 1586 case X86::COND_GE: return X86::COND_L; 1587 case X86::COND_B: return X86::COND_AE; 1588 case X86::COND_BE: return X86::COND_A; 1589 case X86::COND_A: return X86::COND_BE; 1590 case X86::COND_AE: return X86::COND_B; 1591 case X86::COND_S: return X86::COND_NS; 1592 case X86::COND_NS: return X86::COND_S; 1593 case X86::COND_P: return X86::COND_NP; 1594 case X86::COND_NP: return X86::COND_P; 1595 case X86::COND_O: return X86::COND_NO; 1596 case X86::COND_NO: return X86::COND_O; 1597 } 1598} 1599 1600bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { 1601 const TargetInstrDesc &TID = MI->getDesc(); 1602 if (!TID.isTerminator()) return false; 1603 1604 // Conditional branch is a special case. 1605 if (TID.isBranch() && !TID.isBarrier()) 1606 return true; 1607 if (!TID.isPredicable()) 1608 return true; 1609 return !isPredicated(MI); 1610} 1611 1612// For purposes of branch analysis do not count FP_REG_KILL as a terminator. 1613static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI, 1614 const X86InstrInfo &TII) { 1615 if (MI->getOpcode() == X86::FP_REG_KILL) 1616 return false; 1617 return TII.isUnpredicatedTerminator(MI); 1618} 1619 1620bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1621 MachineBasicBlock *&TBB, 1622 MachineBasicBlock *&FBB, 1623 SmallVectorImpl<MachineOperand> &Cond, 1624 bool AllowModify) const { 1625 // Start from the bottom of the block and work up, examining the 1626 // terminator instructions. 1627 MachineBasicBlock::iterator I = MBB.end(); 1628 while (I != MBB.begin()) { 1629 --I; 1630 1631 // Working from the bottom, when we see a non-terminator instruction, we're 1632 // done. 1633 if (!isBrAnalysisUnpredicatedTerminator(I, *this)) 1634 break; 1635 1636 // A terminator that isn't a branch can't easily be handled by this 1637 // analysis. 1638 if (!I->getDesc().isBranch()) 1639 return true; 1640 1641 // Handle unconditional branches. 1642 if (I->getOpcode() == X86::JMP) { 1643 if (!AllowModify) { 1644 TBB = I->getOperand(0).getMBB(); 1645 continue; 1646 } 1647 1648 // If the block has any instructions after a JMP, delete them. 1649 while (llvm::next(I) != MBB.end()) 1650 llvm::next(I)->eraseFromParent(); 1651 1652 Cond.clear(); 1653 FBB = 0; 1654 1655 // Delete the JMP if it's equivalent to a fall-through. 1656 if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 1657 TBB = 0; 1658 I->eraseFromParent(); 1659 I = MBB.end(); 1660 continue; 1661 } 1662 1663 // TBB is used to indicate the unconditinal destination. 1664 TBB = I->getOperand(0).getMBB(); 1665 continue; 1666 } 1667 1668 // Handle conditional branches. 1669 X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); 1670 if (BranchCode == X86::COND_INVALID) 1671 return true; // Can't handle indirect branch. 1672 1673 // Working from the bottom, handle the first conditional branch. 1674 if (Cond.empty()) { 1675 FBB = TBB; 1676 TBB = I->getOperand(0).getMBB(); 1677 Cond.push_back(MachineOperand::CreateImm(BranchCode)); 1678 continue; 1679 } 1680 1681 // Handle subsequent conditional branches. Only handle the case where all 1682 // conditional branches branch to the same destination and their condition 1683 // opcodes fit one of the special multi-branch idioms. 1684 assert(Cond.size() == 1); 1685 assert(TBB); 1686 1687 // Only handle the case where all conditional branches branch to the same 1688 // destination. 1689 if (TBB != I->getOperand(0).getMBB()) 1690 return true; 1691 1692 // If the conditions are the same, we can leave them alone. 1693 X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 1694 if (OldBranchCode == BranchCode) 1695 continue; 1696 1697 // If they differ, see if they fit one of the known patterns. Theoretically, 1698 // we could handle more patterns here, but we shouldn't expect to see them 1699 // if instruction selection has done a reasonable job. 1700 if ((OldBranchCode == X86::COND_NP && 1701 BranchCode == X86::COND_E) || 1702 (OldBranchCode == X86::COND_E && 1703 BranchCode == X86::COND_NP)) 1704 BranchCode = X86::COND_NP_OR_E; 1705 else if ((OldBranchCode == X86::COND_P && 1706 BranchCode == X86::COND_NE) || 1707 (OldBranchCode == X86::COND_NE && 1708 BranchCode == X86::COND_P)) 1709 BranchCode = X86::COND_NE_OR_P; 1710 else 1711 return true; 1712 1713 // Update the MachineOperand. 1714 Cond[0].setImm(BranchCode); 1715 } 1716 1717 return false; 1718} 1719 1720unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1721 MachineBasicBlock::iterator I = MBB.end(); 1722 unsigned Count = 0; 1723 1724 while (I != MBB.begin()) { 1725 --I; 1726 if (I->getOpcode() != X86::JMP && 1727 GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) 1728 break; 1729 // Remove the branch. 1730 I->eraseFromParent(); 1731 I = MBB.end(); 1732 ++Count; 1733 } 1734 1735 return Count; 1736} 1737 1738unsigned 1739X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 1740 MachineBasicBlock *FBB, 1741 const SmallVectorImpl<MachineOperand> &Cond) const { 1742 // FIXME this should probably have a DebugLoc operand 1743 DebugLoc dl = DebugLoc::getUnknownLoc(); 1744 // Shouldn't be a fall through. 1745 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 1746 assert((Cond.size() == 1 || Cond.size() == 0) && 1747 "X86 branch conditions have one component!"); 1748 1749 if (Cond.empty()) { 1750 // Unconditional branch? 1751 assert(!FBB && "Unconditional branch with multiple successors!"); 1752 BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); 1753 return 1; 1754 } 1755 1756 // Conditional branch. 1757 unsigned Count = 0; 1758 X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 1759 switch (CC) { 1760 case X86::COND_NP_OR_E: 1761 // Synthesize NP_OR_E with two branches. 1762 BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); 1763 ++Count; 1764 BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); 1765 ++Count; 1766 break; 1767 case X86::COND_NE_OR_P: 1768 // Synthesize NE_OR_P with two branches. 1769 BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); 1770 ++Count; 1771 BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); 1772 ++Count; 1773 break; 1774 default: { 1775 unsigned Opc = GetCondBranchFromCond(CC); 1776 BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); 1777 ++Count; 1778 } 1779 } 1780 if (FBB) { 1781 // Two-way Conditional branch. Insert the second branch. 1782 BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); 1783 ++Count; 1784 } 1785 return Count; 1786} 1787 1788/// isHReg - Test if the given register is a physical h register. 1789static bool isHReg(unsigned Reg) { 1790 return X86::GR8_ABCD_HRegClass.contains(Reg); 1791} 1792 1793bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, 1794 MachineBasicBlock::iterator MI, 1795 unsigned DestReg, unsigned SrcReg, 1796 const TargetRegisterClass *DestRC, 1797 const TargetRegisterClass *SrcRC) const { 1798 DebugLoc DL = DebugLoc::getUnknownLoc(); 1799 if (MI != MBB.end()) DL = MI->getDebugLoc(); 1800 1801 // Determine if DstRC and SrcRC have a common superclass in common. 1802 const TargetRegisterClass *CommonRC = DestRC; 1803 if (DestRC == SrcRC) 1804 /* Source and destination have the same register class. */; 1805 else if (CommonRC->hasSuperClass(SrcRC)) 1806 CommonRC = SrcRC; 1807 else if (!DestRC->hasSubClass(SrcRC)) { 1808 // Neither of GR64_NOREX or GR64_NOSP is a superclass of the other, 1809 // but we want to copy then as GR64. Similarly, for GR32_NOREX and 1810 // GR32_NOSP, copy as GR32. 1811 if (SrcRC->hasSuperClass(&X86::GR64RegClass) && 1812 DestRC->hasSuperClass(&X86::GR64RegClass)) 1813 CommonRC = &X86::GR64RegClass; 1814 else if (SrcRC->hasSuperClass(&X86::GR32RegClass) && 1815 DestRC->hasSuperClass(&X86::GR32RegClass)) 1816 CommonRC = &X86::GR32RegClass; 1817 else 1818 CommonRC = 0; 1819 } 1820 1821 if (CommonRC) { 1822 unsigned Opc; 1823 if (CommonRC == &X86::GR64RegClass || CommonRC == &X86::GR64_NOSPRegClass) { 1824 Opc = X86::MOV64rr; 1825 } else if (CommonRC == &X86::GR32RegClass || 1826 CommonRC == &X86::GR32_NOSPRegClass) { 1827 Opc = X86::MOV32rr; 1828 } else if (CommonRC == &X86::GR16RegClass) { 1829 Opc = X86::MOV16rr; 1830 } else if (CommonRC == &X86::GR8RegClass) { 1831 // Copying to or from a physical H register on x86-64 requires a NOREX 1832 // move. Otherwise use a normal move. 1833 if ((isHReg(DestReg) || isHReg(SrcReg)) && 1834 TM.getSubtarget<X86Subtarget>().is64Bit()) 1835 Opc = X86::MOV8rr_NOREX; 1836 else 1837 Opc = X86::MOV8rr; 1838 } else if (CommonRC == &X86::GR64_ABCDRegClass) { 1839 Opc = X86::MOV64rr; 1840 } else if (CommonRC == &X86::GR32_ABCDRegClass) { 1841 Opc = X86::MOV32rr; 1842 } else if (CommonRC == &X86::GR16_ABCDRegClass) { 1843 Opc = X86::MOV16rr; 1844 } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { 1845 Opc = X86::MOV8rr; 1846 } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { 1847 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1848 Opc = X86::MOV8rr_NOREX; 1849 else 1850 Opc = X86::MOV8rr; 1851 } else if (CommonRC == &X86::GR64_NOREXRegClass || 1852 CommonRC == &X86::GR64_NOREX_NOSPRegClass) { 1853 Opc = X86::MOV64rr; 1854 } else if (CommonRC == &X86::GR32_NOREXRegClass) { 1855 Opc = X86::MOV32rr; 1856 } else if (CommonRC == &X86::GR16_NOREXRegClass) { 1857 Opc = X86::MOV16rr; 1858 } else if (CommonRC == &X86::GR8_NOREXRegClass) { 1859 Opc = X86::MOV8rr; 1860 } else if (CommonRC == &X86::RFP32RegClass) { 1861 Opc = X86::MOV_Fp3232; 1862 } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { 1863 Opc = X86::MOV_Fp6464; 1864 } else if (CommonRC == &X86::RFP80RegClass) { 1865 Opc = X86::MOV_Fp8080; 1866 } else if (CommonRC == &X86::FR32RegClass) { 1867 Opc = X86::FsMOVAPSrr; 1868 } else if (CommonRC == &X86::FR64RegClass) { 1869 Opc = X86::FsMOVAPDrr; 1870 } else if (CommonRC == &X86::VR128RegClass) { 1871 Opc = X86::MOVAPSrr; 1872 } else if (CommonRC == &X86::VR64RegClass) { 1873 Opc = X86::MMX_MOVQ64rr; 1874 } else { 1875 return false; 1876 } 1877 BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); 1878 return true; 1879 } 1880 1881 // Moving EFLAGS to / from another register requires a push and a pop. 1882 if (SrcRC == &X86::CCRRegClass) { 1883 if (SrcReg != X86::EFLAGS) 1884 return false; 1885 if (DestRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { 1886 BuildMI(MBB, MI, DL, get(X86::PUSHFQ64)); 1887 BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); 1888 return true; 1889 } else if (DestRC == &X86::GR32RegClass || 1890 DestRC == &X86::GR32_NOSPRegClass) { 1891 BuildMI(MBB, MI, DL, get(X86::PUSHFD)); 1892 BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); 1893 return true; 1894 } 1895 } else if (DestRC == &X86::CCRRegClass) { 1896 if (DestReg != X86::EFLAGS) 1897 return false; 1898 if (SrcRC == &X86::GR64RegClass || DestRC == &X86::GR64_NOSPRegClass) { 1899 BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); 1900 BuildMI(MBB, MI, DL, get(X86::POPFQ)); 1901 return true; 1902 } else if (SrcRC == &X86::GR32RegClass || 1903 DestRC == &X86::GR32_NOSPRegClass) { 1904 BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); 1905 BuildMI(MBB, MI, DL, get(X86::POPFD)); 1906 return true; 1907 } 1908 } 1909 1910 // Moving from ST(0) turns into FpGET_ST0_32 etc. 1911 if (SrcRC == &X86::RSTRegClass) { 1912 // Copying from ST(0)/ST(1). 1913 if (SrcReg != X86::ST0 && SrcReg != X86::ST1) 1914 // Can only copy from ST(0)/ST(1) right now 1915 return false; 1916 bool isST0 = SrcReg == X86::ST0; 1917 unsigned Opc; 1918 if (DestRC == &X86::RFP32RegClass) 1919 Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; 1920 else if (DestRC == &X86::RFP64RegClass) 1921 Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; 1922 else { 1923 if (DestRC != &X86::RFP80RegClass) 1924 return false; 1925 Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; 1926 } 1927 BuildMI(MBB, MI, DL, get(Opc), DestReg); 1928 return true; 1929 } 1930 1931 // Moving to ST(0) turns into FpSET_ST0_32 etc. 1932 if (DestRC == &X86::RSTRegClass) { 1933 // Copying to ST(0) / ST(1). 1934 if (DestReg != X86::ST0 && DestReg != X86::ST1) 1935 // Can only copy to TOS right now 1936 return false; 1937 bool isST0 = DestReg == X86::ST0; 1938 unsigned Opc; 1939 if (SrcRC == &X86::RFP32RegClass) 1940 Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; 1941 else if (SrcRC == &X86::RFP64RegClass) 1942 Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; 1943 else { 1944 if (SrcRC != &X86::RFP80RegClass) 1945 return false; 1946 Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; 1947 } 1948 BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); 1949 return true; 1950 } 1951 1952 // Not yet supported! 1953 return false; 1954} 1955 1956static unsigned getStoreRegOpcode(unsigned SrcReg, 1957 const TargetRegisterClass *RC, 1958 bool isStackAligned, 1959 TargetMachine &TM) { 1960 unsigned Opc = 0; 1961 if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { 1962 Opc = X86::MOV64mr; 1963 } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { 1964 Opc = X86::MOV32mr; 1965 } else if (RC == &X86::GR16RegClass) { 1966 Opc = X86::MOV16mr; 1967 } else if (RC == &X86::GR8RegClass) { 1968 // Copying to or from a physical H register on x86-64 requires a NOREX 1969 // move. Otherwise use a normal move. 1970 if (isHReg(SrcReg) && 1971 TM.getSubtarget<X86Subtarget>().is64Bit()) 1972 Opc = X86::MOV8mr_NOREX; 1973 else 1974 Opc = X86::MOV8mr; 1975 } else if (RC == &X86::GR64_ABCDRegClass) { 1976 Opc = X86::MOV64mr; 1977 } else if (RC == &X86::GR32_ABCDRegClass) { 1978 Opc = X86::MOV32mr; 1979 } else if (RC == &X86::GR16_ABCDRegClass) { 1980 Opc = X86::MOV16mr; 1981 } else if (RC == &X86::GR8_ABCD_LRegClass) { 1982 Opc = X86::MOV8mr; 1983 } else if (RC == &X86::GR8_ABCD_HRegClass) { 1984 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1985 Opc = X86::MOV8mr_NOREX; 1986 else 1987 Opc = X86::MOV8mr; 1988 } else if (RC == &X86::GR64_NOREXRegClass || 1989 RC == &X86::GR64_NOREX_NOSPRegClass) { 1990 Opc = X86::MOV64mr; 1991 } else if (RC == &X86::GR32_NOREXRegClass) { 1992 Opc = X86::MOV32mr; 1993 } else if (RC == &X86::GR16_NOREXRegClass) { 1994 Opc = X86::MOV16mr; 1995 } else if (RC == &X86::GR8_NOREXRegClass) { 1996 Opc = X86::MOV8mr; 1997 } else if (RC == &X86::RFP80RegClass) { 1998 Opc = X86::ST_FpP80m; // pops 1999 } else if (RC == &X86::RFP64RegClass) { 2000 Opc = X86::ST_Fp64m; 2001 } else if (RC == &X86::RFP32RegClass) { 2002 Opc = X86::ST_Fp32m; 2003 } else if (RC == &X86::FR32RegClass) { 2004 Opc = X86::MOVSSmr; 2005 } else if (RC == &X86::FR64RegClass) { 2006 Opc = X86::MOVSDmr; 2007 } else if (RC == &X86::VR128RegClass) { 2008 // If stack is realigned we can use aligned stores. 2009 Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; 2010 } else if (RC == &X86::VR64RegClass) { 2011 Opc = X86::MMX_MOVQ64mr; 2012 } else { 2013 llvm_unreachable("Unknown regclass"); 2014 } 2015 2016 return Opc; 2017} 2018 2019void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 2020 MachineBasicBlock::iterator MI, 2021 unsigned SrcReg, bool isKill, int FrameIdx, 2022 const TargetRegisterClass *RC) const { 2023 const MachineFunction &MF = *MBB.getParent(); 2024 bool isAligned = (RI.getStackAlignment() >= 16) || 2025 RI.needsStackRealignment(MF); 2026 unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2027 DebugLoc DL = DebugLoc::getUnknownLoc(); 2028 if (MI != MBB.end()) DL = MI->getDebugLoc(); 2029 addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) 2030 .addReg(SrcReg, getKillRegState(isKill)); 2031} 2032 2033void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, 2034 bool isKill, 2035 SmallVectorImpl<MachineOperand> &Addr, 2036 const TargetRegisterClass *RC, 2037 MachineInstr::mmo_iterator MMOBegin, 2038 MachineInstr::mmo_iterator MMOEnd, 2039 SmallVectorImpl<MachineInstr*> &NewMIs) const { 2040 bool isAligned = (*MMOBegin)->getAlignment() >= 16; 2041 unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 2042 DebugLoc DL = DebugLoc::getUnknownLoc(); 2043 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 2044 for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2045 MIB.addOperand(Addr[i]); 2046 MIB.addReg(SrcReg, getKillRegState(isKill)); 2047 (*MIB).setMemRefs(MMOBegin, MMOEnd); 2048 NewMIs.push_back(MIB); 2049} 2050 2051static unsigned getLoadRegOpcode(unsigned DestReg, 2052 const TargetRegisterClass *RC, 2053 bool isStackAligned, 2054 const TargetMachine &TM) { 2055 unsigned Opc = 0; 2056 if (RC == &X86::GR64RegClass || RC == &X86::GR64_NOSPRegClass) { 2057 Opc = X86::MOV64rm; 2058 } else if (RC == &X86::GR32RegClass || RC == &X86::GR32_NOSPRegClass) { 2059 Opc = X86::MOV32rm; 2060 } else if (RC == &X86::GR16RegClass) { 2061 Opc = X86::MOV16rm; 2062 } else if (RC == &X86::GR8RegClass) { 2063 // Copying to or from a physical H register on x86-64 requires a NOREX 2064 // move. Otherwise use a normal move. 2065 if (isHReg(DestReg) && 2066 TM.getSubtarget<X86Subtarget>().is64Bit()) 2067 Opc = X86::MOV8rm_NOREX; 2068 else 2069 Opc = X86::MOV8rm; 2070 } else if (RC == &X86::GR64_ABCDRegClass) { 2071 Opc = X86::MOV64rm; 2072 } else if (RC == &X86::GR32_ABCDRegClass) { 2073 Opc = X86::MOV32rm; 2074 } else if (RC == &X86::GR16_ABCDRegClass) { 2075 Opc = X86::MOV16rm; 2076 } else if (RC == &X86::GR8_ABCD_LRegClass) { 2077 Opc = X86::MOV8rm; 2078 } else if (RC == &X86::GR8_ABCD_HRegClass) { 2079 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2080 Opc = X86::MOV8rm_NOREX; 2081 else 2082 Opc = X86::MOV8rm; 2083 } else if (RC == &X86::GR64_NOREXRegClass || 2084 RC == &X86::GR64_NOREX_NOSPRegClass) { 2085 Opc = X86::MOV64rm; 2086 } else if (RC == &X86::GR32_NOREXRegClass) { 2087 Opc = X86::MOV32rm; 2088 } else if (RC == &X86::GR16_NOREXRegClass) { 2089 Opc = X86::MOV16rm; 2090 } else if (RC == &X86::GR8_NOREXRegClass) { 2091 Opc = X86::MOV8rm; 2092 } else if (RC == &X86::RFP80RegClass) { 2093 Opc = X86::LD_Fp80m; 2094 } else if (RC == &X86::RFP64RegClass) { 2095 Opc = X86::LD_Fp64m; 2096 } else if (RC == &X86::RFP32RegClass) { 2097 Opc = X86::LD_Fp32m; 2098 } else if (RC == &X86::FR32RegClass) { 2099 Opc = X86::MOVSSrm; 2100 } else if (RC == &X86::FR64RegClass) { 2101 Opc = X86::MOVSDrm; 2102 } else if (RC == &X86::VR128RegClass) { 2103 // If stack is realigned we can use aligned loads. 2104 Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; 2105 } else if (RC == &X86::VR64RegClass) { 2106 Opc = X86::MMX_MOVQ64rm; 2107 } else { 2108 llvm_unreachable("Unknown regclass"); 2109 } 2110 2111 return Opc; 2112} 2113 2114void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 2115 MachineBasicBlock::iterator MI, 2116 unsigned DestReg, int FrameIdx, 2117 const TargetRegisterClass *RC) const{ 2118 const MachineFunction &MF = *MBB.getParent(); 2119 bool isAligned = (RI.getStackAlignment() >= 16) || 2120 RI.needsStackRealignment(MF); 2121 unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2122 DebugLoc DL = DebugLoc::getUnknownLoc(); 2123 if (MI != MBB.end()) DL = MI->getDebugLoc(); 2124 addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); 2125} 2126 2127void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, 2128 SmallVectorImpl<MachineOperand> &Addr, 2129 const TargetRegisterClass *RC, 2130 MachineInstr::mmo_iterator MMOBegin, 2131 MachineInstr::mmo_iterator MMOEnd, 2132 SmallVectorImpl<MachineInstr*> &NewMIs) const { 2133 bool isAligned = (*MMOBegin)->getAlignment() >= 16; 2134 unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2135 DebugLoc DL = DebugLoc::getUnknownLoc(); 2136 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); 2137 for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2138 MIB.addOperand(Addr[i]); 2139 (*MIB).setMemRefs(MMOBegin, MMOEnd); 2140 NewMIs.push_back(MIB); 2141} 2142 2143bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2144 MachineBasicBlock::iterator MI, 2145 const std::vector<CalleeSavedInfo> &CSI) const { 2146 if (CSI.empty()) 2147 return false; 2148 2149 DebugLoc DL = DebugLoc::getUnknownLoc(); 2150 if (MI != MBB.end()) DL = MI->getDebugLoc(); 2151 2152 bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2153 bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2154 unsigned SlotSize = is64Bit ? 8 : 4; 2155 2156 MachineFunction &MF = *MBB.getParent(); 2157 unsigned FPReg = RI.getFrameRegister(MF); 2158 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2159 unsigned CalleeFrameSize = 0; 2160 2161 unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; 2162 for (unsigned i = CSI.size(); i != 0; --i) { 2163 unsigned Reg = CSI[i-1].getReg(); 2164 const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); 2165 // Add the callee-saved register as live-in. It's killed at the spill. 2166 MBB.addLiveIn(Reg); 2167 if (Reg == FPReg) 2168 // X86RegisterInfo::emitPrologue will handle spilling of frame register. 2169 continue; 2170 if (RegClass != &X86::VR128RegClass && !isWin64) { 2171 CalleeFrameSize += SlotSize; 2172 BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); 2173 } else { 2174 storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); 2175 } 2176 } 2177 2178 X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 2179 return true; 2180} 2181 2182bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2183 MachineBasicBlock::iterator MI, 2184 const std::vector<CalleeSavedInfo> &CSI) const { 2185 if (CSI.empty()) 2186 return false; 2187 2188 DebugLoc DL = DebugLoc::getUnknownLoc(); 2189 if (MI != MBB.end()) DL = MI->getDebugLoc(); 2190 2191 MachineFunction &MF = *MBB.getParent(); 2192 unsigned FPReg = RI.getFrameRegister(MF); 2193 bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2194 bool isWin64 = TM.getSubtarget<X86Subtarget>().isTargetWin64(); 2195 unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; 2196 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2197 unsigned Reg = CSI[i].getReg(); 2198 if (Reg == FPReg) 2199 // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 2200 continue; 2201 const TargetRegisterClass *RegClass = CSI[i].getRegClass(); 2202 if (RegClass != &X86::VR128RegClass && !isWin64) { 2203 BuildMI(MBB, MI, DL, get(Opc), Reg); 2204 } else { 2205 loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); 2206 } 2207 } 2208 return true; 2209} 2210 2211static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 2212 const SmallVectorImpl<MachineOperand> &MOs, 2213 MachineInstr *MI, 2214 const TargetInstrInfo &TII) { 2215 // Create the base instruction with the memory operand as the first part. 2216 MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2217 MI->getDebugLoc(), true); 2218 MachineInstrBuilder MIB(NewMI); 2219 unsigned NumAddrOps = MOs.size(); 2220 for (unsigned i = 0; i != NumAddrOps; ++i) 2221 MIB.addOperand(MOs[i]); 2222 if (NumAddrOps < 4) // FrameIndex only 2223 addOffset(MIB, 0); 2224 2225 // Loop over the rest of the ri operands, converting them over. 2226 unsigned NumOps = MI->getDesc().getNumOperands()-2; 2227 for (unsigned i = 0; i != NumOps; ++i) { 2228 MachineOperand &MO = MI->getOperand(i+2); 2229 MIB.addOperand(MO); 2230 } 2231 for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { 2232 MachineOperand &MO = MI->getOperand(i); 2233 MIB.addOperand(MO); 2234 } 2235 return MIB; 2236} 2237 2238static MachineInstr *FuseInst(MachineFunction &MF, 2239 unsigned Opcode, unsigned OpNo, 2240 const SmallVectorImpl<MachineOperand> &MOs, 2241 MachineInstr *MI, const TargetInstrInfo &TII) { 2242 MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2243 MI->getDebugLoc(), true); 2244 MachineInstrBuilder MIB(NewMI); 2245 2246 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2247 MachineOperand &MO = MI->getOperand(i); 2248 if (i == OpNo) { 2249 assert(MO.isReg() && "Expected to fold into reg operand!"); 2250 unsigned NumAddrOps = MOs.size(); 2251 for (unsigned i = 0; i != NumAddrOps; ++i) 2252 MIB.addOperand(MOs[i]); 2253 if (NumAddrOps < 4) // FrameIndex only 2254 addOffset(MIB, 0); 2255 } else { 2256 MIB.addOperand(MO); 2257 } 2258 } 2259 return MIB; 2260} 2261 2262static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 2263 const SmallVectorImpl<MachineOperand> &MOs, 2264 MachineInstr *MI) { 2265 MachineFunction &MF = *MI->getParent()->getParent(); 2266 MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); 2267 2268 unsigned NumAddrOps = MOs.size(); 2269 for (unsigned i = 0; i != NumAddrOps; ++i) 2270 MIB.addOperand(MOs[i]); 2271 if (NumAddrOps < 4) // FrameIndex only 2272 addOffset(MIB, 0); 2273 return MIB.addImm(0); 2274} 2275 2276MachineInstr* 2277X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2278 MachineInstr *MI, unsigned i, 2279 const SmallVectorImpl<MachineOperand> &MOs, 2280 unsigned Size, unsigned Align) const { 2281 const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2282 bool isTwoAddrFold = false; 2283 unsigned NumOps = MI->getDesc().getNumOperands(); 2284 bool isTwoAddr = NumOps > 1 && 2285 MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2286 2287 MachineInstr *NewMI = NULL; 2288 // Folding a memory location into the two-address part of a two-address 2289 // instruction is different than folding it other places. It requires 2290 // replacing the *two* registers with the memory location. 2291 if (isTwoAddr && NumOps >= 2 && i < 2 && 2292 MI->getOperand(0).isReg() && 2293 MI->getOperand(1).isReg() && 2294 MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 2295 OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2296 isTwoAddrFold = true; 2297 } else if (i == 0) { // If operand 0 2298 if (MI->getOpcode() == X86::MOV64r0) 2299 NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); 2300 else if (MI->getOpcode() == X86::MOV32r0) 2301 NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); 2302 else if (MI->getOpcode() == X86::MOV16r0) 2303 NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); 2304 else if (MI->getOpcode() == X86::MOV8r0) 2305 NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); 2306 if (NewMI) 2307 return NewMI; 2308 2309 OpcodeTablePtr = &RegOp2MemOpTable0; 2310 } else if (i == 1) { 2311 OpcodeTablePtr = &RegOp2MemOpTable1; 2312 } else if (i == 2) { 2313 OpcodeTablePtr = &RegOp2MemOpTable2; 2314 } 2315 2316 // If table selected... 2317 if (OpcodeTablePtr) { 2318 // Find the Opcode to fuse 2319 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2320 OpcodeTablePtr->find((unsigned*)MI->getOpcode()); 2321 if (I != OpcodeTablePtr->end()) { 2322 unsigned Opcode = I->second.first; 2323 unsigned MinAlign = I->second.second; 2324 if (Align < MinAlign) 2325 return NULL; 2326 bool NarrowToMOV32rm = false; 2327 if (Size) { 2328 unsigned RCSize = MI->getDesc().OpInfo[i].getRegClass(&RI)->getSize(); 2329 if (Size < RCSize) { 2330 // Check if it's safe to fold the load. If the size of the object is 2331 // narrower than the load width, then it's not. 2332 if (Opcode != X86::MOV64rm || RCSize != 8 || Size != 4) 2333 return NULL; 2334 // If this is a 64-bit load, but the spill slot is 32, then we can do 2335 // a 32-bit load which is implicitly zero-extended. This likely is due 2336 // to liveintervalanalysis remat'ing a load from stack slot. 2337 if (MI->getOperand(0).getSubReg() || MI->getOperand(1).getSubReg()) 2338 return NULL; 2339 Opcode = X86::MOV32rm; 2340 NarrowToMOV32rm = true; 2341 } 2342 } 2343 2344 if (isTwoAddrFold) 2345 NewMI = FuseTwoAddrInst(MF, Opcode, MOs, MI, *this); 2346 else 2347 NewMI = FuseInst(MF, Opcode, i, MOs, MI, *this); 2348 2349 if (NarrowToMOV32rm) { 2350 // If this is the special case where we use a MOV32rm to load a 32-bit 2351 // value and zero-extend the top bits. Change the destination register 2352 // to a 32-bit one. 2353 unsigned DstReg = NewMI->getOperand(0).getReg(); 2354 if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 2355 NewMI->getOperand(0).setReg(RI.getSubReg(DstReg, 2356 4/*x86_subreg_32bit*/)); 2357 else 2358 NewMI->getOperand(0).setSubReg(4/*x86_subreg_32bit*/); 2359 } 2360 return NewMI; 2361 } 2362 } 2363 2364 // No fusion 2365 if (PrintFailedFusing) 2366 dbgs() << "We failed to fuse operand " << i << " in " << *MI; 2367 return NULL; 2368} 2369 2370 2371MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2372 MachineInstr *MI, 2373 const SmallVectorImpl<unsigned> &Ops, 2374 int FrameIndex) const { 2375 // Check switch flag 2376 if (NoFusing) return NULL; 2377 2378 if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2379 switch (MI->getOpcode()) { 2380 case X86::CVTSD2SSrr: 2381 case X86::Int_CVTSD2SSrr: 2382 case X86::CVTSS2SDrr: 2383 case X86::Int_CVTSS2SDrr: 2384 case X86::RCPSSr: 2385 case X86::RCPSSr_Int: 2386 case X86::ROUNDSDr_Int: 2387 case X86::ROUNDSSr_Int: 2388 case X86::RSQRTSSr: 2389 case X86::RSQRTSSr_Int: 2390 case X86::SQRTSSr: 2391 case X86::SQRTSSr_Int: 2392 return 0; 2393 } 2394 2395 const MachineFrameInfo *MFI = MF.getFrameInfo(); 2396 unsigned Size = MFI->getObjectSize(FrameIndex); 2397 unsigned Alignment = MFI->getObjectAlignment(FrameIndex); 2398 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2399 unsigned NewOpc = 0; 2400 unsigned RCSize = 0; 2401 switch (MI->getOpcode()) { 2402 default: return NULL; 2403 case X86::TEST8rr: NewOpc = X86::CMP8ri; RCSize = 1; break; 2404 case X86::TEST16rr: NewOpc = X86::CMP16ri; RCSize = 2; break; 2405 case X86::TEST32rr: NewOpc = X86::CMP32ri; RCSize = 4; break; 2406 case X86::TEST64rr: NewOpc = X86::CMP64ri32; RCSize = 8; break; 2407 } 2408 // Check if it's safe to fold the load. If the size of the object is 2409 // narrower than the load width, then it's not. 2410 if (Size < RCSize) 2411 return NULL; 2412 // Change to CMPXXri r, 0 first. 2413 MI->setDesc(get(NewOpc)); 2414 MI->getOperand(1).ChangeToImmediate(0); 2415 } else if (Ops.size() != 1) 2416 return NULL; 2417 2418 SmallVector<MachineOperand,4> MOs; 2419 MOs.push_back(MachineOperand::CreateFI(FrameIndex)); 2420 return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, Size, Alignment); 2421} 2422 2423MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2424 MachineInstr *MI, 2425 const SmallVectorImpl<unsigned> &Ops, 2426 MachineInstr *LoadMI) const { 2427 // Check switch flag 2428 if (NoFusing) return NULL; 2429 2430 if (!MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 2431 switch (MI->getOpcode()) { 2432 case X86::CVTSD2SSrr: 2433 case X86::Int_CVTSD2SSrr: 2434 case X86::CVTSS2SDrr: 2435 case X86::Int_CVTSS2SDrr: 2436 case X86::RCPSSr: 2437 case X86::RCPSSr_Int: 2438 case X86::ROUNDSDr_Int: 2439 case X86::ROUNDSSr_Int: 2440 case X86::RSQRTSSr: 2441 case X86::RSQRTSSr_Int: 2442 case X86::SQRTSSr: 2443 case X86::SQRTSSr_Int: 2444 return 0; 2445 } 2446 2447 // Determine the alignment of the load. 2448 unsigned Alignment = 0; 2449 if (LoadMI->hasOneMemOperand()) 2450 Alignment = (*LoadMI->memoperands_begin())->getAlignment(); 2451 else 2452 switch (LoadMI->getOpcode()) { 2453 case X86::V_SET0: 2454 case X86::V_SETALLONES: 2455 Alignment = 16; 2456 break; 2457 case X86::FsFLD0SD: 2458 Alignment = 8; 2459 break; 2460 case X86::FsFLD0SS: 2461 Alignment = 4; 2462 break; 2463 default: 2464 llvm_unreachable("Don't know how to fold this instruction!"); 2465 } 2466 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2467 unsigned NewOpc = 0; 2468 switch (MI->getOpcode()) { 2469 default: return NULL; 2470 case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 2471 case X86::TEST16rr: NewOpc = X86::CMP16ri; break; 2472 case X86::TEST32rr: NewOpc = X86::CMP32ri; break; 2473 case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; 2474 } 2475 // Change to CMPXXri r, 0 first. 2476 MI->setDesc(get(NewOpc)); 2477 MI->getOperand(1).ChangeToImmediate(0); 2478 } else if (Ops.size() != 1) 2479 return NULL; 2480 2481 SmallVector<MachineOperand,X86AddrNumOperands> MOs; 2482 switch (LoadMI->getOpcode()) { 2483 case X86::V_SET0: 2484 case X86::V_SETALLONES: 2485 case X86::FsFLD0SD: 2486 case X86::FsFLD0SS: { 2487 // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. 2488 // Create a constant-pool entry and operands to load from it. 2489 2490 // x86-32 PIC requires a PIC base register for constant pools. 2491 unsigned PICBase = 0; 2492 if (TM.getRelocationModel() == Reloc::PIC_) { 2493 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 2494 PICBase = X86::RIP; 2495 else 2496 // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); 2497 // This doesn't work for several reasons. 2498 // 1. GlobalBaseReg may have been spilled. 2499 // 2. It may not be live at MI. 2500 return NULL; 2501 } 2502 2503 // Create a constant-pool entry. 2504 MachineConstantPool &MCP = *MF.getConstantPool(); 2505 const Type *Ty; 2506 if (LoadMI->getOpcode() == X86::FsFLD0SS) 2507 Ty = Type::getFloatTy(MF.getFunction()->getContext()); 2508 else if (LoadMI->getOpcode() == X86::FsFLD0SD) 2509 Ty = Type::getDoubleTy(MF.getFunction()->getContext()); 2510 else 2511 Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()), 4); 2512 Constant *C = LoadMI->getOpcode() == X86::V_SETALLONES ? 2513 Constant::getAllOnesValue(Ty) : 2514 Constant::getNullValue(Ty); 2515 unsigned CPI = MCP.getConstantPoolIndex(C, Alignment); 2516 2517 // Create operands to load from the constant pool entry. 2518 MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 2519 MOs.push_back(MachineOperand::CreateImm(1)); 2520 MOs.push_back(MachineOperand::CreateReg(0, false)); 2521 MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 2522 MOs.push_back(MachineOperand::CreateReg(0, false)); 2523 break; 2524 } 2525 default: { 2526 // Folding a normal load. Just copy the load's address operands. 2527 unsigned NumOps = LoadMI->getDesc().getNumOperands(); 2528 for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) 2529 MOs.push_back(LoadMI->getOperand(i)); 2530 break; 2531 } 2532 } 2533 return foldMemoryOperandImpl(MF, MI, Ops[0], MOs, 0, Alignment); 2534} 2535 2536 2537bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, 2538 const SmallVectorImpl<unsigned> &Ops) const { 2539 // Check switch flag 2540 if (NoFusing) return 0; 2541 2542 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2543 switch (MI->getOpcode()) { 2544 default: return false; 2545 case X86::TEST8rr: 2546 case X86::TEST16rr: 2547 case X86::TEST32rr: 2548 case X86::TEST64rr: 2549 return true; 2550 } 2551 } 2552 2553 if (Ops.size() != 1) 2554 return false; 2555 2556 unsigned OpNum = Ops[0]; 2557 unsigned Opc = MI->getOpcode(); 2558 unsigned NumOps = MI->getDesc().getNumOperands(); 2559 bool isTwoAddr = NumOps > 1 && 2560 MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2561 2562 // Folding a memory location into the two-address part of a two-address 2563 // instruction is different than folding it other places. It requires 2564 // replacing the *two* registers with the memory location. 2565 const DenseMap<unsigned*, std::pair<unsigned,unsigned> > *OpcodeTablePtr=NULL; 2566 if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 2567 OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2568 } else if (OpNum == 0) { // If operand 0 2569 switch (Opc) { 2570 case X86::MOV8r0: 2571 case X86::MOV16r0: 2572 case X86::MOV32r0: 2573 case X86::MOV64r0: 2574 return true; 2575 default: break; 2576 } 2577 OpcodeTablePtr = &RegOp2MemOpTable0; 2578 } else if (OpNum == 1) { 2579 OpcodeTablePtr = &RegOp2MemOpTable1; 2580 } else if (OpNum == 2) { 2581 OpcodeTablePtr = &RegOp2MemOpTable2; 2582 } 2583 2584 if (OpcodeTablePtr) { 2585 // Find the Opcode to fuse 2586 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2587 OpcodeTablePtr->find((unsigned*)Opc); 2588 if (I != OpcodeTablePtr->end()) 2589 return true; 2590 } 2591 return false; 2592} 2593 2594bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, 2595 unsigned Reg, bool UnfoldLoad, bool UnfoldStore, 2596 SmallVectorImpl<MachineInstr*> &NewMIs) const { 2597 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2598 MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); 2599 if (I == MemOp2RegOpTable.end()) 2600 return false; 2601 unsigned Opc = I->second.first; 2602 unsigned Index = I->second.second & 0xf; 2603 bool FoldedLoad = I->second.second & (1 << 4); 2604 bool FoldedStore = I->second.second & (1 << 5); 2605 if (UnfoldLoad && !FoldedLoad) 2606 return false; 2607 UnfoldLoad &= FoldedLoad; 2608 if (UnfoldStore && !FoldedStore) 2609 return false; 2610 UnfoldStore &= FoldedStore; 2611 2612 const TargetInstrDesc &TID = get(Opc); 2613 const TargetOperandInfo &TOI = TID.OpInfo[Index]; 2614 const TargetRegisterClass *RC = TOI.getRegClass(&RI); 2615 SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; 2616 SmallVector<MachineOperand,2> BeforeOps; 2617 SmallVector<MachineOperand,2> AfterOps; 2618 SmallVector<MachineOperand,4> ImpOps; 2619 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2620 MachineOperand &Op = MI->getOperand(i); 2621 if (i >= Index && i < Index + X86AddrNumOperands) 2622 AddrOps.push_back(Op); 2623 else if (Op.isReg() && Op.isImplicit()) 2624 ImpOps.push_back(Op); 2625 else if (i < Index) 2626 BeforeOps.push_back(Op); 2627 else if (i > Index) 2628 AfterOps.push_back(Op); 2629 } 2630 2631 // Emit the load instruction. 2632 if (UnfoldLoad) { 2633 std::pair<MachineInstr::mmo_iterator, 2634 MachineInstr::mmo_iterator> MMOs = 2635 MF.extractLoadMemRefs(MI->memoperands_begin(), 2636 MI->memoperands_end()); 2637 loadRegFromAddr(MF, Reg, AddrOps, RC, MMOs.first, MMOs.second, NewMIs); 2638 if (UnfoldStore) { 2639 // Address operands cannot be marked isKill. 2640 for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { 2641 MachineOperand &MO = NewMIs[0]->getOperand(i); 2642 if (MO.isReg()) 2643 MO.setIsKill(false); 2644 } 2645 } 2646 } 2647 2648 // Emit the data processing instruction. 2649 MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); 2650 MachineInstrBuilder MIB(DataMI); 2651 2652 if (FoldedStore) 2653 MIB.addReg(Reg, RegState::Define); 2654 for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) 2655 MIB.addOperand(BeforeOps[i]); 2656 if (FoldedLoad) 2657 MIB.addReg(Reg); 2658 for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) 2659 MIB.addOperand(AfterOps[i]); 2660 for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { 2661 MachineOperand &MO = ImpOps[i]; 2662 MIB.addReg(MO.getReg(), 2663 getDefRegState(MO.isDef()) | 2664 RegState::Implicit | 2665 getKillRegState(MO.isKill()) | 2666 getDeadRegState(MO.isDead()) | 2667 getUndefRegState(MO.isUndef())); 2668 } 2669 // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 2670 unsigned NewOpc = 0; 2671 switch (DataMI->getOpcode()) { 2672 default: break; 2673 case X86::CMP64ri32: 2674 case X86::CMP32ri: 2675 case X86::CMP16ri: 2676 case X86::CMP8ri: { 2677 MachineOperand &MO0 = DataMI->getOperand(0); 2678 MachineOperand &MO1 = DataMI->getOperand(1); 2679 if (MO1.getImm() == 0) { 2680 switch (DataMI->getOpcode()) { 2681 default: break; 2682 case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 2683 case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 2684 case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 2685 case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 2686 } 2687 DataMI->setDesc(get(NewOpc)); 2688 MO1.ChangeToRegister(MO0.getReg(), false); 2689 } 2690 } 2691 } 2692 NewMIs.push_back(DataMI); 2693 2694 // Emit the store instruction. 2695 if (UnfoldStore) { 2696 const TargetRegisterClass *DstRC = TID.OpInfo[0].getRegClass(&RI); 2697 std::pair<MachineInstr::mmo_iterator, 2698 MachineInstr::mmo_iterator> MMOs = 2699 MF.extractStoreMemRefs(MI->memoperands_begin(), 2700 MI->memoperands_end()); 2701 storeRegToAddr(MF, Reg, true, AddrOps, DstRC, MMOs.first, MMOs.second, NewMIs); 2702 } 2703 2704 return true; 2705} 2706 2707bool 2708X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 2709 SmallVectorImpl<SDNode*> &NewNodes) const { 2710 if (!N->isMachineOpcode()) 2711 return false; 2712 2713 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2714 MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); 2715 if (I == MemOp2RegOpTable.end()) 2716 return false; 2717 unsigned Opc = I->second.first; 2718 unsigned Index = I->second.second & 0xf; 2719 bool FoldedLoad = I->second.second & (1 << 4); 2720 bool FoldedStore = I->second.second & (1 << 5); 2721 const TargetInstrDesc &TID = get(Opc); 2722 const TargetRegisterClass *RC = TID.OpInfo[Index].getRegClass(&RI); 2723 unsigned NumDefs = TID.NumDefs; 2724 std::vector<SDValue> AddrOps; 2725 std::vector<SDValue> BeforeOps; 2726 std::vector<SDValue> AfterOps; 2727 DebugLoc dl = N->getDebugLoc(); 2728 unsigned NumOps = N->getNumOperands(); 2729 for (unsigned i = 0; i != NumOps-1; ++i) { 2730 SDValue Op = N->getOperand(i); 2731 if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) 2732 AddrOps.push_back(Op); 2733 else if (i < Index-NumDefs) 2734 BeforeOps.push_back(Op); 2735 else if (i > Index-NumDefs) 2736 AfterOps.push_back(Op); 2737 } 2738 SDValue Chain = N->getOperand(NumOps-1); 2739 AddrOps.push_back(Chain); 2740 2741 // Emit the load instruction. 2742 SDNode *Load = 0; 2743 MachineFunction &MF = DAG.getMachineFunction(); 2744 if (FoldedLoad) { 2745 EVT VT = *RC->vt_begin(); 2746 std::pair<MachineInstr::mmo_iterator, 2747 MachineInstr::mmo_iterator> MMOs = 2748 MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2749 cast<MachineSDNode>(N)->memoperands_end()); 2750 bool isAligned = (*MMOs.first)->getAlignment() >= 16; 2751 Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, 2752 VT, MVT::Other, &AddrOps[0], AddrOps.size()); 2753 NewNodes.push_back(Load); 2754 2755 // Preserve memory reference information. 2756 cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2757 } 2758 2759 // Emit the data processing instruction. 2760 std::vector<EVT> VTs; 2761 const TargetRegisterClass *DstRC = 0; 2762 if (TID.getNumDefs() > 0) { 2763 DstRC = TID.OpInfo[0].getRegClass(&RI); 2764 VTs.push_back(*DstRC->vt_begin()); 2765 } 2766 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 2767 EVT VT = N->getValueType(i); 2768 if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) 2769 VTs.push_back(VT); 2770 } 2771 if (Load) 2772 BeforeOps.push_back(SDValue(Load, 0)); 2773 std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); 2774 SDNode *NewNode= DAG.getMachineNode(Opc, dl, VTs, &BeforeOps[0], 2775 BeforeOps.size()); 2776 NewNodes.push_back(NewNode); 2777 2778 // Emit the store instruction. 2779 if (FoldedStore) { 2780 AddrOps.pop_back(); 2781 AddrOps.push_back(SDValue(NewNode, 0)); 2782 AddrOps.push_back(Chain); 2783 std::pair<MachineInstr::mmo_iterator, 2784 MachineInstr::mmo_iterator> MMOs = 2785 MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(), 2786 cast<MachineSDNode>(N)->memoperands_end()); 2787 bool isAligned = (*MMOs.first)->getAlignment() >= 16; 2788 SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, 2789 isAligned, TM), 2790 dl, MVT::Other, 2791 &AddrOps[0], AddrOps.size()); 2792 NewNodes.push_back(Store); 2793 2794 // Preserve memory reference information. 2795 cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second); 2796 } 2797 2798 return true; 2799} 2800 2801unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 2802 bool UnfoldLoad, bool UnfoldStore, 2803 unsigned *LoadRegIndex) const { 2804 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I = 2805 MemOp2RegOpTable.find((unsigned*)Opc); 2806 if (I == MemOp2RegOpTable.end()) 2807 return 0; 2808 bool FoldedLoad = I->second.second & (1 << 4); 2809 bool FoldedStore = I->second.second & (1 << 5); 2810 if (UnfoldLoad && !FoldedLoad) 2811 return 0; 2812 if (UnfoldStore && !FoldedStore) 2813 return 0; 2814 if (LoadRegIndex) 2815 *LoadRegIndex = I->second.second & 0xf; 2816 return I->second.first; 2817} 2818 2819bool X86InstrInfo:: 2820ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 2821 assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 2822 X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 2823 if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) 2824 return true; 2825 Cond[0].setImm(GetOppositeBranchCondition(CC)); 2826 return false; 2827} 2828 2829bool X86InstrInfo:: 2830isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 2831 // FIXME: Return false for x87 stack register classes for now. We can't 2832 // allow any loads of these registers before FpGet_ST0_80. 2833 return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || 2834 RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); 2835} 2836 2837unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { 2838 switch (Desc->TSFlags & X86II::ImmMask) { 2839 case X86II::Imm8: return 1; 2840 case X86II::Imm16: return 2; 2841 case X86II::Imm32: return 4; 2842 case X86II::Imm64: return 8; 2843 default: llvm_unreachable("Immediate size not set!"); 2844 return 0; 2845 } 2846} 2847 2848/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register? 2849/// e.g. r8, xmm8, etc. 2850bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) { 2851 if (!MO.isReg()) return false; 2852 switch (MO.getReg()) { 2853 default: break; 2854 case X86::R8: case X86::R9: case X86::R10: case X86::R11: 2855 case X86::R12: case X86::R13: case X86::R14: case X86::R15: 2856 case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: 2857 case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: 2858 case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: 2859 case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: 2860 case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: 2861 case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: 2862 case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: 2863 case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: 2864 return true; 2865 } 2866 return false; 2867} 2868 2869 2870/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 2871/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand 2872/// size, and 3) use of X86-64 extended registers. 2873unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { 2874 unsigned REX = 0; 2875 const TargetInstrDesc &Desc = MI.getDesc(); 2876 2877 // Pseudo instructions do not need REX prefix byte. 2878 if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) 2879 return 0; 2880 if (Desc.TSFlags & X86II::REX_W) 2881 REX |= 1 << 3; 2882 2883 unsigned NumOps = Desc.getNumOperands(); 2884 if (NumOps) { 2885 bool isTwoAddr = NumOps > 1 && 2886 Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; 2887 2888 // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. 2889 unsigned i = isTwoAddr ? 1 : 0; 2890 for (unsigned e = NumOps; i != e; ++i) { 2891 const MachineOperand& MO = MI.getOperand(i); 2892 if (MO.isReg()) { 2893 unsigned Reg = MO.getReg(); 2894 if (isX86_64NonExtLowByteReg(Reg)) 2895 REX |= 0x40; 2896 } 2897 } 2898 2899 switch (Desc.TSFlags & X86II::FormMask) { 2900 case X86II::MRMInitReg: 2901 if (isX86_64ExtendedReg(MI.getOperand(0))) 2902 REX |= (1 << 0) | (1 << 2); 2903 break; 2904 case X86II::MRMSrcReg: { 2905 if (isX86_64ExtendedReg(MI.getOperand(0))) 2906 REX |= 1 << 2; 2907 i = isTwoAddr ? 2 : 1; 2908 for (unsigned e = NumOps; i != e; ++i) { 2909 const MachineOperand& MO = MI.getOperand(i); 2910 if (isX86_64ExtendedReg(MO)) 2911 REX |= 1 << 0; 2912 } 2913 break; 2914 } 2915 case X86II::MRMSrcMem: { 2916 if (isX86_64ExtendedReg(MI.getOperand(0))) 2917 REX |= 1 << 2; 2918 unsigned Bit = 0; 2919 i = isTwoAddr ? 2 : 1; 2920 for (; i != NumOps; ++i) { 2921 const MachineOperand& MO = MI.getOperand(i); 2922 if (MO.isReg()) { 2923 if (isX86_64ExtendedReg(MO)) 2924 REX |= 1 << Bit; 2925 Bit++; 2926 } 2927 } 2928 break; 2929 } 2930 case X86II::MRM0m: case X86II::MRM1m: 2931 case X86II::MRM2m: case X86II::MRM3m: 2932 case X86II::MRM4m: case X86II::MRM5m: 2933 case X86II::MRM6m: case X86II::MRM7m: 2934 case X86II::MRMDestMem: { 2935 unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); 2936 i = isTwoAddr ? 1 : 0; 2937 if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) 2938 REX |= 1 << 2; 2939 unsigned Bit = 0; 2940 for (; i != e; ++i) { 2941 const MachineOperand& MO = MI.getOperand(i); 2942 if (MO.isReg()) { 2943 if (isX86_64ExtendedReg(MO)) 2944 REX |= 1 << Bit; 2945 Bit++; 2946 } 2947 } 2948 break; 2949 } 2950 default: { 2951 if (isX86_64ExtendedReg(MI.getOperand(0))) 2952 REX |= 1 << 0; 2953 i = isTwoAddr ? 2 : 1; 2954 for (unsigned e = NumOps; i != e; ++i) { 2955 const MachineOperand& MO = MI.getOperand(i); 2956 if (isX86_64ExtendedReg(MO)) 2957 REX |= 1 << 2; 2958 } 2959 break; 2960 } 2961 } 2962 } 2963 return REX; 2964} 2965 2966/// sizePCRelativeBlockAddress - This method returns the size of a PC 2967/// relative block address instruction 2968/// 2969static unsigned sizePCRelativeBlockAddress() { 2970 return 4; 2971} 2972 2973/// sizeGlobalAddress - Give the size of the emission of this global address 2974/// 2975static unsigned sizeGlobalAddress(bool dword) { 2976 return dword ? 8 : 4; 2977} 2978 2979/// sizeConstPoolAddress - Give the size of the emission of this constant 2980/// pool address 2981/// 2982static unsigned sizeConstPoolAddress(bool dword) { 2983 return dword ? 8 : 4; 2984} 2985 2986/// sizeExternalSymbolAddress - Give the size of the emission of this external 2987/// symbol 2988/// 2989static unsigned sizeExternalSymbolAddress(bool dword) { 2990 return dword ? 8 : 4; 2991} 2992 2993/// sizeJumpTableAddress - Give the size of the emission of this jump 2994/// table address 2995/// 2996static unsigned sizeJumpTableAddress(bool dword) { 2997 return dword ? 8 : 4; 2998} 2999 3000static unsigned sizeConstant(unsigned Size) { 3001 return Size; 3002} 3003 3004static unsigned sizeRegModRMByte(){ 3005 return 1; 3006} 3007 3008static unsigned sizeSIBByte(){ 3009 return 1; 3010} 3011 3012static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { 3013 unsigned FinalSize = 0; 3014 // If this is a simple integer displacement that doesn't require a relocation. 3015 if (!RelocOp) { 3016 FinalSize += sizeConstant(4); 3017 return FinalSize; 3018 } 3019 3020 // Otherwise, this is something that requires a relocation. 3021 if (RelocOp->isGlobal()) { 3022 FinalSize += sizeGlobalAddress(false); 3023 } else if (RelocOp->isCPI()) { 3024 FinalSize += sizeConstPoolAddress(false); 3025 } else if (RelocOp->isJTI()) { 3026 FinalSize += sizeJumpTableAddress(false); 3027 } else { 3028 llvm_unreachable("Unknown value to relocate!"); 3029 } 3030 return FinalSize; 3031} 3032 3033static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, 3034 bool IsPIC, bool Is64BitMode) { 3035 const MachineOperand &Op3 = MI.getOperand(Op+3); 3036 int DispVal = 0; 3037 const MachineOperand *DispForReloc = 0; 3038 unsigned FinalSize = 0; 3039 3040 // Figure out what sort of displacement we have to handle here. 3041 if (Op3.isGlobal()) { 3042 DispForReloc = &Op3; 3043 } else if (Op3.isCPI()) { 3044 if (Is64BitMode || IsPIC) { 3045 DispForReloc = &Op3; 3046 } else { 3047 DispVal = 1; 3048 } 3049 } else if (Op3.isJTI()) { 3050 if (Is64BitMode || IsPIC) { 3051 DispForReloc = &Op3; 3052 } else { 3053 DispVal = 1; 3054 } 3055 } else { 3056 DispVal = 1; 3057 } 3058 3059 const MachineOperand &Base = MI.getOperand(Op); 3060 const MachineOperand &IndexReg = MI.getOperand(Op+2); 3061 3062 unsigned BaseReg = Base.getReg(); 3063 3064 // Is a SIB byte needed? 3065 if ((!Is64BitMode || DispForReloc || BaseReg != 0) && 3066 IndexReg.getReg() == 0 && 3067 (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { 3068 if (BaseReg == 0) { // Just a displacement? 3069 // Emit special case [disp32] encoding 3070 ++FinalSize; 3071 FinalSize += getDisplacementFieldSize(DispForReloc); 3072 } else { 3073 unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg); 3074 if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { 3075 // Emit simple indirect register encoding... [EAX] f.e. 3076 ++FinalSize; 3077 // Be pessimistic and assume it's a disp32, not a disp8 3078 } else { 3079 // Emit the most general non-SIB encoding: [REG+disp32] 3080 ++FinalSize; 3081 FinalSize += getDisplacementFieldSize(DispForReloc); 3082 } 3083 } 3084 3085 } else { // We need a SIB byte, so start by outputting the ModR/M byte first 3086 assert(IndexReg.getReg() != X86::ESP && 3087 IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); 3088 3089 bool ForceDisp32 = false; 3090 if (BaseReg == 0 || DispForReloc) { 3091 // Emit the normal disp32 encoding. 3092 ++FinalSize; 3093 ForceDisp32 = true; 3094 } else { 3095 ++FinalSize; 3096 } 3097 3098 FinalSize += sizeSIBByte(); 3099 3100 // Do we need to output a displacement? 3101 if (DispVal != 0 || ForceDisp32) { 3102 FinalSize += getDisplacementFieldSize(DispForReloc); 3103 } 3104 } 3105 return FinalSize; 3106} 3107 3108 3109static unsigned GetInstSizeWithDesc(const MachineInstr &MI, 3110 const TargetInstrDesc *Desc, 3111 bool IsPIC, bool Is64BitMode) { 3112 3113 unsigned Opcode = Desc->Opcode; 3114 unsigned FinalSize = 0; 3115 3116 // Emit the lock opcode prefix as needed. 3117 if (Desc->TSFlags & X86II::LOCK) ++FinalSize; 3118 3119 // Emit segment override opcode prefix as needed. 3120 switch (Desc->TSFlags & X86II::SegOvrMask) { 3121 case X86II::FS: 3122 case X86II::GS: 3123 ++FinalSize; 3124 break; 3125 default: llvm_unreachable("Invalid segment!"); 3126 case 0: break; // No segment override! 3127 } 3128 3129 // Emit the repeat opcode prefix as needed. 3130 if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize; 3131 3132 // Emit the operand size opcode prefix as needed. 3133 if (Desc->TSFlags & X86II::OpSize) ++FinalSize; 3134 3135 // Emit the address size opcode prefix as needed. 3136 if (Desc->TSFlags & X86II::AdSize) ++FinalSize; 3137 3138 bool Need0FPrefix = false; 3139 switch (Desc->TSFlags & X86II::Op0Mask) { 3140 case X86II::TB: // Two-byte opcode prefix 3141 case X86II::T8: // 0F 38 3142 case X86II::TA: // 0F 3A 3143 Need0FPrefix = true; 3144 break; 3145 case X86II::TF: // F2 0F 38 3146 ++FinalSize; 3147 Need0FPrefix = true; 3148 break; 3149 case X86II::REP: break; // already handled. 3150 case X86II::XS: // F3 0F 3151 ++FinalSize; 3152 Need0FPrefix = true; 3153 break; 3154 case X86II::XD: // F2 0F 3155 ++FinalSize; 3156 Need0FPrefix = true; 3157 break; 3158 case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: 3159 case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: 3160 ++FinalSize; 3161 break; // Two-byte opcode prefix 3162 default: llvm_unreachable("Invalid prefix!"); 3163 case 0: break; // No prefix! 3164 } 3165 3166 if (Is64BitMode) { 3167 // REX prefix 3168 unsigned REX = X86InstrInfo::determineREX(MI); 3169 if (REX) 3170 ++FinalSize; 3171 } 3172 3173 // 0x0F escape code must be emitted just before the opcode. 3174 if (Need0FPrefix) 3175 ++FinalSize; 3176 3177 switch (Desc->TSFlags & X86II::Op0Mask) { 3178 case X86II::T8: // 0F 38 3179 ++FinalSize; 3180 break; 3181 case X86II::TA: // 0F 3A 3182 ++FinalSize; 3183 break; 3184 case X86II::TF: // F2 0F 38 3185 ++FinalSize; 3186 break; 3187 } 3188 3189 // If this is a two-address instruction, skip one of the register operands. 3190 unsigned NumOps = Desc->getNumOperands(); 3191 unsigned CurOp = 0; 3192 if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) 3193 CurOp++; 3194 else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) 3195 // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 3196 --NumOps; 3197 3198 switch (Desc->TSFlags & X86II::FormMask) { 3199 default: llvm_unreachable("Unknown FormMask value in X86 MachineCodeEmitter!"); 3200 case X86II::Pseudo: 3201 // Remember the current PC offset, this is the PIC relocation 3202 // base address. 3203 switch (Opcode) { 3204 default: 3205 break; 3206 case TargetInstrInfo::INLINEASM: { 3207 const MachineFunction *MF = MI.getParent()->getParent(); 3208 const TargetInstrInfo &TII = *MF->getTarget().getInstrInfo(); 3209 FinalSize += TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(), 3210 *MF->getTarget().getMCAsmInfo()); 3211 break; 3212 } 3213 case TargetInstrInfo::DBG_LABEL: 3214 case TargetInstrInfo::EH_LABEL: 3215 break; 3216 case TargetInstrInfo::IMPLICIT_DEF: 3217 case TargetInstrInfo::KILL: 3218 case X86::FP_REG_KILL: 3219 break; 3220 case X86::MOVPC32r: { 3221 // This emits the "call" portion of this pseudo instruction. 3222 ++FinalSize; 3223 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3224 break; 3225 } 3226 } 3227 CurOp = NumOps; 3228 break; 3229 case X86II::RawFrm: 3230 ++FinalSize; 3231 3232 if (CurOp != NumOps) { 3233 const MachineOperand &MO = MI.getOperand(CurOp++); 3234 if (MO.isMBB()) { 3235 FinalSize += sizePCRelativeBlockAddress(); 3236 } else if (MO.isGlobal()) { 3237 FinalSize += sizeGlobalAddress(false); 3238 } else if (MO.isSymbol()) { 3239 FinalSize += sizeExternalSymbolAddress(false); 3240 } else if (MO.isImm()) { 3241 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3242 } else { 3243 llvm_unreachable("Unknown RawFrm operand!"); 3244 } 3245 } 3246 break; 3247 3248 case X86II::AddRegFrm: 3249 ++FinalSize; 3250 ++CurOp; 3251 3252 if (CurOp != NumOps) { 3253 const MachineOperand &MO1 = MI.getOperand(CurOp++); 3254 unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3255 if (MO1.isImm()) 3256 FinalSize += sizeConstant(Size); 3257 else { 3258 bool dword = false; 3259 if (Opcode == X86::MOV64ri) 3260 dword = true; 3261 if (MO1.isGlobal()) { 3262 FinalSize += sizeGlobalAddress(dword); 3263 } else if (MO1.isSymbol()) 3264 FinalSize += sizeExternalSymbolAddress(dword); 3265 else if (MO1.isCPI()) 3266 FinalSize += sizeConstPoolAddress(dword); 3267 else if (MO1.isJTI()) 3268 FinalSize += sizeJumpTableAddress(dword); 3269 } 3270 } 3271 break; 3272 3273 case X86II::MRMDestReg: { 3274 ++FinalSize; 3275 FinalSize += sizeRegModRMByte(); 3276 CurOp += 2; 3277 if (CurOp != NumOps) { 3278 ++CurOp; 3279 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3280 } 3281 break; 3282 } 3283 case X86II::MRMDestMem: { 3284 ++FinalSize; 3285 FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3286 CurOp += X86AddrNumOperands + 1; 3287 if (CurOp != NumOps) { 3288 ++CurOp; 3289 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3290 } 3291 break; 3292 } 3293 3294 case X86II::MRMSrcReg: 3295 ++FinalSize; 3296 FinalSize += sizeRegModRMByte(); 3297 CurOp += 2; 3298 if (CurOp != NumOps) { 3299 ++CurOp; 3300 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3301 } 3302 break; 3303 3304 case X86II::MRMSrcMem: { 3305 int AddrOperands; 3306 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || 3307 Opcode == X86::LEA16r || Opcode == X86::LEA32r) 3308 AddrOperands = X86AddrNumOperands - 1; // No segment register 3309 else 3310 AddrOperands = X86AddrNumOperands; 3311 3312 ++FinalSize; 3313 FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); 3314 CurOp += AddrOperands + 1; 3315 if (CurOp != NumOps) { 3316 ++CurOp; 3317 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3318 } 3319 break; 3320 } 3321 3322 case X86II::MRM0r: case X86II::MRM1r: 3323 case X86II::MRM2r: case X86II::MRM3r: 3324 case X86II::MRM4r: case X86II::MRM5r: 3325 case X86II::MRM6r: case X86II::MRM7r: 3326 ++FinalSize; 3327 if (Desc->getOpcode() == X86::LFENCE || 3328 Desc->getOpcode() == X86::MFENCE) { 3329 // Special handling of lfence and mfence; 3330 FinalSize += sizeRegModRMByte(); 3331 } else if (Desc->getOpcode() == X86::MONITOR || 3332 Desc->getOpcode() == X86::MWAIT) { 3333 // Special handling of monitor and mwait. 3334 FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode. 3335 } else { 3336 ++CurOp; 3337 FinalSize += sizeRegModRMByte(); 3338 } 3339 3340 if (CurOp != NumOps) { 3341 const MachineOperand &MO1 = MI.getOperand(CurOp++); 3342 unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3343 if (MO1.isImm()) 3344 FinalSize += sizeConstant(Size); 3345 else { 3346 bool dword = false; 3347 if (Opcode == X86::MOV64ri32) 3348 dword = true; 3349 if (MO1.isGlobal()) { 3350 FinalSize += sizeGlobalAddress(dword); 3351 } else if (MO1.isSymbol()) 3352 FinalSize += sizeExternalSymbolAddress(dword); 3353 else if (MO1.isCPI()) 3354 FinalSize += sizeConstPoolAddress(dword); 3355 else if (MO1.isJTI()) 3356 FinalSize += sizeJumpTableAddress(dword); 3357 } 3358 } 3359 break; 3360 3361 case X86II::MRM0m: case X86II::MRM1m: 3362 case X86II::MRM2m: case X86II::MRM3m: 3363 case X86II::MRM4m: case X86II::MRM5m: 3364 case X86II::MRM6m: case X86II::MRM7m: { 3365 3366 ++FinalSize; 3367 FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3368 CurOp += X86AddrNumOperands; 3369 3370 if (CurOp != NumOps) { 3371 const MachineOperand &MO = MI.getOperand(CurOp++); 3372 unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3373 if (MO.isImm()) 3374 FinalSize += sizeConstant(Size); 3375 else { 3376 bool dword = false; 3377 if (Opcode == X86::MOV64mi32) 3378 dword = true; 3379 if (MO.isGlobal()) { 3380 FinalSize += sizeGlobalAddress(dword); 3381 } else if (MO.isSymbol()) 3382 FinalSize += sizeExternalSymbolAddress(dword); 3383 else if (MO.isCPI()) 3384 FinalSize += sizeConstPoolAddress(dword); 3385 else if (MO.isJTI()) 3386 FinalSize += sizeJumpTableAddress(dword); 3387 } 3388 } 3389 break; 3390 } 3391 3392 case X86II::MRMInitReg: 3393 ++FinalSize; 3394 // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). 3395 FinalSize += sizeRegModRMByte(); 3396 ++CurOp; 3397 break; 3398 } 3399 3400 if (!Desc->isVariadic() && CurOp != NumOps) { 3401 std::string msg; 3402 raw_string_ostream Msg(msg); 3403 Msg << "Cannot determine size: " << MI; 3404 llvm_report_error(Msg.str()); 3405 } 3406 3407 3408 return FinalSize; 3409} 3410 3411 3412unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 3413 const TargetInstrDesc &Desc = MI->getDesc(); 3414 bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; 3415 bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); 3416 unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); 3417 if (Desc.getOpcode() == X86::MOVPC32r) 3418 Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); 3419 return Size; 3420} 3421 3422/// getGlobalBaseReg - Return a virtual register initialized with the 3423/// the global base register value. Output instructions required to 3424/// initialize the register in the function entry block, if necessary. 3425/// 3426unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 3427 assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && 3428 "X86-64 PIC uses RIP relative addressing"); 3429 3430 X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 3431 unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 3432 if (GlobalBaseReg != 0) 3433 return GlobalBaseReg; 3434 3435 // Insert the set of GlobalBaseReg into the first MBB of the function 3436 MachineBasicBlock &FirstMBB = MF->front(); 3437 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 3438 DebugLoc DL = DebugLoc::getUnknownLoc(); 3439 if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc(); 3440 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3441 unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3442 3443 const TargetInstrInfo *TII = TM.getInstrInfo(); 3444 // Operand of MovePCtoStack is completely ignored by asm printer. It's 3445 // only used in JIT code emission as displacement to pc. 3446 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 3447 3448 // If we're using vanilla 'GOT' PIC style, we should use relative addressing 3449 // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 3450 if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { 3451 GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3452 // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register 3453 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 3454 .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 3455 X86II::MO_GOT_ABSOLUTE_ADDRESS); 3456 } else { 3457 GlobalBaseReg = PC; 3458 } 3459 3460 X86FI->setGlobalBaseReg(GlobalBaseReg); 3461 return GlobalBaseReg; 3462} 3463