X86InstrInfo.cpp revision 07406346ebbf8a958a956eb05c1e04faedfe1e63
1//===- X86InstrInfo.cpp - X86 Instruction Information -----------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains the X86 implementation of the TargetInstrInfo class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "X86InstrInfo.h" 15#include "X86.h" 16#include "X86GenInstrInfo.inc" 17#include "X86InstrBuilder.h" 18#include "X86MachineFunctionInfo.h" 19#include "X86Subtarget.h" 20#include "X86TargetMachine.h" 21#include "llvm/GlobalVariable.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/ADT/STLExtras.h" 24#include "llvm/CodeGen/MachineConstantPool.h" 25#include "llvm/CodeGen/MachineFrameInfo.h" 26#include "llvm/CodeGen/MachineInstrBuilder.h" 27#include "llvm/CodeGen/MachineRegisterInfo.h" 28#include "llvm/CodeGen/LiveVariables.h" 29#include "llvm/Support/CommandLine.h" 30#include "llvm/Support/ErrorHandling.h" 31#include "llvm/Support/raw_ostream.h" 32#include "llvm/Target/TargetOptions.h" 33#include "llvm/Target/TargetAsmInfo.h" 34using namespace llvm; 35 36namespace { 37 cl::opt<bool> 38 NoFusing("disable-spill-fusing", 39 cl::desc("Disable fusing of spill code into instructions")); 40 cl::opt<bool> 41 PrintFailedFusing("print-failed-fuse-candidates", 42 cl::desc("Print instructions that the allocator wants to" 43 " fuse, but the X86 backend currently can't"), 44 cl::Hidden); 45 cl::opt<bool> 46 ReMatPICStubLoad("remat-pic-stub-load", 47 cl::desc("Re-materialize load from stub in PIC mode"), 48 cl::init(false), cl::Hidden); 49} 50 51X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) 52 : TargetInstrInfoImpl(X86Insts, array_lengthof(X86Insts)), 53 TM(tm), RI(tm, *this) { 54 SmallVector<unsigned,16> AmbEntries; 55 static const unsigned OpTbl2Addr[][2] = { 56 { X86::ADC32ri, X86::ADC32mi }, 57 { X86::ADC32ri8, X86::ADC32mi8 }, 58 { X86::ADC32rr, X86::ADC32mr }, 59 { X86::ADC64ri32, X86::ADC64mi32 }, 60 { X86::ADC64ri8, X86::ADC64mi8 }, 61 { X86::ADC64rr, X86::ADC64mr }, 62 { X86::ADD16ri, X86::ADD16mi }, 63 { X86::ADD16ri8, X86::ADD16mi8 }, 64 { X86::ADD16rr, X86::ADD16mr }, 65 { X86::ADD32ri, X86::ADD32mi }, 66 { X86::ADD32ri8, X86::ADD32mi8 }, 67 { X86::ADD32rr, X86::ADD32mr }, 68 { X86::ADD64ri32, X86::ADD64mi32 }, 69 { X86::ADD64ri8, X86::ADD64mi8 }, 70 { X86::ADD64rr, X86::ADD64mr }, 71 { X86::ADD8ri, X86::ADD8mi }, 72 { X86::ADD8rr, X86::ADD8mr }, 73 { X86::AND16ri, X86::AND16mi }, 74 { X86::AND16ri8, X86::AND16mi8 }, 75 { X86::AND16rr, X86::AND16mr }, 76 { X86::AND32ri, X86::AND32mi }, 77 { X86::AND32ri8, X86::AND32mi8 }, 78 { X86::AND32rr, X86::AND32mr }, 79 { X86::AND64ri32, X86::AND64mi32 }, 80 { X86::AND64ri8, X86::AND64mi8 }, 81 { X86::AND64rr, X86::AND64mr }, 82 { X86::AND8ri, X86::AND8mi }, 83 { X86::AND8rr, X86::AND8mr }, 84 { X86::DEC16r, X86::DEC16m }, 85 { X86::DEC32r, X86::DEC32m }, 86 { X86::DEC64_16r, X86::DEC64_16m }, 87 { X86::DEC64_32r, X86::DEC64_32m }, 88 { X86::DEC64r, X86::DEC64m }, 89 { X86::DEC8r, X86::DEC8m }, 90 { X86::INC16r, X86::INC16m }, 91 { X86::INC32r, X86::INC32m }, 92 { X86::INC64_16r, X86::INC64_16m }, 93 { X86::INC64_32r, X86::INC64_32m }, 94 { X86::INC64r, X86::INC64m }, 95 { X86::INC8r, X86::INC8m }, 96 { X86::NEG16r, X86::NEG16m }, 97 { X86::NEG32r, X86::NEG32m }, 98 { X86::NEG64r, X86::NEG64m }, 99 { X86::NEG8r, X86::NEG8m }, 100 { X86::NOT16r, X86::NOT16m }, 101 { X86::NOT32r, X86::NOT32m }, 102 { X86::NOT64r, X86::NOT64m }, 103 { X86::NOT8r, X86::NOT8m }, 104 { X86::OR16ri, X86::OR16mi }, 105 { X86::OR16ri8, X86::OR16mi8 }, 106 { X86::OR16rr, X86::OR16mr }, 107 { X86::OR32ri, X86::OR32mi }, 108 { X86::OR32ri8, X86::OR32mi8 }, 109 { X86::OR32rr, X86::OR32mr }, 110 { X86::OR64ri32, X86::OR64mi32 }, 111 { X86::OR64ri8, X86::OR64mi8 }, 112 { X86::OR64rr, X86::OR64mr }, 113 { X86::OR8ri, X86::OR8mi }, 114 { X86::OR8rr, X86::OR8mr }, 115 { X86::ROL16r1, X86::ROL16m1 }, 116 { X86::ROL16rCL, X86::ROL16mCL }, 117 { X86::ROL16ri, X86::ROL16mi }, 118 { X86::ROL32r1, X86::ROL32m1 }, 119 { X86::ROL32rCL, X86::ROL32mCL }, 120 { X86::ROL32ri, X86::ROL32mi }, 121 { X86::ROL64r1, X86::ROL64m1 }, 122 { X86::ROL64rCL, X86::ROL64mCL }, 123 { X86::ROL64ri, X86::ROL64mi }, 124 { X86::ROL8r1, X86::ROL8m1 }, 125 { X86::ROL8rCL, X86::ROL8mCL }, 126 { X86::ROL8ri, X86::ROL8mi }, 127 { X86::ROR16r1, X86::ROR16m1 }, 128 { X86::ROR16rCL, X86::ROR16mCL }, 129 { X86::ROR16ri, X86::ROR16mi }, 130 { X86::ROR32r1, X86::ROR32m1 }, 131 { X86::ROR32rCL, X86::ROR32mCL }, 132 { X86::ROR32ri, X86::ROR32mi }, 133 { X86::ROR64r1, X86::ROR64m1 }, 134 { X86::ROR64rCL, X86::ROR64mCL }, 135 { X86::ROR64ri, X86::ROR64mi }, 136 { X86::ROR8r1, X86::ROR8m1 }, 137 { X86::ROR8rCL, X86::ROR8mCL }, 138 { X86::ROR8ri, X86::ROR8mi }, 139 { X86::SAR16r1, X86::SAR16m1 }, 140 { X86::SAR16rCL, X86::SAR16mCL }, 141 { X86::SAR16ri, X86::SAR16mi }, 142 { X86::SAR32r1, X86::SAR32m1 }, 143 { X86::SAR32rCL, X86::SAR32mCL }, 144 { X86::SAR32ri, X86::SAR32mi }, 145 { X86::SAR64r1, X86::SAR64m1 }, 146 { X86::SAR64rCL, X86::SAR64mCL }, 147 { X86::SAR64ri, X86::SAR64mi }, 148 { X86::SAR8r1, X86::SAR8m1 }, 149 { X86::SAR8rCL, X86::SAR8mCL }, 150 { X86::SAR8ri, X86::SAR8mi }, 151 { X86::SBB32ri, X86::SBB32mi }, 152 { X86::SBB32ri8, X86::SBB32mi8 }, 153 { X86::SBB32rr, X86::SBB32mr }, 154 { X86::SBB64ri32, X86::SBB64mi32 }, 155 { X86::SBB64ri8, X86::SBB64mi8 }, 156 { X86::SBB64rr, X86::SBB64mr }, 157 { X86::SHL16rCL, X86::SHL16mCL }, 158 { X86::SHL16ri, X86::SHL16mi }, 159 { X86::SHL32rCL, X86::SHL32mCL }, 160 { X86::SHL32ri, X86::SHL32mi }, 161 { X86::SHL64rCL, X86::SHL64mCL }, 162 { X86::SHL64ri, X86::SHL64mi }, 163 { X86::SHL8rCL, X86::SHL8mCL }, 164 { X86::SHL8ri, X86::SHL8mi }, 165 { X86::SHLD16rrCL, X86::SHLD16mrCL }, 166 { X86::SHLD16rri8, X86::SHLD16mri8 }, 167 { X86::SHLD32rrCL, X86::SHLD32mrCL }, 168 { X86::SHLD32rri8, X86::SHLD32mri8 }, 169 { X86::SHLD64rrCL, X86::SHLD64mrCL }, 170 { X86::SHLD64rri8, X86::SHLD64mri8 }, 171 { X86::SHR16r1, X86::SHR16m1 }, 172 { X86::SHR16rCL, X86::SHR16mCL }, 173 { X86::SHR16ri, X86::SHR16mi }, 174 { X86::SHR32r1, X86::SHR32m1 }, 175 { X86::SHR32rCL, X86::SHR32mCL }, 176 { X86::SHR32ri, X86::SHR32mi }, 177 { X86::SHR64r1, X86::SHR64m1 }, 178 { X86::SHR64rCL, X86::SHR64mCL }, 179 { X86::SHR64ri, X86::SHR64mi }, 180 { X86::SHR8r1, X86::SHR8m1 }, 181 { X86::SHR8rCL, X86::SHR8mCL }, 182 { X86::SHR8ri, X86::SHR8mi }, 183 { X86::SHRD16rrCL, X86::SHRD16mrCL }, 184 { X86::SHRD16rri8, X86::SHRD16mri8 }, 185 { X86::SHRD32rrCL, X86::SHRD32mrCL }, 186 { X86::SHRD32rri8, X86::SHRD32mri8 }, 187 { X86::SHRD64rrCL, X86::SHRD64mrCL }, 188 { X86::SHRD64rri8, X86::SHRD64mri8 }, 189 { X86::SUB16ri, X86::SUB16mi }, 190 { X86::SUB16ri8, X86::SUB16mi8 }, 191 { X86::SUB16rr, X86::SUB16mr }, 192 { X86::SUB32ri, X86::SUB32mi }, 193 { X86::SUB32ri8, X86::SUB32mi8 }, 194 { X86::SUB32rr, X86::SUB32mr }, 195 { X86::SUB64ri32, X86::SUB64mi32 }, 196 { X86::SUB64ri8, X86::SUB64mi8 }, 197 { X86::SUB64rr, X86::SUB64mr }, 198 { X86::SUB8ri, X86::SUB8mi }, 199 { X86::SUB8rr, X86::SUB8mr }, 200 { X86::XOR16ri, X86::XOR16mi }, 201 { X86::XOR16ri8, X86::XOR16mi8 }, 202 { X86::XOR16rr, X86::XOR16mr }, 203 { X86::XOR32ri, X86::XOR32mi }, 204 { X86::XOR32ri8, X86::XOR32mi8 }, 205 { X86::XOR32rr, X86::XOR32mr }, 206 { X86::XOR64ri32, X86::XOR64mi32 }, 207 { X86::XOR64ri8, X86::XOR64mi8 }, 208 { X86::XOR64rr, X86::XOR64mr }, 209 { X86::XOR8ri, X86::XOR8mi }, 210 { X86::XOR8rr, X86::XOR8mr } 211 }; 212 213 for (unsigned i = 0, e = array_lengthof(OpTbl2Addr); i != e; ++i) { 214 unsigned RegOp = OpTbl2Addr[i][0]; 215 unsigned MemOp = OpTbl2Addr[i][1]; 216 if (!RegOp2MemOpTable2Addr.insert(std::make_pair((unsigned*)RegOp, 217 MemOp)).second) 218 assert(false && "Duplicated entries?"); 219 unsigned AuxInfo = 0 | (1 << 4) | (1 << 5); // Index 0,folded load and store 220 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 221 std::make_pair(RegOp, 222 AuxInfo))).second) 223 AmbEntries.push_back(MemOp); 224 } 225 226 // If the third value is 1, then it's folding either a load or a store. 227 static const unsigned OpTbl0[][3] = { 228 { X86::BT16ri8, X86::BT16mi8, 1 }, 229 { X86::BT32ri8, X86::BT32mi8, 1 }, 230 { X86::BT64ri8, X86::BT64mi8, 1 }, 231 { X86::CALL32r, X86::CALL32m, 1 }, 232 { X86::CALL64r, X86::CALL64m, 1 }, 233 { X86::CMP16ri, X86::CMP16mi, 1 }, 234 { X86::CMP16ri8, X86::CMP16mi8, 1 }, 235 { X86::CMP16rr, X86::CMP16mr, 1 }, 236 { X86::CMP32ri, X86::CMP32mi, 1 }, 237 { X86::CMP32ri8, X86::CMP32mi8, 1 }, 238 { X86::CMP32rr, X86::CMP32mr, 1 }, 239 { X86::CMP64ri32, X86::CMP64mi32, 1 }, 240 { X86::CMP64ri8, X86::CMP64mi8, 1 }, 241 { X86::CMP64rr, X86::CMP64mr, 1 }, 242 { X86::CMP8ri, X86::CMP8mi, 1 }, 243 { X86::CMP8rr, X86::CMP8mr, 1 }, 244 { X86::DIV16r, X86::DIV16m, 1 }, 245 { X86::DIV32r, X86::DIV32m, 1 }, 246 { X86::DIV64r, X86::DIV64m, 1 }, 247 { X86::DIV8r, X86::DIV8m, 1 }, 248 { X86::EXTRACTPSrr, X86::EXTRACTPSmr, 0 }, 249 { X86::FsMOVAPDrr, X86::MOVSDmr, 0 }, 250 { X86::FsMOVAPSrr, X86::MOVSSmr, 0 }, 251 { X86::IDIV16r, X86::IDIV16m, 1 }, 252 { X86::IDIV32r, X86::IDIV32m, 1 }, 253 { X86::IDIV64r, X86::IDIV64m, 1 }, 254 { X86::IDIV8r, X86::IDIV8m, 1 }, 255 { X86::IMUL16r, X86::IMUL16m, 1 }, 256 { X86::IMUL32r, X86::IMUL32m, 1 }, 257 { X86::IMUL64r, X86::IMUL64m, 1 }, 258 { X86::IMUL8r, X86::IMUL8m, 1 }, 259 { X86::JMP32r, X86::JMP32m, 1 }, 260 { X86::JMP64r, X86::JMP64m, 1 }, 261 { X86::MOV16ri, X86::MOV16mi, 0 }, 262 { X86::MOV16rr, X86::MOV16mr, 0 }, 263 { X86::MOV32ri, X86::MOV32mi, 0 }, 264 { X86::MOV32rr, X86::MOV32mr, 0 }, 265 { X86::MOV64ri32, X86::MOV64mi32, 0 }, 266 { X86::MOV64rr, X86::MOV64mr, 0 }, 267 { X86::MOV8ri, X86::MOV8mi, 0 }, 268 { X86::MOV8rr, X86::MOV8mr, 0 }, 269 { X86::MOV8rr_NOREX, X86::MOV8mr_NOREX, 0 }, 270 { X86::MOVAPDrr, X86::MOVAPDmr, 0 }, 271 { X86::MOVAPSrr, X86::MOVAPSmr, 0 }, 272 { X86::MOVDQArr, X86::MOVDQAmr, 0 }, 273 { X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0 }, 274 { X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0 }, 275 { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0 }, 276 { X86::MOVSDrr, X86::MOVSDmr, 0 }, 277 { X86::MOVSDto64rr, X86::MOVSDto64mr, 0 }, 278 { X86::MOVSS2DIrr, X86::MOVSS2DImr, 0 }, 279 { X86::MOVSSrr, X86::MOVSSmr, 0 }, 280 { X86::MOVUPDrr, X86::MOVUPDmr, 0 }, 281 { X86::MOVUPSrr, X86::MOVUPSmr, 0 }, 282 { X86::MUL16r, X86::MUL16m, 1 }, 283 { X86::MUL32r, X86::MUL32m, 1 }, 284 { X86::MUL64r, X86::MUL64m, 1 }, 285 { X86::MUL8r, X86::MUL8m, 1 }, 286 { X86::SETAEr, X86::SETAEm, 0 }, 287 { X86::SETAr, X86::SETAm, 0 }, 288 { X86::SETBEr, X86::SETBEm, 0 }, 289 { X86::SETBr, X86::SETBm, 0 }, 290 { X86::SETEr, X86::SETEm, 0 }, 291 { X86::SETGEr, X86::SETGEm, 0 }, 292 { X86::SETGr, X86::SETGm, 0 }, 293 { X86::SETLEr, X86::SETLEm, 0 }, 294 { X86::SETLr, X86::SETLm, 0 }, 295 { X86::SETNEr, X86::SETNEm, 0 }, 296 { X86::SETNOr, X86::SETNOm, 0 }, 297 { X86::SETNPr, X86::SETNPm, 0 }, 298 { X86::SETNSr, X86::SETNSm, 0 }, 299 { X86::SETOr, X86::SETOm, 0 }, 300 { X86::SETPr, X86::SETPm, 0 }, 301 { X86::SETSr, X86::SETSm, 0 }, 302 { X86::TAILJMPr, X86::TAILJMPm, 1 }, 303 { X86::TEST16ri, X86::TEST16mi, 1 }, 304 { X86::TEST32ri, X86::TEST32mi, 1 }, 305 { X86::TEST64ri32, X86::TEST64mi32, 1 }, 306 { X86::TEST8ri, X86::TEST8mi, 1 } 307 }; 308 309 for (unsigned i = 0, e = array_lengthof(OpTbl0); i != e; ++i) { 310 unsigned RegOp = OpTbl0[i][0]; 311 unsigned MemOp = OpTbl0[i][1]; 312 if (!RegOp2MemOpTable0.insert(std::make_pair((unsigned*)RegOp, 313 MemOp)).second) 314 assert(false && "Duplicated entries?"); 315 unsigned FoldedLoad = OpTbl0[i][2]; 316 // Index 0, folded load or store. 317 unsigned AuxInfo = 0 | (FoldedLoad << 4) | ((FoldedLoad^1) << 5); 318 if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 319 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 320 std::make_pair(RegOp, AuxInfo))).second) 321 AmbEntries.push_back(MemOp); 322 } 323 324 static const unsigned OpTbl1[][2] = { 325 { X86::CMP16rr, X86::CMP16rm }, 326 { X86::CMP32rr, X86::CMP32rm }, 327 { X86::CMP64rr, X86::CMP64rm }, 328 { X86::CMP8rr, X86::CMP8rm }, 329 { X86::CVTSD2SSrr, X86::CVTSD2SSrm }, 330 { X86::CVTSI2SD64rr, X86::CVTSI2SD64rm }, 331 { X86::CVTSI2SDrr, X86::CVTSI2SDrm }, 332 { X86::CVTSI2SS64rr, X86::CVTSI2SS64rm }, 333 { X86::CVTSI2SSrr, X86::CVTSI2SSrm }, 334 { X86::CVTSS2SDrr, X86::CVTSS2SDrm }, 335 { X86::CVTTSD2SI64rr, X86::CVTTSD2SI64rm }, 336 { X86::CVTTSD2SIrr, X86::CVTTSD2SIrm }, 337 { X86::CVTTSS2SI64rr, X86::CVTTSS2SI64rm }, 338 { X86::CVTTSS2SIrr, X86::CVTTSS2SIrm }, 339 { X86::FsMOVAPDrr, X86::MOVSDrm }, 340 { X86::FsMOVAPSrr, X86::MOVSSrm }, 341 { X86::IMUL16rri, X86::IMUL16rmi }, 342 { X86::IMUL16rri8, X86::IMUL16rmi8 }, 343 { X86::IMUL32rri, X86::IMUL32rmi }, 344 { X86::IMUL32rri8, X86::IMUL32rmi8 }, 345 { X86::IMUL64rri32, X86::IMUL64rmi32 }, 346 { X86::IMUL64rri8, X86::IMUL64rmi8 }, 347 { X86::Int_CMPSDrr, X86::Int_CMPSDrm }, 348 { X86::Int_CMPSSrr, X86::Int_CMPSSrm }, 349 { X86::Int_COMISDrr, X86::Int_COMISDrm }, 350 { X86::Int_COMISSrr, X86::Int_COMISSrm }, 351 { X86::Int_CVTDQ2PDrr, X86::Int_CVTDQ2PDrm }, 352 { X86::Int_CVTDQ2PSrr, X86::Int_CVTDQ2PSrm }, 353 { X86::Int_CVTPD2DQrr, X86::Int_CVTPD2DQrm }, 354 { X86::Int_CVTPD2PSrr, X86::Int_CVTPD2PSrm }, 355 { X86::Int_CVTPS2DQrr, X86::Int_CVTPS2DQrm }, 356 { X86::Int_CVTPS2PDrr, X86::Int_CVTPS2PDrm }, 357 { X86::Int_CVTSD2SI64rr,X86::Int_CVTSD2SI64rm }, 358 { X86::Int_CVTSD2SIrr, X86::Int_CVTSD2SIrm }, 359 { X86::Int_CVTSD2SSrr, X86::Int_CVTSD2SSrm }, 360 { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm }, 361 { X86::Int_CVTSI2SDrr, X86::Int_CVTSI2SDrm }, 362 { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm }, 363 { X86::Int_CVTSI2SSrr, X86::Int_CVTSI2SSrm }, 364 { X86::Int_CVTSS2SDrr, X86::Int_CVTSS2SDrm }, 365 { X86::Int_CVTSS2SI64rr,X86::Int_CVTSS2SI64rm }, 366 { X86::Int_CVTSS2SIrr, X86::Int_CVTSS2SIrm }, 367 { X86::Int_CVTTPD2DQrr, X86::Int_CVTTPD2DQrm }, 368 { X86::Int_CVTTPS2DQrr, X86::Int_CVTTPS2DQrm }, 369 { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm }, 370 { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm }, 371 { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm }, 372 { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm }, 373 { X86::Int_UCOMISDrr, X86::Int_UCOMISDrm }, 374 { X86::Int_UCOMISSrr, X86::Int_UCOMISSrm }, 375 { X86::MOV16rr, X86::MOV16rm }, 376 { X86::MOV32rr, X86::MOV32rm }, 377 { X86::MOV64rr, X86::MOV64rm }, 378 { X86::MOV64toPQIrr, X86::MOVQI2PQIrm }, 379 { X86::MOV64toSDrr, X86::MOV64toSDrm }, 380 { X86::MOV8rr, X86::MOV8rm }, 381 { X86::MOVAPDrr, X86::MOVAPDrm }, 382 { X86::MOVAPSrr, X86::MOVAPSrm }, 383 { X86::MOVDDUPrr, X86::MOVDDUPrm }, 384 { X86::MOVDI2PDIrr, X86::MOVDI2PDIrm }, 385 { X86::MOVDI2SSrr, X86::MOVDI2SSrm }, 386 { X86::MOVDQArr, X86::MOVDQArm }, 387 { X86::MOVSD2PDrr, X86::MOVSD2PDrm }, 388 { X86::MOVSDrr, X86::MOVSDrm }, 389 { X86::MOVSHDUPrr, X86::MOVSHDUPrm }, 390 { X86::MOVSLDUPrr, X86::MOVSLDUPrm }, 391 { X86::MOVSS2PSrr, X86::MOVSS2PSrm }, 392 { X86::MOVSSrr, X86::MOVSSrm }, 393 { X86::MOVSX16rr8, X86::MOVSX16rm8 }, 394 { X86::MOVSX32rr16, X86::MOVSX32rm16 }, 395 { X86::MOVSX32rr8, X86::MOVSX32rm8 }, 396 { X86::MOVSX64rr16, X86::MOVSX64rm16 }, 397 { X86::MOVSX64rr32, X86::MOVSX64rm32 }, 398 { X86::MOVSX64rr8, X86::MOVSX64rm8 }, 399 { X86::MOVUPDrr, X86::MOVUPDrm }, 400 { X86::MOVUPSrr, X86::MOVUPSrm }, 401 { X86::MOVZDI2PDIrr, X86::MOVZDI2PDIrm }, 402 { X86::MOVZQI2PQIrr, X86::MOVZQI2PQIrm }, 403 { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm }, 404 { X86::MOVZX16rr8, X86::MOVZX16rm8 }, 405 { X86::MOVZX32rr16, X86::MOVZX32rm16 }, 406 { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8 }, 407 { X86::MOVZX32rr8, X86::MOVZX32rm8 }, 408 { X86::MOVZX64rr16, X86::MOVZX64rm16 }, 409 { X86::MOVZX64rr32, X86::MOVZX64rm32 }, 410 { X86::MOVZX64rr8, X86::MOVZX64rm8 }, 411 { X86::PSHUFDri, X86::PSHUFDmi }, 412 { X86::PSHUFHWri, X86::PSHUFHWmi }, 413 { X86::PSHUFLWri, X86::PSHUFLWmi }, 414 { X86::RCPPSr, X86::RCPPSm }, 415 { X86::RCPPSr_Int, X86::RCPPSm_Int }, 416 { X86::RSQRTPSr, X86::RSQRTPSm }, 417 { X86::RSQRTPSr_Int, X86::RSQRTPSm_Int }, 418 { X86::RSQRTSSr, X86::RSQRTSSm }, 419 { X86::RSQRTSSr_Int, X86::RSQRTSSm_Int }, 420 { X86::SQRTPDr, X86::SQRTPDm }, 421 { X86::SQRTPDr_Int, X86::SQRTPDm_Int }, 422 { X86::SQRTPSr, X86::SQRTPSm }, 423 { X86::SQRTPSr_Int, X86::SQRTPSm_Int }, 424 { X86::SQRTSDr, X86::SQRTSDm }, 425 { X86::SQRTSDr_Int, X86::SQRTSDm_Int }, 426 { X86::SQRTSSr, X86::SQRTSSm }, 427 { X86::SQRTSSr_Int, X86::SQRTSSm_Int }, 428 { X86::TEST16rr, X86::TEST16rm }, 429 { X86::TEST32rr, X86::TEST32rm }, 430 { X86::TEST64rr, X86::TEST64rm }, 431 { X86::TEST8rr, X86::TEST8rm }, 432 // FIXME: TEST*rr EAX,EAX ---> CMP [mem], 0 433 { X86::UCOMISDrr, X86::UCOMISDrm }, 434 { X86::UCOMISSrr, X86::UCOMISSrm } 435 }; 436 437 for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { 438 unsigned RegOp = OpTbl1[i][0]; 439 unsigned MemOp = OpTbl1[i][1]; 440 if (!RegOp2MemOpTable1.insert(std::make_pair((unsigned*)RegOp, 441 MemOp)).second) 442 assert(false && "Duplicated entries?"); 443 unsigned AuxInfo = 1 | (1 << 4); // Index 1, folded load 444 if (RegOp != X86::FsMOVAPDrr && RegOp != X86::FsMOVAPSrr) 445 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 446 std::make_pair(RegOp, AuxInfo))).second) 447 AmbEntries.push_back(MemOp); 448 } 449 450 static const unsigned OpTbl2[][2] = { 451 { X86::ADC32rr, X86::ADC32rm }, 452 { X86::ADC64rr, X86::ADC64rm }, 453 { X86::ADD16rr, X86::ADD16rm }, 454 { X86::ADD32rr, X86::ADD32rm }, 455 { X86::ADD64rr, X86::ADD64rm }, 456 { X86::ADD8rr, X86::ADD8rm }, 457 { X86::ADDPDrr, X86::ADDPDrm }, 458 { X86::ADDPSrr, X86::ADDPSrm }, 459 { X86::ADDSDrr, X86::ADDSDrm }, 460 { X86::ADDSSrr, X86::ADDSSrm }, 461 { X86::ADDSUBPDrr, X86::ADDSUBPDrm }, 462 { X86::ADDSUBPSrr, X86::ADDSUBPSrm }, 463 { X86::AND16rr, X86::AND16rm }, 464 { X86::AND32rr, X86::AND32rm }, 465 { X86::AND64rr, X86::AND64rm }, 466 { X86::AND8rr, X86::AND8rm }, 467 { X86::ANDNPDrr, X86::ANDNPDrm }, 468 { X86::ANDNPSrr, X86::ANDNPSrm }, 469 { X86::ANDPDrr, X86::ANDPDrm }, 470 { X86::ANDPSrr, X86::ANDPSrm }, 471 { X86::CMOVA16rr, X86::CMOVA16rm }, 472 { X86::CMOVA32rr, X86::CMOVA32rm }, 473 { X86::CMOVA64rr, X86::CMOVA64rm }, 474 { X86::CMOVAE16rr, X86::CMOVAE16rm }, 475 { X86::CMOVAE32rr, X86::CMOVAE32rm }, 476 { X86::CMOVAE64rr, X86::CMOVAE64rm }, 477 { X86::CMOVB16rr, X86::CMOVB16rm }, 478 { X86::CMOVB32rr, X86::CMOVB32rm }, 479 { X86::CMOVB64rr, X86::CMOVB64rm }, 480 { X86::CMOVBE16rr, X86::CMOVBE16rm }, 481 { X86::CMOVBE32rr, X86::CMOVBE32rm }, 482 { X86::CMOVBE64rr, X86::CMOVBE64rm }, 483 { X86::CMOVE16rr, X86::CMOVE16rm }, 484 { X86::CMOVE32rr, X86::CMOVE32rm }, 485 { X86::CMOVE64rr, X86::CMOVE64rm }, 486 { X86::CMOVG16rr, X86::CMOVG16rm }, 487 { X86::CMOVG32rr, X86::CMOVG32rm }, 488 { X86::CMOVG64rr, X86::CMOVG64rm }, 489 { X86::CMOVGE16rr, X86::CMOVGE16rm }, 490 { X86::CMOVGE32rr, X86::CMOVGE32rm }, 491 { X86::CMOVGE64rr, X86::CMOVGE64rm }, 492 { X86::CMOVL16rr, X86::CMOVL16rm }, 493 { X86::CMOVL32rr, X86::CMOVL32rm }, 494 { X86::CMOVL64rr, X86::CMOVL64rm }, 495 { X86::CMOVLE16rr, X86::CMOVLE16rm }, 496 { X86::CMOVLE32rr, X86::CMOVLE32rm }, 497 { X86::CMOVLE64rr, X86::CMOVLE64rm }, 498 { X86::CMOVNE16rr, X86::CMOVNE16rm }, 499 { X86::CMOVNE32rr, X86::CMOVNE32rm }, 500 { X86::CMOVNE64rr, X86::CMOVNE64rm }, 501 { X86::CMOVNO16rr, X86::CMOVNO16rm }, 502 { X86::CMOVNO32rr, X86::CMOVNO32rm }, 503 { X86::CMOVNO64rr, X86::CMOVNO64rm }, 504 { X86::CMOVNP16rr, X86::CMOVNP16rm }, 505 { X86::CMOVNP32rr, X86::CMOVNP32rm }, 506 { X86::CMOVNP64rr, X86::CMOVNP64rm }, 507 { X86::CMOVNS16rr, X86::CMOVNS16rm }, 508 { X86::CMOVNS32rr, X86::CMOVNS32rm }, 509 { X86::CMOVNS64rr, X86::CMOVNS64rm }, 510 { X86::CMOVO16rr, X86::CMOVO16rm }, 511 { X86::CMOVO32rr, X86::CMOVO32rm }, 512 { X86::CMOVO64rr, X86::CMOVO64rm }, 513 { X86::CMOVP16rr, X86::CMOVP16rm }, 514 { X86::CMOVP32rr, X86::CMOVP32rm }, 515 { X86::CMOVP64rr, X86::CMOVP64rm }, 516 { X86::CMOVS16rr, X86::CMOVS16rm }, 517 { X86::CMOVS32rr, X86::CMOVS32rm }, 518 { X86::CMOVS64rr, X86::CMOVS64rm }, 519 { X86::CMPPDrri, X86::CMPPDrmi }, 520 { X86::CMPPSrri, X86::CMPPSrmi }, 521 { X86::CMPSDrr, X86::CMPSDrm }, 522 { X86::CMPSSrr, X86::CMPSSrm }, 523 { X86::DIVPDrr, X86::DIVPDrm }, 524 { X86::DIVPSrr, X86::DIVPSrm }, 525 { X86::DIVSDrr, X86::DIVSDrm }, 526 { X86::DIVSSrr, X86::DIVSSrm }, 527 { X86::FsANDNPDrr, X86::FsANDNPDrm }, 528 { X86::FsANDNPSrr, X86::FsANDNPSrm }, 529 { X86::FsANDPDrr, X86::FsANDPDrm }, 530 { X86::FsANDPSrr, X86::FsANDPSrm }, 531 { X86::FsORPDrr, X86::FsORPDrm }, 532 { X86::FsORPSrr, X86::FsORPSrm }, 533 { X86::FsXORPDrr, X86::FsXORPDrm }, 534 { X86::FsXORPSrr, X86::FsXORPSrm }, 535 { X86::HADDPDrr, X86::HADDPDrm }, 536 { X86::HADDPSrr, X86::HADDPSrm }, 537 { X86::HSUBPDrr, X86::HSUBPDrm }, 538 { X86::HSUBPSrr, X86::HSUBPSrm }, 539 { X86::IMUL16rr, X86::IMUL16rm }, 540 { X86::IMUL32rr, X86::IMUL32rm }, 541 { X86::IMUL64rr, X86::IMUL64rm }, 542 { X86::MAXPDrr, X86::MAXPDrm }, 543 { X86::MAXPDrr_Int, X86::MAXPDrm_Int }, 544 { X86::MAXPSrr, X86::MAXPSrm }, 545 { X86::MAXPSrr_Int, X86::MAXPSrm_Int }, 546 { X86::MAXSDrr, X86::MAXSDrm }, 547 { X86::MAXSDrr_Int, X86::MAXSDrm_Int }, 548 { X86::MAXSSrr, X86::MAXSSrm }, 549 { X86::MAXSSrr_Int, X86::MAXSSrm_Int }, 550 { X86::MINPDrr, X86::MINPDrm }, 551 { X86::MINPDrr_Int, X86::MINPDrm_Int }, 552 { X86::MINPSrr, X86::MINPSrm }, 553 { X86::MINPSrr_Int, X86::MINPSrm_Int }, 554 { X86::MINSDrr, X86::MINSDrm }, 555 { X86::MINSDrr_Int, X86::MINSDrm_Int }, 556 { X86::MINSSrr, X86::MINSSrm }, 557 { X86::MINSSrr_Int, X86::MINSSrm_Int }, 558 { X86::MULPDrr, X86::MULPDrm }, 559 { X86::MULPSrr, X86::MULPSrm }, 560 { X86::MULSDrr, X86::MULSDrm }, 561 { X86::MULSSrr, X86::MULSSrm }, 562 { X86::OR16rr, X86::OR16rm }, 563 { X86::OR32rr, X86::OR32rm }, 564 { X86::OR64rr, X86::OR64rm }, 565 { X86::OR8rr, X86::OR8rm }, 566 { X86::ORPDrr, X86::ORPDrm }, 567 { X86::ORPSrr, X86::ORPSrm }, 568 { X86::PACKSSDWrr, X86::PACKSSDWrm }, 569 { X86::PACKSSWBrr, X86::PACKSSWBrm }, 570 { X86::PACKUSWBrr, X86::PACKUSWBrm }, 571 { X86::PADDBrr, X86::PADDBrm }, 572 { X86::PADDDrr, X86::PADDDrm }, 573 { X86::PADDQrr, X86::PADDQrm }, 574 { X86::PADDSBrr, X86::PADDSBrm }, 575 { X86::PADDSWrr, X86::PADDSWrm }, 576 { X86::PADDWrr, X86::PADDWrm }, 577 { X86::PANDNrr, X86::PANDNrm }, 578 { X86::PANDrr, X86::PANDrm }, 579 { X86::PAVGBrr, X86::PAVGBrm }, 580 { X86::PAVGWrr, X86::PAVGWrm }, 581 { X86::PCMPEQBrr, X86::PCMPEQBrm }, 582 { X86::PCMPEQDrr, X86::PCMPEQDrm }, 583 { X86::PCMPEQWrr, X86::PCMPEQWrm }, 584 { X86::PCMPGTBrr, X86::PCMPGTBrm }, 585 { X86::PCMPGTDrr, X86::PCMPGTDrm }, 586 { X86::PCMPGTWrr, X86::PCMPGTWrm }, 587 { X86::PINSRWrri, X86::PINSRWrmi }, 588 { X86::PMADDWDrr, X86::PMADDWDrm }, 589 { X86::PMAXSWrr, X86::PMAXSWrm }, 590 { X86::PMAXUBrr, X86::PMAXUBrm }, 591 { X86::PMINSWrr, X86::PMINSWrm }, 592 { X86::PMINUBrr, X86::PMINUBrm }, 593 { X86::PMULDQrr, X86::PMULDQrm }, 594 { X86::PMULHUWrr, X86::PMULHUWrm }, 595 { X86::PMULHWrr, X86::PMULHWrm }, 596 { X86::PMULLDrr, X86::PMULLDrm }, 597 { X86::PMULLDrr_int, X86::PMULLDrm_int }, 598 { X86::PMULLWrr, X86::PMULLWrm }, 599 { X86::PMULUDQrr, X86::PMULUDQrm }, 600 { X86::PORrr, X86::PORrm }, 601 { X86::PSADBWrr, X86::PSADBWrm }, 602 { X86::PSLLDrr, X86::PSLLDrm }, 603 { X86::PSLLQrr, X86::PSLLQrm }, 604 { X86::PSLLWrr, X86::PSLLWrm }, 605 { X86::PSRADrr, X86::PSRADrm }, 606 { X86::PSRAWrr, X86::PSRAWrm }, 607 { X86::PSRLDrr, X86::PSRLDrm }, 608 { X86::PSRLQrr, X86::PSRLQrm }, 609 { X86::PSRLWrr, X86::PSRLWrm }, 610 { X86::PSUBBrr, X86::PSUBBrm }, 611 { X86::PSUBDrr, X86::PSUBDrm }, 612 { X86::PSUBSBrr, X86::PSUBSBrm }, 613 { X86::PSUBSWrr, X86::PSUBSWrm }, 614 { X86::PSUBWrr, X86::PSUBWrm }, 615 { X86::PUNPCKHBWrr, X86::PUNPCKHBWrm }, 616 { X86::PUNPCKHDQrr, X86::PUNPCKHDQrm }, 617 { X86::PUNPCKHQDQrr, X86::PUNPCKHQDQrm }, 618 { X86::PUNPCKHWDrr, X86::PUNPCKHWDrm }, 619 { X86::PUNPCKLBWrr, X86::PUNPCKLBWrm }, 620 { X86::PUNPCKLDQrr, X86::PUNPCKLDQrm }, 621 { X86::PUNPCKLQDQrr, X86::PUNPCKLQDQrm }, 622 { X86::PUNPCKLWDrr, X86::PUNPCKLWDrm }, 623 { X86::PXORrr, X86::PXORrm }, 624 { X86::SBB32rr, X86::SBB32rm }, 625 { X86::SBB64rr, X86::SBB64rm }, 626 { X86::SHUFPDrri, X86::SHUFPDrmi }, 627 { X86::SHUFPSrri, X86::SHUFPSrmi }, 628 { X86::SUB16rr, X86::SUB16rm }, 629 { X86::SUB32rr, X86::SUB32rm }, 630 { X86::SUB64rr, X86::SUB64rm }, 631 { X86::SUB8rr, X86::SUB8rm }, 632 { X86::SUBPDrr, X86::SUBPDrm }, 633 { X86::SUBPSrr, X86::SUBPSrm }, 634 { X86::SUBSDrr, X86::SUBSDrm }, 635 { X86::SUBSSrr, X86::SUBSSrm }, 636 // FIXME: TEST*rr -> swapped operand of TEST*mr. 637 { X86::UNPCKHPDrr, X86::UNPCKHPDrm }, 638 { X86::UNPCKHPSrr, X86::UNPCKHPSrm }, 639 { X86::UNPCKLPDrr, X86::UNPCKLPDrm }, 640 { X86::UNPCKLPSrr, X86::UNPCKLPSrm }, 641 { X86::XOR16rr, X86::XOR16rm }, 642 { X86::XOR32rr, X86::XOR32rm }, 643 { X86::XOR64rr, X86::XOR64rm }, 644 { X86::XOR8rr, X86::XOR8rm }, 645 { X86::XORPDrr, X86::XORPDrm }, 646 { X86::XORPSrr, X86::XORPSrm } 647 }; 648 649 for (unsigned i = 0, e = array_lengthof(OpTbl2); i != e; ++i) { 650 unsigned RegOp = OpTbl2[i][0]; 651 unsigned MemOp = OpTbl2[i][1]; 652 if (!RegOp2MemOpTable2.insert(std::make_pair((unsigned*)RegOp, 653 MemOp)).second) 654 assert(false && "Duplicated entries?"); 655 unsigned AuxInfo = 2 | (1 << 4); // Index 2, folded load 656 if (!MemOp2RegOpTable.insert(std::make_pair((unsigned*)MemOp, 657 std::make_pair(RegOp, AuxInfo))).second) 658 AmbEntries.push_back(MemOp); 659 } 660 661 // Remove ambiguous entries. 662 assert(AmbEntries.empty() && "Duplicated entries in unfolding maps?"); 663} 664 665bool X86InstrInfo::isMoveInstr(const MachineInstr& MI, 666 unsigned &SrcReg, unsigned &DstReg, 667 unsigned &SrcSubIdx, unsigned &DstSubIdx) const { 668 switch (MI.getOpcode()) { 669 default: 670 return false; 671 case X86::MOV8rr: 672 case X86::MOV8rr_NOREX: 673 case X86::MOV16rr: 674 case X86::MOV32rr: 675 case X86::MOV64rr: 676 case X86::MOVSSrr: 677 case X86::MOVSDrr: 678 679 // FP Stack register class copies 680 case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080: 681 case X86::MOV_Fp3264: case X86::MOV_Fp3280: 682 case X86::MOV_Fp6432: case X86::MOV_Fp8032: 683 684 case X86::FsMOVAPSrr: 685 case X86::FsMOVAPDrr: 686 case X86::MOVAPSrr: 687 case X86::MOVAPDrr: 688 case X86::MOVDQArr: 689 case X86::MOVSS2PSrr: 690 case X86::MOVSD2PDrr: 691 case X86::MOVPS2SSrr: 692 case X86::MOVPD2SDrr: 693 case X86::MMX_MOVQ64rr: 694 assert(MI.getNumOperands() >= 2 && 695 MI.getOperand(0).isReg() && 696 MI.getOperand(1).isReg() && 697 "invalid register-register move instruction"); 698 SrcReg = MI.getOperand(1).getReg(); 699 DstReg = MI.getOperand(0).getReg(); 700 SrcSubIdx = MI.getOperand(1).getSubReg(); 701 DstSubIdx = MI.getOperand(0).getSubReg(); 702 return true; 703 } 704} 705 706unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI, 707 int &FrameIndex) const { 708 switch (MI->getOpcode()) { 709 default: break; 710 case X86::MOV8rm: 711 case X86::MOV16rm: 712 case X86::MOV32rm: 713 case X86::MOV64rm: 714 case X86::LD_Fp64m: 715 case X86::MOVSSrm: 716 case X86::MOVSDrm: 717 case X86::MOVAPSrm: 718 case X86::MOVAPDrm: 719 case X86::MOVDQArm: 720 case X86::MMX_MOVD64rm: 721 case X86::MMX_MOVQ64rm: 722 if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() && 723 MI->getOperand(3).isReg() && MI->getOperand(4).isImm() && 724 MI->getOperand(2).getImm() == 1 && 725 MI->getOperand(3).getReg() == 0 && 726 MI->getOperand(4).getImm() == 0) { 727 FrameIndex = MI->getOperand(1).getIndex(); 728 return MI->getOperand(0).getReg(); 729 } 730 break; 731 } 732 return 0; 733} 734 735unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI, 736 int &FrameIndex) const { 737 switch (MI->getOpcode()) { 738 default: break; 739 case X86::MOV8mr: 740 case X86::MOV16mr: 741 case X86::MOV32mr: 742 case X86::MOV64mr: 743 case X86::ST_FpP64m: 744 case X86::MOVSSmr: 745 case X86::MOVSDmr: 746 case X86::MOVAPSmr: 747 case X86::MOVAPDmr: 748 case X86::MOVDQAmr: 749 case X86::MMX_MOVD64mr: 750 case X86::MMX_MOVQ64mr: 751 case X86::MMX_MOVNTQmr: 752 if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() && 753 MI->getOperand(2).isReg() && MI->getOperand(3).isImm() && 754 MI->getOperand(1).getImm() == 1 && 755 MI->getOperand(2).getReg() == 0 && 756 MI->getOperand(3).getImm() == 0) { 757 FrameIndex = MI->getOperand(0).getIndex(); 758 return MI->getOperand(X86AddrNumOperands).getReg(); 759 } 760 break; 761 } 762 return 0; 763} 764 765/// regIsPICBase - Return true if register is PIC base (i.e.g defined by 766/// X86::MOVPC32r. 767static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) { 768 bool isPICBase = false; 769 for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 770 E = MRI.def_end(); I != E; ++I) { 771 MachineInstr *DefMI = I.getOperand().getParent(); 772 if (DefMI->getOpcode() != X86::MOVPC32r) 773 return false; 774 assert(!isPICBase && "More than one PIC base?"); 775 isPICBase = true; 776 } 777 return isPICBase; 778} 779 780/// CanRematLoadWithDispOperand - Return true if a load with the specified 781/// operand is a candidate for remat: for this to be true we need to know that 782/// the load will always return the same value, even if moved. 783static bool CanRematLoadWithDispOperand(const MachineOperand &MO, 784 X86TargetMachine &TM) { 785 // Loads from constant pool entries can be remat'd. 786 if (MO.isCPI()) return true; 787 788 // We can remat globals in some cases. 789 if (MO.isGlobal()) { 790 // If this is a load of a stub, not of the global, we can remat it. This 791 // access will always return the address of the global. 792 if (isGlobalStubReference(MO)) 793 return true; 794 795 // If the global itself is constant, we can remat the load. 796 if (GlobalVariable *GV = dyn_cast<GlobalVariable>(MO.getGlobal())) 797 if (GV->isConstant()) 798 return true; 799 } 800 return false; 801} 802 803bool 804X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI) const { 805 switch (MI->getOpcode()) { 806 default: break; 807 case X86::MOV8rm: 808 case X86::MOV16rm: 809 case X86::MOV32rm: 810 case X86::MOV64rm: 811 case X86::LD_Fp64m: 812 case X86::MOVSSrm: 813 case X86::MOVSDrm: 814 case X86::MOVAPSrm: 815 case X86::MOVAPDrm: 816 case X86::MOVDQArm: 817 case X86::MMX_MOVD64rm: 818 case X86::MMX_MOVQ64rm: { 819 // Loads from constant pools are trivially rematerializable. 820 if (MI->getOperand(1).isReg() && 821 MI->getOperand(2).isImm() && 822 MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 823 CanRematLoadWithDispOperand(MI->getOperand(4), TM)) { 824 unsigned BaseReg = MI->getOperand(1).getReg(); 825 if (BaseReg == 0 || BaseReg == X86::RIP) 826 return true; 827 // Allow re-materialization of PIC load. 828 if (!ReMatPICStubLoad && MI->getOperand(4).isGlobal()) 829 return false; 830 const MachineFunction &MF = *MI->getParent()->getParent(); 831 const MachineRegisterInfo &MRI = MF.getRegInfo(); 832 bool isPICBase = false; 833 for (MachineRegisterInfo::def_iterator I = MRI.def_begin(BaseReg), 834 E = MRI.def_end(); I != E; ++I) { 835 MachineInstr *DefMI = I.getOperand().getParent(); 836 if (DefMI->getOpcode() != X86::MOVPC32r) 837 return false; 838 assert(!isPICBase && "More than one PIC base?"); 839 isPICBase = true; 840 } 841 return isPICBase; 842 } 843 return false; 844 } 845 846 case X86::LEA32r: 847 case X86::LEA64r: { 848 if (MI->getOperand(2).isImm() && 849 MI->getOperand(3).isReg() && MI->getOperand(3).getReg() == 0 && 850 !MI->getOperand(4).isReg()) { 851 // lea fi#, lea GV, etc. are all rematerializable. 852 if (!MI->getOperand(1).isReg()) 853 return true; 854 unsigned BaseReg = MI->getOperand(1).getReg(); 855 if (BaseReg == 0) 856 return true; 857 // Allow re-materialization of lea PICBase + x. 858 const MachineFunction &MF = *MI->getParent()->getParent(); 859 const MachineRegisterInfo &MRI = MF.getRegInfo(); 860 return regIsPICBase(BaseReg, MRI); 861 } 862 return false; 863 } 864 } 865 866 // All other instructions marked M_REMATERIALIZABLE are always trivially 867 // rematerializable. 868 return true; 869} 870 871/// isSafeToClobberEFLAGS - Return true if it's safe insert an instruction that 872/// would clobber the EFLAGS condition register. Note the result may be 873/// conservative. If it cannot definitely determine the safety after visiting 874/// two instructions it assumes it's not safe. 875static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB, 876 MachineBasicBlock::iterator I) { 877 // It's always safe to clobber EFLAGS at the end of a block. 878 if (I == MBB.end()) 879 return true; 880 881 // For compile time consideration, if we are not able to determine the 882 // safety after visiting 2 instructions, we will assume it's not safe. 883 for (unsigned i = 0; i < 2; ++i) { 884 bool SeenDef = false; 885 for (unsigned j = 0, e = I->getNumOperands(); j != e; ++j) { 886 MachineOperand &MO = I->getOperand(j); 887 if (!MO.isReg()) 888 continue; 889 if (MO.getReg() == X86::EFLAGS) { 890 if (MO.isUse()) 891 return false; 892 SeenDef = true; 893 } 894 } 895 896 if (SeenDef) 897 // This instruction defines EFLAGS, no need to look any further. 898 return true; 899 ++I; 900 901 // If we make it to the end of the block, it's safe to clobber EFLAGS. 902 if (I == MBB.end()) 903 return true; 904 } 905 906 // Conservative answer. 907 return false; 908} 909 910void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB, 911 MachineBasicBlock::iterator I, 912 unsigned DestReg, 913 const MachineInstr *Orig) const { 914 DebugLoc DL = DebugLoc::getUnknownLoc(); 915 if (I != MBB.end()) DL = I->getDebugLoc(); 916 917 unsigned SubIdx = Orig->getOperand(0).isReg() 918 ? Orig->getOperand(0).getSubReg() : 0; 919 bool ChangeSubIdx = SubIdx != 0; 920 if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) { 921 DestReg = RI.getSubReg(DestReg, SubIdx); 922 SubIdx = 0; 923 } 924 925 // MOV32r0 etc. are implemented with xor which clobbers condition code. 926 // Re-materialize them as movri instructions to avoid side effects. 927 bool Emitted = false; 928 switch (Orig->getOpcode()) { 929 default: break; 930 case X86::MOV8r0: 931 case X86::MOV16r0: 932 case X86::MOV32r0: 933 case X86::MOV64r0: { 934 if (!isSafeToClobberEFLAGS(MBB, I)) { 935 unsigned Opc = 0; 936 switch (Orig->getOpcode()) { 937 default: break; 938 case X86::MOV8r0: Opc = X86::MOV8ri; break; 939 case X86::MOV16r0: Opc = X86::MOV16ri; break; 940 case X86::MOV32r0: Opc = X86::MOV32ri; break; 941 case X86::MOV64r0: Opc = X86::MOV64ri32; break; 942 } 943 BuildMI(MBB, I, DL, get(Opc), DestReg).addImm(0); 944 Emitted = true; 945 } 946 break; 947 } 948 } 949 950 if (!Emitted) { 951 MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); 952 MI->getOperand(0).setReg(DestReg); 953 MBB.insert(I, MI); 954 } 955 956 if (ChangeSubIdx) { 957 MachineInstr *NewMI = prior(I); 958 NewMI->getOperand(0).setSubReg(SubIdx); 959 } 960} 961 962/// isInvariantLoad - Return true if the specified instruction (which is marked 963/// mayLoad) is loading from a location whose value is invariant across the 964/// function. For example, loading a value from the constant pool or from 965/// from the argument area of a function if it does not change. This should 966/// only return true of *all* loads the instruction does are invariant (if it 967/// does multiple loads). 968bool X86InstrInfo::isInvariantLoad(const MachineInstr *MI) const { 969 // This code cares about loads from three cases: constant pool entries, 970 // invariant argument slots, and global stubs. In order to handle these cases 971 // for all of the myriad of X86 instructions, we just scan for a CP/FI/GV 972 // operand and base our analysis on it. This is safe because the address of 973 // none of these three cases is ever used as anything other than a load base 974 // and X86 doesn't have any instructions that load from multiple places. 975 976 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 977 const MachineOperand &MO = MI->getOperand(i); 978 // Loads from constant pools are trivially invariant. 979 if (MO.isCPI()) 980 return true; 981 982 if (MO.isGlobal()) 983 return isGlobalStubReference(MO); 984 985 // If this is a load from an invariant stack slot, the load is a constant. 986 if (MO.isFI()) { 987 const MachineFrameInfo &MFI = 988 *MI->getParent()->getParent()->getFrameInfo(); 989 int Idx = MO.getIndex(); 990 return MFI.isFixedObjectIndex(Idx) && MFI.isImmutableObjectIndex(Idx); 991 } 992 } 993 994 // All other instances of these instructions are presumed to have other 995 // issues. 996 return false; 997} 998 999/// hasLiveCondCodeDef - True if MI has a condition code def, e.g. EFLAGS, that 1000/// is not marked dead. 1001static bool hasLiveCondCodeDef(MachineInstr *MI) { 1002 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1003 MachineOperand &MO = MI->getOperand(i); 1004 if (MO.isReg() && MO.isDef() && 1005 MO.getReg() == X86::EFLAGS && !MO.isDead()) { 1006 return true; 1007 } 1008 } 1009 return false; 1010} 1011 1012/// convertToThreeAddress - This method must be implemented by targets that 1013/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target 1014/// may be able to convert a two-address instruction into a true 1015/// three-address instruction on demand. This allows the X86 target (for 1016/// example) to convert ADD and SHL instructions into LEA instructions if they 1017/// would require register copies due to two-addressness. 1018/// 1019/// This method returns a null pointer if the transformation cannot be 1020/// performed, otherwise it returns the new instruction. 1021/// 1022MachineInstr * 1023X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, 1024 MachineBasicBlock::iterator &MBBI, 1025 LiveVariables *LV) const { 1026 MachineInstr *MI = MBBI; 1027 MachineFunction &MF = *MI->getParent()->getParent(); 1028 // All instructions input are two-addr instructions. Get the known operands. 1029 unsigned Dest = MI->getOperand(0).getReg(); 1030 unsigned Src = MI->getOperand(1).getReg(); 1031 bool isDead = MI->getOperand(0).isDead(); 1032 bool isKill = MI->getOperand(1).isKill(); 1033 1034 MachineInstr *NewMI = NULL; 1035 // FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When 1036 // we have better subtarget support, enable the 16-bit LEA generation here. 1037 bool DisableLEA16 = true; 1038 1039 unsigned MIOpc = MI->getOpcode(); 1040 switch (MIOpc) { 1041 case X86::SHUFPSrri: { 1042 assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); 1043 if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return 0; 1044 1045 unsigned B = MI->getOperand(1).getReg(); 1046 unsigned C = MI->getOperand(2).getReg(); 1047 if (B != C) return 0; 1048 unsigned A = MI->getOperand(0).getReg(); 1049 unsigned M = MI->getOperand(3).getImm(); 1050 NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::PSHUFDri)) 1051 .addReg(A, RegState::Define | getDeadRegState(isDead)) 1052 .addReg(B, getKillRegState(isKill)).addImm(M); 1053 break; 1054 } 1055 case X86::SHL64ri: { 1056 assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1057 // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1058 // the flags produced by a shift yet, so this is safe. 1059 unsigned ShAmt = MI->getOperand(2).getImm(); 1060 if (ShAmt == 0 || ShAmt >= 4) return 0; 1061 1062 NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1063 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1064 .addReg(0).addImm(1 << ShAmt) 1065 .addReg(Src, getKillRegState(isKill)) 1066 .addImm(0); 1067 break; 1068 } 1069 case X86::SHL32ri: { 1070 assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1071 // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1072 // the flags produced by a shift yet, so this is safe. 1073 unsigned ShAmt = MI->getOperand(2).getImm(); 1074 if (ShAmt == 0 || ShAmt >= 4) return 0; 1075 1076 unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ? 1077 X86::LEA64_32r : X86::LEA32r; 1078 NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1079 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1080 .addReg(0).addImm(1 << ShAmt) 1081 .addReg(Src, getKillRegState(isKill)).addImm(0); 1082 break; 1083 } 1084 case X86::SHL16ri: { 1085 assert(MI->getNumOperands() >= 3 && "Unknown shift instruction!"); 1086 // NOTE: LEA doesn't produce flags like shift does, but LLVM never uses 1087 // the flags produced by a shift yet, so this is safe. 1088 unsigned ShAmt = MI->getOperand(2).getImm(); 1089 if (ShAmt == 0 || ShAmt >= 4) return 0; 1090 1091 if (DisableLEA16) { 1092 // If 16-bit LEA is disabled, use 32-bit LEA via subregisters. 1093 MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); 1094 unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() 1095 ? X86::LEA64_32r : X86::LEA32r; 1096 unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1097 unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); 1098 1099 // Build and insert into an implicit UNDEF value. This is OK because 1100 // well be shifting and then extracting the lower 16-bits. 1101 BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg); 1102 MachineInstr *InsMI = 1103 BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg) 1104 .addReg(leaInReg) 1105 .addReg(Src, getKillRegState(isKill)) 1106 .addImm(X86::SUBREG_16BIT); 1107 1108 NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg) 1109 .addReg(0).addImm(1 << ShAmt) 1110 .addReg(leaInReg, RegState::Kill) 1111 .addImm(0); 1112 1113 MachineInstr *ExtMI = 1114 BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG)) 1115 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1116 .addReg(leaOutReg, RegState::Kill) 1117 .addImm(X86::SUBREG_16BIT); 1118 1119 if (LV) { 1120 // Update live variables 1121 LV->getVarInfo(leaInReg).Kills.push_back(NewMI); 1122 LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI); 1123 if (isKill) 1124 LV->replaceKillInstruction(Src, MI, InsMI); 1125 if (isDead) 1126 LV->replaceKillInstruction(Dest, MI, ExtMI); 1127 } 1128 return ExtMI; 1129 } else { 1130 NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1131 .addReg(Dest, RegState::Define | getDeadRegState(isDead)) 1132 .addReg(0).addImm(1 << ShAmt) 1133 .addReg(Src, getKillRegState(isKill)) 1134 .addImm(0); 1135 } 1136 break; 1137 } 1138 default: { 1139 // The following opcodes also sets the condition code register(s). Only 1140 // convert them to equivalent lea if the condition code register def's 1141 // are dead! 1142 if (hasLiveCondCodeDef(MI)) 1143 return 0; 1144 1145 bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 1146 switch (MIOpc) { 1147 default: return 0; 1148 case X86::INC64r: 1149 case X86::INC32r: 1150 case X86::INC64_32r: { 1151 assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1152 unsigned Opc = MIOpc == X86::INC64r ? X86::LEA64r 1153 : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1154 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1155 .addReg(Dest, RegState::Define | 1156 getDeadRegState(isDead)), 1157 Src, isKill, 1); 1158 break; 1159 } 1160 case X86::INC16r: 1161 case X86::INC64_16r: 1162 if (DisableLEA16) return 0; 1163 assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!"); 1164 NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1165 .addReg(Dest, RegState::Define | 1166 getDeadRegState(isDead)), 1167 Src, isKill, 1); 1168 break; 1169 case X86::DEC64r: 1170 case X86::DEC32r: 1171 case X86::DEC64_32r: { 1172 assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1173 unsigned Opc = MIOpc == X86::DEC64r ? X86::LEA64r 1174 : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1175 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1176 .addReg(Dest, RegState::Define | 1177 getDeadRegState(isDead)), 1178 Src, isKill, -1); 1179 break; 1180 } 1181 case X86::DEC16r: 1182 case X86::DEC64_16r: 1183 if (DisableLEA16) return 0; 1184 assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!"); 1185 NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1186 .addReg(Dest, RegState::Define | 1187 getDeadRegState(isDead)), 1188 Src, isKill, -1); 1189 break; 1190 case X86::ADD64rr: 1191 case X86::ADD32rr: { 1192 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1193 unsigned Opc = MIOpc == X86::ADD64rr ? X86::LEA64r 1194 : (is64Bit ? X86::LEA64_32r : X86::LEA32r); 1195 unsigned Src2 = MI->getOperand(2).getReg(); 1196 bool isKill2 = MI->getOperand(2).isKill(); 1197 NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1198 .addReg(Dest, RegState::Define | 1199 getDeadRegState(isDead)), 1200 Src, isKill, Src2, isKill2); 1201 if (LV && isKill2) 1202 LV->replaceKillInstruction(Src2, MI, NewMI); 1203 break; 1204 } 1205 case X86::ADD16rr: { 1206 if (DisableLEA16) return 0; 1207 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1208 unsigned Src2 = MI->getOperand(2).getReg(); 1209 bool isKill2 = MI->getOperand(2).isKill(); 1210 NewMI = addRegReg(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1211 .addReg(Dest, RegState::Define | 1212 getDeadRegState(isDead)), 1213 Src, isKill, Src2, isKill2); 1214 if (LV && isKill2) 1215 LV->replaceKillInstruction(Src2, MI, NewMI); 1216 break; 1217 } 1218 case X86::ADD64ri32: 1219 case X86::ADD64ri8: 1220 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1221 if (MI->getOperand(2).isImm()) 1222 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r)) 1223 .addReg(Dest, RegState::Define | 1224 getDeadRegState(isDead)), 1225 Src, isKill, MI->getOperand(2).getImm()); 1226 break; 1227 case X86::ADD32ri: 1228 case X86::ADD32ri8: 1229 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1230 if (MI->getOperand(2).isImm()) { 1231 unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r; 1232 NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1233 .addReg(Dest, RegState::Define | 1234 getDeadRegState(isDead)), 1235 Src, isKill, MI->getOperand(2).getImm()); 1236 } 1237 break; 1238 case X86::ADD16ri: 1239 case X86::ADD16ri8: 1240 if (DisableLEA16) return 0; 1241 assert(MI->getNumOperands() >= 3 && "Unknown add instruction!"); 1242 if (MI->getOperand(2).isImm()) 1243 NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r)) 1244 .addReg(Dest, RegState::Define | 1245 getDeadRegState(isDead)), 1246 Src, isKill, MI->getOperand(2).getImm()); 1247 break; 1248 case X86::SHL16ri: 1249 if (DisableLEA16) return 0; 1250 case X86::SHL32ri: 1251 case X86::SHL64ri: { 1252 assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() && 1253 "Unknown shl instruction!"); 1254 unsigned ShAmt = MI->getOperand(2).getImm(); 1255 if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) { 1256 X86AddressMode AM; 1257 AM.Scale = 1 << ShAmt; 1258 AM.IndexReg = Src; 1259 unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r 1260 : (MIOpc == X86::SHL32ri 1261 ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r); 1262 NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc)) 1263 .addReg(Dest, RegState::Define | 1264 getDeadRegState(isDead)), AM); 1265 if (isKill) 1266 NewMI->getOperand(3).setIsKill(true); 1267 } 1268 break; 1269 } 1270 } 1271 } 1272 } 1273 1274 if (!NewMI) return 0; 1275 1276 if (LV) { // Update live variables 1277 if (isKill) 1278 LV->replaceKillInstruction(Src, MI, NewMI); 1279 if (isDead) 1280 LV->replaceKillInstruction(Dest, MI, NewMI); 1281 } 1282 1283 MFI->insert(MBBI, NewMI); // Insert the new inst 1284 return NewMI; 1285} 1286 1287/// commuteInstruction - We have a few instructions that must be hacked on to 1288/// commute them. 1289/// 1290MachineInstr * 1291X86InstrInfo::commuteInstruction(MachineInstr *MI, bool NewMI) const { 1292 switch (MI->getOpcode()) { 1293 case X86::SHRD16rri8: // A = SHRD16rri8 B, C, I -> A = SHLD16rri8 C, B, (16-I) 1294 case X86::SHLD16rri8: // A = SHLD16rri8 B, C, I -> A = SHRD16rri8 C, B, (16-I) 1295 case X86::SHRD32rri8: // A = SHRD32rri8 B, C, I -> A = SHLD32rri8 C, B, (32-I) 1296 case X86::SHLD32rri8: // A = SHLD32rri8 B, C, I -> A = SHRD32rri8 C, B, (32-I) 1297 case X86::SHRD64rri8: // A = SHRD64rri8 B, C, I -> A = SHLD64rri8 C, B, (64-I) 1298 case X86::SHLD64rri8:{// A = SHLD64rri8 B, C, I -> A = SHRD64rri8 C, B, (64-I) 1299 unsigned Opc; 1300 unsigned Size; 1301 switch (MI->getOpcode()) { 1302 default: assert(0 && "Unreachable!"); 1303 case X86::SHRD16rri8: Size = 16; Opc = X86::SHLD16rri8; break; 1304 case X86::SHLD16rri8: Size = 16; Opc = X86::SHRD16rri8; break; 1305 case X86::SHRD32rri8: Size = 32; Opc = X86::SHLD32rri8; break; 1306 case X86::SHLD32rri8: Size = 32; Opc = X86::SHRD32rri8; break; 1307 case X86::SHRD64rri8: Size = 64; Opc = X86::SHLD64rri8; break; 1308 case X86::SHLD64rri8: Size = 64; Opc = X86::SHRD64rri8; break; 1309 } 1310 unsigned Amt = MI->getOperand(3).getImm(); 1311 if (NewMI) { 1312 MachineFunction &MF = *MI->getParent()->getParent(); 1313 MI = MF.CloneMachineInstr(MI); 1314 NewMI = false; 1315 } 1316 MI->setDesc(get(Opc)); 1317 MI->getOperand(3).setImm(Size-Amt); 1318 return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1319 } 1320 case X86::CMOVB16rr: 1321 case X86::CMOVB32rr: 1322 case X86::CMOVB64rr: 1323 case X86::CMOVAE16rr: 1324 case X86::CMOVAE32rr: 1325 case X86::CMOVAE64rr: 1326 case X86::CMOVE16rr: 1327 case X86::CMOVE32rr: 1328 case X86::CMOVE64rr: 1329 case X86::CMOVNE16rr: 1330 case X86::CMOVNE32rr: 1331 case X86::CMOVNE64rr: 1332 case X86::CMOVBE16rr: 1333 case X86::CMOVBE32rr: 1334 case X86::CMOVBE64rr: 1335 case X86::CMOVA16rr: 1336 case X86::CMOVA32rr: 1337 case X86::CMOVA64rr: 1338 case X86::CMOVL16rr: 1339 case X86::CMOVL32rr: 1340 case X86::CMOVL64rr: 1341 case X86::CMOVGE16rr: 1342 case X86::CMOVGE32rr: 1343 case X86::CMOVGE64rr: 1344 case X86::CMOVLE16rr: 1345 case X86::CMOVLE32rr: 1346 case X86::CMOVLE64rr: 1347 case X86::CMOVG16rr: 1348 case X86::CMOVG32rr: 1349 case X86::CMOVG64rr: 1350 case X86::CMOVS16rr: 1351 case X86::CMOVS32rr: 1352 case X86::CMOVS64rr: 1353 case X86::CMOVNS16rr: 1354 case X86::CMOVNS32rr: 1355 case X86::CMOVNS64rr: 1356 case X86::CMOVP16rr: 1357 case X86::CMOVP32rr: 1358 case X86::CMOVP64rr: 1359 case X86::CMOVNP16rr: 1360 case X86::CMOVNP32rr: 1361 case X86::CMOVNP64rr: 1362 case X86::CMOVO16rr: 1363 case X86::CMOVO32rr: 1364 case X86::CMOVO64rr: 1365 case X86::CMOVNO16rr: 1366 case X86::CMOVNO32rr: 1367 case X86::CMOVNO64rr: { 1368 unsigned Opc = 0; 1369 switch (MI->getOpcode()) { 1370 default: break; 1371 case X86::CMOVB16rr: Opc = X86::CMOVAE16rr; break; 1372 case X86::CMOVB32rr: Opc = X86::CMOVAE32rr; break; 1373 case X86::CMOVB64rr: Opc = X86::CMOVAE64rr; break; 1374 case X86::CMOVAE16rr: Opc = X86::CMOVB16rr; break; 1375 case X86::CMOVAE32rr: Opc = X86::CMOVB32rr; break; 1376 case X86::CMOVAE64rr: Opc = X86::CMOVB64rr; break; 1377 case X86::CMOVE16rr: Opc = X86::CMOVNE16rr; break; 1378 case X86::CMOVE32rr: Opc = X86::CMOVNE32rr; break; 1379 case X86::CMOVE64rr: Opc = X86::CMOVNE64rr; break; 1380 case X86::CMOVNE16rr: Opc = X86::CMOVE16rr; break; 1381 case X86::CMOVNE32rr: Opc = X86::CMOVE32rr; break; 1382 case X86::CMOVNE64rr: Opc = X86::CMOVE64rr; break; 1383 case X86::CMOVBE16rr: Opc = X86::CMOVA16rr; break; 1384 case X86::CMOVBE32rr: Opc = X86::CMOVA32rr; break; 1385 case X86::CMOVBE64rr: Opc = X86::CMOVA64rr; break; 1386 case X86::CMOVA16rr: Opc = X86::CMOVBE16rr; break; 1387 case X86::CMOVA32rr: Opc = X86::CMOVBE32rr; break; 1388 case X86::CMOVA64rr: Opc = X86::CMOVBE64rr; break; 1389 case X86::CMOVL16rr: Opc = X86::CMOVGE16rr; break; 1390 case X86::CMOVL32rr: Opc = X86::CMOVGE32rr; break; 1391 case X86::CMOVL64rr: Opc = X86::CMOVGE64rr; break; 1392 case X86::CMOVGE16rr: Opc = X86::CMOVL16rr; break; 1393 case X86::CMOVGE32rr: Opc = X86::CMOVL32rr; break; 1394 case X86::CMOVGE64rr: Opc = X86::CMOVL64rr; break; 1395 case X86::CMOVLE16rr: Opc = X86::CMOVG16rr; break; 1396 case X86::CMOVLE32rr: Opc = X86::CMOVG32rr; break; 1397 case X86::CMOVLE64rr: Opc = X86::CMOVG64rr; break; 1398 case X86::CMOVG16rr: Opc = X86::CMOVLE16rr; break; 1399 case X86::CMOVG32rr: Opc = X86::CMOVLE32rr; break; 1400 case X86::CMOVG64rr: Opc = X86::CMOVLE64rr; break; 1401 case X86::CMOVS16rr: Opc = X86::CMOVNS16rr; break; 1402 case X86::CMOVS32rr: Opc = X86::CMOVNS32rr; break; 1403 case X86::CMOVS64rr: Opc = X86::CMOVNS64rr; break; 1404 case X86::CMOVNS16rr: Opc = X86::CMOVS16rr; break; 1405 case X86::CMOVNS32rr: Opc = X86::CMOVS32rr; break; 1406 case X86::CMOVNS64rr: Opc = X86::CMOVS64rr; break; 1407 case X86::CMOVP16rr: Opc = X86::CMOVNP16rr; break; 1408 case X86::CMOVP32rr: Opc = X86::CMOVNP32rr; break; 1409 case X86::CMOVP64rr: Opc = X86::CMOVNP64rr; break; 1410 case X86::CMOVNP16rr: Opc = X86::CMOVP16rr; break; 1411 case X86::CMOVNP32rr: Opc = X86::CMOVP32rr; break; 1412 case X86::CMOVNP64rr: Opc = X86::CMOVP64rr; break; 1413 case X86::CMOVO16rr: Opc = X86::CMOVNO16rr; break; 1414 case X86::CMOVO32rr: Opc = X86::CMOVNO32rr; break; 1415 case X86::CMOVO64rr: Opc = X86::CMOVNO64rr; break; 1416 case X86::CMOVNO16rr: Opc = X86::CMOVO16rr; break; 1417 case X86::CMOVNO32rr: Opc = X86::CMOVO32rr; break; 1418 case X86::CMOVNO64rr: Opc = X86::CMOVO64rr; break; 1419 } 1420 if (NewMI) { 1421 MachineFunction &MF = *MI->getParent()->getParent(); 1422 MI = MF.CloneMachineInstr(MI); 1423 NewMI = false; 1424 } 1425 MI->setDesc(get(Opc)); 1426 // Fallthrough intended. 1427 } 1428 default: 1429 return TargetInstrInfoImpl::commuteInstruction(MI, NewMI); 1430 } 1431} 1432 1433static X86::CondCode GetCondFromBranchOpc(unsigned BrOpc) { 1434 switch (BrOpc) { 1435 default: return X86::COND_INVALID; 1436 case X86::JE: return X86::COND_E; 1437 case X86::JNE: return X86::COND_NE; 1438 case X86::JL: return X86::COND_L; 1439 case X86::JLE: return X86::COND_LE; 1440 case X86::JG: return X86::COND_G; 1441 case X86::JGE: return X86::COND_GE; 1442 case X86::JB: return X86::COND_B; 1443 case X86::JBE: return X86::COND_BE; 1444 case X86::JA: return X86::COND_A; 1445 case X86::JAE: return X86::COND_AE; 1446 case X86::JS: return X86::COND_S; 1447 case X86::JNS: return X86::COND_NS; 1448 case X86::JP: return X86::COND_P; 1449 case X86::JNP: return X86::COND_NP; 1450 case X86::JO: return X86::COND_O; 1451 case X86::JNO: return X86::COND_NO; 1452 } 1453} 1454 1455unsigned X86::GetCondBranchFromCond(X86::CondCode CC) { 1456 switch (CC) { 1457 default: assert(0 && "Illegal condition code!"); 1458 case X86::COND_E: return X86::JE; 1459 case X86::COND_NE: return X86::JNE; 1460 case X86::COND_L: return X86::JL; 1461 case X86::COND_LE: return X86::JLE; 1462 case X86::COND_G: return X86::JG; 1463 case X86::COND_GE: return X86::JGE; 1464 case X86::COND_B: return X86::JB; 1465 case X86::COND_BE: return X86::JBE; 1466 case X86::COND_A: return X86::JA; 1467 case X86::COND_AE: return X86::JAE; 1468 case X86::COND_S: return X86::JS; 1469 case X86::COND_NS: return X86::JNS; 1470 case X86::COND_P: return X86::JP; 1471 case X86::COND_NP: return X86::JNP; 1472 case X86::COND_O: return X86::JO; 1473 case X86::COND_NO: return X86::JNO; 1474 } 1475} 1476 1477/// GetOppositeBranchCondition - Return the inverse of the specified condition, 1478/// e.g. turning COND_E to COND_NE. 1479X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) { 1480 switch (CC) { 1481 default: assert(0 && "Illegal condition code!"); 1482 case X86::COND_E: return X86::COND_NE; 1483 case X86::COND_NE: return X86::COND_E; 1484 case X86::COND_L: return X86::COND_GE; 1485 case X86::COND_LE: return X86::COND_G; 1486 case X86::COND_G: return X86::COND_LE; 1487 case X86::COND_GE: return X86::COND_L; 1488 case X86::COND_B: return X86::COND_AE; 1489 case X86::COND_BE: return X86::COND_A; 1490 case X86::COND_A: return X86::COND_BE; 1491 case X86::COND_AE: return X86::COND_B; 1492 case X86::COND_S: return X86::COND_NS; 1493 case X86::COND_NS: return X86::COND_S; 1494 case X86::COND_P: return X86::COND_NP; 1495 case X86::COND_NP: return X86::COND_P; 1496 case X86::COND_O: return X86::COND_NO; 1497 case X86::COND_NO: return X86::COND_O; 1498 } 1499} 1500 1501bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { 1502 const TargetInstrDesc &TID = MI->getDesc(); 1503 if (!TID.isTerminator()) return false; 1504 1505 // Conditional branch is a special case. 1506 if (TID.isBranch() && !TID.isBarrier()) 1507 return true; 1508 if (!TID.isPredicable()) 1509 return true; 1510 return !isPredicated(MI); 1511} 1512 1513// For purposes of branch analysis do not count FP_REG_KILL as a terminator. 1514static bool isBrAnalysisUnpredicatedTerminator(const MachineInstr *MI, 1515 const X86InstrInfo &TII) { 1516 if (MI->getOpcode() == X86::FP_REG_KILL) 1517 return false; 1518 return TII.isUnpredicatedTerminator(MI); 1519} 1520 1521bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, 1522 MachineBasicBlock *&TBB, 1523 MachineBasicBlock *&FBB, 1524 SmallVectorImpl<MachineOperand> &Cond, 1525 bool AllowModify) const { 1526 // Start from the bottom of the block and work up, examining the 1527 // terminator instructions. 1528 MachineBasicBlock::iterator I = MBB.end(); 1529 while (I != MBB.begin()) { 1530 --I; 1531 // Working from the bottom, when we see a non-terminator 1532 // instruction, we're done. 1533 if (!isBrAnalysisUnpredicatedTerminator(I, *this)) 1534 break; 1535 // A terminator that isn't a branch can't easily be handled 1536 // by this analysis. 1537 if (!I->getDesc().isBranch()) 1538 return true; 1539 // Handle unconditional branches. 1540 if (I->getOpcode() == X86::JMP) { 1541 if (!AllowModify) { 1542 TBB = I->getOperand(0).getMBB(); 1543 continue; 1544 } 1545 1546 // If the block has any instructions after a JMP, delete them. 1547 while (next(I) != MBB.end()) 1548 next(I)->eraseFromParent(); 1549 Cond.clear(); 1550 FBB = 0; 1551 // Delete the JMP if it's equivalent to a fall-through. 1552 if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { 1553 TBB = 0; 1554 I->eraseFromParent(); 1555 I = MBB.end(); 1556 continue; 1557 } 1558 // TBB is used to indicate the unconditinal destination. 1559 TBB = I->getOperand(0).getMBB(); 1560 continue; 1561 } 1562 // Handle conditional branches. 1563 X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode()); 1564 if (BranchCode == X86::COND_INVALID) 1565 return true; // Can't handle indirect branch. 1566 // Working from the bottom, handle the first conditional branch. 1567 if (Cond.empty()) { 1568 FBB = TBB; 1569 TBB = I->getOperand(0).getMBB(); 1570 Cond.push_back(MachineOperand::CreateImm(BranchCode)); 1571 continue; 1572 } 1573 // Handle subsequent conditional branches. Only handle the case 1574 // where all conditional branches branch to the same destination 1575 // and their condition opcodes fit one of the special 1576 // multi-branch idioms. 1577 assert(Cond.size() == 1); 1578 assert(TBB); 1579 // Only handle the case where all conditional branches branch to 1580 // the same destination. 1581 if (TBB != I->getOperand(0).getMBB()) 1582 return true; 1583 X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm(); 1584 // If the conditions are the same, we can leave them alone. 1585 if (OldBranchCode == BranchCode) 1586 continue; 1587 // If they differ, see if they fit one of the known patterns. 1588 // Theoretically we could handle more patterns here, but 1589 // we shouldn't expect to see them if instruction selection 1590 // has done a reasonable job. 1591 if ((OldBranchCode == X86::COND_NP && 1592 BranchCode == X86::COND_E) || 1593 (OldBranchCode == X86::COND_E && 1594 BranchCode == X86::COND_NP)) 1595 BranchCode = X86::COND_NP_OR_E; 1596 else if ((OldBranchCode == X86::COND_P && 1597 BranchCode == X86::COND_NE) || 1598 (OldBranchCode == X86::COND_NE && 1599 BranchCode == X86::COND_P)) 1600 BranchCode = X86::COND_NE_OR_P; 1601 else 1602 return true; 1603 // Update the MachineOperand. 1604 Cond[0].setImm(BranchCode); 1605 } 1606 1607 return false; 1608} 1609 1610unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { 1611 MachineBasicBlock::iterator I = MBB.end(); 1612 unsigned Count = 0; 1613 1614 while (I != MBB.begin()) { 1615 --I; 1616 if (I->getOpcode() != X86::JMP && 1617 GetCondFromBranchOpc(I->getOpcode()) == X86::COND_INVALID) 1618 break; 1619 // Remove the branch. 1620 I->eraseFromParent(); 1621 I = MBB.end(); 1622 ++Count; 1623 } 1624 1625 return Count; 1626} 1627 1628unsigned 1629X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, 1630 MachineBasicBlock *FBB, 1631 const SmallVectorImpl<MachineOperand> &Cond) const { 1632 // FIXME this should probably have a DebugLoc operand 1633 DebugLoc dl = DebugLoc::getUnknownLoc(); 1634 // Shouldn't be a fall through. 1635 assert(TBB && "InsertBranch must not be told to insert a fallthrough"); 1636 assert((Cond.size() == 1 || Cond.size() == 0) && 1637 "X86 branch conditions have one component!"); 1638 1639 if (Cond.empty()) { 1640 // Unconditional branch? 1641 assert(!FBB && "Unconditional branch with multiple successors!"); 1642 BuildMI(&MBB, dl, get(X86::JMP)).addMBB(TBB); 1643 return 1; 1644 } 1645 1646 // Conditional branch. 1647 unsigned Count = 0; 1648 X86::CondCode CC = (X86::CondCode)Cond[0].getImm(); 1649 switch (CC) { 1650 case X86::COND_NP_OR_E: 1651 // Synthesize NP_OR_E with two branches. 1652 BuildMI(&MBB, dl, get(X86::JNP)).addMBB(TBB); 1653 ++Count; 1654 BuildMI(&MBB, dl, get(X86::JE)).addMBB(TBB); 1655 ++Count; 1656 break; 1657 case X86::COND_NE_OR_P: 1658 // Synthesize NE_OR_P with two branches. 1659 BuildMI(&MBB, dl, get(X86::JNE)).addMBB(TBB); 1660 ++Count; 1661 BuildMI(&MBB, dl, get(X86::JP)).addMBB(TBB); 1662 ++Count; 1663 break; 1664 default: { 1665 unsigned Opc = GetCondBranchFromCond(CC); 1666 BuildMI(&MBB, dl, get(Opc)).addMBB(TBB); 1667 ++Count; 1668 } 1669 } 1670 if (FBB) { 1671 // Two-way Conditional branch. Insert the second branch. 1672 BuildMI(&MBB, dl, get(X86::JMP)).addMBB(FBB); 1673 ++Count; 1674 } 1675 return Count; 1676} 1677 1678/// isHReg - Test if the given register is a physical h register. 1679static bool isHReg(unsigned Reg) { 1680 return X86::GR8_ABCD_HRegClass.contains(Reg); 1681} 1682 1683bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB, 1684 MachineBasicBlock::iterator MI, 1685 unsigned DestReg, unsigned SrcReg, 1686 const TargetRegisterClass *DestRC, 1687 const TargetRegisterClass *SrcRC) const { 1688 DebugLoc DL = DebugLoc::getUnknownLoc(); 1689 if (MI != MBB.end()) DL = MI->getDebugLoc(); 1690 1691 // Determine if DstRC and SrcRC have a common superclass in common. 1692 const TargetRegisterClass *CommonRC = DestRC; 1693 if (DestRC == SrcRC) 1694 /* Source and destination have the same register class. */; 1695 else if (CommonRC->hasSuperClass(SrcRC)) 1696 CommonRC = SrcRC; 1697 else if (!DestRC->hasSubClass(SrcRC)) 1698 CommonRC = 0; 1699 1700 if (CommonRC) { 1701 unsigned Opc; 1702 if (CommonRC == &X86::GR64RegClass) { 1703 Opc = X86::MOV64rr; 1704 } else if (CommonRC == &X86::GR32RegClass) { 1705 Opc = X86::MOV32rr; 1706 } else if (CommonRC == &X86::GR16RegClass) { 1707 Opc = X86::MOV16rr; 1708 } else if (CommonRC == &X86::GR8RegClass) { 1709 // Copying to or from a physical H register on x86-64 requires a NOREX 1710 // move. Otherwise use a normal move. 1711 if ((isHReg(DestReg) || isHReg(SrcReg)) && 1712 TM.getSubtarget<X86Subtarget>().is64Bit()) 1713 Opc = X86::MOV8rr_NOREX; 1714 else 1715 Opc = X86::MOV8rr; 1716 } else if (CommonRC == &X86::GR64_ABCDRegClass) { 1717 Opc = X86::MOV64rr; 1718 } else if (CommonRC == &X86::GR32_ABCDRegClass) { 1719 Opc = X86::MOV32rr; 1720 } else if (CommonRC == &X86::GR16_ABCDRegClass) { 1721 Opc = X86::MOV16rr; 1722 } else if (CommonRC == &X86::GR8_ABCD_LRegClass) { 1723 Opc = X86::MOV8rr; 1724 } else if (CommonRC == &X86::GR8_ABCD_HRegClass) { 1725 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1726 Opc = X86::MOV8rr_NOREX; 1727 else 1728 Opc = X86::MOV8rr; 1729 } else if (CommonRC == &X86::GR64_NOREXRegClass) { 1730 Opc = X86::MOV64rr; 1731 } else if (CommonRC == &X86::GR32_NOREXRegClass) { 1732 Opc = X86::MOV32rr; 1733 } else if (CommonRC == &X86::GR16_NOREXRegClass) { 1734 Opc = X86::MOV16rr; 1735 } else if (CommonRC == &X86::GR8_NOREXRegClass) { 1736 Opc = X86::MOV8rr; 1737 } else if (CommonRC == &X86::RFP32RegClass) { 1738 Opc = X86::MOV_Fp3232; 1739 } else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) { 1740 Opc = X86::MOV_Fp6464; 1741 } else if (CommonRC == &X86::RFP80RegClass) { 1742 Opc = X86::MOV_Fp8080; 1743 } else if (CommonRC == &X86::FR32RegClass) { 1744 Opc = X86::FsMOVAPSrr; 1745 } else if (CommonRC == &X86::FR64RegClass) { 1746 Opc = X86::FsMOVAPDrr; 1747 } else if (CommonRC == &X86::VR128RegClass) { 1748 Opc = X86::MOVAPSrr; 1749 } else if (CommonRC == &X86::VR64RegClass) { 1750 Opc = X86::MMX_MOVQ64rr; 1751 } else { 1752 return false; 1753 } 1754 BuildMI(MBB, MI, DL, get(Opc), DestReg).addReg(SrcReg); 1755 return true; 1756 } 1757 1758 // Moving EFLAGS to / from another register requires a push and a pop. 1759 if (SrcRC == &X86::CCRRegClass) { 1760 if (SrcReg != X86::EFLAGS) 1761 return false; 1762 if (DestRC == &X86::GR64RegClass) { 1763 BuildMI(MBB, MI, DL, get(X86::PUSHFQ)); 1764 BuildMI(MBB, MI, DL, get(X86::POP64r), DestReg); 1765 return true; 1766 } else if (DestRC == &X86::GR32RegClass) { 1767 BuildMI(MBB, MI, DL, get(X86::PUSHFD)); 1768 BuildMI(MBB, MI, DL, get(X86::POP32r), DestReg); 1769 return true; 1770 } 1771 } else if (DestRC == &X86::CCRRegClass) { 1772 if (DestReg != X86::EFLAGS) 1773 return false; 1774 if (SrcRC == &X86::GR64RegClass) { 1775 BuildMI(MBB, MI, DL, get(X86::PUSH64r)).addReg(SrcReg); 1776 BuildMI(MBB, MI, DL, get(X86::POPFQ)); 1777 return true; 1778 } else if (SrcRC == &X86::GR32RegClass) { 1779 BuildMI(MBB, MI, DL, get(X86::PUSH32r)).addReg(SrcReg); 1780 BuildMI(MBB, MI, DL, get(X86::POPFD)); 1781 return true; 1782 } 1783 } 1784 1785 // Moving from ST(0) turns into FpGET_ST0_32 etc. 1786 if (SrcRC == &X86::RSTRegClass) { 1787 // Copying from ST(0)/ST(1). 1788 if (SrcReg != X86::ST0 && SrcReg != X86::ST1) 1789 // Can only copy from ST(0)/ST(1) right now 1790 return false; 1791 bool isST0 = SrcReg == X86::ST0; 1792 unsigned Opc; 1793 if (DestRC == &X86::RFP32RegClass) 1794 Opc = isST0 ? X86::FpGET_ST0_32 : X86::FpGET_ST1_32; 1795 else if (DestRC == &X86::RFP64RegClass) 1796 Opc = isST0 ? X86::FpGET_ST0_64 : X86::FpGET_ST1_64; 1797 else { 1798 if (DestRC != &X86::RFP80RegClass) 1799 return false; 1800 Opc = isST0 ? X86::FpGET_ST0_80 : X86::FpGET_ST1_80; 1801 } 1802 BuildMI(MBB, MI, DL, get(Opc), DestReg); 1803 return true; 1804 } 1805 1806 // Moving to ST(0) turns into FpSET_ST0_32 etc. 1807 if (DestRC == &X86::RSTRegClass) { 1808 // Copying to ST(0) / ST(1). 1809 if (DestReg != X86::ST0 && DestReg != X86::ST1) 1810 // Can only copy to TOS right now 1811 return false; 1812 bool isST0 = DestReg == X86::ST0; 1813 unsigned Opc; 1814 if (SrcRC == &X86::RFP32RegClass) 1815 Opc = isST0 ? X86::FpSET_ST0_32 : X86::FpSET_ST1_32; 1816 else if (SrcRC == &X86::RFP64RegClass) 1817 Opc = isST0 ? X86::FpSET_ST0_64 : X86::FpSET_ST1_64; 1818 else { 1819 if (SrcRC != &X86::RFP80RegClass) 1820 return false; 1821 Opc = isST0 ? X86::FpSET_ST0_80 : X86::FpSET_ST1_80; 1822 } 1823 BuildMI(MBB, MI, DL, get(Opc)).addReg(SrcReg); 1824 return true; 1825 } 1826 1827 // Not yet supported! 1828 return false; 1829} 1830 1831static unsigned getStoreRegOpcode(unsigned SrcReg, 1832 const TargetRegisterClass *RC, 1833 bool isStackAligned, 1834 TargetMachine &TM) { 1835 unsigned Opc = 0; 1836 if (RC == &X86::GR64RegClass) { 1837 Opc = X86::MOV64mr; 1838 } else if (RC == &X86::GR32RegClass) { 1839 Opc = X86::MOV32mr; 1840 } else if (RC == &X86::GR16RegClass) { 1841 Opc = X86::MOV16mr; 1842 } else if (RC == &X86::GR8RegClass) { 1843 // Copying to or from a physical H register on x86-64 requires a NOREX 1844 // move. Otherwise use a normal move. 1845 if (isHReg(SrcReg) && 1846 TM.getSubtarget<X86Subtarget>().is64Bit()) 1847 Opc = X86::MOV8mr_NOREX; 1848 else 1849 Opc = X86::MOV8mr; 1850 } else if (RC == &X86::GR64_ABCDRegClass) { 1851 Opc = X86::MOV64mr; 1852 } else if (RC == &X86::GR32_ABCDRegClass) { 1853 Opc = X86::MOV32mr; 1854 } else if (RC == &X86::GR16_ABCDRegClass) { 1855 Opc = X86::MOV16mr; 1856 } else if (RC == &X86::GR8_ABCD_LRegClass) { 1857 Opc = X86::MOV8mr; 1858 } else if (RC == &X86::GR8_ABCD_HRegClass) { 1859 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1860 Opc = X86::MOV8mr_NOREX; 1861 else 1862 Opc = X86::MOV8mr; 1863 } else if (RC == &X86::GR64_NOREXRegClass) { 1864 Opc = X86::MOV64mr; 1865 } else if (RC == &X86::GR32_NOREXRegClass) { 1866 Opc = X86::MOV32mr; 1867 } else if (RC == &X86::GR16_NOREXRegClass) { 1868 Opc = X86::MOV16mr; 1869 } else if (RC == &X86::GR8_NOREXRegClass) { 1870 Opc = X86::MOV8mr; 1871 } else if (RC == &X86::RFP80RegClass) { 1872 Opc = X86::ST_FpP80m; // pops 1873 } else if (RC == &X86::RFP64RegClass) { 1874 Opc = X86::ST_Fp64m; 1875 } else if (RC == &X86::RFP32RegClass) { 1876 Opc = X86::ST_Fp32m; 1877 } else if (RC == &X86::FR32RegClass) { 1878 Opc = X86::MOVSSmr; 1879 } else if (RC == &X86::FR64RegClass) { 1880 Opc = X86::MOVSDmr; 1881 } else if (RC == &X86::VR128RegClass) { 1882 // If stack is realigned we can use aligned stores. 1883 Opc = isStackAligned ? X86::MOVAPSmr : X86::MOVUPSmr; 1884 } else if (RC == &X86::VR64RegClass) { 1885 Opc = X86::MMX_MOVQ64mr; 1886 } else { 1887 LLVM_UNREACHABLE("Unknown regclass"); 1888 } 1889 1890 return Opc; 1891} 1892 1893void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, 1894 MachineBasicBlock::iterator MI, 1895 unsigned SrcReg, bool isKill, int FrameIdx, 1896 const TargetRegisterClass *RC) const { 1897 const MachineFunction &MF = *MBB.getParent(); 1898 bool isAligned = (RI.getStackAlignment() >= 16) || 1899 RI.needsStackRealignment(MF); 1900 unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 1901 DebugLoc DL = DebugLoc::getUnknownLoc(); 1902 if (MI != MBB.end()) DL = MI->getDebugLoc(); 1903 addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) 1904 .addReg(SrcReg, getKillRegState(isKill)); 1905} 1906 1907void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, 1908 bool isKill, 1909 SmallVectorImpl<MachineOperand> &Addr, 1910 const TargetRegisterClass *RC, 1911 SmallVectorImpl<MachineInstr*> &NewMIs) const { 1912 bool isAligned = (RI.getStackAlignment() >= 16) || 1913 RI.needsStackRealignment(MF); 1914 unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); 1915 DebugLoc DL = DebugLoc::getUnknownLoc(); 1916 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); 1917 for (unsigned i = 0, e = Addr.size(); i != e; ++i) 1918 MIB.addOperand(Addr[i]); 1919 MIB.addReg(SrcReg, getKillRegState(isKill)); 1920 NewMIs.push_back(MIB); 1921} 1922 1923static unsigned getLoadRegOpcode(unsigned DestReg, 1924 const TargetRegisterClass *RC, 1925 bool isStackAligned, 1926 const TargetMachine &TM) { 1927 unsigned Opc = 0; 1928 if (RC == &X86::GR64RegClass) { 1929 Opc = X86::MOV64rm; 1930 } else if (RC == &X86::GR32RegClass) { 1931 Opc = X86::MOV32rm; 1932 } else if (RC == &X86::GR16RegClass) { 1933 Opc = X86::MOV16rm; 1934 } else if (RC == &X86::GR8RegClass) { 1935 // Copying to or from a physical H register on x86-64 requires a NOREX 1936 // move. Otherwise use a normal move. 1937 if (isHReg(DestReg) && 1938 TM.getSubtarget<X86Subtarget>().is64Bit()) 1939 Opc = X86::MOV8rm_NOREX; 1940 else 1941 Opc = X86::MOV8rm; 1942 } else if (RC == &X86::GR64_ABCDRegClass) { 1943 Opc = X86::MOV64rm; 1944 } else if (RC == &X86::GR32_ABCDRegClass) { 1945 Opc = X86::MOV32rm; 1946 } else if (RC == &X86::GR16_ABCDRegClass) { 1947 Opc = X86::MOV16rm; 1948 } else if (RC == &X86::GR8_ABCD_LRegClass) { 1949 Opc = X86::MOV8rm; 1950 } else if (RC == &X86::GR8_ABCD_HRegClass) { 1951 if (TM.getSubtarget<X86Subtarget>().is64Bit()) 1952 Opc = X86::MOV8rm_NOREX; 1953 else 1954 Opc = X86::MOV8rm; 1955 } else if (RC == &X86::GR64_NOREXRegClass) { 1956 Opc = X86::MOV64rm; 1957 } else if (RC == &X86::GR32_NOREXRegClass) { 1958 Opc = X86::MOV32rm; 1959 } else if (RC == &X86::GR16_NOREXRegClass) { 1960 Opc = X86::MOV16rm; 1961 } else if (RC == &X86::GR8_NOREXRegClass) { 1962 Opc = X86::MOV8rm; 1963 } else if (RC == &X86::RFP80RegClass) { 1964 Opc = X86::LD_Fp80m; 1965 } else if (RC == &X86::RFP64RegClass) { 1966 Opc = X86::LD_Fp64m; 1967 } else if (RC == &X86::RFP32RegClass) { 1968 Opc = X86::LD_Fp32m; 1969 } else if (RC == &X86::FR32RegClass) { 1970 Opc = X86::MOVSSrm; 1971 } else if (RC == &X86::FR64RegClass) { 1972 Opc = X86::MOVSDrm; 1973 } else if (RC == &X86::VR128RegClass) { 1974 // If stack is realigned we can use aligned loads. 1975 Opc = isStackAligned ? X86::MOVAPSrm : X86::MOVUPSrm; 1976 } else if (RC == &X86::VR64RegClass) { 1977 Opc = X86::MMX_MOVQ64rm; 1978 } else { 1979 LLVM_UNREACHABLE("Unknown regclass"); 1980 } 1981 1982 return Opc; 1983} 1984 1985void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, 1986 MachineBasicBlock::iterator MI, 1987 unsigned DestReg, int FrameIdx, 1988 const TargetRegisterClass *RC) const{ 1989 const MachineFunction &MF = *MBB.getParent(); 1990 bool isAligned = (RI.getStackAlignment() >= 16) || 1991 RI.needsStackRealignment(MF); 1992 unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 1993 DebugLoc DL = DebugLoc::getUnknownLoc(); 1994 if (MI != MBB.end()) DL = MI->getDebugLoc(); 1995 addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); 1996} 1997 1998void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, 1999 SmallVectorImpl<MachineOperand> &Addr, 2000 const TargetRegisterClass *RC, 2001 SmallVectorImpl<MachineInstr*> &NewMIs) const { 2002 bool isAligned = (RI.getStackAlignment() >= 16) || 2003 RI.needsStackRealignment(MF); 2004 unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); 2005 DebugLoc DL = DebugLoc::getUnknownLoc(); 2006 MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); 2007 for (unsigned i = 0, e = Addr.size(); i != e; ++i) 2008 MIB.addOperand(Addr[i]); 2009 NewMIs.push_back(MIB); 2010} 2011 2012bool X86InstrInfo::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2013 MachineBasicBlock::iterator MI, 2014 const std::vector<CalleeSavedInfo> &CSI) const { 2015 if (CSI.empty()) 2016 return false; 2017 2018 DebugLoc DL = DebugLoc::getUnknownLoc(); 2019 if (MI != MBB.end()) DL = MI->getDebugLoc(); 2020 2021 bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2022 unsigned SlotSize = is64Bit ? 8 : 4; 2023 2024 MachineFunction &MF = *MBB.getParent(); 2025 unsigned FPReg = RI.getFrameRegister(MF); 2026 X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); 2027 unsigned CalleeFrameSize = 0; 2028 2029 unsigned Opc = is64Bit ? X86::PUSH64r : X86::PUSH32r; 2030 for (unsigned i = CSI.size(); i != 0; --i) { 2031 unsigned Reg = CSI[i-1].getReg(); 2032 const TargetRegisterClass *RegClass = CSI[i-1].getRegClass(); 2033 // Add the callee-saved register as live-in. It's killed at the spill. 2034 MBB.addLiveIn(Reg); 2035 if (Reg == FPReg) 2036 // X86RegisterInfo::emitPrologue will handle spilling of frame register. 2037 continue; 2038 if (RegClass != &X86::VR128RegClass) { 2039 CalleeFrameSize += SlotSize; 2040 BuildMI(MBB, MI, DL, get(Opc)).addReg(Reg, RegState::Kill); 2041 } else { 2042 storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), RegClass); 2043 } 2044 } 2045 2046 X86FI->setCalleeSavedFrameSize(CalleeFrameSize); 2047 return true; 2048} 2049 2050bool X86InstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2051 MachineBasicBlock::iterator MI, 2052 const std::vector<CalleeSavedInfo> &CSI) const { 2053 if (CSI.empty()) 2054 return false; 2055 2056 DebugLoc DL = DebugLoc::getUnknownLoc(); 2057 if (MI != MBB.end()) DL = MI->getDebugLoc(); 2058 2059 MachineFunction &MF = *MBB.getParent(); 2060 unsigned FPReg = RI.getFrameRegister(MF); 2061 bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); 2062 unsigned Opc = is64Bit ? X86::POP64r : X86::POP32r; 2063 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2064 unsigned Reg = CSI[i].getReg(); 2065 if (Reg == FPReg) 2066 // X86RegisterInfo::emitEpilogue will handle restoring of frame register. 2067 continue; 2068 const TargetRegisterClass *RegClass = CSI[i].getRegClass(); 2069 if (RegClass != &X86::VR128RegClass) { 2070 BuildMI(MBB, MI, DL, get(Opc), Reg); 2071 } else { 2072 loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RegClass); 2073 } 2074 } 2075 return true; 2076} 2077 2078static MachineInstr *FuseTwoAddrInst(MachineFunction &MF, unsigned Opcode, 2079 const SmallVectorImpl<MachineOperand> &MOs, 2080 MachineInstr *MI, 2081 const TargetInstrInfo &TII) { 2082 // Create the base instruction with the memory operand as the first part. 2083 MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2084 MI->getDebugLoc(), true); 2085 MachineInstrBuilder MIB(NewMI); 2086 unsigned NumAddrOps = MOs.size(); 2087 for (unsigned i = 0; i != NumAddrOps; ++i) 2088 MIB.addOperand(MOs[i]); 2089 if (NumAddrOps < 4) // FrameIndex only 2090 addOffset(MIB, 0); 2091 2092 // Loop over the rest of the ri operands, converting them over. 2093 unsigned NumOps = MI->getDesc().getNumOperands()-2; 2094 for (unsigned i = 0; i != NumOps; ++i) { 2095 MachineOperand &MO = MI->getOperand(i+2); 2096 MIB.addOperand(MO); 2097 } 2098 for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { 2099 MachineOperand &MO = MI->getOperand(i); 2100 MIB.addOperand(MO); 2101 } 2102 return MIB; 2103} 2104 2105static MachineInstr *FuseInst(MachineFunction &MF, 2106 unsigned Opcode, unsigned OpNo, 2107 const SmallVectorImpl<MachineOperand> &MOs, 2108 MachineInstr *MI, const TargetInstrInfo &TII) { 2109 MachineInstr *NewMI = MF.CreateMachineInstr(TII.get(Opcode), 2110 MI->getDebugLoc(), true); 2111 MachineInstrBuilder MIB(NewMI); 2112 2113 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2114 MachineOperand &MO = MI->getOperand(i); 2115 if (i == OpNo) { 2116 assert(MO.isReg() && "Expected to fold into reg operand!"); 2117 unsigned NumAddrOps = MOs.size(); 2118 for (unsigned i = 0; i != NumAddrOps; ++i) 2119 MIB.addOperand(MOs[i]); 2120 if (NumAddrOps < 4) // FrameIndex only 2121 addOffset(MIB, 0); 2122 } else { 2123 MIB.addOperand(MO); 2124 } 2125 } 2126 return MIB; 2127} 2128 2129static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, 2130 const SmallVectorImpl<MachineOperand> &MOs, 2131 MachineInstr *MI) { 2132 MachineFunction &MF = *MI->getParent()->getParent(); 2133 MachineInstrBuilder MIB = BuildMI(MF, MI->getDebugLoc(), TII.get(Opcode)); 2134 2135 unsigned NumAddrOps = MOs.size(); 2136 for (unsigned i = 0; i != NumAddrOps; ++i) 2137 MIB.addOperand(MOs[i]); 2138 if (NumAddrOps < 4) // FrameIndex only 2139 addOffset(MIB, 0); 2140 return MIB.addImm(0); 2141} 2142 2143MachineInstr* 2144X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2145 MachineInstr *MI, unsigned i, 2146 const SmallVectorImpl<MachineOperand> &MOs) const{ 2147 const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL; 2148 bool isTwoAddrFold = false; 2149 unsigned NumOps = MI->getDesc().getNumOperands(); 2150 bool isTwoAddr = NumOps > 1 && 2151 MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2152 2153 MachineInstr *NewMI = NULL; 2154 // Folding a memory location into the two-address part of a two-address 2155 // instruction is different than folding it other places. It requires 2156 // replacing the *two* registers with the memory location. 2157 if (isTwoAddr && NumOps >= 2 && i < 2 && 2158 MI->getOperand(0).isReg() && 2159 MI->getOperand(1).isReg() && 2160 MI->getOperand(0).getReg() == MI->getOperand(1).getReg()) { 2161 OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2162 isTwoAddrFold = true; 2163 } else if (i == 0) { // If operand 0 2164 if (MI->getOpcode() == X86::MOV16r0) 2165 NewMI = MakeM0Inst(*this, X86::MOV16mi, MOs, MI); 2166 else if (MI->getOpcode() == X86::MOV32r0) 2167 NewMI = MakeM0Inst(*this, X86::MOV32mi, MOs, MI); 2168 else if (MI->getOpcode() == X86::MOV64r0) 2169 NewMI = MakeM0Inst(*this, X86::MOV64mi32, MOs, MI); 2170 else if (MI->getOpcode() == X86::MOV8r0) 2171 NewMI = MakeM0Inst(*this, X86::MOV8mi, MOs, MI); 2172 if (NewMI) 2173 return NewMI; 2174 2175 OpcodeTablePtr = &RegOp2MemOpTable0; 2176 } else if (i == 1) { 2177 OpcodeTablePtr = &RegOp2MemOpTable1; 2178 } else if (i == 2) { 2179 OpcodeTablePtr = &RegOp2MemOpTable2; 2180 } 2181 2182 // If table selected... 2183 if (OpcodeTablePtr) { 2184 // Find the Opcode to fuse 2185 DenseMap<unsigned*, unsigned>::iterator I = 2186 OpcodeTablePtr->find((unsigned*)MI->getOpcode()); 2187 if (I != OpcodeTablePtr->end()) { 2188 if (isTwoAddrFold) 2189 NewMI = FuseTwoAddrInst(MF, I->second, MOs, MI, *this); 2190 else 2191 NewMI = FuseInst(MF, I->second, i, MOs, MI, *this); 2192 return NewMI; 2193 } 2194 } 2195 2196 // No fusion 2197 if (PrintFailedFusing) 2198 cerr << "We failed to fuse operand " << i << " in " << *MI; 2199 return NULL; 2200} 2201 2202 2203MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2204 MachineInstr *MI, 2205 const SmallVectorImpl<unsigned> &Ops, 2206 int FrameIndex) const { 2207 // Check switch flag 2208 if (NoFusing) return NULL; 2209 2210 const MachineFrameInfo *MFI = MF.getFrameInfo(); 2211 unsigned Alignment = MFI->getObjectAlignment(FrameIndex); 2212 // FIXME: Move alignment requirement into tables? 2213 if (Alignment < 16) { 2214 switch (MI->getOpcode()) { 2215 default: break; 2216 // Not always safe to fold movsd into these instructions since their load 2217 // folding variants expects the address to be 16 byte aligned. 2218 case X86::FsANDNPDrr: 2219 case X86::FsANDNPSrr: 2220 case X86::FsANDPDrr: 2221 case X86::FsANDPSrr: 2222 case X86::FsORPDrr: 2223 case X86::FsORPSrr: 2224 case X86::FsXORPDrr: 2225 case X86::FsXORPSrr: 2226 return NULL; 2227 } 2228 } 2229 2230 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2231 unsigned NewOpc = 0; 2232 switch (MI->getOpcode()) { 2233 default: return NULL; 2234 case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 2235 case X86::TEST16rr: NewOpc = X86::CMP16ri; break; 2236 case X86::TEST32rr: NewOpc = X86::CMP32ri; break; 2237 case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; 2238 } 2239 // Change to CMPXXri r, 0 first. 2240 MI->setDesc(get(NewOpc)); 2241 MI->getOperand(1).ChangeToImmediate(0); 2242 } else if (Ops.size() != 1) 2243 return NULL; 2244 2245 SmallVector<MachineOperand,4> MOs; 2246 MOs.push_back(MachineOperand::CreateFI(FrameIndex)); 2247 return foldMemoryOperandImpl(MF, MI, Ops[0], MOs); 2248} 2249 2250MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, 2251 MachineInstr *MI, 2252 const SmallVectorImpl<unsigned> &Ops, 2253 MachineInstr *LoadMI) const { 2254 // Check switch flag 2255 if (NoFusing) return NULL; 2256 2257 // Determine the alignment of the load. 2258 unsigned Alignment = 0; 2259 if (LoadMI->hasOneMemOperand()) 2260 Alignment = LoadMI->memoperands_begin()->getAlignment(); 2261 2262 // FIXME: Move alignment requirement into tables? 2263 if (Alignment < 16) { 2264 switch (MI->getOpcode()) { 2265 default: break; 2266 // Not always safe to fold movsd into these instructions since their load 2267 // folding variants expects the address to be 16 byte aligned. 2268 case X86::FsANDNPDrr: 2269 case X86::FsANDNPSrr: 2270 case X86::FsANDPDrr: 2271 case X86::FsANDPSrr: 2272 case X86::FsORPDrr: 2273 case X86::FsORPSrr: 2274 case X86::FsXORPDrr: 2275 case X86::FsXORPSrr: 2276 return NULL; 2277 } 2278 } 2279 2280 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2281 unsigned NewOpc = 0; 2282 switch (MI->getOpcode()) { 2283 default: return NULL; 2284 case X86::TEST8rr: NewOpc = X86::CMP8ri; break; 2285 case X86::TEST16rr: NewOpc = X86::CMP16ri; break; 2286 case X86::TEST32rr: NewOpc = X86::CMP32ri; break; 2287 case X86::TEST64rr: NewOpc = X86::CMP64ri32; break; 2288 } 2289 // Change to CMPXXri r, 0 first. 2290 MI->setDesc(get(NewOpc)); 2291 MI->getOperand(1).ChangeToImmediate(0); 2292 } else if (Ops.size() != 1) 2293 return NULL; 2294 2295 SmallVector<MachineOperand,X86AddrNumOperands> MOs; 2296 if (LoadMI->getOpcode() == X86::V_SET0 || 2297 LoadMI->getOpcode() == X86::V_SETALLONES) { 2298 // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure. 2299 // Create a constant-pool entry and operands to load from it. 2300 2301 // x86-32 PIC requires a PIC base register for constant pools. 2302 unsigned PICBase = 0; 2303 if (TM.getRelocationModel() == Reloc::PIC_ && 2304 !TM.getSubtarget<X86Subtarget>().is64Bit()) 2305 // FIXME: PICBase = TM.getInstrInfo()->getGlobalBaseReg(&MF); 2306 // This doesn't work for several reasons. 2307 // 1. GlobalBaseReg may have been spilled. 2308 // 2. It may not be live at MI. 2309 return false; 2310 2311 // Create a v4i32 constant-pool entry. 2312 MachineConstantPool &MCP = *MF.getConstantPool(); 2313 const VectorType *Ty = VectorType::get(Type::Int32Ty, 4); 2314 Constant *C = LoadMI->getOpcode() == X86::V_SET0 ? 2315 ConstantVector::getNullValue(Ty) : 2316 ConstantVector::getAllOnesValue(Ty); 2317 unsigned CPI = MCP.getConstantPoolIndex(C, 16); 2318 2319 // Create operands to load from the constant pool entry. 2320 MOs.push_back(MachineOperand::CreateReg(PICBase, false)); 2321 MOs.push_back(MachineOperand::CreateImm(1)); 2322 MOs.push_back(MachineOperand::CreateReg(0, false)); 2323 MOs.push_back(MachineOperand::CreateCPI(CPI, 0)); 2324 MOs.push_back(MachineOperand::CreateReg(0, false)); 2325 } else { 2326 // Folding a normal load. Just copy the load's address operands. 2327 unsigned NumOps = LoadMI->getDesc().getNumOperands(); 2328 for (unsigned i = NumOps - X86AddrNumOperands; i != NumOps; ++i) 2329 MOs.push_back(LoadMI->getOperand(i)); 2330 } 2331 return foldMemoryOperandImpl(MF, MI, Ops[0], MOs); 2332} 2333 2334 2335bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI, 2336 const SmallVectorImpl<unsigned> &Ops) const { 2337 // Check switch flag 2338 if (NoFusing) return 0; 2339 2340 if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { 2341 switch (MI->getOpcode()) { 2342 default: return false; 2343 case X86::TEST8rr: 2344 case X86::TEST16rr: 2345 case X86::TEST32rr: 2346 case X86::TEST64rr: 2347 return true; 2348 } 2349 } 2350 2351 if (Ops.size() != 1) 2352 return false; 2353 2354 unsigned OpNum = Ops[0]; 2355 unsigned Opc = MI->getOpcode(); 2356 unsigned NumOps = MI->getDesc().getNumOperands(); 2357 bool isTwoAddr = NumOps > 1 && 2358 MI->getDesc().getOperandConstraint(1, TOI::TIED_TO) != -1; 2359 2360 // Folding a memory location into the two-address part of a two-address 2361 // instruction is different than folding it other places. It requires 2362 // replacing the *two* registers with the memory location. 2363 const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL; 2364 if (isTwoAddr && NumOps >= 2 && OpNum < 2) { 2365 OpcodeTablePtr = &RegOp2MemOpTable2Addr; 2366 } else if (OpNum == 0) { // If operand 0 2367 switch (Opc) { 2368 case X86::MOV16r0: 2369 case X86::MOV32r0: 2370 case X86::MOV64r0: 2371 case X86::MOV8r0: 2372 return true; 2373 default: break; 2374 } 2375 OpcodeTablePtr = &RegOp2MemOpTable0; 2376 } else if (OpNum == 1) { 2377 OpcodeTablePtr = &RegOp2MemOpTable1; 2378 } else if (OpNum == 2) { 2379 OpcodeTablePtr = &RegOp2MemOpTable2; 2380 } 2381 2382 if (OpcodeTablePtr) { 2383 // Find the Opcode to fuse 2384 DenseMap<unsigned*, unsigned>::iterator I = 2385 OpcodeTablePtr->find((unsigned*)Opc); 2386 if (I != OpcodeTablePtr->end()) 2387 return true; 2388 } 2389 return false; 2390} 2391 2392bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, 2393 unsigned Reg, bool UnfoldLoad, bool UnfoldStore, 2394 SmallVectorImpl<MachineInstr*> &NewMIs) const { 2395 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = 2396 MemOp2RegOpTable.find((unsigned*)MI->getOpcode()); 2397 if (I == MemOp2RegOpTable.end()) 2398 return false; 2399 DebugLoc dl = MI->getDebugLoc(); 2400 unsigned Opc = I->second.first; 2401 unsigned Index = I->second.second & 0xf; 2402 bool FoldedLoad = I->second.second & (1 << 4); 2403 bool FoldedStore = I->second.second & (1 << 5); 2404 if (UnfoldLoad && !FoldedLoad) 2405 return false; 2406 UnfoldLoad &= FoldedLoad; 2407 if (UnfoldStore && !FoldedStore) 2408 return false; 2409 UnfoldStore &= FoldedStore; 2410 2411 const TargetInstrDesc &TID = get(Opc); 2412 const TargetOperandInfo &TOI = TID.OpInfo[Index]; 2413 const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() 2414 ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass); 2415 SmallVector<MachineOperand, X86AddrNumOperands> AddrOps; 2416 SmallVector<MachineOperand,2> BeforeOps; 2417 SmallVector<MachineOperand,2> AfterOps; 2418 SmallVector<MachineOperand,4> ImpOps; 2419 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2420 MachineOperand &Op = MI->getOperand(i); 2421 if (i >= Index && i < Index + X86AddrNumOperands) 2422 AddrOps.push_back(Op); 2423 else if (Op.isReg() && Op.isImplicit()) 2424 ImpOps.push_back(Op); 2425 else if (i < Index) 2426 BeforeOps.push_back(Op); 2427 else if (i > Index) 2428 AfterOps.push_back(Op); 2429 } 2430 2431 // Emit the load instruction. 2432 if (UnfoldLoad) { 2433 loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs); 2434 if (UnfoldStore) { 2435 // Address operands cannot be marked isKill. 2436 for (unsigned i = 1; i != 1 + X86AddrNumOperands; ++i) { 2437 MachineOperand &MO = NewMIs[0]->getOperand(i); 2438 if (MO.isReg()) 2439 MO.setIsKill(false); 2440 } 2441 } 2442 } 2443 2444 // Emit the data processing instruction. 2445 MachineInstr *DataMI = MF.CreateMachineInstr(TID, MI->getDebugLoc(), true); 2446 MachineInstrBuilder MIB(DataMI); 2447 2448 if (FoldedStore) 2449 MIB.addReg(Reg, RegState::Define); 2450 for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) 2451 MIB.addOperand(BeforeOps[i]); 2452 if (FoldedLoad) 2453 MIB.addReg(Reg); 2454 for (unsigned i = 0, e = AfterOps.size(); i != e; ++i) 2455 MIB.addOperand(AfterOps[i]); 2456 for (unsigned i = 0, e = ImpOps.size(); i != e; ++i) { 2457 MachineOperand &MO = ImpOps[i]; 2458 MIB.addReg(MO.getReg(), 2459 getDefRegState(MO.isDef()) | 2460 RegState::Implicit | 2461 getKillRegState(MO.isKill()) | 2462 getDeadRegState(MO.isDead()) | 2463 getUndefRegState(MO.isUndef())); 2464 } 2465 // Change CMP32ri r, 0 back to TEST32rr r, r, etc. 2466 unsigned NewOpc = 0; 2467 switch (DataMI->getOpcode()) { 2468 default: break; 2469 case X86::CMP64ri32: 2470 case X86::CMP32ri: 2471 case X86::CMP16ri: 2472 case X86::CMP8ri: { 2473 MachineOperand &MO0 = DataMI->getOperand(0); 2474 MachineOperand &MO1 = DataMI->getOperand(1); 2475 if (MO1.getImm() == 0) { 2476 switch (DataMI->getOpcode()) { 2477 default: break; 2478 case X86::CMP64ri32: NewOpc = X86::TEST64rr; break; 2479 case X86::CMP32ri: NewOpc = X86::TEST32rr; break; 2480 case X86::CMP16ri: NewOpc = X86::TEST16rr; break; 2481 case X86::CMP8ri: NewOpc = X86::TEST8rr; break; 2482 } 2483 DataMI->setDesc(get(NewOpc)); 2484 MO1.ChangeToRegister(MO0.getReg(), false); 2485 } 2486 } 2487 } 2488 NewMIs.push_back(DataMI); 2489 2490 // Emit the store instruction. 2491 if (UnfoldStore) { 2492 const TargetOperandInfo &DstTOI = TID.OpInfo[0]; 2493 const TargetRegisterClass *DstRC = DstTOI.isLookupPtrRegClass() 2494 ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); 2495 storeRegToAddr(MF, Reg, true, AddrOps, DstRC, NewMIs); 2496 } 2497 2498 return true; 2499} 2500 2501bool 2502X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, 2503 SmallVectorImpl<SDNode*> &NewNodes) const { 2504 if (!N->isMachineOpcode()) 2505 return false; 2506 2507 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = 2508 MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode()); 2509 if (I == MemOp2RegOpTable.end()) 2510 return false; 2511 unsigned Opc = I->second.first; 2512 unsigned Index = I->second.second & 0xf; 2513 bool FoldedLoad = I->second.second & (1 << 4); 2514 bool FoldedStore = I->second.second & (1 << 5); 2515 const TargetInstrDesc &TID = get(Opc); 2516 const TargetOperandInfo &TOI = TID.OpInfo[Index]; 2517 const TargetRegisterClass *RC = TOI.isLookupPtrRegClass() 2518 ? RI.getPointerRegClass() : RI.getRegClass(TOI.RegClass); 2519 unsigned NumDefs = TID.NumDefs; 2520 std::vector<SDValue> AddrOps; 2521 std::vector<SDValue> BeforeOps; 2522 std::vector<SDValue> AfterOps; 2523 DebugLoc dl = N->getDebugLoc(); 2524 unsigned NumOps = N->getNumOperands(); 2525 for (unsigned i = 0; i != NumOps-1; ++i) { 2526 SDValue Op = N->getOperand(i); 2527 if (i >= Index-NumDefs && i < Index-NumDefs + X86AddrNumOperands) 2528 AddrOps.push_back(Op); 2529 else if (i < Index-NumDefs) 2530 BeforeOps.push_back(Op); 2531 else if (i > Index-NumDefs) 2532 AfterOps.push_back(Op); 2533 } 2534 SDValue Chain = N->getOperand(NumOps-1); 2535 AddrOps.push_back(Chain); 2536 2537 // Emit the load instruction. 2538 SDNode *Load = 0; 2539 const MachineFunction &MF = DAG.getMachineFunction(); 2540 if (FoldedLoad) { 2541 MVT VT = *RC->vt_begin(); 2542 bool isAligned = (RI.getStackAlignment() >= 16) || 2543 RI.needsStackRealignment(MF); 2544 Load = DAG.getTargetNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, 2545 VT, MVT::Other, &AddrOps[0], AddrOps.size()); 2546 NewNodes.push_back(Load); 2547 } 2548 2549 // Emit the data processing instruction. 2550 std::vector<MVT> VTs; 2551 const TargetRegisterClass *DstRC = 0; 2552 if (TID.getNumDefs() > 0) { 2553 const TargetOperandInfo &DstTOI = TID.OpInfo[0]; 2554 DstRC = DstTOI.isLookupPtrRegClass() 2555 ? RI.getPointerRegClass() : RI.getRegClass(DstTOI.RegClass); 2556 VTs.push_back(*DstRC->vt_begin()); 2557 } 2558 for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { 2559 MVT VT = N->getValueType(i); 2560 if (VT != MVT::Other && i >= (unsigned)TID.getNumDefs()) 2561 VTs.push_back(VT); 2562 } 2563 if (Load) 2564 BeforeOps.push_back(SDValue(Load, 0)); 2565 std::copy(AfterOps.begin(), AfterOps.end(), std::back_inserter(BeforeOps)); 2566 SDNode *NewNode= DAG.getTargetNode(Opc, dl, VTs, &BeforeOps[0], 2567 BeforeOps.size()); 2568 NewNodes.push_back(NewNode); 2569 2570 // Emit the store instruction. 2571 if (FoldedStore) { 2572 AddrOps.pop_back(); 2573 AddrOps.push_back(SDValue(NewNode, 0)); 2574 AddrOps.push_back(Chain); 2575 bool isAligned = (RI.getStackAlignment() >= 16) || 2576 RI.needsStackRealignment(MF); 2577 SDNode *Store = DAG.getTargetNode(getStoreRegOpcode(0, DstRC, 2578 isAligned, TM), 2579 dl, MVT::Other, 2580 &AddrOps[0], AddrOps.size()); 2581 NewNodes.push_back(Store); 2582 } 2583 2584 return true; 2585} 2586 2587unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, 2588 bool UnfoldLoad, bool UnfoldStore) const { 2589 DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I = 2590 MemOp2RegOpTable.find((unsigned*)Opc); 2591 if (I == MemOp2RegOpTable.end()) 2592 return 0; 2593 bool FoldedLoad = I->second.second & (1 << 4); 2594 bool FoldedStore = I->second.second & (1 << 5); 2595 if (UnfoldLoad && !FoldedLoad) 2596 return 0; 2597 if (UnfoldStore && !FoldedStore) 2598 return 0; 2599 return I->second.first; 2600} 2601 2602bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const { 2603 if (MBB.empty()) return false; 2604 2605 switch (MBB.back().getOpcode()) { 2606 case X86::TCRETURNri: 2607 case X86::TCRETURNdi: 2608 case X86::RET: // Return. 2609 case X86::RETI: 2610 case X86::TAILJMPd: 2611 case X86::TAILJMPr: 2612 case X86::TAILJMPm: 2613 case X86::JMP: // Uncond branch. 2614 case X86::JMP32r: // Indirect branch. 2615 case X86::JMP64r: // Indirect branch (64-bit). 2616 case X86::JMP32m: // Indirect branch through mem. 2617 case X86::JMP64m: // Indirect branch through mem (64-bit). 2618 return true; 2619 default: return false; 2620 } 2621} 2622 2623bool X86InstrInfo:: 2624ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const { 2625 assert(Cond.size() == 1 && "Invalid X86 branch condition!"); 2626 X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm()); 2627 if (CC == X86::COND_NE_OR_P || CC == X86::COND_NP_OR_E) 2628 return true; 2629 Cond[0].setImm(GetOppositeBranchCondition(CC)); 2630 return false; 2631} 2632 2633bool X86InstrInfo:: 2634isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { 2635 // FIXME: Return false for x87 stack register classes for now. We can't 2636 // allow any loads of these registers before FpGet_ST0_80. 2637 return !(RC == &X86::CCRRegClass || RC == &X86::RFP32RegClass || 2638 RC == &X86::RFP64RegClass || RC == &X86::RFP80RegClass); 2639} 2640 2641unsigned X86InstrInfo::sizeOfImm(const TargetInstrDesc *Desc) { 2642 switch (Desc->TSFlags & X86II::ImmMask) { 2643 case X86II::Imm8: return 1; 2644 case X86II::Imm16: return 2; 2645 case X86II::Imm32: return 4; 2646 case X86II::Imm64: return 8; 2647 default: assert(0 && "Immediate size not set!"); 2648 return 0; 2649 } 2650} 2651 2652/// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended register? 2653/// e.g. r8, xmm8, etc. 2654bool X86InstrInfo::isX86_64ExtendedReg(const MachineOperand &MO) { 2655 if (!MO.isReg()) return false; 2656 switch (MO.getReg()) { 2657 default: break; 2658 case X86::R8: case X86::R9: case X86::R10: case X86::R11: 2659 case X86::R12: case X86::R13: case X86::R14: case X86::R15: 2660 case X86::R8D: case X86::R9D: case X86::R10D: case X86::R11D: 2661 case X86::R12D: case X86::R13D: case X86::R14D: case X86::R15D: 2662 case X86::R8W: case X86::R9W: case X86::R10W: case X86::R11W: 2663 case X86::R12W: case X86::R13W: case X86::R14W: case X86::R15W: 2664 case X86::R8B: case X86::R9B: case X86::R10B: case X86::R11B: 2665 case X86::R12B: case X86::R13B: case X86::R14B: case X86::R15B: 2666 case X86::XMM8: case X86::XMM9: case X86::XMM10: case X86::XMM11: 2667 case X86::XMM12: case X86::XMM13: case X86::XMM14: case X86::XMM15: 2668 return true; 2669 } 2670 return false; 2671} 2672 2673 2674/// determineREX - Determine if the MachineInstr has to be encoded with a X86-64 2675/// REX prefix which specifies 1) 64-bit instructions, 2) non-default operand 2676/// size, and 3) use of X86-64 extended registers. 2677unsigned X86InstrInfo::determineREX(const MachineInstr &MI) { 2678 unsigned REX = 0; 2679 const TargetInstrDesc &Desc = MI.getDesc(); 2680 2681 // Pseudo instructions do not need REX prefix byte. 2682 if ((Desc.TSFlags & X86II::FormMask) == X86II::Pseudo) 2683 return 0; 2684 if (Desc.TSFlags & X86II::REX_W) 2685 REX |= 1 << 3; 2686 2687 unsigned NumOps = Desc.getNumOperands(); 2688 if (NumOps) { 2689 bool isTwoAddr = NumOps > 1 && 2690 Desc.getOperandConstraint(1, TOI::TIED_TO) != -1; 2691 2692 // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix. 2693 unsigned i = isTwoAddr ? 1 : 0; 2694 for (unsigned e = NumOps; i != e; ++i) { 2695 const MachineOperand& MO = MI.getOperand(i); 2696 if (MO.isReg()) { 2697 unsigned Reg = MO.getReg(); 2698 if (isX86_64NonExtLowByteReg(Reg)) 2699 REX |= 0x40; 2700 } 2701 } 2702 2703 switch (Desc.TSFlags & X86II::FormMask) { 2704 case X86II::MRMInitReg: 2705 if (isX86_64ExtendedReg(MI.getOperand(0))) 2706 REX |= (1 << 0) | (1 << 2); 2707 break; 2708 case X86II::MRMSrcReg: { 2709 if (isX86_64ExtendedReg(MI.getOperand(0))) 2710 REX |= 1 << 2; 2711 i = isTwoAddr ? 2 : 1; 2712 for (unsigned e = NumOps; i != e; ++i) { 2713 const MachineOperand& MO = MI.getOperand(i); 2714 if (isX86_64ExtendedReg(MO)) 2715 REX |= 1 << 0; 2716 } 2717 break; 2718 } 2719 case X86II::MRMSrcMem: { 2720 if (isX86_64ExtendedReg(MI.getOperand(0))) 2721 REX |= 1 << 2; 2722 unsigned Bit = 0; 2723 i = isTwoAddr ? 2 : 1; 2724 for (; i != NumOps; ++i) { 2725 const MachineOperand& MO = MI.getOperand(i); 2726 if (MO.isReg()) { 2727 if (isX86_64ExtendedReg(MO)) 2728 REX |= 1 << Bit; 2729 Bit++; 2730 } 2731 } 2732 break; 2733 } 2734 case X86II::MRM0m: case X86II::MRM1m: 2735 case X86II::MRM2m: case X86II::MRM3m: 2736 case X86II::MRM4m: case X86II::MRM5m: 2737 case X86II::MRM6m: case X86II::MRM7m: 2738 case X86II::MRMDestMem: { 2739 unsigned e = (isTwoAddr ? X86AddrNumOperands+1 : X86AddrNumOperands); 2740 i = isTwoAddr ? 1 : 0; 2741 if (NumOps > e && isX86_64ExtendedReg(MI.getOperand(e))) 2742 REX |= 1 << 2; 2743 unsigned Bit = 0; 2744 for (; i != e; ++i) { 2745 const MachineOperand& MO = MI.getOperand(i); 2746 if (MO.isReg()) { 2747 if (isX86_64ExtendedReg(MO)) 2748 REX |= 1 << Bit; 2749 Bit++; 2750 } 2751 } 2752 break; 2753 } 2754 default: { 2755 if (isX86_64ExtendedReg(MI.getOperand(0))) 2756 REX |= 1 << 0; 2757 i = isTwoAddr ? 2 : 1; 2758 for (unsigned e = NumOps; i != e; ++i) { 2759 const MachineOperand& MO = MI.getOperand(i); 2760 if (isX86_64ExtendedReg(MO)) 2761 REX |= 1 << 2; 2762 } 2763 break; 2764 } 2765 } 2766 } 2767 return REX; 2768} 2769 2770/// sizePCRelativeBlockAddress - This method returns the size of a PC 2771/// relative block address instruction 2772/// 2773static unsigned sizePCRelativeBlockAddress() { 2774 return 4; 2775} 2776 2777/// sizeGlobalAddress - Give the size of the emission of this global address 2778/// 2779static unsigned sizeGlobalAddress(bool dword) { 2780 return dword ? 8 : 4; 2781} 2782 2783/// sizeConstPoolAddress - Give the size of the emission of this constant 2784/// pool address 2785/// 2786static unsigned sizeConstPoolAddress(bool dword) { 2787 return dword ? 8 : 4; 2788} 2789 2790/// sizeExternalSymbolAddress - Give the size of the emission of this external 2791/// symbol 2792/// 2793static unsigned sizeExternalSymbolAddress(bool dword) { 2794 return dword ? 8 : 4; 2795} 2796 2797/// sizeJumpTableAddress - Give the size of the emission of this jump 2798/// table address 2799/// 2800static unsigned sizeJumpTableAddress(bool dword) { 2801 return dword ? 8 : 4; 2802} 2803 2804static unsigned sizeConstant(unsigned Size) { 2805 return Size; 2806} 2807 2808static unsigned sizeRegModRMByte(){ 2809 return 1; 2810} 2811 2812static unsigned sizeSIBByte(){ 2813 return 1; 2814} 2815 2816static unsigned getDisplacementFieldSize(const MachineOperand *RelocOp) { 2817 unsigned FinalSize = 0; 2818 // If this is a simple integer displacement that doesn't require a relocation. 2819 if (!RelocOp) { 2820 FinalSize += sizeConstant(4); 2821 return FinalSize; 2822 } 2823 2824 // Otherwise, this is something that requires a relocation. 2825 if (RelocOp->isGlobal()) { 2826 FinalSize += sizeGlobalAddress(false); 2827 } else if (RelocOp->isCPI()) { 2828 FinalSize += sizeConstPoolAddress(false); 2829 } else if (RelocOp->isJTI()) { 2830 FinalSize += sizeJumpTableAddress(false); 2831 } else { 2832 assert(0 && "Unknown value to relocate!"); 2833 } 2834 return FinalSize; 2835} 2836 2837static unsigned getMemModRMByteSize(const MachineInstr &MI, unsigned Op, 2838 bool IsPIC, bool Is64BitMode) { 2839 const MachineOperand &Op3 = MI.getOperand(Op+3); 2840 int DispVal = 0; 2841 const MachineOperand *DispForReloc = 0; 2842 unsigned FinalSize = 0; 2843 2844 // Figure out what sort of displacement we have to handle here. 2845 if (Op3.isGlobal()) { 2846 DispForReloc = &Op3; 2847 } else if (Op3.isCPI()) { 2848 if (Is64BitMode || IsPIC) { 2849 DispForReloc = &Op3; 2850 } else { 2851 DispVal = 1; 2852 } 2853 } else if (Op3.isJTI()) { 2854 if (Is64BitMode || IsPIC) { 2855 DispForReloc = &Op3; 2856 } else { 2857 DispVal = 1; 2858 } 2859 } else { 2860 DispVal = 1; 2861 } 2862 2863 const MachineOperand &Base = MI.getOperand(Op); 2864 const MachineOperand &IndexReg = MI.getOperand(Op+2); 2865 2866 unsigned BaseReg = Base.getReg(); 2867 2868 // Is a SIB byte needed? 2869 if ((!Is64BitMode || DispForReloc || BaseReg != 0) && 2870 IndexReg.getReg() == 0 && 2871 (BaseReg == 0 || X86RegisterInfo::getX86RegNum(BaseReg) != N86::ESP)) { 2872 if (BaseReg == 0) { // Just a displacement? 2873 // Emit special case [disp32] encoding 2874 ++FinalSize; 2875 FinalSize += getDisplacementFieldSize(DispForReloc); 2876 } else { 2877 unsigned BaseRegNo = X86RegisterInfo::getX86RegNum(BaseReg); 2878 if (!DispForReloc && DispVal == 0 && BaseRegNo != N86::EBP) { 2879 // Emit simple indirect register encoding... [EAX] f.e. 2880 ++FinalSize; 2881 // Be pessimistic and assume it's a disp32, not a disp8 2882 } else { 2883 // Emit the most general non-SIB encoding: [REG+disp32] 2884 ++FinalSize; 2885 FinalSize += getDisplacementFieldSize(DispForReloc); 2886 } 2887 } 2888 2889 } else { // We need a SIB byte, so start by outputting the ModR/M byte first 2890 assert(IndexReg.getReg() != X86::ESP && 2891 IndexReg.getReg() != X86::RSP && "Cannot use ESP as index reg!"); 2892 2893 bool ForceDisp32 = false; 2894 if (BaseReg == 0 || DispForReloc) { 2895 // Emit the normal disp32 encoding. 2896 ++FinalSize; 2897 ForceDisp32 = true; 2898 } else { 2899 ++FinalSize; 2900 } 2901 2902 FinalSize += sizeSIBByte(); 2903 2904 // Do we need to output a displacement? 2905 if (DispVal != 0 || ForceDisp32) { 2906 FinalSize += getDisplacementFieldSize(DispForReloc); 2907 } 2908 } 2909 return FinalSize; 2910} 2911 2912 2913static unsigned GetInstSizeWithDesc(const MachineInstr &MI, 2914 const TargetInstrDesc *Desc, 2915 bool IsPIC, bool Is64BitMode) { 2916 2917 unsigned Opcode = Desc->Opcode; 2918 unsigned FinalSize = 0; 2919 2920 // Emit the lock opcode prefix as needed. 2921 if (Desc->TSFlags & X86II::LOCK) ++FinalSize; 2922 2923 // Emit segment override opcode prefix as needed. 2924 switch (Desc->TSFlags & X86II::SegOvrMask) { 2925 case X86II::FS: 2926 case X86II::GS: 2927 ++FinalSize; 2928 break; 2929 default: assert(0 && "Invalid segment!"); 2930 case 0: break; // No segment override! 2931 } 2932 2933 // Emit the repeat opcode prefix as needed. 2934 if ((Desc->TSFlags & X86II::Op0Mask) == X86II::REP) ++FinalSize; 2935 2936 // Emit the operand size opcode prefix as needed. 2937 if (Desc->TSFlags & X86II::OpSize) ++FinalSize; 2938 2939 // Emit the address size opcode prefix as needed. 2940 if (Desc->TSFlags & X86II::AdSize) ++FinalSize; 2941 2942 bool Need0FPrefix = false; 2943 switch (Desc->TSFlags & X86II::Op0Mask) { 2944 case X86II::TB: // Two-byte opcode prefix 2945 case X86II::T8: // 0F 38 2946 case X86II::TA: // 0F 3A 2947 Need0FPrefix = true; 2948 break; 2949 case X86II::REP: break; // already handled. 2950 case X86II::XS: // F3 0F 2951 ++FinalSize; 2952 Need0FPrefix = true; 2953 break; 2954 case X86II::XD: // F2 0F 2955 ++FinalSize; 2956 Need0FPrefix = true; 2957 break; 2958 case X86II::D8: case X86II::D9: case X86II::DA: case X86II::DB: 2959 case X86II::DC: case X86II::DD: case X86II::DE: case X86II::DF: 2960 ++FinalSize; 2961 break; // Two-byte opcode prefix 2962 default: assert(0 && "Invalid prefix!"); 2963 case 0: break; // No prefix! 2964 } 2965 2966 if (Is64BitMode) { 2967 // REX prefix 2968 unsigned REX = X86InstrInfo::determineREX(MI); 2969 if (REX) 2970 ++FinalSize; 2971 } 2972 2973 // 0x0F escape code must be emitted just before the opcode. 2974 if (Need0FPrefix) 2975 ++FinalSize; 2976 2977 switch (Desc->TSFlags & X86II::Op0Mask) { 2978 case X86II::T8: // 0F 38 2979 ++FinalSize; 2980 break; 2981 case X86II::TA: // 0F 3A 2982 ++FinalSize; 2983 break; 2984 } 2985 2986 // If this is a two-address instruction, skip one of the register operands. 2987 unsigned NumOps = Desc->getNumOperands(); 2988 unsigned CurOp = 0; 2989 if (NumOps > 1 && Desc->getOperandConstraint(1, TOI::TIED_TO) != -1) 2990 CurOp++; 2991 else if (NumOps > 2 && Desc->getOperandConstraint(NumOps-1, TOI::TIED_TO)== 0) 2992 // Skip the last source operand that is tied_to the dest reg. e.g. LXADD32 2993 --NumOps; 2994 2995 switch (Desc->TSFlags & X86II::FormMask) { 2996 default: assert(0 && "Unknown FormMask value in X86 MachineCodeEmitter!"); 2997 case X86II::Pseudo: 2998 // Remember the current PC offset, this is the PIC relocation 2999 // base address. 3000 switch (Opcode) { 3001 default: 3002 break; 3003 case TargetInstrInfo::INLINEASM: { 3004 const MachineFunction *MF = MI.getParent()->getParent(); 3005 const char *AsmStr = MI.getOperand(0).getSymbolName(); 3006 const TargetAsmInfo* AI = MF->getTarget().getTargetAsmInfo(); 3007 FinalSize += AI->getInlineAsmLength(AsmStr); 3008 break; 3009 } 3010 case TargetInstrInfo::DBG_LABEL: 3011 case TargetInstrInfo::EH_LABEL: 3012 break; 3013 case TargetInstrInfo::IMPLICIT_DEF: 3014 case TargetInstrInfo::DECLARE: 3015 case X86::DWARF_LOC: 3016 case X86::FP_REG_KILL: 3017 break; 3018 case X86::MOVPC32r: { 3019 // This emits the "call" portion of this pseudo instruction. 3020 ++FinalSize; 3021 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3022 break; 3023 } 3024 } 3025 CurOp = NumOps; 3026 break; 3027 case X86II::RawFrm: 3028 ++FinalSize; 3029 3030 if (CurOp != NumOps) { 3031 const MachineOperand &MO = MI.getOperand(CurOp++); 3032 if (MO.isMBB()) { 3033 FinalSize += sizePCRelativeBlockAddress(); 3034 } else if (MO.isGlobal()) { 3035 FinalSize += sizeGlobalAddress(false); 3036 } else if (MO.isSymbol()) { 3037 FinalSize += sizeExternalSymbolAddress(false); 3038 } else if (MO.isImm()) { 3039 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3040 } else { 3041 assert(0 && "Unknown RawFrm operand!"); 3042 } 3043 } 3044 break; 3045 3046 case X86II::AddRegFrm: 3047 ++FinalSize; 3048 ++CurOp; 3049 3050 if (CurOp != NumOps) { 3051 const MachineOperand &MO1 = MI.getOperand(CurOp++); 3052 unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3053 if (MO1.isImm()) 3054 FinalSize += sizeConstant(Size); 3055 else { 3056 bool dword = false; 3057 if (Opcode == X86::MOV64ri) 3058 dword = true; 3059 if (MO1.isGlobal()) { 3060 FinalSize += sizeGlobalAddress(dword); 3061 } else if (MO1.isSymbol()) 3062 FinalSize += sizeExternalSymbolAddress(dword); 3063 else if (MO1.isCPI()) 3064 FinalSize += sizeConstPoolAddress(dword); 3065 else if (MO1.isJTI()) 3066 FinalSize += sizeJumpTableAddress(dword); 3067 } 3068 } 3069 break; 3070 3071 case X86II::MRMDestReg: { 3072 ++FinalSize; 3073 FinalSize += sizeRegModRMByte(); 3074 CurOp += 2; 3075 if (CurOp != NumOps) { 3076 ++CurOp; 3077 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3078 } 3079 break; 3080 } 3081 case X86II::MRMDestMem: { 3082 ++FinalSize; 3083 FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3084 CurOp += X86AddrNumOperands + 1; 3085 if (CurOp != NumOps) { 3086 ++CurOp; 3087 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3088 } 3089 break; 3090 } 3091 3092 case X86II::MRMSrcReg: 3093 ++FinalSize; 3094 FinalSize += sizeRegModRMByte(); 3095 CurOp += 2; 3096 if (CurOp != NumOps) { 3097 ++CurOp; 3098 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3099 } 3100 break; 3101 3102 case X86II::MRMSrcMem: { 3103 int AddrOperands; 3104 if (Opcode == X86::LEA64r || Opcode == X86::LEA64_32r || 3105 Opcode == X86::LEA16r || Opcode == X86::LEA32r) 3106 AddrOperands = X86AddrNumOperands - 1; // No segment register 3107 else 3108 AddrOperands = X86AddrNumOperands; 3109 3110 ++FinalSize; 3111 FinalSize += getMemModRMByteSize(MI, CurOp+1, IsPIC, Is64BitMode); 3112 CurOp += AddrOperands + 1; 3113 if (CurOp != NumOps) { 3114 ++CurOp; 3115 FinalSize += sizeConstant(X86InstrInfo::sizeOfImm(Desc)); 3116 } 3117 break; 3118 } 3119 3120 case X86II::MRM0r: case X86II::MRM1r: 3121 case X86II::MRM2r: case X86II::MRM3r: 3122 case X86II::MRM4r: case X86II::MRM5r: 3123 case X86II::MRM6r: case X86II::MRM7r: 3124 ++FinalSize; 3125 if (Desc->getOpcode() == X86::LFENCE || 3126 Desc->getOpcode() == X86::MFENCE) { 3127 // Special handling of lfence and mfence; 3128 FinalSize += sizeRegModRMByte(); 3129 } else if (Desc->getOpcode() == X86::MONITOR || 3130 Desc->getOpcode() == X86::MWAIT) { 3131 // Special handling of monitor and mwait. 3132 FinalSize += sizeRegModRMByte() + 1; // +1 for the opcode. 3133 } else { 3134 ++CurOp; 3135 FinalSize += sizeRegModRMByte(); 3136 } 3137 3138 if (CurOp != NumOps) { 3139 const MachineOperand &MO1 = MI.getOperand(CurOp++); 3140 unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3141 if (MO1.isImm()) 3142 FinalSize += sizeConstant(Size); 3143 else { 3144 bool dword = false; 3145 if (Opcode == X86::MOV64ri32) 3146 dword = true; 3147 if (MO1.isGlobal()) { 3148 FinalSize += sizeGlobalAddress(dword); 3149 } else if (MO1.isSymbol()) 3150 FinalSize += sizeExternalSymbolAddress(dword); 3151 else if (MO1.isCPI()) 3152 FinalSize += sizeConstPoolAddress(dword); 3153 else if (MO1.isJTI()) 3154 FinalSize += sizeJumpTableAddress(dword); 3155 } 3156 } 3157 break; 3158 3159 case X86II::MRM0m: case X86II::MRM1m: 3160 case X86II::MRM2m: case X86II::MRM3m: 3161 case X86II::MRM4m: case X86II::MRM5m: 3162 case X86II::MRM6m: case X86II::MRM7m: { 3163 3164 ++FinalSize; 3165 FinalSize += getMemModRMByteSize(MI, CurOp, IsPIC, Is64BitMode); 3166 CurOp += X86AddrNumOperands; 3167 3168 if (CurOp != NumOps) { 3169 const MachineOperand &MO = MI.getOperand(CurOp++); 3170 unsigned Size = X86InstrInfo::sizeOfImm(Desc); 3171 if (MO.isImm()) 3172 FinalSize += sizeConstant(Size); 3173 else { 3174 bool dword = false; 3175 if (Opcode == X86::MOV64mi32) 3176 dword = true; 3177 if (MO.isGlobal()) { 3178 FinalSize += sizeGlobalAddress(dword); 3179 } else if (MO.isSymbol()) 3180 FinalSize += sizeExternalSymbolAddress(dword); 3181 else if (MO.isCPI()) 3182 FinalSize += sizeConstPoolAddress(dword); 3183 else if (MO.isJTI()) 3184 FinalSize += sizeJumpTableAddress(dword); 3185 } 3186 } 3187 break; 3188 } 3189 3190 case X86II::MRMInitReg: 3191 ++FinalSize; 3192 // Duplicate register, used by things like MOV8r0 (aka xor reg,reg). 3193 FinalSize += sizeRegModRMByte(); 3194 ++CurOp; 3195 break; 3196 } 3197 3198 if (!Desc->isVariadic() && CurOp != NumOps) { 3199 std::string msg; 3200 raw_string_ostream Msg(msg); 3201 Msg << "Cannot determine size: " << MI; 3202 llvm_report_error(Msg.str()); 3203 } 3204 3205 3206 return FinalSize; 3207} 3208 3209 3210unsigned X86InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { 3211 const TargetInstrDesc &Desc = MI->getDesc(); 3212 bool IsPIC = (TM.getRelocationModel() == Reloc::PIC_); 3213 bool Is64BitMode = TM.getSubtargetImpl()->is64Bit(); 3214 unsigned Size = GetInstSizeWithDesc(*MI, &Desc, IsPIC, Is64BitMode); 3215 if (Desc.getOpcode() == X86::MOVPC32r) 3216 Size += GetInstSizeWithDesc(*MI, &get(X86::POP32r), IsPIC, Is64BitMode); 3217 return Size; 3218} 3219 3220/// getGlobalBaseReg - Return a virtual register initialized with the 3221/// the global base register value. Output instructions required to 3222/// initialize the register in the function entry block, if necessary. 3223/// 3224unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { 3225 assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && 3226 "X86-64 PIC uses RIP relative addressing"); 3227 3228 X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); 3229 unsigned GlobalBaseReg = X86FI->getGlobalBaseReg(); 3230 if (GlobalBaseReg != 0) 3231 return GlobalBaseReg; 3232 3233 // Insert the set of GlobalBaseReg into the first MBB of the function 3234 MachineBasicBlock &FirstMBB = MF->front(); 3235 MachineBasicBlock::iterator MBBI = FirstMBB.begin(); 3236 DebugLoc DL = DebugLoc::getUnknownLoc(); 3237 if (MBBI != FirstMBB.end()) DL = MBBI->getDebugLoc(); 3238 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3239 unsigned PC = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3240 3241 const TargetInstrInfo *TII = TM.getInstrInfo(); 3242 // Operand of MovePCtoStack is completely ignored by asm printer. It's 3243 // only used in JIT code emission as displacement to pc. 3244 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::MOVPC32r), PC).addImm(0); 3245 3246 // If we're using vanilla 'GOT' PIC style, we should use relative addressing 3247 // not to pc, but to _GLOBAL_OFFSET_TABLE_ external. 3248 if (TM.getSubtarget<X86Subtarget>().isPICStyleGOT()) { 3249 GlobalBaseReg = RegInfo.createVirtualRegister(X86::GR32RegisterClass); 3250 // Generate addl $__GLOBAL_OFFSET_TABLE_ + [.-piclabel], %some_register 3251 BuildMI(FirstMBB, MBBI, DL, TII->get(X86::ADD32ri), GlobalBaseReg) 3252 .addReg(PC).addExternalSymbol("_GLOBAL_OFFSET_TABLE_", 0, 3253 X86II::MO_GOT_ABSOLUTE_ADDRESS); 3254 } else { 3255 GlobalBaseReg = PC; 3256 } 3257 3258 X86FI->setGlobalBaseReg(GlobalBaseReg); 3259 return GlobalBaseReg; 3260} 3261