X86FastISel.cpp revision db4971259ce94cea26e555e9ade82672a3581f5c
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the X86-specific support for the FastISel class. Much 11// of the target-specific code is generated by tablegen in the file 12// X86GenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86RegisterInfo.h" 19#include "X86Subtarget.h" 20#include "X86TargetMachine.h" 21#include "llvm/CallingConv.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Instructions.h" 25#include "llvm/IntrinsicInst.h" 26#include "llvm/CodeGen/FastISel.h" 27#include "llvm/CodeGen/MachineConstantPool.h" 28#include "llvm/CodeGen/MachineFrameInfo.h" 29#include "llvm/CodeGen/MachineRegisterInfo.h" 30#include "llvm/Support/CallSite.h" 31#include "llvm/Support/ErrorHandling.h" 32#include "llvm/Support/GetElementPtrTypeIterator.h" 33#include "llvm/Target/TargetOptions.h" 34using namespace llvm; 35 36namespace { 37 38class X86FastISel : public FastISel { 39 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 40 /// make the right decision when generating code for different targets. 41 const X86Subtarget *Subtarget; 42 43 /// StackPtr - Register used as the stack pointer. 44 /// 45 unsigned StackPtr; 46 47 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 48 /// floating point ops. 49 /// When SSE is available, use it for f32 operations. 50 /// When SSE2 is available, use it for f64 operations. 51 bool X86ScalarSSEf64; 52 bool X86ScalarSSEf32; 53 54public: 55 explicit X86FastISel(MachineFunction &mf, 56 DenseMap<const Value *, unsigned> &vm, 57 DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, 58 DenseMap<const AllocaInst *, int> &am, 59 std::vector<std::pair<MachineInstr*, unsigned> > &pn 60#ifndef NDEBUG 61 , SmallSet<const Instruction *, 8> &cil 62#endif 63 ) 64 : FastISel(mf, vm, bm, am, pn 65#ifndef NDEBUG 66 , cil 67#endif 68 ) { 69 Subtarget = &TM.getSubtarget<X86Subtarget>(); 70 StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 71 X86ScalarSSEf64 = Subtarget->hasSSE2(); 72 X86ScalarSSEf32 = Subtarget->hasSSE1(); 73 } 74 75 virtual bool TargetSelectInstruction(const Instruction *I); 76 77#include "X86GenFastISel.inc" 78 79private: 80 bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); 81 82 bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); 83 84 bool X86FastEmitStore(EVT VT, const Value *Val, 85 const X86AddressMode &AM); 86 bool X86FastEmitStore(EVT VT, unsigned Val, 87 const X86AddressMode &AM); 88 89 bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 90 unsigned &ResultReg); 91 92 bool X86SelectAddress(const Value *V, X86AddressMode &AM); 93 bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); 94 95 bool X86SelectLoad(const Instruction *I); 96 97 bool X86SelectStore(const Instruction *I); 98 99 bool X86SelectCmp(const Instruction *I); 100 101 bool X86SelectZExt(const Instruction *I); 102 103 bool X86SelectBranch(const Instruction *I); 104 105 bool X86SelectShift(const Instruction *I); 106 107 bool X86SelectSelect(const Instruction *I); 108 109 bool X86SelectTrunc(const Instruction *I); 110 111 bool X86SelectFPExt(const Instruction *I); 112 bool X86SelectFPTrunc(const Instruction *I); 113 114 bool X86SelectExtractValue(const Instruction *I); 115 116 bool X86VisitIntrinsicCall(const IntrinsicInst &I); 117 bool X86SelectCall(const Instruction *I); 118 119 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); 120 121 const X86InstrInfo *getInstrInfo() const { 122 return getTargetMachine()->getInstrInfo(); 123 } 124 const X86TargetMachine *getTargetMachine() const { 125 return static_cast<const X86TargetMachine *>(&TM); 126 } 127 128 unsigned TargetMaterializeConstant(const Constant *C); 129 130 unsigned TargetMaterializeAlloca(const AllocaInst *C); 131 132 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 133 /// computed in an SSE register, not on the X87 floating point stack. 134 bool isScalarFPTypeInSSEReg(EVT VT) const { 135 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 136 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 137 } 138 139 bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false); 140}; 141 142} // end anonymous namespace. 143 144bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) { 145 VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); 146 if (VT == MVT::Other || !VT.isSimple()) 147 // Unhandled type. Halt "fast" selection and bail. 148 return false; 149 150 // For now, require SSE/SSE2 for performing floating-point operations, 151 // since x87 requires additional work. 152 if (VT == MVT::f64 && !X86ScalarSSEf64) 153 return false; 154 if (VT == MVT::f32 && !X86ScalarSSEf32) 155 return false; 156 // Similarly, no f80 support yet. 157 if (VT == MVT::f80) 158 return false; 159 // We only handle legal types. For example, on x86-32 the instruction 160 // selector contains all of the 64-bit instructions from x86-64, 161 // under the assumption that i64 won't be used if the target doesn't 162 // support it. 163 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 164} 165 166#include "X86GenCallingConv.inc" 167 168/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling 169/// convention. 170CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, 171 bool isTaillCall) { 172 if (Subtarget->is64Bit()) { 173 if (CC == CallingConv::GHC) 174 return CC_X86_64_GHC; 175 else if (Subtarget->isTargetWin64()) 176 return CC_X86_Win64_C; 177 else 178 return CC_X86_64_C; 179 } 180 181 if (CC == CallingConv::X86_FastCall) 182 return CC_X86_32_FastCall; 183 else if (CC == CallingConv::X86_ThisCall) 184 return CC_X86_32_ThisCall; 185 else if (CC == CallingConv::Fast) 186 return CC_X86_32_FastCC; 187 else if (CC == CallingConv::GHC) 188 return CC_X86_32_GHC; 189 else 190 return CC_X86_32_C; 191} 192 193/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 194/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 195/// Return true and the result register by reference if it is possible. 196bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, 197 unsigned &ResultReg) { 198 // Get opcode and regclass of the output for the given load instruction. 199 unsigned Opc = 0; 200 const TargetRegisterClass *RC = NULL; 201 switch (VT.getSimpleVT().SimpleTy) { 202 default: return false; 203 case MVT::i1: 204 case MVT::i8: 205 Opc = X86::MOV8rm; 206 RC = X86::GR8RegisterClass; 207 break; 208 case MVT::i16: 209 Opc = X86::MOV16rm; 210 RC = X86::GR16RegisterClass; 211 break; 212 case MVT::i32: 213 Opc = X86::MOV32rm; 214 RC = X86::GR32RegisterClass; 215 break; 216 case MVT::i64: 217 // Must be in x86-64 mode. 218 Opc = X86::MOV64rm; 219 RC = X86::GR64RegisterClass; 220 break; 221 case MVT::f32: 222 if (Subtarget->hasSSE1()) { 223 Opc = X86::MOVSSrm; 224 RC = X86::FR32RegisterClass; 225 } else { 226 Opc = X86::LD_Fp32m; 227 RC = X86::RFP32RegisterClass; 228 } 229 break; 230 case MVT::f64: 231 if (Subtarget->hasSSE2()) { 232 Opc = X86::MOVSDrm; 233 RC = X86::FR64RegisterClass; 234 } else { 235 Opc = X86::LD_Fp64m; 236 RC = X86::RFP64RegisterClass; 237 } 238 break; 239 case MVT::f80: 240 // No f80 support yet. 241 return false; 242 } 243 244 ResultReg = createResultReg(RC); 245 addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 246 return true; 247} 248 249/// X86FastEmitStore - Emit a machine instruction to store a value Val of 250/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 251/// and a displacement offset, or a GlobalAddress, 252/// i.e. V. Return true if it is possible. 253bool 254X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, 255 const X86AddressMode &AM) { 256 // Get opcode and regclass of the output for the given store instruction. 257 unsigned Opc = 0; 258 switch (VT.getSimpleVT().SimpleTy) { 259 case MVT::f80: // No f80 support yet. 260 default: return false; 261 case MVT::i1: { 262 // Mask out all but lowest bit. 263 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 264 BuildMI(MBB, DL, 265 TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); 266 Val = AndResult; 267 } 268 // FALLTHROUGH, handling i1 as i8. 269 case MVT::i8: Opc = X86::MOV8mr; break; 270 case MVT::i16: Opc = X86::MOV16mr; break; 271 case MVT::i32: Opc = X86::MOV32mr; break; 272 case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. 273 case MVT::f32: 274 Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m; 275 break; 276 case MVT::f64: 277 Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; 278 break; 279 } 280 281 addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val); 282 return true; 283} 284 285bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, 286 const X86AddressMode &AM) { 287 // Handle 'null' like i32/i64 0. 288 if (isa<ConstantPointerNull>(Val)) 289 Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); 290 291 // If this is a store of a simple constant, fold the constant into the store. 292 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 293 unsigned Opc = 0; 294 bool Signed = true; 295 switch (VT.getSimpleVT().SimpleTy) { 296 default: break; 297 case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8. 298 case MVT::i8: Opc = X86::MOV8mi; break; 299 case MVT::i16: Opc = X86::MOV16mi; break; 300 case MVT::i32: Opc = X86::MOV32mi; break; 301 case MVT::i64: 302 // Must be a 32-bit sign extended value. 303 if ((int)CI->getSExtValue() == CI->getSExtValue()) 304 Opc = X86::MOV64mi32; 305 break; 306 } 307 308 if (Opc) { 309 addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) 310 .addImm(Signed ? (uint64_t) CI->getSExtValue() : 311 CI->getZExtValue()); 312 return true; 313 } 314 } 315 316 unsigned ValReg = getRegForValue(Val); 317 if (ValReg == 0) 318 return false; 319 320 return X86FastEmitStore(VT, ValReg, AM); 321} 322 323/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 324/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 325/// ISD::SIGN_EXTEND). 326bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, 327 unsigned Src, EVT SrcVT, 328 unsigned &ResultReg) { 329 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 330 Src, /*TODO: Kill=*/false); 331 332 if (RR != 0) { 333 ResultReg = RR; 334 return true; 335 } else 336 return false; 337} 338 339/// X86SelectAddress - Attempt to fill in an address from the given value. 340/// 341bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { 342 const User *U = NULL; 343 unsigned Opcode = Instruction::UserOp1; 344 if (const Instruction *I = dyn_cast<Instruction>(V)) { 345 // Don't walk into other basic blocks; it's possible we haven't 346 // visited them yet, so the instructions may not yet be assigned 347 // virtual registers. 348 if (MBBMap[I->getParent()] != MBB) 349 return false; 350 351 Opcode = I->getOpcode(); 352 U = I; 353 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 354 Opcode = C->getOpcode(); 355 U = C; 356 } 357 358 if (const PointerType *Ty = dyn_cast<PointerType>(V->getType())) 359 if (Ty->getAddressSpace() > 255) 360 // Fast instruction selection doesn't support the special 361 // address spaces. 362 return false; 363 364 switch (Opcode) { 365 default: break; 366 case Instruction::BitCast: 367 // Look past bitcasts. 368 return X86SelectAddress(U->getOperand(0), AM); 369 370 case Instruction::IntToPtr: 371 // Look past no-op inttoptrs. 372 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 373 return X86SelectAddress(U->getOperand(0), AM); 374 break; 375 376 case Instruction::PtrToInt: 377 // Look past no-op ptrtoints. 378 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 379 return X86SelectAddress(U->getOperand(0), AM); 380 break; 381 382 case Instruction::Alloca: { 383 // Do static allocas. 384 const AllocaInst *A = cast<AllocaInst>(V); 385 DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A); 386 if (SI != StaticAllocaMap.end()) { 387 AM.BaseType = X86AddressMode::FrameIndexBase; 388 AM.Base.FrameIndex = SI->second; 389 return true; 390 } 391 break; 392 } 393 394 case Instruction::Add: { 395 // Adds of constants are common and easy enough. 396 if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 397 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 398 // They have to fit in the 32-bit signed displacement field though. 399 if (isInt<32>(Disp)) { 400 AM.Disp = (uint32_t)Disp; 401 return X86SelectAddress(U->getOperand(0), AM); 402 } 403 } 404 break; 405 } 406 407 case Instruction::GetElementPtr: { 408 X86AddressMode SavedAM = AM; 409 410 // Pattern-match simple GEPs. 411 uint64_t Disp = (int32_t)AM.Disp; 412 unsigned IndexReg = AM.IndexReg; 413 unsigned Scale = AM.Scale; 414 gep_type_iterator GTI = gep_type_begin(U); 415 // Iterate through the indices, folding what we can. Constants can be 416 // folded, and one dynamic index can be handled, if the scale is supported. 417 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 418 i != e; ++i, ++GTI) { 419 const Value *Op = *i; 420 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 421 const StructLayout *SL = TD.getStructLayout(STy); 422 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 423 Disp += SL->getElementOffset(Idx); 424 } else { 425 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 426 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 427 // Constant-offset addressing. 428 Disp += CI->getSExtValue() * S; 429 } else if (IndexReg == 0 && 430 (!AM.GV || !Subtarget->isPICStyleRIPRel()) && 431 (S == 1 || S == 2 || S == 4 || S == 8)) { 432 // Scaled-index addressing. 433 Scale = S; 434 IndexReg = getRegForGEPIndex(Op).first; 435 if (IndexReg == 0) 436 return false; 437 } else 438 // Unsupported. 439 goto unsupported_gep; 440 } 441 } 442 // Check for displacement overflow. 443 if (!isInt<32>(Disp)) 444 break; 445 // Ok, the GEP indices were covered by constant-offset and scaled-index 446 // addressing. Update the address state and move on to examining the base. 447 AM.IndexReg = IndexReg; 448 AM.Scale = Scale; 449 AM.Disp = (uint32_t)Disp; 450 if (X86SelectAddress(U->getOperand(0), AM)) 451 return true; 452 453 // If we couldn't merge the sub value into this addr mode, revert back to 454 // our address and just match the value instead of completely failing. 455 AM = SavedAM; 456 break; 457 unsupported_gep: 458 // Ok, the GEP indices weren't all covered. 459 break; 460 } 461 } 462 463 // Handle constant address. 464 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 465 // Can't handle alternate code models yet. 466 if (TM.getCodeModel() != CodeModel::Small) 467 return false; 468 469 // RIP-relative addresses can't have additional register operands. 470 if (Subtarget->isPICStyleRIPRel() && 471 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 472 return false; 473 474 // Can't handle TLS yet. 475 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 476 if (GVar->isThreadLocal()) 477 return false; 478 479 // Okay, we've committed to selecting this global. Set up the basic address. 480 AM.GV = GV; 481 482 // Allow the subtarget to classify the global. 483 unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); 484 485 // If this reference is relative to the pic base, set it now. 486 if (isGlobalRelativeToPICBase(GVFlags)) { 487 // FIXME: How do we know Base.Reg is free?? 488 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); 489 } 490 491 // Unless the ABI requires an extra load, return a direct reference to 492 // the global. 493 if (!isGlobalStubReference(GVFlags)) { 494 if (Subtarget->isPICStyleRIPRel()) { 495 // Use rip-relative addressing if we can. Above we verified that the 496 // base and index registers are unused. 497 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 498 AM.Base.Reg = X86::RIP; 499 } 500 AM.GVOpFlags = GVFlags; 501 return true; 502 } 503 504 // Ok, we need to do a load from a stub. If we've already loaded from this 505 // stub, reuse the loaded pointer, otherwise emit the load now. 506 DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); 507 unsigned LoadReg; 508 if (I != LocalValueMap.end() && I->second != 0) { 509 LoadReg = I->second; 510 } else { 511 // Issue load from stub. 512 unsigned Opc = 0; 513 const TargetRegisterClass *RC = NULL; 514 X86AddressMode StubAM; 515 StubAM.Base.Reg = AM.Base.Reg; 516 StubAM.GV = GV; 517 StubAM.GVOpFlags = GVFlags; 518 519 if (TLI.getPointerTy() == MVT::i64) { 520 Opc = X86::MOV64rm; 521 RC = X86::GR64RegisterClass; 522 523 if (Subtarget->isPICStyleRIPRel()) 524 StubAM.Base.Reg = X86::RIP; 525 } else { 526 Opc = X86::MOV32rm; 527 RC = X86::GR32RegisterClass; 528 } 529 530 LoadReg = createResultReg(RC); 531 addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM); 532 533 // Prevent loading GV stub multiple times in same MBB. 534 LocalValueMap[V] = LoadReg; 535 } 536 537 // Now construct the final address. Note that the Disp, Scale, 538 // and Index values may already be set here. 539 AM.Base.Reg = LoadReg; 540 AM.GV = 0; 541 return true; 542 } 543 544 // If all else fails, try to materialize the value in a register. 545 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 546 if (AM.Base.Reg == 0) { 547 AM.Base.Reg = getRegForValue(V); 548 return AM.Base.Reg != 0; 549 } 550 if (AM.IndexReg == 0) { 551 assert(AM.Scale == 1 && "Scale with no index!"); 552 AM.IndexReg = getRegForValue(V); 553 return AM.IndexReg != 0; 554 } 555 } 556 557 return false; 558} 559 560/// X86SelectCallAddress - Attempt to fill in an address from the given value. 561/// 562bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { 563 const User *U = NULL; 564 unsigned Opcode = Instruction::UserOp1; 565 if (const Instruction *I = dyn_cast<Instruction>(V)) { 566 Opcode = I->getOpcode(); 567 U = I; 568 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 569 Opcode = C->getOpcode(); 570 U = C; 571 } 572 573 switch (Opcode) { 574 default: break; 575 case Instruction::BitCast: 576 // Look past bitcasts. 577 return X86SelectCallAddress(U->getOperand(0), AM); 578 579 case Instruction::IntToPtr: 580 // Look past no-op inttoptrs. 581 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 582 return X86SelectCallAddress(U->getOperand(0), AM); 583 break; 584 585 case Instruction::PtrToInt: 586 // Look past no-op ptrtoints. 587 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 588 return X86SelectCallAddress(U->getOperand(0), AM); 589 break; 590 } 591 592 // Handle constant address. 593 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 594 // Can't handle alternate code models yet. 595 if (TM.getCodeModel() != CodeModel::Small) 596 return false; 597 598 // RIP-relative addresses can't have additional register operands. 599 if (Subtarget->isPICStyleRIPRel() && 600 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 601 return false; 602 603 // Can't handle TLS or DLLImport. 604 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 605 if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) 606 return false; 607 608 // Okay, we've committed to selecting this global. Set up the basic address. 609 AM.GV = GV; 610 611 // No ABI requires an extra load for anything other than DLLImport, which 612 // we rejected above. Return a direct reference to the global. 613 if (Subtarget->isPICStyleRIPRel()) { 614 // Use rip-relative addressing if we can. Above we verified that the 615 // base and index registers are unused. 616 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 617 AM.Base.Reg = X86::RIP; 618 } else if (Subtarget->isPICStyleStubPIC()) { 619 AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; 620 } else if (Subtarget->isPICStyleGOT()) { 621 AM.GVOpFlags = X86II::MO_GOTOFF; 622 } 623 624 return true; 625 } 626 627 // If all else fails, try to materialize the value in a register. 628 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 629 if (AM.Base.Reg == 0) { 630 AM.Base.Reg = getRegForValue(V); 631 return AM.Base.Reg != 0; 632 } 633 if (AM.IndexReg == 0) { 634 assert(AM.Scale == 1 && "Scale with no index!"); 635 AM.IndexReg = getRegForValue(V); 636 return AM.IndexReg != 0; 637 } 638 } 639 640 return false; 641} 642 643 644/// X86SelectStore - Select and emit code to implement store instructions. 645bool X86FastISel::X86SelectStore(const Instruction *I) { 646 EVT VT; 647 if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) 648 return false; 649 650 X86AddressMode AM; 651 if (!X86SelectAddress(I->getOperand(1), AM)) 652 return false; 653 654 return X86FastEmitStore(VT, I->getOperand(0), AM); 655} 656 657/// X86SelectLoad - Select and emit code to implement load instructions. 658/// 659bool X86FastISel::X86SelectLoad(const Instruction *I) { 660 EVT VT; 661 if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) 662 return false; 663 664 X86AddressMode AM; 665 if (!X86SelectAddress(I->getOperand(0), AM)) 666 return false; 667 668 unsigned ResultReg = 0; 669 if (X86FastEmitLoad(VT, AM, ResultReg)) { 670 UpdateValueMap(I, ResultReg); 671 return true; 672 } 673 return false; 674} 675 676static unsigned X86ChooseCmpOpcode(EVT VT) { 677 switch (VT.getSimpleVT().SimpleTy) { 678 default: return 0; 679 case MVT::i8: return X86::CMP8rr; 680 case MVT::i16: return X86::CMP16rr; 681 case MVT::i32: return X86::CMP32rr; 682 case MVT::i64: return X86::CMP64rr; 683 case MVT::f32: return X86::UCOMISSrr; 684 case MVT::f64: return X86::UCOMISDrr; 685 } 686} 687 688/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS 689/// of the comparison, return an opcode that works for the compare (e.g. 690/// CMP32ri) otherwise return 0. 691static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { 692 switch (VT.getSimpleVT().SimpleTy) { 693 // Otherwise, we can't fold the immediate into this comparison. 694 default: return 0; 695 case MVT::i8: return X86::CMP8ri; 696 case MVT::i16: return X86::CMP16ri; 697 case MVT::i32: return X86::CMP32ri; 698 case MVT::i64: 699 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 700 // field. 701 if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) 702 return X86::CMP64ri32; 703 return 0; 704 } 705} 706 707bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, 708 EVT VT) { 709 unsigned Op0Reg = getRegForValue(Op0); 710 if (Op0Reg == 0) return false; 711 712 // Handle 'null' like i32/i64 0. 713 if (isa<ConstantPointerNull>(Op1)) 714 Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext())); 715 716 // We have two options: compare with register or immediate. If the RHS of 717 // the compare is an immediate that we can fold into this compare, use 718 // CMPri, otherwise use CMPrr. 719 if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 720 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 721 BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg) 722 .addImm(Op1C->getSExtValue()); 723 return true; 724 } 725 } 726 727 unsigned CompareOpc = X86ChooseCmpOpcode(VT); 728 if (CompareOpc == 0) return false; 729 730 unsigned Op1Reg = getRegForValue(Op1); 731 if (Op1Reg == 0) return false; 732 BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg); 733 734 return true; 735} 736 737bool X86FastISel::X86SelectCmp(const Instruction *I) { 738 const CmpInst *CI = cast<CmpInst>(I); 739 740 EVT VT; 741 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 742 return false; 743 744 unsigned ResultReg = createResultReg(&X86::GR8RegClass); 745 unsigned SetCCOpc; 746 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 747 switch (CI->getPredicate()) { 748 case CmpInst::FCMP_OEQ: { 749 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 750 return false; 751 752 unsigned EReg = createResultReg(&X86::GR8RegClass); 753 unsigned NPReg = createResultReg(&X86::GR8RegClass); 754 BuildMI(MBB, DL, TII.get(X86::SETEr), EReg); 755 BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg); 756 BuildMI(MBB, DL, 757 TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); 758 UpdateValueMap(I, ResultReg); 759 return true; 760 } 761 case CmpInst::FCMP_UNE: { 762 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 763 return false; 764 765 unsigned NEReg = createResultReg(&X86::GR8RegClass); 766 unsigned PReg = createResultReg(&X86::GR8RegClass); 767 BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg); 768 BuildMI(MBB, DL, TII.get(X86::SETPr), PReg); 769 BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg); 770 UpdateValueMap(I, ResultReg); 771 return true; 772 } 773 case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 774 case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 775 case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break; 776 case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break; 777 case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 778 case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break; 779 case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break; 780 case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 781 case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break; 782 case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; 783 case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 784 case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 785 786 case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 787 case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 788 case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 789 case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 790 case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 791 case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 792 case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break; 793 case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break; 794 case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break; 795 case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break; 796 default: 797 return false; 798 } 799 800 const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 801 if (SwapArgs) 802 std::swap(Op0, Op1); 803 804 // Emit a compare of Op0/Op1. 805 if (!X86FastEmitCompare(Op0, Op1, VT)) 806 return false; 807 808 BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg); 809 UpdateValueMap(I, ResultReg); 810 return true; 811} 812 813bool X86FastISel::X86SelectZExt(const Instruction *I) { 814 // Handle zero-extension from i1 to i8, which is common. 815 if (I->getType()->isIntegerTy(8) && 816 I->getOperand(0)->getType()->isIntegerTy(1)) { 817 unsigned ResultReg = getRegForValue(I->getOperand(0)); 818 if (ResultReg == 0) return false; 819 // Set the high bits to zero. 820 ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); 821 if (ResultReg == 0) return false; 822 UpdateValueMap(I, ResultReg); 823 return true; 824 } 825 826 return false; 827} 828 829 830bool X86FastISel::X86SelectBranch(const Instruction *I) { 831 // Unconditional branches are selected by tablegen-generated code. 832 // Handle a conditional branch. 833 const BranchInst *BI = cast<BranchInst>(I); 834 MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; 835 MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; 836 837 // Fold the common case of a conditional branch with a comparison. 838 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 839 if (CI->hasOneUse()) { 840 EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); 841 842 // Try to take advantage of fallthrough opportunities. 843 CmpInst::Predicate Predicate = CI->getPredicate(); 844 if (MBB->isLayoutSuccessor(TrueMBB)) { 845 std::swap(TrueMBB, FalseMBB); 846 Predicate = CmpInst::getInversePredicate(Predicate); 847 } 848 849 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 850 unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA" 851 852 switch (Predicate) { 853 case CmpInst::FCMP_OEQ: 854 std::swap(TrueMBB, FalseMBB); 855 Predicate = CmpInst::FCMP_UNE; 856 // FALL THROUGH 857 case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 858 case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break; 859 case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; 860 case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break; 861 case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break; 862 case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 863 case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break; 864 case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break; 865 case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break; 866 case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break; 867 case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; 868 case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; 869 case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; 870 871 case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; 872 case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 873 case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; 874 case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; 875 case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; 876 case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; 877 case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break; 878 case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break; 879 case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break; 880 case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break; 881 default: 882 return false; 883 } 884 885 const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 886 if (SwapArgs) 887 std::swap(Op0, Op1); 888 889 // Emit a compare of the LHS and RHS, setting the flags. 890 if (!X86FastEmitCompare(Op0, Op1, VT)) 891 return false; 892 893 BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB); 894 895 if (Predicate == CmpInst::FCMP_UNE) { 896 // X86 requires a second branch to handle UNE (and OEQ, 897 // which is mapped to UNE above). 898 BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB); 899 } 900 901 FastEmitBranch(FalseMBB, DL); 902 MBB->addSuccessor(TrueMBB); 903 return true; 904 } 905 } else if (ExtractValueInst *EI = 906 dyn_cast<ExtractValueInst>(BI->getCondition())) { 907 // Check to see if the branch instruction is from an "arithmetic with 908 // overflow" intrinsic. The main way these intrinsics are used is: 909 // 910 // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) 911 // %sum = extractvalue { i32, i1 } %t, 0 912 // %obit = extractvalue { i32, i1 } %t, 1 913 // br i1 %obit, label %overflow, label %normal 914 // 915 // The %sum and %obit are converted in an ADD and a SETO/SETB before 916 // reaching the branch. Therefore, we search backwards through the MBB 917 // looking for the SETO/SETB instruction. If an instruction modifies the 918 // EFLAGS register before we reach the SETO/SETB instruction, then we can't 919 // convert the branch into a JO/JB instruction. 920 if (const IntrinsicInst *CI = 921 dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){ 922 if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || 923 CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { 924 const MachineInstr *SetMI = 0; 925 unsigned Reg = lookUpRegForValue(EI); 926 927 for (MachineBasicBlock::const_reverse_iterator 928 RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { 929 const MachineInstr &MI = *RI; 930 931 if (MI.definesRegister(Reg)) { 932 unsigned Src, Dst, SrcSR, DstSR; 933 934 if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { 935 Reg = Src; 936 continue; 937 } 938 939 SetMI = &MI; 940 break; 941 } 942 943 const TargetInstrDesc &TID = MI.getDesc(); 944 if (TID.hasUnmodeledSideEffects() || 945 TID.hasImplicitDefOfPhysReg(X86::EFLAGS)) 946 break; 947 } 948 949 if (SetMI) { 950 unsigned OpCode = SetMI->getOpcode(); 951 952 if (OpCode == X86::SETOr || OpCode == X86::SETBr) { 953 BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? 954 X86::JO_4 : X86::JB_4)) 955 .addMBB(TrueMBB); 956 FastEmitBranch(FalseMBB, DL); 957 MBB->addSuccessor(TrueMBB); 958 return true; 959 } 960 } 961 } 962 } 963 } 964 965 // Otherwise do a clumsy setcc and re-test it. 966 unsigned OpReg = getRegForValue(BI->getCondition()); 967 if (OpReg == 0) return false; 968 969 BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); 970 BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB); 971 FastEmitBranch(FalseMBB, DL); 972 MBB->addSuccessor(TrueMBB); 973 return true; 974} 975 976bool X86FastISel::X86SelectShift(const Instruction *I) { 977 unsigned CReg = 0, OpReg = 0, OpImm = 0; 978 const TargetRegisterClass *RC = NULL; 979 if (I->getType()->isIntegerTy(8)) { 980 CReg = X86::CL; 981 RC = &X86::GR8RegClass; 982 switch (I->getOpcode()) { 983 case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break; 984 case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break; 985 case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; 986 default: return false; 987 } 988 } else if (I->getType()->isIntegerTy(16)) { 989 CReg = X86::CX; 990 RC = &X86::GR16RegClass; 991 switch (I->getOpcode()) { 992 case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break; 993 case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break; 994 case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; 995 default: return false; 996 } 997 } else if (I->getType()->isIntegerTy(32)) { 998 CReg = X86::ECX; 999 RC = &X86::GR32RegClass; 1000 switch (I->getOpcode()) { 1001 case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break; 1002 case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break; 1003 case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; 1004 default: return false; 1005 } 1006 } else if (I->getType()->isIntegerTy(64)) { 1007 CReg = X86::RCX; 1008 RC = &X86::GR64RegClass; 1009 switch (I->getOpcode()) { 1010 case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break; 1011 case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break; 1012 case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break; 1013 default: return false; 1014 } 1015 } else { 1016 return false; 1017 } 1018 1019 EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 1020 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 1021 return false; 1022 1023 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1024 if (Op0Reg == 0) return false; 1025 1026 // Fold immediate in shl(x,3). 1027 if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 1028 unsigned ResultReg = createResultReg(RC); 1029 BuildMI(MBB, DL, TII.get(OpImm), 1030 ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); 1031 UpdateValueMap(I, ResultReg); 1032 return true; 1033 } 1034 1035 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1036 if (Op1Reg == 0) return false; 1037 TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL); 1038 1039 // The shift instruction uses X86::CL. If we defined a super-register 1040 // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what 1041 // we're doing here. 1042 if (CReg != X86::CL) 1043 BuildMI(MBB, DL, TII.get(TargetOpcode::EXTRACT_SUBREG), X86::CL) 1044 .addReg(CReg).addImm(X86::sub_8bit); 1045 1046 unsigned ResultReg = createResultReg(RC); 1047 BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); 1048 UpdateValueMap(I, ResultReg); 1049 return true; 1050} 1051 1052bool X86FastISel::X86SelectSelect(const Instruction *I) { 1053 EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 1054 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 1055 return false; 1056 1057 unsigned Opc = 0; 1058 const TargetRegisterClass *RC = NULL; 1059 if (VT.getSimpleVT() == MVT::i16) { 1060 Opc = X86::CMOVE16rr; 1061 RC = &X86::GR16RegClass; 1062 } else if (VT.getSimpleVT() == MVT::i32) { 1063 Opc = X86::CMOVE32rr; 1064 RC = &X86::GR32RegClass; 1065 } else if (VT.getSimpleVT() == MVT::i64) { 1066 Opc = X86::CMOVE64rr; 1067 RC = &X86::GR64RegClass; 1068 } else { 1069 return false; 1070 } 1071 1072 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1073 if (Op0Reg == 0) return false; 1074 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1075 if (Op1Reg == 0) return false; 1076 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1077 if (Op2Reg == 0) return false; 1078 1079 BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg); 1080 unsigned ResultReg = createResultReg(RC); 1081 BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg); 1082 UpdateValueMap(I, ResultReg); 1083 return true; 1084} 1085 1086bool X86FastISel::X86SelectFPExt(const Instruction *I) { 1087 // fpext from float to double. 1088 if (Subtarget->hasSSE2() && 1089 I->getType()->isDoubleTy()) { 1090 const Value *V = I->getOperand(0); 1091 if (V->getType()->isFloatTy()) { 1092 unsigned OpReg = getRegForValue(V); 1093 if (OpReg == 0) return false; 1094 unsigned ResultReg = createResultReg(X86::FR64RegisterClass); 1095 BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg); 1096 UpdateValueMap(I, ResultReg); 1097 return true; 1098 } 1099 } 1100 1101 return false; 1102} 1103 1104bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { 1105 if (Subtarget->hasSSE2()) { 1106 if (I->getType()->isFloatTy()) { 1107 const Value *V = I->getOperand(0); 1108 if (V->getType()->isDoubleTy()) { 1109 unsigned OpReg = getRegForValue(V); 1110 if (OpReg == 0) return false; 1111 unsigned ResultReg = createResultReg(X86::FR32RegisterClass); 1112 BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg); 1113 UpdateValueMap(I, ResultReg); 1114 return true; 1115 } 1116 } 1117 } 1118 1119 return false; 1120} 1121 1122bool X86FastISel::X86SelectTrunc(const Instruction *I) { 1123 if (Subtarget->is64Bit()) 1124 // All other cases should be handled by the tblgen generated code. 1125 return false; 1126 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1127 EVT DstVT = TLI.getValueType(I->getType()); 1128 1129 // This code only handles truncation to byte right now. 1130 if (DstVT != MVT::i8 && DstVT != MVT::i1) 1131 // All other cases should be handled by the tblgen generated code. 1132 return false; 1133 if (SrcVT != MVT::i16 && SrcVT != MVT::i32) 1134 // All other cases should be handled by the tblgen generated code. 1135 return false; 1136 1137 unsigned InputReg = getRegForValue(I->getOperand(0)); 1138 if (!InputReg) 1139 // Unhandled operand. Halt "fast" selection and bail. 1140 return false; 1141 1142 // First issue a copy to GR16_ABCD or GR32_ABCD. 1143 unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; 1144 const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) 1145 ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; 1146 unsigned CopyReg = createResultReg(CopyRC); 1147 BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg); 1148 1149 // Then issue an extract_subreg. 1150 unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, 1151 CopyReg, /*Kill=*/true, 1152 X86::sub_8bit); 1153 if (!ResultReg) 1154 return false; 1155 1156 UpdateValueMap(I, ResultReg); 1157 return true; 1158} 1159 1160bool X86FastISel::X86SelectExtractValue(const Instruction *I) { 1161 const ExtractValueInst *EI = cast<ExtractValueInst>(I); 1162 const Value *Agg = EI->getAggregateOperand(); 1163 1164 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) { 1165 switch (CI->getIntrinsicID()) { 1166 default: break; 1167 case Intrinsic::sadd_with_overflow: 1168 case Intrinsic::uadd_with_overflow: 1169 // Cheat a little. We know that the registers for "add" and "seto" are 1170 // allocated sequentially. However, we only keep track of the register 1171 // for "add" in the value map. Use extractvalue's index to get the 1172 // correct register for "seto". 1173 UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin()); 1174 return true; 1175 } 1176 } 1177 1178 return false; 1179} 1180 1181bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { 1182 // FIXME: Handle more intrinsics. 1183 switch (I.getIntrinsicID()) { 1184 default: return false; 1185 case Intrinsic::stackprotector: { 1186 // Emit code inline code to store the stack guard onto the stack. 1187 EVT PtrTy = TLI.getPointerTy(); 1188 1189 const Value *Op1 = I.getOperand(1); // The guard's value. 1190 const AllocaInst *Slot = cast<AllocaInst>(I.getOperand(2)); 1191 1192 // Grab the frame index. 1193 X86AddressMode AM; 1194 if (!X86SelectAddress(Slot, AM)) return false; 1195 1196 if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; 1197 1198 return true; 1199 } 1200 case Intrinsic::objectsize: { 1201 ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(2)); 1202 const Type *Ty = I.getCalledFunction()->getReturnType(); 1203 1204 assert(CI && "Non-constant type in Intrinsic::objectsize?"); 1205 1206 EVT VT; 1207 if (!isTypeLegal(Ty, VT)) 1208 return false; 1209 1210 unsigned OpC = 0; 1211 if (VT == MVT::i32) 1212 OpC = X86::MOV32ri; 1213 else if (VT == MVT::i64) 1214 OpC = X86::MOV64ri; 1215 else 1216 return false; 1217 1218 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1219 BuildMI(MBB, DL, TII.get(OpC), ResultReg). 1220 addImm(CI->isZero() ? -1ULL : 0); 1221 UpdateValueMap(&I, ResultReg); 1222 return true; 1223 } 1224 case Intrinsic::dbg_declare: { 1225 const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); 1226 X86AddressMode AM; 1227 assert(DI->getAddress() && "Null address should be checked earlier!"); 1228 if (!X86SelectAddress(DI->getAddress(), AM)) 1229 return false; 1230 const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); 1231 // FIXME may need to add RegState::Debug to any registers produced, 1232 // although ESP/EBP should be the only ones at the moment. 1233 addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). 1234 addMetadata(DI->getVariable()); 1235 return true; 1236 } 1237 case Intrinsic::trap: { 1238 BuildMI(MBB, DL, TII.get(X86::TRAP)); 1239 return true; 1240 } 1241 case Intrinsic::sadd_with_overflow: 1242 case Intrinsic::uadd_with_overflow: { 1243 // Replace "add with overflow" intrinsics with an "add" instruction followed 1244 // by a seto/setc instruction. Later on, when the "extractvalue" 1245 // instructions are encountered, we use the fact that two registers were 1246 // created sequentially to get the correct registers for the "sum" and the 1247 // "overflow bit". 1248 const Function *Callee = I.getCalledFunction(); 1249 const Type *RetTy = 1250 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); 1251 1252 EVT VT; 1253 if (!isTypeLegal(RetTy, VT)) 1254 return false; 1255 1256 const Value *Op1 = I.getOperand(1); 1257 const Value *Op2 = I.getOperand(2); 1258 unsigned Reg1 = getRegForValue(Op1); 1259 unsigned Reg2 = getRegForValue(Op2); 1260 1261 if (Reg1 == 0 || Reg2 == 0) 1262 // FIXME: Handle values *not* in registers. 1263 return false; 1264 1265 unsigned OpC = 0; 1266 if (VT == MVT::i32) 1267 OpC = X86::ADD32rr; 1268 else if (VT == MVT::i64) 1269 OpC = X86::ADD64rr; 1270 else 1271 return false; 1272 1273 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1274 BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2); 1275 unsigned DestReg1 = UpdateValueMap(&I, ResultReg); 1276 1277 // If the add with overflow is an intra-block value then we just want to 1278 // create temporaries for it like normal. If it is a cross-block value then 1279 // UpdateValueMap will return the cross-block register used. Since we 1280 // *really* want the value to be live in the register pair known by 1281 // UpdateValueMap, we have to use DestReg1+1 as the destination register in 1282 // the cross block case. In the non-cross-block case, we should just make 1283 // another register for the value. 1284 if (DestReg1 != ResultReg) 1285 ResultReg = DestReg1+1; 1286 else 1287 ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8)); 1288 1289 unsigned Opc = X86::SETBr; 1290 if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) 1291 Opc = X86::SETOr; 1292 BuildMI(MBB, DL, TII.get(Opc), ResultReg); 1293 return true; 1294 } 1295 } 1296} 1297 1298bool X86FastISel::X86SelectCall(const Instruction *I) { 1299 const CallInst *CI = cast<CallInst>(I); 1300 const Value *Callee = I->getOperand(0); 1301 1302 // Can't handle inline asm yet. 1303 if (isa<InlineAsm>(Callee)) 1304 return false; 1305 1306 // Handle intrinsic calls. 1307 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) 1308 return X86VisitIntrinsicCall(*II); 1309 1310 // Handle only C and fastcc calling conventions for now. 1311 ImmutableCallSite CS(CI); 1312 CallingConv::ID CC = CS.getCallingConv(); 1313 if (CC != CallingConv::C && 1314 CC != CallingConv::Fast && 1315 CC != CallingConv::X86_FastCall) 1316 return false; 1317 1318 // fastcc with -tailcallopt is intended to provide a guaranteed 1319 // tail call optimization. Fastisel doesn't know how to do that. 1320 if (CC == CallingConv::Fast && GuaranteedTailCallOpt) 1321 return false; 1322 1323 // Let SDISel handle vararg functions. 1324 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1325 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1326 if (FTy->isVarArg()) 1327 return false; 1328 1329 // Fast-isel doesn't know about callee-pop yet. 1330 if (Subtarget->IsCalleePop(FTy->isVarArg(), CC)) 1331 return false; 1332 1333 // Handle *simple* calls for now. 1334 const Type *RetTy = CS.getType(); 1335 EVT RetVT; 1336 if (RetTy->isVoidTy()) 1337 RetVT = MVT::isVoid; 1338 else if (!isTypeLegal(RetTy, RetVT, true)) 1339 return false; 1340 1341 // Materialize callee address in a register. FIXME: GV address can be 1342 // handled with a CALLpcrel32 instead. 1343 X86AddressMode CalleeAM; 1344 if (!X86SelectCallAddress(Callee, CalleeAM)) 1345 return false; 1346 unsigned CalleeOp = 0; 1347 const GlobalValue *GV = 0; 1348 if (CalleeAM.GV != 0) { 1349 GV = CalleeAM.GV; 1350 } else if (CalleeAM.Base.Reg != 0) { 1351 CalleeOp = CalleeAM.Base.Reg; 1352 } else 1353 return false; 1354 1355 // Allow calls which produce i1 results. 1356 bool AndToI1 = false; 1357 if (RetVT == MVT::i1) { 1358 RetVT = MVT::i8; 1359 AndToI1 = true; 1360 } 1361 1362 // Deal with call operands first. 1363 SmallVector<const Value *, 8> ArgVals; 1364 SmallVector<unsigned, 8> Args; 1365 SmallVector<EVT, 8> ArgVTs; 1366 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1367 Args.reserve(CS.arg_size()); 1368 ArgVals.reserve(CS.arg_size()); 1369 ArgVTs.reserve(CS.arg_size()); 1370 ArgFlags.reserve(CS.arg_size()); 1371 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1372 i != e; ++i) { 1373 unsigned Arg = getRegForValue(*i); 1374 if (Arg == 0) 1375 return false; 1376 ISD::ArgFlagsTy Flags; 1377 unsigned AttrInd = i - CS.arg_begin() + 1; 1378 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1379 Flags.setSExt(); 1380 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1381 Flags.setZExt(); 1382 1383 // FIXME: Only handle *easy* calls for now. 1384 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1385 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1386 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1387 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1388 return false; 1389 1390 const Type *ArgTy = (*i)->getType(); 1391 EVT ArgVT; 1392 if (!isTypeLegal(ArgTy, ArgVT)) 1393 return false; 1394 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1395 Flags.setOrigAlign(OriginalAlignment); 1396 1397 Args.push_back(Arg); 1398 ArgVals.push_back(*i); 1399 ArgVTs.push_back(ArgVT); 1400 ArgFlags.push_back(Flags); 1401 } 1402 1403 // Analyze operands of the call, assigning locations to each operand. 1404 SmallVector<CCValAssign, 16> ArgLocs; 1405 CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); 1406 1407 // Allocate shadow area for Win64 1408 if (Subtarget->isTargetWin64()) { 1409 CCInfo.AllocateStack(32, 8); 1410 } 1411 1412 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); 1413 1414 // Get a count of how many bytes are to be pushed on the stack. 1415 unsigned NumBytes = CCInfo.getNextStackOffset(); 1416 1417 // Issue CALLSEQ_START 1418 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1419 BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes); 1420 1421 // Process argument: walk the register/memloc assignments, inserting 1422 // copies / loads. 1423 SmallVector<unsigned, 4> RegArgs; 1424 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1425 CCValAssign &VA = ArgLocs[i]; 1426 unsigned Arg = Args[VA.getValNo()]; 1427 EVT ArgVT = ArgVTs[VA.getValNo()]; 1428 1429 // Promote the value if needed. 1430 switch (VA.getLocInfo()) { 1431 default: llvm_unreachable("Unknown loc info!"); 1432 case CCValAssign::Full: break; 1433 case CCValAssign::SExt: { 1434 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1435 Arg, ArgVT, Arg); 1436 assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted; 1437 Emitted = true; 1438 ArgVT = VA.getLocVT(); 1439 break; 1440 } 1441 case CCValAssign::ZExt: { 1442 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1443 Arg, ArgVT, Arg); 1444 assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted; 1445 Emitted = true; 1446 ArgVT = VA.getLocVT(); 1447 break; 1448 } 1449 case CCValAssign::AExt: { 1450 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1451 Arg, ArgVT, Arg); 1452 if (!Emitted) 1453 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1454 Arg, ArgVT, Arg); 1455 if (!Emitted) 1456 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1457 Arg, ArgVT, Arg); 1458 1459 assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted; 1460 ArgVT = VA.getLocVT(); 1461 break; 1462 } 1463 case CCValAssign::BCvt: { 1464 unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), 1465 ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false); 1466 assert(BC != 0 && "Failed to emit a bitcast!"); 1467 Arg = BC; 1468 ArgVT = VA.getLocVT(); 1469 break; 1470 } 1471 } 1472 1473 if (VA.isRegLoc()) { 1474 TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); 1475 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), 1476 Arg, RC, RC, DL); 1477 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1478 Emitted = true; 1479 RegArgs.push_back(VA.getLocReg()); 1480 } else { 1481 unsigned LocMemOffset = VA.getLocMemOffset(); 1482 X86AddressMode AM; 1483 AM.Base.Reg = StackPtr; 1484 AM.Disp = LocMemOffset; 1485 const Value *ArgVal = ArgVals[VA.getValNo()]; 1486 1487 // If this is a really simple value, emit this with the Value* version of 1488 // X86FastEmitStore. If it isn't simple, we don't want to do this, as it 1489 // can cause us to reevaluate the argument. 1490 if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) 1491 X86FastEmitStore(ArgVT, ArgVal, AM); 1492 else 1493 X86FastEmitStore(ArgVT, Arg, AM); 1494 } 1495 } 1496 1497 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1498 // GOT pointer. 1499 if (Subtarget->isPICStyleGOT()) { 1500 TargetRegisterClass *RC = X86::GR32RegisterClass; 1501 unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); 1502 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC, 1503 DL); 1504 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1505 Emitted = true; 1506 } 1507 1508 // Issue the call. 1509 MachineInstrBuilder MIB; 1510 if (CalleeOp) { 1511 // Register-indirect call. 1512 unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; 1513 MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp); 1514 1515 } else { 1516 // Direct call. 1517 assert(GV && "Not a direct call"); 1518 unsigned CallOpc = 1519 Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; 1520 1521 // See if we need any target-specific flags on the GV operand. 1522 unsigned char OpFlags = 0; 1523 1524 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to 1525 // external symbols most go through the PLT in PIC mode. If the symbol 1526 // has hidden or protected visibility, or if it is static or local, then 1527 // we don't need to use the PLT - we can directly call it. 1528 if (Subtarget->isTargetELF() && 1529 TM.getRelocationModel() == Reloc::PIC_ && 1530 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { 1531 OpFlags = X86II::MO_PLT; 1532 } else if (Subtarget->isPICStyleStubAny() && 1533 (GV->isDeclaration() || GV->isWeakForLinker()) && 1534 Subtarget->getDarwinVers() < 9) { 1535 // PC-relative references to external symbols should go through $stub, 1536 // unless we're building with the leopard linker or later, which 1537 // automatically synthesizes these stubs. 1538 OpFlags = X86II::MO_DARWIN_STUB; 1539 } 1540 1541 1542 MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags); 1543 } 1544 1545 // Add an implicit use GOT pointer in EBX. 1546 if (Subtarget->isPICStyleGOT()) 1547 MIB.addReg(X86::EBX); 1548 1549 // Add implicit physical register uses to the call. 1550 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1551 MIB.addReg(RegArgs[i]); 1552 1553 // Issue CALLSEQ_END 1554 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1555 BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); 1556 1557 // Now handle call return value (if any). 1558 SmallVector<unsigned, 4> UsedRegs; 1559 if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { 1560 SmallVector<CCValAssign, 16> RVLocs; 1561 CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); 1562 CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); 1563 1564 // Copy all of the result registers out of their specified physreg. 1565 assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); 1566 EVT CopyVT = RVLocs[0].getValVT(); 1567 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1568 TargetRegisterClass *SrcRC = DstRC; 1569 1570 // If this is a call to a function that returns an fp value on the x87 fp 1571 // stack, but where we prefer to use the value in xmm registers, copy it 1572 // out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1573 if ((RVLocs[0].getLocReg() == X86::ST0 || 1574 RVLocs[0].getLocReg() == X86::ST1) && 1575 isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { 1576 CopyVT = MVT::f80; 1577 SrcRC = X86::RSTRegisterClass; 1578 DstRC = X86::RFP80RegisterClass; 1579 } 1580 1581 unsigned ResultReg = createResultReg(DstRC); 1582 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, 1583 RVLocs[0].getLocReg(), DstRC, SrcRC, DL); 1584 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1585 Emitted = true; 1586 UsedRegs.push_back(RVLocs[0].getLocReg()); 1587 1588 if (CopyVT != RVLocs[0].getValVT()) { 1589 // Round the F80 the right size, which also moves to the appropriate xmm 1590 // register. This is accomplished by storing the F80 value in memory and 1591 // then loading it back. Ewww... 1592 EVT ResVT = RVLocs[0].getValVT(); 1593 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 1594 unsigned MemSize = ResVT.getSizeInBits()/8; 1595 int FI = MFI.CreateStackObject(MemSize, MemSize, false); 1596 addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); 1597 DstRC = ResVT == MVT::f32 1598 ? X86::FR32RegisterClass : X86::FR64RegisterClass; 1599 Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; 1600 ResultReg = createResultReg(DstRC); 1601 addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI); 1602 } 1603 1604 if (AndToI1) { 1605 // Mask out all but lowest bit for some call which produces an i1. 1606 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 1607 BuildMI(MBB, DL, 1608 TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); 1609 ResultReg = AndResult; 1610 } 1611 1612 UpdateValueMap(I, ResultReg); 1613 } 1614 1615 // Set all unused physreg defs as dead. 1616 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1617 1618 return true; 1619} 1620 1621 1622bool 1623X86FastISel::TargetSelectInstruction(const Instruction *I) { 1624 switch (I->getOpcode()) { 1625 default: break; 1626 case Instruction::Load: 1627 return X86SelectLoad(I); 1628 case Instruction::Store: 1629 return X86SelectStore(I); 1630 case Instruction::ICmp: 1631 case Instruction::FCmp: 1632 return X86SelectCmp(I); 1633 case Instruction::ZExt: 1634 return X86SelectZExt(I); 1635 case Instruction::Br: 1636 return X86SelectBranch(I); 1637 case Instruction::Call: 1638 return X86SelectCall(I); 1639 case Instruction::LShr: 1640 case Instruction::AShr: 1641 case Instruction::Shl: 1642 return X86SelectShift(I); 1643 case Instruction::Select: 1644 return X86SelectSelect(I); 1645 case Instruction::Trunc: 1646 return X86SelectTrunc(I); 1647 case Instruction::FPExt: 1648 return X86SelectFPExt(I); 1649 case Instruction::FPTrunc: 1650 return X86SelectFPTrunc(I); 1651 case Instruction::ExtractValue: 1652 return X86SelectExtractValue(I); 1653 case Instruction::IntToPtr: // Deliberate fall-through. 1654 case Instruction::PtrToInt: { 1655 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1656 EVT DstVT = TLI.getValueType(I->getType()); 1657 if (DstVT.bitsGT(SrcVT)) 1658 return X86SelectZExt(I); 1659 if (DstVT.bitsLT(SrcVT)) 1660 return X86SelectTrunc(I); 1661 unsigned Reg = getRegForValue(I->getOperand(0)); 1662 if (Reg == 0) return false; 1663 UpdateValueMap(I, Reg); 1664 return true; 1665 } 1666 } 1667 1668 return false; 1669} 1670 1671unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { 1672 EVT VT; 1673 if (!isTypeLegal(C->getType(), VT)) 1674 return false; 1675 1676 // Get opcode and regclass of the output for the given load instruction. 1677 unsigned Opc = 0; 1678 const TargetRegisterClass *RC = NULL; 1679 switch (VT.getSimpleVT().SimpleTy) { 1680 default: return false; 1681 case MVT::i8: 1682 Opc = X86::MOV8rm; 1683 RC = X86::GR8RegisterClass; 1684 break; 1685 case MVT::i16: 1686 Opc = X86::MOV16rm; 1687 RC = X86::GR16RegisterClass; 1688 break; 1689 case MVT::i32: 1690 Opc = X86::MOV32rm; 1691 RC = X86::GR32RegisterClass; 1692 break; 1693 case MVT::i64: 1694 // Must be in x86-64 mode. 1695 Opc = X86::MOV64rm; 1696 RC = X86::GR64RegisterClass; 1697 break; 1698 case MVT::f32: 1699 if (Subtarget->hasSSE1()) { 1700 Opc = X86::MOVSSrm; 1701 RC = X86::FR32RegisterClass; 1702 } else { 1703 Opc = X86::LD_Fp32m; 1704 RC = X86::RFP32RegisterClass; 1705 } 1706 break; 1707 case MVT::f64: 1708 if (Subtarget->hasSSE2()) { 1709 Opc = X86::MOVSDrm; 1710 RC = X86::FR64RegisterClass; 1711 } else { 1712 Opc = X86::LD_Fp64m; 1713 RC = X86::RFP64RegisterClass; 1714 } 1715 break; 1716 case MVT::f80: 1717 // No f80 support yet. 1718 return false; 1719 } 1720 1721 // Materialize addresses with LEA instructions. 1722 if (isa<GlobalValue>(C)) { 1723 X86AddressMode AM; 1724 if (X86SelectAddress(C, AM)) { 1725 if (TLI.getPointerTy() == MVT::i32) 1726 Opc = X86::LEA32r; 1727 else 1728 Opc = X86::LEA64r; 1729 unsigned ResultReg = createResultReg(RC); 1730 addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 1731 return ResultReg; 1732 } 1733 return 0; 1734 } 1735 1736 // MachineConstantPool wants an explicit alignment. 1737 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 1738 if (Align == 0) { 1739 // Alignment of vector types. FIXME! 1740 Align = TD.getTypeAllocSize(C->getType()); 1741 } 1742 1743 // x86-32 PIC requires a PIC base register for constant pools. 1744 unsigned PICBase = 0; 1745 unsigned char OpFlag = 0; 1746 if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic 1747 OpFlag = X86II::MO_PIC_BASE_OFFSET; 1748 PICBase = getInstrInfo()->getGlobalBaseReg(&MF); 1749 } else if (Subtarget->isPICStyleGOT()) { 1750 OpFlag = X86II::MO_GOTOFF; 1751 PICBase = getInstrInfo()->getGlobalBaseReg(&MF); 1752 } else if (Subtarget->isPICStyleRIPRel() && 1753 TM.getCodeModel() == CodeModel::Small) { 1754 PICBase = X86::RIP; 1755 } 1756 1757 // Create the load from the constant pool. 1758 unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); 1759 unsigned ResultReg = createResultReg(RC); 1760 addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), 1761 MCPOffset, PICBase, OpFlag); 1762 1763 return ResultReg; 1764} 1765 1766unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { 1767 // Fail on dynamic allocas. At this point, getRegForValue has already 1768 // checked its CSE maps, so if we're here trying to handle a dynamic 1769 // alloca, we're not going to succeed. X86SelectAddress has a 1770 // check for dynamic allocas, because it's called directly from 1771 // various places, but TargetMaterializeAlloca also needs a check 1772 // in order to avoid recursion between getRegForValue, 1773 // X86SelectAddrss, and TargetMaterializeAlloca. 1774 if (!StaticAllocaMap.count(C)) 1775 return 0; 1776 1777 X86AddressMode AM; 1778 if (!X86SelectAddress(C, AM)) 1779 return 0; 1780 unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; 1781 TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); 1782 unsigned ResultReg = createResultReg(RC); 1783 addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 1784 return ResultReg; 1785} 1786 1787namespace llvm { 1788 llvm::FastISel *X86::createFastISel(MachineFunction &mf, 1789 DenseMap<const Value *, unsigned> &vm, 1790 DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, 1791 DenseMap<const AllocaInst *, int> &am, 1792 std::vector<std::pair<MachineInstr*, unsigned> > &pn 1793#ifndef NDEBUG 1794 , SmallSet<const Instruction *, 8> &cil 1795#endif 1796 ) { 1797 return new X86FastISel(mf, vm, bm, am, pn 1798#ifndef NDEBUG 1799 , cil 1800#endif 1801 ); 1802 } 1803} 1804