X86FastISel.cpp revision 0bc25f40402f48ba42fc45403f635b20d90fabb3
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the X86-specific support for the FastISel class. Much 11// of the target-specific code is generated by tablegen in the file 12// X86GenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86RegisterInfo.h" 19#include "X86Subtarget.h" 20#include "X86TargetMachine.h" 21#include "llvm/CallingConv.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Instructions.h" 25#include "llvm/IntrinsicInst.h" 26#include "llvm/CodeGen/FastISel.h" 27#include "llvm/CodeGen/FunctionLoweringInfo.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/Support/CallSite.h" 32#include "llvm/Support/ErrorHandling.h" 33#include "llvm/Support/GetElementPtrTypeIterator.h" 34#include "llvm/Target/TargetOptions.h" 35using namespace llvm; 36 37namespace { 38 39class X86FastISel : public FastISel { 40 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 41 /// make the right decision when generating code for different targets. 42 const X86Subtarget *Subtarget; 43 44 /// StackPtr - Register used as the stack pointer. 45 /// 46 unsigned StackPtr; 47 48 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 49 /// floating point ops. 50 /// When SSE is available, use it for f32 operations. 51 /// When SSE2 is available, use it for f64 operations. 52 bool X86ScalarSSEf64; 53 bool X86ScalarSSEf32; 54 55public: 56 explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { 57 Subtarget = &TM.getSubtarget<X86Subtarget>(); 58 StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 59 X86ScalarSSEf64 = Subtarget->hasSSE2(); 60 X86ScalarSSEf32 = Subtarget->hasSSE1(); 61 } 62 63 virtual bool TargetSelectInstruction(const Instruction *I); 64 65#include "X86GenFastISel.inc" 66 67private: 68 bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); 69 70 bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); 71 72 bool X86FastEmitStore(EVT VT, const Value *Val, 73 const X86AddressMode &AM); 74 bool X86FastEmitStore(EVT VT, unsigned Val, 75 const X86AddressMode &AM); 76 77 bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 78 unsigned &ResultReg); 79 80 bool X86SelectAddress(const Value *V, X86AddressMode &AM); 81 bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); 82 83 bool X86SelectLoad(const Instruction *I); 84 85 bool X86SelectStore(const Instruction *I); 86 87 bool X86SelectCmp(const Instruction *I); 88 89 bool X86SelectZExt(const Instruction *I); 90 91 bool X86SelectBranch(const Instruction *I); 92 93 bool X86SelectShift(const Instruction *I); 94 95 bool X86SelectSelect(const Instruction *I); 96 97 bool X86SelectTrunc(const Instruction *I); 98 99 bool X86SelectFPExt(const Instruction *I); 100 bool X86SelectFPTrunc(const Instruction *I); 101 102 bool X86SelectExtractValue(const Instruction *I); 103 104 bool X86VisitIntrinsicCall(const IntrinsicInst &I); 105 bool X86SelectCall(const Instruction *I); 106 107 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); 108 109 const X86InstrInfo *getInstrInfo() const { 110 return getTargetMachine()->getInstrInfo(); 111 } 112 const X86TargetMachine *getTargetMachine() const { 113 return static_cast<const X86TargetMachine *>(&TM); 114 } 115 116 unsigned TargetMaterializeConstant(const Constant *C); 117 118 unsigned TargetMaterializeAlloca(const AllocaInst *C); 119 120 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 121 /// computed in an SSE register, not on the X87 floating point stack. 122 bool isScalarFPTypeInSSEReg(EVT VT) const { 123 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 124 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 125 } 126 127 bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false); 128}; 129 130} // end anonymous namespace. 131 132bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) { 133 VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); 134 if (VT == MVT::Other || !VT.isSimple()) 135 // Unhandled type. Halt "fast" selection and bail. 136 return false; 137 138 // For now, require SSE/SSE2 for performing floating-point operations, 139 // since x87 requires additional work. 140 if (VT == MVT::f64 && !X86ScalarSSEf64) 141 return false; 142 if (VT == MVT::f32 && !X86ScalarSSEf32) 143 return false; 144 // Similarly, no f80 support yet. 145 if (VT == MVT::f80) 146 return false; 147 // We only handle legal types. For example, on x86-32 the instruction 148 // selector contains all of the 64-bit instructions from x86-64, 149 // under the assumption that i64 won't be used if the target doesn't 150 // support it. 151 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 152} 153 154#include "X86GenCallingConv.inc" 155 156/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling 157/// convention. 158CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, 159 bool isTaillCall) { 160 if (Subtarget->is64Bit()) { 161 if (CC == CallingConv::GHC) 162 return CC_X86_64_GHC; 163 else if (Subtarget->isTargetWin64()) 164 return CC_X86_Win64_C; 165 else 166 return CC_X86_64_C; 167 } 168 169 if (CC == CallingConv::X86_FastCall) 170 return CC_X86_32_FastCall; 171 else if (CC == CallingConv::X86_ThisCall) 172 return CC_X86_32_ThisCall; 173 else if (CC == CallingConv::Fast) 174 return CC_X86_32_FastCC; 175 else if (CC == CallingConv::GHC) 176 return CC_X86_32_GHC; 177 else 178 return CC_X86_32_C; 179} 180 181/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 182/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 183/// Return true and the result register by reference if it is possible. 184bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, 185 unsigned &ResultReg) { 186 // Get opcode and regclass of the output for the given load instruction. 187 unsigned Opc = 0; 188 const TargetRegisterClass *RC = NULL; 189 switch (VT.getSimpleVT().SimpleTy) { 190 default: return false; 191 case MVT::i1: 192 case MVT::i8: 193 Opc = X86::MOV8rm; 194 RC = X86::GR8RegisterClass; 195 break; 196 case MVT::i16: 197 Opc = X86::MOV16rm; 198 RC = X86::GR16RegisterClass; 199 break; 200 case MVT::i32: 201 Opc = X86::MOV32rm; 202 RC = X86::GR32RegisterClass; 203 break; 204 case MVT::i64: 205 // Must be in x86-64 mode. 206 Opc = X86::MOV64rm; 207 RC = X86::GR64RegisterClass; 208 break; 209 case MVT::f32: 210 if (Subtarget->hasSSE1()) { 211 Opc = X86::MOVSSrm; 212 RC = X86::FR32RegisterClass; 213 } else { 214 Opc = X86::LD_Fp32m; 215 RC = X86::RFP32RegisterClass; 216 } 217 break; 218 case MVT::f64: 219 if (Subtarget->hasSSE2()) { 220 Opc = X86::MOVSDrm; 221 RC = X86::FR64RegisterClass; 222 } else { 223 Opc = X86::LD_Fp64m; 224 RC = X86::RFP64RegisterClass; 225 } 226 break; 227 case MVT::f80: 228 // No f80 support yet. 229 return false; 230 } 231 232 ResultReg = createResultReg(RC); 233 addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 234 return true; 235} 236 237/// X86FastEmitStore - Emit a machine instruction to store a value Val of 238/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 239/// and a displacement offset, or a GlobalAddress, 240/// i.e. V. Return true if it is possible. 241bool 242X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, 243 const X86AddressMode &AM) { 244 // Get opcode and regclass of the output for the given store instruction. 245 unsigned Opc = 0; 246 switch (VT.getSimpleVT().SimpleTy) { 247 case MVT::f80: // No f80 support yet. 248 default: return false; 249 case MVT::i1: { 250 // Mask out all but lowest bit. 251 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 252 BuildMI(MBB, DL, 253 TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); 254 Val = AndResult; 255 } 256 // FALLTHROUGH, handling i1 as i8. 257 case MVT::i8: Opc = X86::MOV8mr; break; 258 case MVT::i16: Opc = X86::MOV16mr; break; 259 case MVT::i32: Opc = X86::MOV32mr; break; 260 case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. 261 case MVT::f32: 262 Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m; 263 break; 264 case MVT::f64: 265 Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; 266 break; 267 } 268 269 addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val); 270 return true; 271} 272 273bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, 274 const X86AddressMode &AM) { 275 // Handle 'null' like i32/i64 0. 276 if (isa<ConstantPointerNull>(Val)) 277 Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); 278 279 // If this is a store of a simple constant, fold the constant into the store. 280 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 281 unsigned Opc = 0; 282 bool Signed = true; 283 switch (VT.getSimpleVT().SimpleTy) { 284 default: break; 285 case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8. 286 case MVT::i8: Opc = X86::MOV8mi; break; 287 case MVT::i16: Opc = X86::MOV16mi; break; 288 case MVT::i32: Opc = X86::MOV32mi; break; 289 case MVT::i64: 290 // Must be a 32-bit sign extended value. 291 if ((int)CI->getSExtValue() == CI->getSExtValue()) 292 Opc = X86::MOV64mi32; 293 break; 294 } 295 296 if (Opc) { 297 addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) 298 .addImm(Signed ? (uint64_t) CI->getSExtValue() : 299 CI->getZExtValue()); 300 return true; 301 } 302 } 303 304 unsigned ValReg = getRegForValue(Val); 305 if (ValReg == 0) 306 return false; 307 308 return X86FastEmitStore(VT, ValReg, AM); 309} 310 311/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 312/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 313/// ISD::SIGN_EXTEND). 314bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, 315 unsigned Src, EVT SrcVT, 316 unsigned &ResultReg) { 317 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 318 Src, /*TODO: Kill=*/false); 319 320 if (RR != 0) { 321 ResultReg = RR; 322 return true; 323 } else 324 return false; 325} 326 327/// X86SelectAddress - Attempt to fill in an address from the given value. 328/// 329bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { 330 const User *U = NULL; 331 unsigned Opcode = Instruction::UserOp1; 332 if (const Instruction *I = dyn_cast<Instruction>(V)) { 333 // Don't walk into other basic blocks; it's possible we haven't 334 // visited them yet, so the instructions may not yet be assigned 335 // virtual registers. 336 if (FuncInfo.MBBMap[I->getParent()] != MBB) 337 return false; 338 339 Opcode = I->getOpcode(); 340 U = I; 341 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 342 Opcode = C->getOpcode(); 343 U = C; 344 } 345 346 if (const PointerType *Ty = dyn_cast<PointerType>(V->getType())) 347 if (Ty->getAddressSpace() > 255) 348 // Fast instruction selection doesn't support the special 349 // address spaces. 350 return false; 351 352 switch (Opcode) { 353 default: break; 354 case Instruction::BitCast: 355 // Look past bitcasts. 356 return X86SelectAddress(U->getOperand(0), AM); 357 358 case Instruction::IntToPtr: 359 // Look past no-op inttoptrs. 360 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 361 return X86SelectAddress(U->getOperand(0), AM); 362 break; 363 364 case Instruction::PtrToInt: 365 // Look past no-op ptrtoints. 366 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 367 return X86SelectAddress(U->getOperand(0), AM); 368 break; 369 370 case Instruction::Alloca: { 371 // Do static allocas. 372 const AllocaInst *A = cast<AllocaInst>(V); 373 DenseMap<const AllocaInst*, int>::iterator SI = 374 FuncInfo.StaticAllocaMap.find(A); 375 if (SI != FuncInfo.StaticAllocaMap.end()) { 376 AM.BaseType = X86AddressMode::FrameIndexBase; 377 AM.Base.FrameIndex = SI->second; 378 return true; 379 } 380 break; 381 } 382 383 case Instruction::Add: { 384 // Adds of constants are common and easy enough. 385 if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 386 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 387 // They have to fit in the 32-bit signed displacement field though. 388 if (isInt<32>(Disp)) { 389 AM.Disp = (uint32_t)Disp; 390 return X86SelectAddress(U->getOperand(0), AM); 391 } 392 } 393 break; 394 } 395 396 case Instruction::GetElementPtr: { 397 X86AddressMode SavedAM = AM; 398 399 // Pattern-match simple GEPs. 400 uint64_t Disp = (int32_t)AM.Disp; 401 unsigned IndexReg = AM.IndexReg; 402 unsigned Scale = AM.Scale; 403 gep_type_iterator GTI = gep_type_begin(U); 404 // Iterate through the indices, folding what we can. Constants can be 405 // folded, and one dynamic index can be handled, if the scale is supported. 406 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 407 i != e; ++i, ++GTI) { 408 const Value *Op = *i; 409 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 410 const StructLayout *SL = TD.getStructLayout(STy); 411 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 412 Disp += SL->getElementOffset(Idx); 413 } else { 414 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 415 SmallVector<const Value *, 4> Worklist; 416 Worklist.push_back(Op); 417 do { 418 Op = Worklist.pop_back_val(); 419 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 420 // Constant-offset addressing. 421 Disp += CI->getSExtValue() * S; 422 } else if (isa<AddOperator>(Op) && 423 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { 424 // An add with a constant operand. Fold the constant. 425 ConstantInt *CI = 426 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 427 Disp += CI->getSExtValue() * S; 428 // Add the other operand back to the work list. 429 Worklist.push_back(cast<AddOperator>(Op)->getOperand(0)); 430 } else if (IndexReg == 0 && 431 (!AM.GV || !Subtarget->isPICStyleRIPRel()) && 432 (S == 1 || S == 2 || S == 4 || S == 8)) { 433 // Scaled-index addressing. 434 Scale = S; 435 IndexReg = getRegForGEPIndex(Op).first; 436 if (IndexReg == 0) 437 return false; 438 } else 439 // Unsupported. 440 goto unsupported_gep; 441 } while (!Worklist.empty()); 442 } 443 } 444 // Check for displacement overflow. 445 if (!isInt<32>(Disp)) 446 break; 447 // Ok, the GEP indices were covered by constant-offset and scaled-index 448 // addressing. Update the address state and move on to examining the base. 449 AM.IndexReg = IndexReg; 450 AM.Scale = Scale; 451 AM.Disp = (uint32_t)Disp; 452 if (X86SelectAddress(U->getOperand(0), AM)) 453 return true; 454 455 // If we couldn't merge the sub value into this addr mode, revert back to 456 // our address and just match the value instead of completely failing. 457 AM = SavedAM; 458 break; 459 unsupported_gep: 460 // Ok, the GEP indices weren't all covered. 461 break; 462 } 463 } 464 465 // Handle constant address. 466 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 467 // Can't handle alternate code models yet. 468 if (TM.getCodeModel() != CodeModel::Small) 469 return false; 470 471 // RIP-relative addresses can't have additional register operands. 472 if (Subtarget->isPICStyleRIPRel() && 473 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 474 return false; 475 476 // Can't handle TLS yet. 477 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 478 if (GVar->isThreadLocal()) 479 return false; 480 481 // Okay, we've committed to selecting this global. Set up the basic address. 482 AM.GV = GV; 483 484 // Allow the subtarget to classify the global. 485 unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); 486 487 // If this reference is relative to the pic base, set it now. 488 if (isGlobalRelativeToPICBase(GVFlags)) { 489 // FIXME: How do we know Base.Reg is free?? 490 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 491 } 492 493 // Unless the ABI requires an extra load, return a direct reference to 494 // the global. 495 if (!isGlobalStubReference(GVFlags)) { 496 if (Subtarget->isPICStyleRIPRel()) { 497 // Use rip-relative addressing if we can. Above we verified that the 498 // base and index registers are unused. 499 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 500 AM.Base.Reg = X86::RIP; 501 } 502 AM.GVOpFlags = GVFlags; 503 return true; 504 } 505 506 // Ok, we need to do a load from a stub. If we've already loaded from this 507 // stub, reuse the loaded pointer, otherwise emit the load now. 508 DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); 509 unsigned LoadReg; 510 if (I != LocalValueMap.end() && I->second != 0) { 511 LoadReg = I->second; 512 } else { 513 // Issue load from stub. 514 unsigned Opc = 0; 515 const TargetRegisterClass *RC = NULL; 516 X86AddressMode StubAM; 517 StubAM.Base.Reg = AM.Base.Reg; 518 StubAM.GV = GV; 519 StubAM.GVOpFlags = GVFlags; 520 521 if (TLI.getPointerTy() == MVT::i64) { 522 Opc = X86::MOV64rm; 523 RC = X86::GR64RegisterClass; 524 525 if (Subtarget->isPICStyleRIPRel()) 526 StubAM.Base.Reg = X86::RIP; 527 } else { 528 Opc = X86::MOV32rm; 529 RC = X86::GR32RegisterClass; 530 } 531 532 LoadReg = createResultReg(RC); 533 addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM); 534 535 // Prevent loading GV stub multiple times in same MBB. 536 LocalValueMap[V] = LoadReg; 537 } 538 539 // Now construct the final address. Note that the Disp, Scale, 540 // and Index values may already be set here. 541 AM.Base.Reg = LoadReg; 542 AM.GV = 0; 543 return true; 544 } 545 546 // If all else fails, try to materialize the value in a register. 547 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 548 if (AM.Base.Reg == 0) { 549 AM.Base.Reg = getRegForValue(V); 550 return AM.Base.Reg != 0; 551 } 552 if (AM.IndexReg == 0) { 553 assert(AM.Scale == 1 && "Scale with no index!"); 554 AM.IndexReg = getRegForValue(V); 555 return AM.IndexReg != 0; 556 } 557 } 558 559 return false; 560} 561 562/// X86SelectCallAddress - Attempt to fill in an address from the given value. 563/// 564bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { 565 const User *U = NULL; 566 unsigned Opcode = Instruction::UserOp1; 567 if (const Instruction *I = dyn_cast<Instruction>(V)) { 568 Opcode = I->getOpcode(); 569 U = I; 570 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 571 Opcode = C->getOpcode(); 572 U = C; 573 } 574 575 switch (Opcode) { 576 default: break; 577 case Instruction::BitCast: 578 // Look past bitcasts. 579 return X86SelectCallAddress(U->getOperand(0), AM); 580 581 case Instruction::IntToPtr: 582 // Look past no-op inttoptrs. 583 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 584 return X86SelectCallAddress(U->getOperand(0), AM); 585 break; 586 587 case Instruction::PtrToInt: 588 // Look past no-op ptrtoints. 589 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 590 return X86SelectCallAddress(U->getOperand(0), AM); 591 break; 592 } 593 594 // Handle constant address. 595 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 596 // Can't handle alternate code models yet. 597 if (TM.getCodeModel() != CodeModel::Small) 598 return false; 599 600 // RIP-relative addresses can't have additional register operands. 601 if (Subtarget->isPICStyleRIPRel() && 602 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 603 return false; 604 605 // Can't handle TLS or DLLImport. 606 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 607 if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) 608 return false; 609 610 // Okay, we've committed to selecting this global. Set up the basic address. 611 AM.GV = GV; 612 613 // No ABI requires an extra load for anything other than DLLImport, which 614 // we rejected above. Return a direct reference to the global. 615 if (Subtarget->isPICStyleRIPRel()) { 616 // Use rip-relative addressing if we can. Above we verified that the 617 // base and index registers are unused. 618 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 619 AM.Base.Reg = X86::RIP; 620 } else if (Subtarget->isPICStyleStubPIC()) { 621 AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; 622 } else if (Subtarget->isPICStyleGOT()) { 623 AM.GVOpFlags = X86II::MO_GOTOFF; 624 } 625 626 return true; 627 } 628 629 // If all else fails, try to materialize the value in a register. 630 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 631 if (AM.Base.Reg == 0) { 632 AM.Base.Reg = getRegForValue(V); 633 return AM.Base.Reg != 0; 634 } 635 if (AM.IndexReg == 0) { 636 assert(AM.Scale == 1 && "Scale with no index!"); 637 AM.IndexReg = getRegForValue(V); 638 return AM.IndexReg != 0; 639 } 640 } 641 642 return false; 643} 644 645 646/// X86SelectStore - Select and emit code to implement store instructions. 647bool X86FastISel::X86SelectStore(const Instruction *I) { 648 EVT VT; 649 if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) 650 return false; 651 652 X86AddressMode AM; 653 if (!X86SelectAddress(I->getOperand(1), AM)) 654 return false; 655 656 return X86FastEmitStore(VT, I->getOperand(0), AM); 657} 658 659/// X86SelectLoad - Select and emit code to implement load instructions. 660/// 661bool X86FastISel::X86SelectLoad(const Instruction *I) { 662 EVT VT; 663 if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) 664 return false; 665 666 X86AddressMode AM; 667 if (!X86SelectAddress(I->getOperand(0), AM)) 668 return false; 669 670 unsigned ResultReg = 0; 671 if (X86FastEmitLoad(VT, AM, ResultReg)) { 672 UpdateValueMap(I, ResultReg); 673 return true; 674 } 675 return false; 676} 677 678static unsigned X86ChooseCmpOpcode(EVT VT) { 679 switch (VT.getSimpleVT().SimpleTy) { 680 default: return 0; 681 case MVT::i8: return X86::CMP8rr; 682 case MVT::i16: return X86::CMP16rr; 683 case MVT::i32: return X86::CMP32rr; 684 case MVT::i64: return X86::CMP64rr; 685 case MVT::f32: return X86::UCOMISSrr; 686 case MVT::f64: return X86::UCOMISDrr; 687 } 688} 689 690/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS 691/// of the comparison, return an opcode that works for the compare (e.g. 692/// CMP32ri) otherwise return 0. 693static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { 694 switch (VT.getSimpleVT().SimpleTy) { 695 // Otherwise, we can't fold the immediate into this comparison. 696 default: return 0; 697 case MVT::i8: return X86::CMP8ri; 698 case MVT::i16: return X86::CMP16ri; 699 case MVT::i32: return X86::CMP32ri; 700 case MVT::i64: 701 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 702 // field. 703 if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) 704 return X86::CMP64ri32; 705 return 0; 706 } 707} 708 709bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, 710 EVT VT) { 711 unsigned Op0Reg = getRegForValue(Op0); 712 if (Op0Reg == 0) return false; 713 714 // Handle 'null' like i32/i64 0. 715 if (isa<ConstantPointerNull>(Op1)) 716 Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext())); 717 718 // We have two options: compare with register or immediate. If the RHS of 719 // the compare is an immediate that we can fold into this compare, use 720 // CMPri, otherwise use CMPrr. 721 if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 722 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 723 BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg) 724 .addImm(Op1C->getSExtValue()); 725 return true; 726 } 727 } 728 729 unsigned CompareOpc = X86ChooseCmpOpcode(VT); 730 if (CompareOpc == 0) return false; 731 732 unsigned Op1Reg = getRegForValue(Op1); 733 if (Op1Reg == 0) return false; 734 BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg); 735 736 return true; 737} 738 739bool X86FastISel::X86SelectCmp(const Instruction *I) { 740 const CmpInst *CI = cast<CmpInst>(I); 741 742 EVT VT; 743 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 744 return false; 745 746 unsigned ResultReg = createResultReg(&X86::GR8RegClass); 747 unsigned SetCCOpc; 748 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 749 switch (CI->getPredicate()) { 750 case CmpInst::FCMP_OEQ: { 751 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 752 return false; 753 754 unsigned EReg = createResultReg(&X86::GR8RegClass); 755 unsigned NPReg = createResultReg(&X86::GR8RegClass); 756 BuildMI(MBB, DL, TII.get(X86::SETEr), EReg); 757 BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg); 758 BuildMI(MBB, DL, 759 TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); 760 UpdateValueMap(I, ResultReg); 761 return true; 762 } 763 case CmpInst::FCMP_UNE: { 764 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 765 return false; 766 767 unsigned NEReg = createResultReg(&X86::GR8RegClass); 768 unsigned PReg = createResultReg(&X86::GR8RegClass); 769 BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg); 770 BuildMI(MBB, DL, TII.get(X86::SETPr), PReg); 771 BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg); 772 UpdateValueMap(I, ResultReg); 773 return true; 774 } 775 case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 776 case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 777 case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break; 778 case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break; 779 case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 780 case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break; 781 case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break; 782 case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 783 case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break; 784 case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; 785 case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 786 case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 787 788 case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 789 case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 790 case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 791 case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 792 case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 793 case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 794 case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break; 795 case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break; 796 case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break; 797 case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break; 798 default: 799 return false; 800 } 801 802 const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 803 if (SwapArgs) 804 std::swap(Op0, Op1); 805 806 // Emit a compare of Op0/Op1. 807 if (!X86FastEmitCompare(Op0, Op1, VT)) 808 return false; 809 810 BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg); 811 UpdateValueMap(I, ResultReg); 812 return true; 813} 814 815bool X86FastISel::X86SelectZExt(const Instruction *I) { 816 // Handle zero-extension from i1 to i8, which is common. 817 if (I->getType()->isIntegerTy(8) && 818 I->getOperand(0)->getType()->isIntegerTy(1)) { 819 unsigned ResultReg = getRegForValue(I->getOperand(0)); 820 if (ResultReg == 0) return false; 821 // Set the high bits to zero. 822 ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); 823 if (ResultReg == 0) return false; 824 UpdateValueMap(I, ResultReg); 825 return true; 826 } 827 828 return false; 829} 830 831 832bool X86FastISel::X86SelectBranch(const Instruction *I) { 833 // Unconditional branches are selected by tablegen-generated code. 834 // Handle a conditional branch. 835 const BranchInst *BI = cast<BranchInst>(I); 836 MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 837 MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 838 839 // Fold the common case of a conditional branch with a comparison. 840 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 841 if (CI->hasOneUse()) { 842 EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); 843 844 // Try to take advantage of fallthrough opportunities. 845 CmpInst::Predicate Predicate = CI->getPredicate(); 846 if (MBB->isLayoutSuccessor(TrueMBB)) { 847 std::swap(TrueMBB, FalseMBB); 848 Predicate = CmpInst::getInversePredicate(Predicate); 849 } 850 851 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 852 unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA" 853 854 switch (Predicate) { 855 case CmpInst::FCMP_OEQ: 856 std::swap(TrueMBB, FalseMBB); 857 Predicate = CmpInst::FCMP_UNE; 858 // FALL THROUGH 859 case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 860 case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break; 861 case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; 862 case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break; 863 case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break; 864 case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 865 case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break; 866 case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break; 867 case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break; 868 case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break; 869 case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; 870 case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; 871 case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; 872 873 case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; 874 case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 875 case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; 876 case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; 877 case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; 878 case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; 879 case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break; 880 case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break; 881 case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break; 882 case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break; 883 default: 884 return false; 885 } 886 887 const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 888 if (SwapArgs) 889 std::swap(Op0, Op1); 890 891 // Emit a compare of the LHS and RHS, setting the flags. 892 if (!X86FastEmitCompare(Op0, Op1, VT)) 893 return false; 894 895 BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB); 896 897 if (Predicate == CmpInst::FCMP_UNE) { 898 // X86 requires a second branch to handle UNE (and OEQ, 899 // which is mapped to UNE above). 900 BuildMI(MBB, DL, TII.get(X86::JP_4)).addMBB(TrueMBB); 901 } 902 903 FastEmitBranch(FalseMBB, DL); 904 MBB->addSuccessor(TrueMBB); 905 return true; 906 } 907 } else if (ExtractValueInst *EI = 908 dyn_cast<ExtractValueInst>(BI->getCondition())) { 909 // Check to see if the branch instruction is from an "arithmetic with 910 // overflow" intrinsic. The main way these intrinsics are used is: 911 // 912 // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) 913 // %sum = extractvalue { i32, i1 } %t, 0 914 // %obit = extractvalue { i32, i1 } %t, 1 915 // br i1 %obit, label %overflow, label %normal 916 // 917 // The %sum and %obit are converted in an ADD and a SETO/SETB before 918 // reaching the branch. Therefore, we search backwards through the MBB 919 // looking for the SETO/SETB instruction. If an instruction modifies the 920 // EFLAGS register before we reach the SETO/SETB instruction, then we can't 921 // convert the branch into a JO/JB instruction. 922 if (const IntrinsicInst *CI = 923 dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){ 924 if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || 925 CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { 926 const MachineInstr *SetMI = 0; 927 unsigned Reg = getRegForValue(EI); 928 929 for (MachineBasicBlock::const_reverse_iterator 930 RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { 931 const MachineInstr &MI = *RI; 932 933 if (MI.definesRegister(Reg)) { 934 unsigned Src, Dst, SrcSR, DstSR; 935 936 if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { 937 Reg = Src; 938 continue; 939 } 940 941 SetMI = &MI; 942 break; 943 } 944 945 const TargetInstrDesc &TID = MI.getDesc(); 946 if (TID.hasUnmodeledSideEffects() || 947 TID.hasImplicitDefOfPhysReg(X86::EFLAGS)) 948 break; 949 } 950 951 if (SetMI) { 952 unsigned OpCode = SetMI->getOpcode(); 953 954 if (OpCode == X86::SETOr || OpCode == X86::SETBr) { 955 BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? 956 X86::JO_4 : X86::JB_4)) 957 .addMBB(TrueMBB); 958 FastEmitBranch(FalseMBB, DL); 959 MBB->addSuccessor(TrueMBB); 960 return true; 961 } 962 } 963 } 964 } 965 } 966 967 // Otherwise do a clumsy setcc and re-test it. 968 unsigned OpReg = getRegForValue(BI->getCondition()); 969 if (OpReg == 0) return false; 970 971 BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); 972 BuildMI(MBB, DL, TII.get(X86::JNE_4)).addMBB(TrueMBB); 973 FastEmitBranch(FalseMBB, DL); 974 MBB->addSuccessor(TrueMBB); 975 return true; 976} 977 978bool X86FastISel::X86SelectShift(const Instruction *I) { 979 unsigned CReg = 0, OpReg = 0, OpImm = 0; 980 const TargetRegisterClass *RC = NULL; 981 if (I->getType()->isIntegerTy(8)) { 982 CReg = X86::CL; 983 RC = &X86::GR8RegClass; 984 switch (I->getOpcode()) { 985 case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break; 986 case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break; 987 case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; 988 default: return false; 989 } 990 } else if (I->getType()->isIntegerTy(16)) { 991 CReg = X86::CX; 992 RC = &X86::GR16RegClass; 993 switch (I->getOpcode()) { 994 case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break; 995 case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break; 996 case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; 997 default: return false; 998 } 999 } else if (I->getType()->isIntegerTy(32)) { 1000 CReg = X86::ECX; 1001 RC = &X86::GR32RegClass; 1002 switch (I->getOpcode()) { 1003 case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break; 1004 case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break; 1005 case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; 1006 default: return false; 1007 } 1008 } else if (I->getType()->isIntegerTy(64)) { 1009 CReg = X86::RCX; 1010 RC = &X86::GR64RegClass; 1011 switch (I->getOpcode()) { 1012 case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break; 1013 case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break; 1014 case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break; 1015 default: return false; 1016 } 1017 } else { 1018 return false; 1019 } 1020 1021 EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 1022 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 1023 return false; 1024 1025 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1026 if (Op0Reg == 0) return false; 1027 1028 // Fold immediate in shl(x,3). 1029 if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 1030 unsigned ResultReg = createResultReg(RC); 1031 BuildMI(MBB, DL, TII.get(OpImm), 1032 ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); 1033 UpdateValueMap(I, ResultReg); 1034 return true; 1035 } 1036 1037 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1038 if (Op1Reg == 0) return false; 1039 TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC, DL); 1040 1041 // The shift instruction uses X86::CL. If we defined a super-register 1042 // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. 1043 if (CReg != X86::CL) 1044 BuildMI(MBB, DL, TII.get(TargetOpcode::KILL), X86::CL) 1045 .addReg(CReg, RegState::Kill); 1046 1047 unsigned ResultReg = createResultReg(RC); 1048 BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); 1049 UpdateValueMap(I, ResultReg); 1050 return true; 1051} 1052 1053bool X86FastISel::X86SelectSelect(const Instruction *I) { 1054 EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 1055 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 1056 return false; 1057 1058 unsigned Opc = 0; 1059 const TargetRegisterClass *RC = NULL; 1060 if (VT.getSimpleVT() == MVT::i16) { 1061 Opc = X86::CMOVE16rr; 1062 RC = &X86::GR16RegClass; 1063 } else if (VT.getSimpleVT() == MVT::i32) { 1064 Opc = X86::CMOVE32rr; 1065 RC = &X86::GR32RegClass; 1066 } else if (VT.getSimpleVT() == MVT::i64) { 1067 Opc = X86::CMOVE64rr; 1068 RC = &X86::GR64RegClass; 1069 } else { 1070 return false; 1071 } 1072 1073 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1074 if (Op0Reg == 0) return false; 1075 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1076 if (Op1Reg == 0) return false; 1077 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1078 if (Op2Reg == 0) return false; 1079 1080 BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg); 1081 unsigned ResultReg = createResultReg(RC); 1082 BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg); 1083 UpdateValueMap(I, ResultReg); 1084 return true; 1085} 1086 1087bool X86FastISel::X86SelectFPExt(const Instruction *I) { 1088 // fpext from float to double. 1089 if (Subtarget->hasSSE2() && 1090 I->getType()->isDoubleTy()) { 1091 const Value *V = I->getOperand(0); 1092 if (V->getType()->isFloatTy()) { 1093 unsigned OpReg = getRegForValue(V); 1094 if (OpReg == 0) return false; 1095 unsigned ResultReg = createResultReg(X86::FR64RegisterClass); 1096 BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg); 1097 UpdateValueMap(I, ResultReg); 1098 return true; 1099 } 1100 } 1101 1102 return false; 1103} 1104 1105bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { 1106 if (Subtarget->hasSSE2()) { 1107 if (I->getType()->isFloatTy()) { 1108 const Value *V = I->getOperand(0); 1109 if (V->getType()->isDoubleTy()) { 1110 unsigned OpReg = getRegForValue(V); 1111 if (OpReg == 0) return false; 1112 unsigned ResultReg = createResultReg(X86::FR32RegisterClass); 1113 BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg); 1114 UpdateValueMap(I, ResultReg); 1115 return true; 1116 } 1117 } 1118 } 1119 1120 return false; 1121} 1122 1123bool X86FastISel::X86SelectTrunc(const Instruction *I) { 1124 if (Subtarget->is64Bit()) 1125 // All other cases should be handled by the tblgen generated code. 1126 return false; 1127 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1128 EVT DstVT = TLI.getValueType(I->getType()); 1129 1130 // This code only handles truncation to byte right now. 1131 if (DstVT != MVT::i8 && DstVT != MVT::i1) 1132 // All other cases should be handled by the tblgen generated code. 1133 return false; 1134 if (SrcVT != MVT::i16 && SrcVT != MVT::i32) 1135 // All other cases should be handled by the tblgen generated code. 1136 return false; 1137 1138 unsigned InputReg = getRegForValue(I->getOperand(0)); 1139 if (!InputReg) 1140 // Unhandled operand. Halt "fast" selection and bail. 1141 return false; 1142 1143 // First issue a copy to GR16_ABCD or GR32_ABCD. 1144 unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; 1145 const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) 1146 ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; 1147 unsigned CopyReg = createResultReg(CopyRC); 1148 BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg); 1149 1150 // Then issue an extract_subreg. 1151 unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, 1152 CopyReg, /*Kill=*/true, 1153 X86::sub_8bit); 1154 if (!ResultReg) 1155 return false; 1156 1157 UpdateValueMap(I, ResultReg); 1158 return true; 1159} 1160 1161bool X86FastISel::X86SelectExtractValue(const Instruction *I) { 1162 const ExtractValueInst *EI = cast<ExtractValueInst>(I); 1163 const Value *Agg = EI->getAggregateOperand(); 1164 1165 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) { 1166 switch (CI->getIntrinsicID()) { 1167 default: break; 1168 case Intrinsic::sadd_with_overflow: 1169 case Intrinsic::uadd_with_overflow: 1170 // Cheat a little. We know that the registers for "add" and "seto" are 1171 // allocated sequentially. However, we only keep track of the register 1172 // for "add" in the value map. Use extractvalue's index to get the 1173 // correct register for "seto". 1174 UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin()); 1175 return true; 1176 } 1177 } 1178 1179 return false; 1180} 1181 1182bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { 1183 // FIXME: Handle more intrinsics. 1184 switch (I.getIntrinsicID()) { 1185 default: return false; 1186 case Intrinsic::stackprotector: { 1187 // Emit code inline code to store the stack guard onto the stack. 1188 EVT PtrTy = TLI.getPointerTy(); 1189 1190 const Value *Op1 = I.getArgOperand(0); // The guard's value. 1191 const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); 1192 1193 // Grab the frame index. 1194 X86AddressMode AM; 1195 if (!X86SelectAddress(Slot, AM)) return false; 1196 1197 if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; 1198 1199 return true; 1200 } 1201 case Intrinsic::objectsize: { 1202 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); 1203 const Type *Ty = I.getCalledFunction()->getReturnType(); 1204 1205 assert(CI && "Non-constant type in Intrinsic::objectsize?"); 1206 1207 EVT VT; 1208 if (!isTypeLegal(Ty, VT)) 1209 return false; 1210 1211 unsigned OpC = 0; 1212 if (VT == MVT::i32) 1213 OpC = X86::MOV32ri; 1214 else if (VT == MVT::i64) 1215 OpC = X86::MOV64ri; 1216 else 1217 return false; 1218 1219 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1220 BuildMI(MBB, DL, TII.get(OpC), ResultReg). 1221 addImm(CI->isZero() ? -1ULL : 0); 1222 UpdateValueMap(&I, ResultReg); 1223 return true; 1224 } 1225 case Intrinsic::dbg_declare: { 1226 const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); 1227 X86AddressMode AM; 1228 assert(DI->getAddress() && "Null address should be checked earlier!"); 1229 if (!X86SelectAddress(DI->getAddress(), AM)) 1230 return false; 1231 const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); 1232 // FIXME may need to add RegState::Debug to any registers produced, 1233 // although ESP/EBP should be the only ones at the moment. 1234 addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0). 1235 addMetadata(DI->getVariable()); 1236 return true; 1237 } 1238 case Intrinsic::trap: { 1239 BuildMI(MBB, DL, TII.get(X86::TRAP)); 1240 return true; 1241 } 1242 case Intrinsic::sadd_with_overflow: 1243 case Intrinsic::uadd_with_overflow: { 1244 // Replace "add with overflow" intrinsics with an "add" instruction followed 1245 // by a seto/setc instruction. Later on, when the "extractvalue" 1246 // instructions are encountered, we use the fact that two registers were 1247 // created sequentially to get the correct registers for the "sum" and the 1248 // "overflow bit". 1249 const Function *Callee = I.getCalledFunction(); 1250 const Type *RetTy = 1251 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); 1252 1253 EVT VT; 1254 if (!isTypeLegal(RetTy, VT)) 1255 return false; 1256 1257 const Value *Op1 = I.getArgOperand(0); 1258 const Value *Op2 = I.getArgOperand(1); 1259 unsigned Reg1 = getRegForValue(Op1); 1260 unsigned Reg2 = getRegForValue(Op2); 1261 1262 if (Reg1 == 0 || Reg2 == 0) 1263 // FIXME: Handle values *not* in registers. 1264 return false; 1265 1266 unsigned OpC = 0; 1267 if (VT == MVT::i32) 1268 OpC = X86::ADD32rr; 1269 else if (VT == MVT::i64) 1270 OpC = X86::ADD64rr; 1271 else 1272 return false; 1273 1274 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1275 BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2); 1276 unsigned DestReg1 = UpdateValueMap(&I, ResultReg); 1277 1278 // If the add with overflow is an intra-block value then we just want to 1279 // create temporaries for it like normal. If it is a cross-block value then 1280 // UpdateValueMap will return the cross-block register used. Since we 1281 // *really* want the value to be live in the register pair known by 1282 // UpdateValueMap, we have to use DestReg1+1 as the destination register in 1283 // the cross block case. In the non-cross-block case, we should just make 1284 // another register for the value. 1285 if (DestReg1 != ResultReg) 1286 ResultReg = DestReg1+1; 1287 else 1288 ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8)); 1289 1290 unsigned Opc = X86::SETBr; 1291 if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) 1292 Opc = X86::SETOr; 1293 BuildMI(MBB, DL, TII.get(Opc), ResultReg); 1294 return true; 1295 } 1296 } 1297} 1298 1299bool X86FastISel::X86SelectCall(const Instruction *I) { 1300 const CallInst *CI = cast<CallInst>(I); 1301 const Value *Callee = CI->getCalledValue(); 1302 1303 // Can't handle inline asm yet. 1304 if (isa<InlineAsm>(Callee)) 1305 return false; 1306 1307 // Handle intrinsic calls. 1308 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) 1309 return X86VisitIntrinsicCall(*II); 1310 1311 // Handle only C and fastcc calling conventions for now. 1312 ImmutableCallSite CS(CI); 1313 CallingConv::ID CC = CS.getCallingConv(); 1314 if (CC != CallingConv::C && 1315 CC != CallingConv::Fast && 1316 CC != CallingConv::X86_FastCall) 1317 return false; 1318 1319 // fastcc with -tailcallopt is intended to provide a guaranteed 1320 // tail call optimization. Fastisel doesn't know how to do that. 1321 if (CC == CallingConv::Fast && GuaranteedTailCallOpt) 1322 return false; 1323 1324 // Let SDISel handle vararg functions. 1325 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1326 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1327 if (FTy->isVarArg()) 1328 return false; 1329 1330 // Fast-isel doesn't know about callee-pop yet. 1331 if (Subtarget->IsCalleePop(FTy->isVarArg(), CC)) 1332 return false; 1333 1334 // Handle *simple* calls for now. 1335 const Type *RetTy = CS.getType(); 1336 EVT RetVT; 1337 if (RetTy->isVoidTy()) 1338 RetVT = MVT::isVoid; 1339 else if (!isTypeLegal(RetTy, RetVT, true)) 1340 return false; 1341 1342 // Materialize callee address in a register. FIXME: GV address can be 1343 // handled with a CALLpcrel32 instead. 1344 X86AddressMode CalleeAM; 1345 if (!X86SelectCallAddress(Callee, CalleeAM)) 1346 return false; 1347 unsigned CalleeOp = 0; 1348 const GlobalValue *GV = 0; 1349 if (CalleeAM.GV != 0) { 1350 GV = CalleeAM.GV; 1351 } else if (CalleeAM.Base.Reg != 0) { 1352 CalleeOp = CalleeAM.Base.Reg; 1353 } else 1354 return false; 1355 1356 // Allow calls which produce i1 results. 1357 bool AndToI1 = false; 1358 if (RetVT == MVT::i1) { 1359 RetVT = MVT::i8; 1360 AndToI1 = true; 1361 } 1362 1363 // Deal with call operands first. 1364 SmallVector<const Value *, 8> ArgVals; 1365 SmallVector<unsigned, 8> Args; 1366 SmallVector<EVT, 8> ArgVTs; 1367 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1368 Args.reserve(CS.arg_size()); 1369 ArgVals.reserve(CS.arg_size()); 1370 ArgVTs.reserve(CS.arg_size()); 1371 ArgFlags.reserve(CS.arg_size()); 1372 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1373 i != e; ++i) { 1374 unsigned Arg = getRegForValue(*i); 1375 if (Arg == 0) 1376 return false; 1377 ISD::ArgFlagsTy Flags; 1378 unsigned AttrInd = i - CS.arg_begin() + 1; 1379 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1380 Flags.setSExt(); 1381 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1382 Flags.setZExt(); 1383 1384 // FIXME: Only handle *easy* calls for now. 1385 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1386 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1387 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1388 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1389 return false; 1390 1391 const Type *ArgTy = (*i)->getType(); 1392 EVT ArgVT; 1393 if (!isTypeLegal(ArgTy, ArgVT)) 1394 return false; 1395 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1396 Flags.setOrigAlign(OriginalAlignment); 1397 1398 Args.push_back(Arg); 1399 ArgVals.push_back(*i); 1400 ArgVTs.push_back(ArgVT); 1401 ArgFlags.push_back(Flags); 1402 } 1403 1404 // Analyze operands of the call, assigning locations to each operand. 1405 SmallVector<CCValAssign, 16> ArgLocs; 1406 CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); 1407 1408 // Allocate shadow area for Win64 1409 if (Subtarget->isTargetWin64()) { 1410 CCInfo.AllocateStack(32, 8); 1411 } 1412 1413 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); 1414 1415 // Get a count of how many bytes are to be pushed on the stack. 1416 unsigned NumBytes = CCInfo.getNextStackOffset(); 1417 1418 // Issue CALLSEQ_START 1419 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1420 BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes); 1421 1422 // Process argument: walk the register/memloc assignments, inserting 1423 // copies / loads. 1424 SmallVector<unsigned, 4> RegArgs; 1425 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1426 CCValAssign &VA = ArgLocs[i]; 1427 unsigned Arg = Args[VA.getValNo()]; 1428 EVT ArgVT = ArgVTs[VA.getValNo()]; 1429 1430 // Promote the value if needed. 1431 switch (VA.getLocInfo()) { 1432 default: llvm_unreachable("Unknown loc info!"); 1433 case CCValAssign::Full: break; 1434 case CCValAssign::SExt: { 1435 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1436 Arg, ArgVT, Arg); 1437 assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted; 1438 Emitted = true; 1439 ArgVT = VA.getLocVT(); 1440 break; 1441 } 1442 case CCValAssign::ZExt: { 1443 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1444 Arg, ArgVT, Arg); 1445 assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted; 1446 Emitted = true; 1447 ArgVT = VA.getLocVT(); 1448 break; 1449 } 1450 case CCValAssign::AExt: { 1451 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1452 Arg, ArgVT, Arg); 1453 if (!Emitted) 1454 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1455 Arg, ArgVT, Arg); 1456 if (!Emitted) 1457 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1458 Arg, ArgVT, Arg); 1459 1460 assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted; 1461 ArgVT = VA.getLocVT(); 1462 break; 1463 } 1464 case CCValAssign::BCvt: { 1465 unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), 1466 ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false); 1467 assert(BC != 0 && "Failed to emit a bitcast!"); 1468 Arg = BC; 1469 ArgVT = VA.getLocVT(); 1470 break; 1471 } 1472 } 1473 1474 if (VA.isRegLoc()) { 1475 TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); 1476 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), 1477 Arg, RC, RC, DL); 1478 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1479 Emitted = true; 1480 RegArgs.push_back(VA.getLocReg()); 1481 } else { 1482 unsigned LocMemOffset = VA.getLocMemOffset(); 1483 X86AddressMode AM; 1484 AM.Base.Reg = StackPtr; 1485 AM.Disp = LocMemOffset; 1486 const Value *ArgVal = ArgVals[VA.getValNo()]; 1487 1488 // If this is a really simple value, emit this with the Value* version of 1489 // X86FastEmitStore. If it isn't simple, we don't want to do this, as it 1490 // can cause us to reevaluate the argument. 1491 if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) 1492 X86FastEmitStore(ArgVT, ArgVal, AM); 1493 else 1494 X86FastEmitStore(ArgVT, Arg, AM); 1495 } 1496 } 1497 1498 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1499 // GOT pointer. 1500 if (Subtarget->isPICStyleGOT()) { 1501 TargetRegisterClass *RC = X86::GR32RegisterClass; 1502 unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 1503 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC, 1504 DL); 1505 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1506 Emitted = true; 1507 } 1508 1509 // Issue the call. 1510 MachineInstrBuilder MIB; 1511 if (CalleeOp) { 1512 // Register-indirect call. 1513 unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; 1514 MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp); 1515 1516 } else { 1517 // Direct call. 1518 assert(GV && "Not a direct call"); 1519 unsigned CallOpc = 1520 Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; 1521 1522 // See if we need any target-specific flags on the GV operand. 1523 unsigned char OpFlags = 0; 1524 1525 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to 1526 // external symbols most go through the PLT in PIC mode. If the symbol 1527 // has hidden or protected visibility, or if it is static or local, then 1528 // we don't need to use the PLT - we can directly call it. 1529 if (Subtarget->isTargetELF() && 1530 TM.getRelocationModel() == Reloc::PIC_ && 1531 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { 1532 OpFlags = X86II::MO_PLT; 1533 } else if (Subtarget->isPICStyleStubAny() && 1534 (GV->isDeclaration() || GV->isWeakForLinker()) && 1535 Subtarget->getDarwinVers() < 9) { 1536 // PC-relative references to external symbols should go through $stub, 1537 // unless we're building with the leopard linker or later, which 1538 // automatically synthesizes these stubs. 1539 OpFlags = X86II::MO_DARWIN_STUB; 1540 } 1541 1542 1543 MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags); 1544 } 1545 1546 // Add an implicit use GOT pointer in EBX. 1547 if (Subtarget->isPICStyleGOT()) 1548 MIB.addReg(X86::EBX); 1549 1550 // Add implicit physical register uses to the call. 1551 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1552 MIB.addReg(RegArgs[i]); 1553 1554 // Issue CALLSEQ_END 1555 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1556 BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); 1557 1558 // Now handle call return value (if any). 1559 SmallVector<unsigned, 4> UsedRegs; 1560 if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { 1561 SmallVector<CCValAssign, 16> RVLocs; 1562 CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); 1563 CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); 1564 1565 // Copy all of the result registers out of their specified physreg. 1566 assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); 1567 EVT CopyVT = RVLocs[0].getValVT(); 1568 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1569 TargetRegisterClass *SrcRC = DstRC; 1570 1571 // If this is a call to a function that returns an fp value on the x87 fp 1572 // stack, but where we prefer to use the value in xmm registers, copy it 1573 // out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1574 if ((RVLocs[0].getLocReg() == X86::ST0 || 1575 RVLocs[0].getLocReg() == X86::ST1) && 1576 isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { 1577 CopyVT = MVT::f80; 1578 SrcRC = X86::RSTRegisterClass; 1579 DstRC = X86::RFP80RegisterClass; 1580 } 1581 1582 unsigned ResultReg = createResultReg(DstRC); 1583 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, 1584 RVLocs[0].getLocReg(), DstRC, SrcRC, DL); 1585 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1586 Emitted = true; 1587 UsedRegs.push_back(RVLocs[0].getLocReg()); 1588 1589 if (CopyVT != RVLocs[0].getValVT()) { 1590 // Round the F80 the right size, which also moves to the appropriate xmm 1591 // register. This is accomplished by storing the F80 value in memory and 1592 // then loading it back. Ewww... 1593 EVT ResVT = RVLocs[0].getValVT(); 1594 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 1595 unsigned MemSize = ResVT.getSizeInBits()/8; 1596 int FI = MFI.CreateStackObject(MemSize, MemSize, false); 1597 addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); 1598 DstRC = ResVT == MVT::f32 1599 ? X86::FR32RegisterClass : X86::FR64RegisterClass; 1600 Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; 1601 ResultReg = createResultReg(DstRC); 1602 addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI); 1603 } 1604 1605 if (AndToI1) { 1606 // Mask out all but lowest bit for some call which produces an i1. 1607 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 1608 BuildMI(MBB, DL, 1609 TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); 1610 ResultReg = AndResult; 1611 } 1612 1613 UpdateValueMap(I, ResultReg); 1614 } 1615 1616 // Set all unused physreg defs as dead. 1617 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1618 1619 return true; 1620} 1621 1622 1623bool 1624X86FastISel::TargetSelectInstruction(const Instruction *I) { 1625 switch (I->getOpcode()) { 1626 default: break; 1627 case Instruction::Load: 1628 return X86SelectLoad(I); 1629 case Instruction::Store: 1630 return X86SelectStore(I); 1631 case Instruction::ICmp: 1632 case Instruction::FCmp: 1633 return X86SelectCmp(I); 1634 case Instruction::ZExt: 1635 return X86SelectZExt(I); 1636 case Instruction::Br: 1637 return X86SelectBranch(I); 1638 case Instruction::Call: 1639 return X86SelectCall(I); 1640 case Instruction::LShr: 1641 case Instruction::AShr: 1642 case Instruction::Shl: 1643 return X86SelectShift(I); 1644 case Instruction::Select: 1645 return X86SelectSelect(I); 1646 case Instruction::Trunc: 1647 return X86SelectTrunc(I); 1648 case Instruction::FPExt: 1649 return X86SelectFPExt(I); 1650 case Instruction::FPTrunc: 1651 return X86SelectFPTrunc(I); 1652 case Instruction::ExtractValue: 1653 return X86SelectExtractValue(I); 1654 case Instruction::IntToPtr: // Deliberate fall-through. 1655 case Instruction::PtrToInt: { 1656 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1657 EVT DstVT = TLI.getValueType(I->getType()); 1658 if (DstVT.bitsGT(SrcVT)) 1659 return X86SelectZExt(I); 1660 if (DstVT.bitsLT(SrcVT)) 1661 return X86SelectTrunc(I); 1662 unsigned Reg = getRegForValue(I->getOperand(0)); 1663 if (Reg == 0) return false; 1664 UpdateValueMap(I, Reg); 1665 return true; 1666 } 1667 } 1668 1669 return false; 1670} 1671 1672unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { 1673 EVT VT; 1674 if (!isTypeLegal(C->getType(), VT)) 1675 return false; 1676 1677 // Get opcode and regclass of the output for the given load instruction. 1678 unsigned Opc = 0; 1679 const TargetRegisterClass *RC = NULL; 1680 switch (VT.getSimpleVT().SimpleTy) { 1681 default: return false; 1682 case MVT::i8: 1683 Opc = X86::MOV8rm; 1684 RC = X86::GR8RegisterClass; 1685 break; 1686 case MVT::i16: 1687 Opc = X86::MOV16rm; 1688 RC = X86::GR16RegisterClass; 1689 break; 1690 case MVT::i32: 1691 Opc = X86::MOV32rm; 1692 RC = X86::GR32RegisterClass; 1693 break; 1694 case MVT::i64: 1695 // Must be in x86-64 mode. 1696 Opc = X86::MOV64rm; 1697 RC = X86::GR64RegisterClass; 1698 break; 1699 case MVT::f32: 1700 if (Subtarget->hasSSE1()) { 1701 Opc = X86::MOVSSrm; 1702 RC = X86::FR32RegisterClass; 1703 } else { 1704 Opc = X86::LD_Fp32m; 1705 RC = X86::RFP32RegisterClass; 1706 } 1707 break; 1708 case MVT::f64: 1709 if (Subtarget->hasSSE2()) { 1710 Opc = X86::MOVSDrm; 1711 RC = X86::FR64RegisterClass; 1712 } else { 1713 Opc = X86::LD_Fp64m; 1714 RC = X86::RFP64RegisterClass; 1715 } 1716 break; 1717 case MVT::f80: 1718 // No f80 support yet. 1719 return false; 1720 } 1721 1722 // Materialize addresses with LEA instructions. 1723 if (isa<GlobalValue>(C)) { 1724 X86AddressMode AM; 1725 if (X86SelectAddress(C, AM)) { 1726 if (TLI.getPointerTy() == MVT::i32) 1727 Opc = X86::LEA32r; 1728 else 1729 Opc = X86::LEA64r; 1730 unsigned ResultReg = createResultReg(RC); 1731 addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 1732 return ResultReg; 1733 } 1734 return 0; 1735 } 1736 1737 // MachineConstantPool wants an explicit alignment. 1738 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 1739 if (Align == 0) { 1740 // Alignment of vector types. FIXME! 1741 Align = TD.getTypeAllocSize(C->getType()); 1742 } 1743 1744 // x86-32 PIC requires a PIC base register for constant pools. 1745 unsigned PICBase = 0; 1746 unsigned char OpFlag = 0; 1747 if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic 1748 OpFlag = X86II::MO_PIC_BASE_OFFSET; 1749 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 1750 } else if (Subtarget->isPICStyleGOT()) { 1751 OpFlag = X86II::MO_GOTOFF; 1752 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 1753 } else if (Subtarget->isPICStyleRIPRel() && 1754 TM.getCodeModel() == CodeModel::Small) { 1755 PICBase = X86::RIP; 1756 } 1757 1758 // Create the load from the constant pool. 1759 unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); 1760 unsigned ResultReg = createResultReg(RC); 1761 addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), 1762 MCPOffset, PICBase, OpFlag); 1763 1764 return ResultReg; 1765} 1766 1767unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { 1768 // Fail on dynamic allocas. At this point, getRegForValue has already 1769 // checked its CSE maps, so if we're here trying to handle a dynamic 1770 // alloca, we're not going to succeed. X86SelectAddress has a 1771 // check for dynamic allocas, because it's called directly from 1772 // various places, but TargetMaterializeAlloca also needs a check 1773 // in order to avoid recursion between getRegForValue, 1774 // X86SelectAddrss, and TargetMaterializeAlloca. 1775 if (!FuncInfo.StaticAllocaMap.count(C)) 1776 return 0; 1777 1778 X86AddressMode AM; 1779 if (!X86SelectAddress(C, AM)) 1780 return 0; 1781 unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; 1782 TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); 1783 unsigned ResultReg = createResultReg(RC); 1784 addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 1785 return ResultReg; 1786} 1787 1788namespace llvm { 1789 llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { 1790 return new X86FastISel(funcInfo); 1791 } 1792} 1793