X86FastISel.cpp revision 1ba318982e4dcca66b6cf7ce624af2ba8a55d9d8
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the X86-specific support for the FastISel class. Much 11// of the target-specific code is generated by tablegen in the file 12// X86GenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86RegisterInfo.h" 19#include "X86Subtarget.h" 20#include "X86TargetMachine.h" 21#include "llvm/CallingConv.h" 22#include "llvm/DerivedTypes.h" 23#include "llvm/GlobalVariable.h" 24#include "llvm/Instructions.h" 25#include "llvm/IntrinsicInst.h" 26#include "llvm/CodeGen/Analysis.h" 27#include "llvm/CodeGen/FastISel.h" 28#include "llvm/CodeGen/FunctionLoweringInfo.h" 29#include "llvm/CodeGen/MachineConstantPool.h" 30#include "llvm/CodeGen/MachineFrameInfo.h" 31#include "llvm/CodeGen/MachineRegisterInfo.h" 32#include "llvm/Support/CallSite.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/GetElementPtrTypeIterator.h" 35#include "llvm/Target/TargetOptions.h" 36using namespace llvm; 37 38namespace { 39 40class X86FastISel : public FastISel { 41 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 42 /// make the right decision when generating code for different targets. 43 const X86Subtarget *Subtarget; 44 45 /// StackPtr - Register used as the stack pointer. 46 /// 47 unsigned StackPtr; 48 49 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 50 /// floating point ops. 51 /// When SSE is available, use it for f32 operations. 52 /// When SSE2 is available, use it for f64 operations. 53 bool X86ScalarSSEf64; 54 bool X86ScalarSSEf32; 55 56public: 57 explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { 58 Subtarget = &TM.getSubtarget<X86Subtarget>(); 59 StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 60 X86ScalarSSEf64 = Subtarget->hasSSE2(); 61 X86ScalarSSEf32 = Subtarget->hasSSE1(); 62 } 63 64 virtual bool TargetSelectInstruction(const Instruction *I); 65 66#include "X86GenFastISel.inc" 67 68private: 69 bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); 70 71 bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); 72 73 bool X86FastEmitStore(EVT VT, const Value *Val, 74 const X86AddressMode &AM); 75 bool X86FastEmitStore(EVT VT, unsigned Val, 76 const X86AddressMode &AM); 77 78 bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 79 unsigned &ResultReg); 80 81 bool X86SelectAddress(const Value *V, X86AddressMode &AM); 82 bool X86SelectCallAddress(const Value *V, X86AddressMode &AM); 83 84 bool X86SelectLoad(const Instruction *I); 85 86 bool X86SelectStore(const Instruction *I); 87 88 bool X86SelectRet(const Instruction *I); 89 90 bool X86SelectCmp(const Instruction *I); 91 92 bool X86SelectZExt(const Instruction *I); 93 94 bool X86SelectBranch(const Instruction *I); 95 96 bool X86SelectShift(const Instruction *I); 97 98 bool X86SelectSelect(const Instruction *I); 99 100 bool X86SelectTrunc(const Instruction *I); 101 102 bool X86SelectFPExt(const Instruction *I); 103 bool X86SelectFPTrunc(const Instruction *I); 104 105 bool X86SelectExtractValue(const Instruction *I); 106 107 bool X86VisitIntrinsicCall(const IntrinsicInst &I); 108 bool X86SelectCall(const Instruction *I); 109 110 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isTailCall = false); 111 CCAssignFn *CCAssignFnForRet(CallingConv::ID CC, bool isTailCall = false); 112 113 const X86InstrInfo *getInstrInfo() const { 114 return getTargetMachine()->getInstrInfo(); 115 } 116 const X86TargetMachine *getTargetMachine() const { 117 return static_cast<const X86TargetMachine *>(&TM); 118 } 119 120 unsigned TargetMaterializeConstant(const Constant *C); 121 122 unsigned TargetMaterializeAlloca(const AllocaInst *C); 123 124 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 125 /// computed in an SSE register, not on the X87 floating point stack. 126 bool isScalarFPTypeInSSEReg(EVT VT) const { 127 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 128 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 129 } 130 131 bool isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1 = false); 132}; 133 134} // end anonymous namespace. 135 136bool X86FastISel::isTypeLegal(const Type *Ty, EVT &VT, bool AllowI1) { 137 VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); 138 if (VT == MVT::Other || !VT.isSimple()) 139 // Unhandled type. Halt "fast" selection and bail. 140 return false; 141 142 // For now, require SSE/SSE2 for performing floating-point operations, 143 // since x87 requires additional work. 144 if (VT == MVT::f64 && !X86ScalarSSEf64) 145 return false; 146 if (VT == MVT::f32 && !X86ScalarSSEf32) 147 return false; 148 // Similarly, no f80 support yet. 149 if (VT == MVT::f80) 150 return false; 151 // We only handle legal types. For example, on x86-32 the instruction 152 // selector contains all of the 64-bit instructions from x86-64, 153 // under the assumption that i64 won't be used if the target doesn't 154 // support it. 155 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 156} 157 158#include "X86GenCallingConv.inc" 159 160/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling 161/// convention. 162CCAssignFn *X86FastISel::CCAssignFnForCall(CallingConv::ID CC, 163 bool isTaillCall) { 164 if (Subtarget->is64Bit()) { 165 if (CC == CallingConv::GHC) 166 return CC_X86_64_GHC; 167 else if (Subtarget->isTargetWin64()) 168 return CC_X86_Win64_C; 169 else 170 return CC_X86_64_C; 171 } 172 173 if (CC == CallingConv::X86_FastCall) 174 return CC_X86_32_FastCall; 175 else if (CC == CallingConv::X86_ThisCall) 176 return CC_X86_32_ThisCall; 177 else if (CC == CallingConv::Fast) 178 return CC_X86_32_FastCC; 179 else if (CC == CallingConv::GHC) 180 return CC_X86_32_GHC; 181 else 182 return CC_X86_32_C; 183} 184 185/// CCAssignFnForRet - Selects the correct CCAssignFn for a given calling 186/// convention. 187CCAssignFn *X86FastISel::CCAssignFnForRet(CallingConv::ID CC, 188 bool isTaillCall) { 189 if (Subtarget->is64Bit()) { 190 if (Subtarget->isTargetWin64()) 191 return RetCC_X86_Win64_C; 192 else 193 return RetCC_X86_64_C; 194 } 195 196 return RetCC_X86_32_C; 197} 198 199/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 200/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 201/// Return true and the result register by reference if it is possible. 202bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, 203 unsigned &ResultReg) { 204 // Get opcode and regclass of the output for the given load instruction. 205 unsigned Opc = 0; 206 const TargetRegisterClass *RC = NULL; 207 switch (VT.getSimpleVT().SimpleTy) { 208 default: return false; 209 case MVT::i1: 210 case MVT::i8: 211 Opc = X86::MOV8rm; 212 RC = X86::GR8RegisterClass; 213 break; 214 case MVT::i16: 215 Opc = X86::MOV16rm; 216 RC = X86::GR16RegisterClass; 217 break; 218 case MVT::i32: 219 Opc = X86::MOV32rm; 220 RC = X86::GR32RegisterClass; 221 break; 222 case MVT::i64: 223 // Must be in x86-64 mode. 224 Opc = X86::MOV64rm; 225 RC = X86::GR64RegisterClass; 226 break; 227 case MVT::f32: 228 if (Subtarget->hasSSE1()) { 229 Opc = X86::MOVSSrm; 230 RC = X86::FR32RegisterClass; 231 } else { 232 Opc = X86::LD_Fp32m; 233 RC = X86::RFP32RegisterClass; 234 } 235 break; 236 case MVT::f64: 237 if (Subtarget->hasSSE2()) { 238 Opc = X86::MOVSDrm; 239 RC = X86::FR64RegisterClass; 240 } else { 241 Opc = X86::LD_Fp64m; 242 RC = X86::RFP64RegisterClass; 243 } 244 break; 245 case MVT::f80: 246 // No f80 support yet. 247 return false; 248 } 249 250 ResultReg = createResultReg(RC); 251 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 252 DL, TII.get(Opc), ResultReg), AM); 253 return true; 254} 255 256/// X86FastEmitStore - Emit a machine instruction to store a value Val of 257/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 258/// and a displacement offset, or a GlobalAddress, 259/// i.e. V. Return true if it is possible. 260bool 261X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, 262 const X86AddressMode &AM) { 263 // Get opcode and regclass of the output for the given store instruction. 264 unsigned Opc = 0; 265 switch (VT.getSimpleVT().SimpleTy) { 266 case MVT::f80: // No f80 support yet. 267 default: return false; 268 case MVT::i1: { 269 // Mask out all but lowest bit. 270 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 271 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 272 TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); 273 Val = AndResult; 274 } 275 // FALLTHROUGH, handling i1 as i8. 276 case MVT::i8: Opc = X86::MOV8mr; break; 277 case MVT::i16: Opc = X86::MOV16mr; break; 278 case MVT::i32: Opc = X86::MOV32mr; break; 279 case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. 280 case MVT::f32: 281 Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m; 282 break; 283 case MVT::f64: 284 Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; 285 break; 286 } 287 288 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 289 DL, TII.get(Opc)), AM).addReg(Val); 290 return true; 291} 292 293bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, 294 const X86AddressMode &AM) { 295 // Handle 'null' like i32/i64 0. 296 if (isa<ConstantPointerNull>(Val)) 297 Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); 298 299 // If this is a store of a simple constant, fold the constant into the store. 300 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 301 unsigned Opc = 0; 302 bool Signed = true; 303 switch (VT.getSimpleVT().SimpleTy) { 304 default: break; 305 case MVT::i1: Signed = false; // FALLTHROUGH to handle as i8. 306 case MVT::i8: Opc = X86::MOV8mi; break; 307 case MVT::i16: Opc = X86::MOV16mi; break; 308 case MVT::i32: Opc = X86::MOV32mi; break; 309 case MVT::i64: 310 // Must be a 32-bit sign extended value. 311 if ((int)CI->getSExtValue() == CI->getSExtValue()) 312 Opc = X86::MOV64mi32; 313 break; 314 } 315 316 if (Opc) { 317 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 318 DL, TII.get(Opc)), AM) 319 .addImm(Signed ? (uint64_t) CI->getSExtValue() : 320 CI->getZExtValue()); 321 return true; 322 } 323 } 324 325 unsigned ValReg = getRegForValue(Val); 326 if (ValReg == 0) 327 return false; 328 329 return X86FastEmitStore(VT, ValReg, AM); 330} 331 332/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 333/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 334/// ISD::SIGN_EXTEND). 335bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, 336 unsigned Src, EVT SrcVT, 337 unsigned &ResultReg) { 338 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 339 Src, /*TODO: Kill=*/false); 340 341 if (RR != 0) { 342 ResultReg = RR; 343 return true; 344 } else 345 return false; 346} 347 348/// X86SelectAddress - Attempt to fill in an address from the given value. 349/// 350bool X86FastISel::X86SelectAddress(const Value *V, X86AddressMode &AM) { 351 const User *U = NULL; 352 unsigned Opcode = Instruction::UserOp1; 353 if (const Instruction *I = dyn_cast<Instruction>(V)) { 354 // Don't walk into other basic blocks; it's possible we haven't 355 // visited them yet, so the instructions may not yet be assigned 356 // virtual registers. 357 if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) 358 return false; 359 360 Opcode = I->getOpcode(); 361 U = I; 362 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 363 Opcode = C->getOpcode(); 364 U = C; 365 } 366 367 if (const PointerType *Ty = dyn_cast<PointerType>(V->getType())) 368 if (Ty->getAddressSpace() > 255) 369 // Fast instruction selection doesn't support the special 370 // address spaces. 371 return false; 372 373 switch (Opcode) { 374 default: break; 375 case Instruction::BitCast: 376 // Look past bitcasts. 377 return X86SelectAddress(U->getOperand(0), AM); 378 379 case Instruction::IntToPtr: 380 // Look past no-op inttoptrs. 381 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 382 return X86SelectAddress(U->getOperand(0), AM); 383 break; 384 385 case Instruction::PtrToInt: 386 // Look past no-op ptrtoints. 387 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 388 return X86SelectAddress(U->getOperand(0), AM); 389 break; 390 391 case Instruction::Alloca: { 392 // Do static allocas. 393 const AllocaInst *A = cast<AllocaInst>(V); 394 DenseMap<const AllocaInst*, int>::iterator SI = 395 FuncInfo.StaticAllocaMap.find(A); 396 if (SI != FuncInfo.StaticAllocaMap.end()) { 397 AM.BaseType = X86AddressMode::FrameIndexBase; 398 AM.Base.FrameIndex = SI->second; 399 return true; 400 } 401 break; 402 } 403 404 case Instruction::Add: { 405 // Adds of constants are common and easy enough. 406 if (const ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 407 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 408 // They have to fit in the 32-bit signed displacement field though. 409 if (isInt<32>(Disp)) { 410 AM.Disp = (uint32_t)Disp; 411 return X86SelectAddress(U->getOperand(0), AM); 412 } 413 } 414 break; 415 } 416 417 case Instruction::GetElementPtr: { 418 X86AddressMode SavedAM = AM; 419 420 // Pattern-match simple GEPs. 421 uint64_t Disp = (int32_t)AM.Disp; 422 unsigned IndexReg = AM.IndexReg; 423 unsigned Scale = AM.Scale; 424 gep_type_iterator GTI = gep_type_begin(U); 425 // Iterate through the indices, folding what we can. Constants can be 426 // folded, and one dynamic index can be handled, if the scale is supported. 427 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 428 i != e; ++i, ++GTI) { 429 const Value *Op = *i; 430 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 431 const StructLayout *SL = TD.getStructLayout(STy); 432 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 433 Disp += SL->getElementOffset(Idx); 434 } else { 435 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 436 SmallVector<const Value *, 4> Worklist; 437 Worklist.push_back(Op); 438 do { 439 Op = Worklist.pop_back_val(); 440 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 441 // Constant-offset addressing. 442 Disp += CI->getSExtValue() * S; 443 } else if (isa<AddOperator>(Op) && 444 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { 445 // An add with a constant operand. Fold the constant. 446 ConstantInt *CI = 447 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 448 Disp += CI->getSExtValue() * S; 449 // Add the other operand back to the work list. 450 Worklist.push_back(cast<AddOperator>(Op)->getOperand(0)); 451 } else if (IndexReg == 0 && 452 (!AM.GV || !Subtarget->isPICStyleRIPRel()) && 453 (S == 1 || S == 2 || S == 4 || S == 8)) { 454 // Scaled-index addressing. 455 Scale = S; 456 IndexReg = getRegForGEPIndex(Op).first; 457 if (IndexReg == 0) 458 return false; 459 } else 460 // Unsupported. 461 goto unsupported_gep; 462 } while (!Worklist.empty()); 463 } 464 } 465 // Check for displacement overflow. 466 if (!isInt<32>(Disp)) 467 break; 468 // Ok, the GEP indices were covered by constant-offset and scaled-index 469 // addressing. Update the address state and move on to examining the base. 470 AM.IndexReg = IndexReg; 471 AM.Scale = Scale; 472 AM.Disp = (uint32_t)Disp; 473 if (X86SelectAddress(U->getOperand(0), AM)) 474 return true; 475 476 // If we couldn't merge the sub value into this addr mode, revert back to 477 // our address and just match the value instead of completely failing. 478 AM = SavedAM; 479 break; 480 unsupported_gep: 481 // Ok, the GEP indices weren't all covered. 482 break; 483 } 484 } 485 486 // Handle constant address. 487 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 488 // Can't handle alternate code models yet. 489 if (TM.getCodeModel() != CodeModel::Small) 490 return false; 491 492 // RIP-relative addresses can't have additional register operands. 493 if (Subtarget->isPICStyleRIPRel() && 494 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 495 return false; 496 497 // Can't handle TLS yet. 498 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 499 if (GVar->isThreadLocal()) 500 return false; 501 502 // Okay, we've committed to selecting this global. Set up the basic address. 503 AM.GV = GV; 504 505 // Allow the subtarget to classify the global. 506 unsigned char GVFlags = Subtarget->ClassifyGlobalReference(GV, TM); 507 508 // If this reference is relative to the pic base, set it now. 509 if (isGlobalRelativeToPICBase(GVFlags)) { 510 // FIXME: How do we know Base.Reg is free?? 511 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 512 } 513 514 // Unless the ABI requires an extra load, return a direct reference to 515 // the global. 516 if (!isGlobalStubReference(GVFlags)) { 517 if (Subtarget->isPICStyleRIPRel()) { 518 // Use rip-relative addressing if we can. Above we verified that the 519 // base and index registers are unused. 520 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 521 AM.Base.Reg = X86::RIP; 522 } 523 AM.GVOpFlags = GVFlags; 524 return true; 525 } 526 527 // Ok, we need to do a load from a stub. If we've already loaded from this 528 // stub, reuse the loaded pointer, otherwise emit the load now. 529 DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); 530 unsigned LoadReg; 531 if (I != LocalValueMap.end() && I->second != 0) { 532 LoadReg = I->second; 533 } else { 534 // Issue load from stub. 535 unsigned Opc = 0; 536 const TargetRegisterClass *RC = NULL; 537 X86AddressMode StubAM; 538 StubAM.Base.Reg = AM.Base.Reg; 539 StubAM.GV = GV; 540 StubAM.GVOpFlags = GVFlags; 541 542 // Prepare for inserting code in the local-value area. 543 MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); 544 545 if (TLI.getPointerTy() == MVT::i64) { 546 Opc = X86::MOV64rm; 547 RC = X86::GR64RegisterClass; 548 549 if (Subtarget->isPICStyleRIPRel()) 550 StubAM.Base.Reg = X86::RIP; 551 } else { 552 Opc = X86::MOV32rm; 553 RC = X86::GR32RegisterClass; 554 } 555 556 LoadReg = createResultReg(RC); 557 MachineInstrBuilder LoadMI = 558 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), LoadReg); 559 addFullAddress(LoadMI, StubAM); 560 561 // Ok, back to normal mode. 562 leaveLocalValueArea(SaveInsertPt); 563 564 // Prevent loading GV stub multiple times in same MBB. 565 LocalValueMap[V] = LoadReg; 566 } 567 568 // Now construct the final address. Note that the Disp, Scale, 569 // and Index values may already be set here. 570 AM.Base.Reg = LoadReg; 571 AM.GV = 0; 572 return true; 573 } 574 575 // If all else fails, try to materialize the value in a register. 576 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 577 if (AM.Base.Reg == 0) { 578 AM.Base.Reg = getRegForValue(V); 579 return AM.Base.Reg != 0; 580 } 581 if (AM.IndexReg == 0) { 582 assert(AM.Scale == 1 && "Scale with no index!"); 583 AM.IndexReg = getRegForValue(V); 584 return AM.IndexReg != 0; 585 } 586 } 587 588 return false; 589} 590 591/// X86SelectCallAddress - Attempt to fill in an address from the given value. 592/// 593bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { 594 const User *U = NULL; 595 unsigned Opcode = Instruction::UserOp1; 596 if (const Instruction *I = dyn_cast<Instruction>(V)) { 597 Opcode = I->getOpcode(); 598 U = I; 599 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 600 Opcode = C->getOpcode(); 601 U = C; 602 } 603 604 switch (Opcode) { 605 default: break; 606 case Instruction::BitCast: 607 // Look past bitcasts. 608 return X86SelectCallAddress(U->getOperand(0), AM); 609 610 case Instruction::IntToPtr: 611 // Look past no-op inttoptrs. 612 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 613 return X86SelectCallAddress(U->getOperand(0), AM); 614 break; 615 616 case Instruction::PtrToInt: 617 // Look past no-op ptrtoints. 618 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 619 return X86SelectCallAddress(U->getOperand(0), AM); 620 break; 621 } 622 623 // Handle constant address. 624 if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 625 // Can't handle alternate code models yet. 626 if (TM.getCodeModel() != CodeModel::Small) 627 return false; 628 629 // RIP-relative addresses can't have additional register operands. 630 if (Subtarget->isPICStyleRIPRel() && 631 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 632 return false; 633 634 // Can't handle TLS or DLLImport. 635 if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 636 if (GVar->isThreadLocal() || GVar->hasDLLImportLinkage()) 637 return false; 638 639 // Okay, we've committed to selecting this global. Set up the basic address. 640 AM.GV = GV; 641 642 // No ABI requires an extra load for anything other than DLLImport, which 643 // we rejected above. Return a direct reference to the global. 644 if (Subtarget->isPICStyleRIPRel()) { 645 // Use rip-relative addressing if we can. Above we verified that the 646 // base and index registers are unused. 647 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 648 AM.Base.Reg = X86::RIP; 649 } else if (Subtarget->isPICStyleStubPIC()) { 650 AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; 651 } else if (Subtarget->isPICStyleGOT()) { 652 AM.GVOpFlags = X86II::MO_GOTOFF; 653 } 654 655 return true; 656 } 657 658 // If all else fails, try to materialize the value in a register. 659 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 660 if (AM.Base.Reg == 0) { 661 AM.Base.Reg = getRegForValue(V); 662 return AM.Base.Reg != 0; 663 } 664 if (AM.IndexReg == 0) { 665 assert(AM.Scale == 1 && "Scale with no index!"); 666 AM.IndexReg = getRegForValue(V); 667 return AM.IndexReg != 0; 668 } 669 } 670 671 return false; 672} 673 674 675/// X86SelectStore - Select and emit code to implement store instructions. 676bool X86FastISel::X86SelectStore(const Instruction *I) { 677 EVT VT; 678 if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) 679 return false; 680 681 X86AddressMode AM; 682 if (!X86SelectAddress(I->getOperand(1), AM)) 683 return false; 684 685 return X86FastEmitStore(VT, I->getOperand(0), AM); 686} 687 688/// X86SelectRet - Select and emit code to implement ret instructions. 689bool X86FastISel::X86SelectRet(const Instruction *I) { 690 const ReturnInst *Ret = cast<ReturnInst>(I); 691 const Function &F = *I->getParent()->getParent(); 692 693 if (!FuncInfo.CanLowerReturn) 694 return false; 695 696 CallingConv::ID CC = F.getCallingConv(); 697 if (CC != CallingConv::C && 698 CC != CallingConv::Fast && 699 CC != CallingConv::X86_FastCall) 700 return false; 701 702 if (Subtarget->isTargetWin64()) 703 return false; 704 705 // Don't handle popping bytes on return for now. 706 if (FuncInfo.MF->getInfo<X86MachineFunctionInfo>() 707 ->getBytesToPopOnReturn() != 0) 708 return 0; 709 710 // fastcc with -tailcallopt is intended to provide a guaranteed 711 // tail call optimization. Fastisel doesn't know how to do that. 712 if (CC == CallingConv::Fast && GuaranteedTailCallOpt) 713 return false; 714 715 // Let SDISel handle vararg functions. 716 if (F.isVarArg()) 717 return false; 718 719 if (Ret->getNumOperands() > 0) { 720 SmallVector<ISD::OutputArg, 4> Outs; 721 GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 722 Outs, TLI); 723 724 // Analyze operands of the call, assigning locations to each operand. 725 SmallVector<CCValAssign, 16> ValLocs; 726 CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); 727 CCInfo.AnalyzeReturn(Outs, CCAssignFnForRet(CC)); 728 729 const Value *RV = Ret->getOperand(0); 730 unsigned Reg = getRegForValue(RV); 731 if (Reg == 0) 732 return false; 733 734 // Only handle a single return value for now. 735 if (ValLocs.size() != 1) 736 return false; 737 738 CCValAssign &VA = ValLocs[0]; 739 740 // Don't bother handling odd stuff for now. 741 if (VA.getLocInfo() != CCValAssign::Full) 742 return false; 743 // Only handle register returns for now. 744 if (!VA.isRegLoc()) 745 return false; 746 // TODO: For now, don't try to handle cases where getLocInfo() 747 // says Full but the types don't match. 748 if (VA.getValVT() != TLI.getValueType(RV->getType())) 749 return false; 750 751 // The calling-convention tables for x87 returns don't tell 752 // the whole story. 753 if (VA.getLocReg() == X86::ST0 || VA.getLocReg() == X86::ST1) 754 return false; 755 756 // Make the copy. 757 unsigned SrcReg = Reg + VA.getValNo(); 758 unsigned DstReg = VA.getLocReg(); 759 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 760 // Avoid a cross-class copy. This is very unlikely. 761 if (!SrcRC->contains(DstReg)) 762 return false; 763 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 764 DstReg).addReg(SrcReg); 765 766 // Mark the register as live out of the function. 767 MRI.addLiveOut(VA.getLocReg()); 768 } 769 770 // Now emit the RET. 771 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::RET)); 772 return true; 773} 774 775/// X86SelectLoad - Select and emit code to implement load instructions. 776/// 777bool X86FastISel::X86SelectLoad(const Instruction *I) { 778 EVT VT; 779 if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) 780 return false; 781 782 X86AddressMode AM; 783 if (!X86SelectAddress(I->getOperand(0), AM)) 784 return false; 785 786 unsigned ResultReg = 0; 787 if (X86FastEmitLoad(VT, AM, ResultReg)) { 788 UpdateValueMap(I, ResultReg); 789 return true; 790 } 791 return false; 792} 793 794static unsigned X86ChooseCmpOpcode(EVT VT) { 795 switch (VT.getSimpleVT().SimpleTy) { 796 default: return 0; 797 case MVT::i8: return X86::CMP8rr; 798 case MVT::i16: return X86::CMP16rr; 799 case MVT::i32: return X86::CMP32rr; 800 case MVT::i64: return X86::CMP64rr; 801 case MVT::f32: return X86::UCOMISSrr; 802 case MVT::f64: return X86::UCOMISDrr; 803 } 804} 805 806/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS 807/// of the comparison, return an opcode that works for the compare (e.g. 808/// CMP32ri) otherwise return 0. 809static unsigned X86ChooseCmpImmediateOpcode(EVT VT, const ConstantInt *RHSC) { 810 switch (VT.getSimpleVT().SimpleTy) { 811 // Otherwise, we can't fold the immediate into this comparison. 812 default: return 0; 813 case MVT::i8: return X86::CMP8ri; 814 case MVT::i16: return X86::CMP16ri; 815 case MVT::i32: return X86::CMP32ri; 816 case MVT::i64: 817 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 818 // field. 819 if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) 820 return X86::CMP64ri32; 821 return 0; 822 } 823} 824 825bool X86FastISel::X86FastEmitCompare(const Value *Op0, const Value *Op1, 826 EVT VT) { 827 unsigned Op0Reg = getRegForValue(Op0); 828 if (Op0Reg == 0) return false; 829 830 // Handle 'null' like i32/i64 0. 831 if (isa<ConstantPointerNull>(Op1)) 832 Op1 = Constant::getNullValue(TD.getIntPtrType(Op0->getContext())); 833 834 // We have two options: compare with register or immediate. If the RHS of 835 // the compare is an immediate that we can fold into this compare, use 836 // CMPri, otherwise use CMPrr. 837 if (const ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 838 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 839 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareImmOpc)) 840 .addReg(Op0Reg) 841 .addImm(Op1C->getSExtValue()); 842 return true; 843 } 844 } 845 846 unsigned CompareOpc = X86ChooseCmpOpcode(VT); 847 if (CompareOpc == 0) return false; 848 849 unsigned Op1Reg = getRegForValue(Op1); 850 if (Op1Reg == 0) return false; 851 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CompareOpc)) 852 .addReg(Op0Reg) 853 .addReg(Op1Reg); 854 855 return true; 856} 857 858bool X86FastISel::X86SelectCmp(const Instruction *I) { 859 const CmpInst *CI = cast<CmpInst>(I); 860 861 EVT VT; 862 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 863 return false; 864 865 unsigned ResultReg = createResultReg(&X86::GR8RegClass); 866 unsigned SetCCOpc; 867 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 868 switch (CI->getPredicate()) { 869 case CmpInst::FCMP_OEQ: { 870 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 871 return false; 872 873 unsigned EReg = createResultReg(&X86::GR8RegClass); 874 unsigned NPReg = createResultReg(&X86::GR8RegClass); 875 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SETEr), EReg); 876 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 877 TII.get(X86::SETNPr), NPReg); 878 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 879 TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); 880 UpdateValueMap(I, ResultReg); 881 return true; 882 } 883 case CmpInst::FCMP_UNE: { 884 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 885 return false; 886 887 unsigned NEReg = createResultReg(&X86::GR8RegClass); 888 unsigned PReg = createResultReg(&X86::GR8RegClass); 889 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 890 TII.get(X86::SETNEr), NEReg); 891 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 892 TII.get(X86::SETPr), PReg); 893 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 894 TII.get(X86::OR8rr), ResultReg) 895 .addReg(PReg).addReg(NEReg); 896 UpdateValueMap(I, ResultReg); 897 return true; 898 } 899 case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 900 case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 901 case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break; 902 case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break; 903 case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 904 case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break; 905 case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break; 906 case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 907 case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break; 908 case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; 909 case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 910 case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 911 912 case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 913 case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 914 case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 915 case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 916 case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 917 case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 918 case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break; 919 case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break; 920 case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break; 921 case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break; 922 default: 923 return false; 924 } 925 926 const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 927 if (SwapArgs) 928 std::swap(Op0, Op1); 929 930 // Emit a compare of Op0/Op1. 931 if (!X86FastEmitCompare(Op0, Op1, VT)) 932 return false; 933 934 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(SetCCOpc), ResultReg); 935 UpdateValueMap(I, ResultReg); 936 return true; 937} 938 939bool X86FastISel::X86SelectZExt(const Instruction *I) { 940 // Handle zero-extension from i1 to i8, which is common. 941 if (I->getType()->isIntegerTy(8) && 942 I->getOperand(0)->getType()->isIntegerTy(1)) { 943 unsigned ResultReg = getRegForValue(I->getOperand(0)); 944 if (ResultReg == 0) return false; 945 // Set the high bits to zero. 946 ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false); 947 if (ResultReg == 0) return false; 948 UpdateValueMap(I, ResultReg); 949 return true; 950 } 951 952 return false; 953} 954 955 956bool X86FastISel::X86SelectBranch(const Instruction *I) { 957 // Unconditional branches are selected by tablegen-generated code. 958 // Handle a conditional branch. 959 const BranchInst *BI = cast<BranchInst>(I); 960 MachineBasicBlock *TrueMBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 961 MachineBasicBlock *FalseMBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 962 963 // Fold the common case of a conditional branch with a comparison. 964 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 965 if (CI->hasOneUse()) { 966 EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); 967 968 // Try to take advantage of fallthrough opportunities. 969 CmpInst::Predicate Predicate = CI->getPredicate(); 970 if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { 971 std::swap(TrueMBB, FalseMBB); 972 Predicate = CmpInst::getInversePredicate(Predicate); 973 } 974 975 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 976 unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA" 977 978 switch (Predicate) { 979 case CmpInst::FCMP_OEQ: 980 std::swap(TrueMBB, FalseMBB); 981 Predicate = CmpInst::FCMP_UNE; 982 // FALL THROUGH 983 case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 984 case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break; 985 case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; 986 case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break; 987 case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break; 988 case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 989 case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break; 990 case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break; 991 case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break; 992 case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break; 993 case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; 994 case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; 995 case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; 996 997 case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; 998 case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; 999 case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; 1000 case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; 1001 case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; 1002 case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; 1003 case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break; 1004 case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break; 1005 case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break; 1006 case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break; 1007 default: 1008 return false; 1009 } 1010 1011 const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 1012 if (SwapArgs) 1013 std::swap(Op0, Op1); 1014 1015 // Emit a compare of the LHS and RHS, setting the flags. 1016 if (!X86FastEmitCompare(Op0, Op1, VT)) 1017 return false; 1018 1019 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BranchOpc)) 1020 .addMBB(TrueMBB); 1021 1022 if (Predicate == CmpInst::FCMP_UNE) { 1023 // X86 requires a second branch to handle UNE (and OEQ, 1024 // which is mapped to UNE above). 1025 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JP_4)) 1026 .addMBB(TrueMBB); 1027 } 1028 1029 FastEmitBranch(FalseMBB, DL); 1030 FuncInfo.MBB->addSuccessor(TrueMBB); 1031 return true; 1032 } 1033 } else if (ExtractValueInst *EI = 1034 dyn_cast<ExtractValueInst>(BI->getCondition())) { 1035 // Check to see if the branch instruction is from an "arithmetic with 1036 // overflow" intrinsic. The main way these intrinsics are used is: 1037 // 1038 // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) 1039 // %sum = extractvalue { i32, i1 } %t, 0 1040 // %obit = extractvalue { i32, i1 } %t, 1 1041 // br i1 %obit, label %overflow, label %normal 1042 // 1043 // The %sum and %obit are converted in an ADD and a SETO/SETB before 1044 // reaching the branch. Therefore, we search backwards through the MBB 1045 // looking for the SETO/SETB instruction. If an instruction modifies the 1046 // EFLAGS register before we reach the SETO/SETB instruction, then we can't 1047 // convert the branch into a JO/JB instruction. 1048 if (const IntrinsicInst *CI = 1049 dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){ 1050 if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || 1051 CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { 1052 const MachineInstr *SetMI = 0; 1053 unsigned Reg = getRegForValue(EI); 1054 1055 for (MachineBasicBlock::const_reverse_iterator 1056 RI = FuncInfo.MBB->rbegin(), RE = FuncInfo.MBB->rend(); 1057 RI != RE; ++RI) { 1058 const MachineInstr &MI = *RI; 1059 1060 if (MI.definesRegister(Reg)) { 1061 unsigned Src, Dst, SrcSR, DstSR; 1062 1063 if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { 1064 Reg = Src; 1065 continue; 1066 } 1067 1068 SetMI = &MI; 1069 break; 1070 } 1071 1072 const TargetInstrDesc &TID = MI.getDesc(); 1073 if (TID.hasUnmodeledSideEffects() || 1074 TID.hasImplicitDefOfPhysReg(X86::EFLAGS)) 1075 break; 1076 } 1077 1078 if (SetMI) { 1079 unsigned OpCode = SetMI->getOpcode(); 1080 1081 if (OpCode == X86::SETOr || OpCode == X86::SETBr) { 1082 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1083 TII.get(OpCode == X86::SETOr ? X86::JO_4 : X86::JB_4)) 1084 .addMBB(TrueMBB); 1085 FastEmitBranch(FalseMBB, DL); 1086 FuncInfo.MBB->addSuccessor(TrueMBB); 1087 return true; 1088 } 1089 } 1090 } 1091 } 1092 } 1093 1094 // Otherwise do a clumsy setcc and re-test it. 1095 unsigned OpReg = getRegForValue(BI->getCondition()); 1096 if (OpReg == 0) return false; 1097 1098 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) 1099 .addReg(OpReg).addReg(OpReg); 1100 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::JNE_4)) 1101 .addMBB(TrueMBB); 1102 FastEmitBranch(FalseMBB, DL); 1103 FuncInfo.MBB->addSuccessor(TrueMBB); 1104 return true; 1105} 1106 1107bool X86FastISel::X86SelectShift(const Instruction *I) { 1108 unsigned CReg = 0, OpReg = 0, OpImm = 0; 1109 const TargetRegisterClass *RC = NULL; 1110 if (I->getType()->isIntegerTy(8)) { 1111 CReg = X86::CL; 1112 RC = &X86::GR8RegClass; 1113 switch (I->getOpcode()) { 1114 case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break; 1115 case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break; 1116 case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; 1117 default: return false; 1118 } 1119 } else if (I->getType()->isIntegerTy(16)) { 1120 CReg = X86::CX; 1121 RC = &X86::GR16RegClass; 1122 switch (I->getOpcode()) { 1123 case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break; 1124 case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break; 1125 case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; 1126 default: return false; 1127 } 1128 } else if (I->getType()->isIntegerTy(32)) { 1129 CReg = X86::ECX; 1130 RC = &X86::GR32RegClass; 1131 switch (I->getOpcode()) { 1132 case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break; 1133 case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break; 1134 case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; 1135 default: return false; 1136 } 1137 } else if (I->getType()->isIntegerTy(64)) { 1138 CReg = X86::RCX; 1139 RC = &X86::GR64RegClass; 1140 switch (I->getOpcode()) { 1141 case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break; 1142 case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break; 1143 case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break; 1144 default: return false; 1145 } 1146 } else { 1147 return false; 1148 } 1149 1150 EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 1151 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 1152 return false; 1153 1154 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1155 if (Op0Reg == 0) return false; 1156 1157 // Fold immediate in shl(x,3). 1158 if (const ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 1159 unsigned ResultReg = createResultReg(RC); 1160 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpImm), 1161 ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); 1162 UpdateValueMap(I, ResultReg); 1163 return true; 1164 } 1165 1166 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1167 if (Op1Reg == 0) return false; 1168 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1169 CReg).addReg(Op1Reg); 1170 1171 // The shift instruction uses X86::CL. If we defined a super-register 1172 // of X86::CL, emit a subreg KILL to precisely describe what we're doing here. 1173 if (CReg != X86::CL) 1174 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1175 TII.get(TargetOpcode::KILL), X86::CL) 1176 .addReg(CReg, RegState::Kill); 1177 1178 unsigned ResultReg = createResultReg(RC); 1179 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpReg), ResultReg) 1180 .addReg(Op0Reg); 1181 UpdateValueMap(I, ResultReg); 1182 return true; 1183} 1184 1185bool X86FastISel::X86SelectSelect(const Instruction *I) { 1186 EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 1187 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 1188 return false; 1189 1190 unsigned Opc = 0; 1191 const TargetRegisterClass *RC = NULL; 1192 if (VT.getSimpleVT() == MVT::i16) { 1193 Opc = X86::CMOVE16rr; 1194 RC = &X86::GR16RegClass; 1195 } else if (VT.getSimpleVT() == MVT::i32) { 1196 Opc = X86::CMOVE32rr; 1197 RC = &X86::GR32RegClass; 1198 } else if (VT.getSimpleVT() == MVT::i64) { 1199 Opc = X86::CMOVE64rr; 1200 RC = &X86::GR64RegClass; 1201 } else { 1202 return false; 1203 } 1204 1205 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 1206 if (Op0Reg == 0) return false; 1207 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1208 if (Op1Reg == 0) return false; 1209 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1210 if (Op2Reg == 0) return false; 1211 1212 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TEST8rr)) 1213 .addReg(Op0Reg).addReg(Op0Reg); 1214 unsigned ResultReg = createResultReg(RC); 1215 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) 1216 .addReg(Op1Reg).addReg(Op2Reg); 1217 UpdateValueMap(I, ResultReg); 1218 return true; 1219} 1220 1221bool X86FastISel::X86SelectFPExt(const Instruction *I) { 1222 // fpext from float to double. 1223 if (Subtarget->hasSSE2() && 1224 I->getType()->isDoubleTy()) { 1225 const Value *V = I->getOperand(0); 1226 if (V->getType()->isFloatTy()) { 1227 unsigned OpReg = getRegForValue(V); 1228 if (OpReg == 0) return false; 1229 unsigned ResultReg = createResultReg(X86::FR64RegisterClass); 1230 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1231 TII.get(X86::CVTSS2SDrr), ResultReg) 1232 .addReg(OpReg); 1233 UpdateValueMap(I, ResultReg); 1234 return true; 1235 } 1236 } 1237 1238 return false; 1239} 1240 1241bool X86FastISel::X86SelectFPTrunc(const Instruction *I) { 1242 if (Subtarget->hasSSE2()) { 1243 if (I->getType()->isFloatTy()) { 1244 const Value *V = I->getOperand(0); 1245 if (V->getType()->isDoubleTy()) { 1246 unsigned OpReg = getRegForValue(V); 1247 if (OpReg == 0) return false; 1248 unsigned ResultReg = createResultReg(X86::FR32RegisterClass); 1249 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1250 TII.get(X86::CVTSD2SSrr), ResultReg) 1251 .addReg(OpReg); 1252 UpdateValueMap(I, ResultReg); 1253 return true; 1254 } 1255 } 1256 } 1257 1258 return false; 1259} 1260 1261bool X86FastISel::X86SelectTrunc(const Instruction *I) { 1262 if (Subtarget->is64Bit()) 1263 // All other cases should be handled by the tblgen generated code. 1264 return false; 1265 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1266 EVT DstVT = TLI.getValueType(I->getType()); 1267 1268 // This code only handles truncation to byte right now. 1269 if (DstVT != MVT::i8 && DstVT != MVT::i1) 1270 // All other cases should be handled by the tblgen generated code. 1271 return false; 1272 if (SrcVT != MVT::i16 && SrcVT != MVT::i32) 1273 // All other cases should be handled by the tblgen generated code. 1274 return false; 1275 1276 unsigned InputReg = getRegForValue(I->getOperand(0)); 1277 if (!InputReg) 1278 // Unhandled operand. Halt "fast" selection and bail. 1279 return false; 1280 1281 // First issue a copy to GR16_ABCD or GR32_ABCD. 1282 unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; 1283 const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) 1284 ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; 1285 unsigned CopyReg = createResultReg(CopyRC); 1286 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CopyOpc), CopyReg) 1287 .addReg(InputReg); 1288 1289 // Then issue an extract_subreg. 1290 unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, 1291 CopyReg, /*Kill=*/true, 1292 X86::sub_8bit); 1293 if (!ResultReg) 1294 return false; 1295 1296 UpdateValueMap(I, ResultReg); 1297 return true; 1298} 1299 1300bool X86FastISel::X86SelectExtractValue(const Instruction *I) { 1301 const ExtractValueInst *EI = cast<ExtractValueInst>(I); 1302 const Value *Agg = EI->getAggregateOperand(); 1303 1304 if (const IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) { 1305 switch (CI->getIntrinsicID()) { 1306 default: break; 1307 case Intrinsic::sadd_with_overflow: 1308 case Intrinsic::uadd_with_overflow: { 1309 // Cheat a little. We know that the registers for "add" and "seto" are 1310 // allocated sequentially. However, we only keep track of the register 1311 // for "add" in the value map. Use extractvalue's index to get the 1312 // correct register for "seto". 1313 unsigned OpReg = getRegForValue(Agg); 1314 if (OpReg == 0) 1315 return false; 1316 UpdateValueMap(I, OpReg + *EI->idx_begin()); 1317 return true; 1318 } 1319 } 1320 } 1321 1322 return false; 1323} 1324 1325bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { 1326 // FIXME: Handle more intrinsics. 1327 switch (I.getIntrinsicID()) { 1328 default: return false; 1329 case Intrinsic::stackprotector: { 1330 // Emit code inline code to store the stack guard onto the stack. 1331 EVT PtrTy = TLI.getPointerTy(); 1332 1333 const Value *Op1 = I.getArgOperand(0); // The guard's value. 1334 const AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1)); 1335 1336 // Grab the frame index. 1337 X86AddressMode AM; 1338 if (!X86SelectAddress(Slot, AM)) return false; 1339 1340 if (!X86FastEmitStore(PtrTy, Op1, AM)) return false; 1341 1342 return true; 1343 } 1344 case Intrinsic::objectsize: { 1345 ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1)); 1346 const Type *Ty = I.getCalledFunction()->getReturnType(); 1347 1348 assert(CI && "Non-constant type in Intrinsic::objectsize?"); 1349 1350 EVT VT; 1351 if (!isTypeLegal(Ty, VT)) 1352 return false; 1353 1354 unsigned OpC = 0; 1355 if (VT == MVT::i32) 1356 OpC = X86::MOV32ri; 1357 else if (VT == MVT::i64) 1358 OpC = X86::MOV64ri; 1359 else 1360 return false; 1361 1362 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1363 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg). 1364 addImm(CI->isZero() ? -1ULL : 0); 1365 UpdateValueMap(&I, ResultReg); 1366 return true; 1367 } 1368 case Intrinsic::dbg_declare: { 1369 const DbgDeclareInst *DI = cast<DbgDeclareInst>(&I); 1370 X86AddressMode AM; 1371 assert(DI->getAddress() && "Null address should be checked earlier!"); 1372 if (!X86SelectAddress(DI->getAddress(), AM)) 1373 return false; 1374 const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE); 1375 // FIXME may need to add RegState::Debug to any registers produced, 1376 // although ESP/EBP should be the only ones at the moment. 1377 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II), AM). 1378 addImm(0).addMetadata(DI->getVariable()); 1379 return true; 1380 } 1381 case Intrinsic::trap: { 1382 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::TRAP)); 1383 return true; 1384 } 1385 case Intrinsic::sadd_with_overflow: 1386 case Intrinsic::uadd_with_overflow: { 1387 // Replace "add with overflow" intrinsics with an "add" instruction followed 1388 // by a seto/setc instruction. Later on, when the "extractvalue" 1389 // instructions are encountered, we use the fact that two registers were 1390 // created sequentially to get the correct registers for the "sum" and the 1391 // "overflow bit". 1392 const Function *Callee = I.getCalledFunction(); 1393 const Type *RetTy = 1394 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); 1395 1396 EVT VT; 1397 if (!isTypeLegal(RetTy, VT)) 1398 return false; 1399 1400 const Value *Op1 = I.getArgOperand(0); 1401 const Value *Op2 = I.getArgOperand(1); 1402 unsigned Reg1 = getRegForValue(Op1); 1403 unsigned Reg2 = getRegForValue(Op2); 1404 1405 if (Reg1 == 0 || Reg2 == 0) 1406 // FIXME: Handle values *not* in registers. 1407 return false; 1408 1409 unsigned OpC = 0; 1410 if (VT == MVT::i32) 1411 OpC = X86::ADD32rr; 1412 else if (VT == MVT::i64) 1413 OpC = X86::ADD64rr; 1414 else 1415 return false; 1416 1417 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1418 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpC), ResultReg) 1419 .addReg(Reg1).addReg(Reg2); 1420 unsigned DestReg1 = UpdateValueMap(&I, ResultReg); 1421 1422 // If the add with overflow is an intra-block value then we just want to 1423 // create temporaries for it like normal. If it is a cross-block value then 1424 // UpdateValueMap will return the cross-block register used. Since we 1425 // *really* want the value to be live in the register pair known by 1426 // UpdateValueMap, we have to use DestReg1+1 as the destination register in 1427 // the cross block case. In the non-cross-block case, we should just make 1428 // another register for the value. 1429 if (DestReg1 != ResultReg) 1430 ResultReg = DestReg1+1; 1431 else 1432 ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8)); 1433 1434 unsigned Opc = X86::SETBr; 1435 if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) 1436 Opc = X86::SETOr; 1437 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); 1438 return true; 1439 } 1440 } 1441} 1442 1443bool X86FastISel::X86SelectCall(const Instruction *I) { 1444 const CallInst *CI = cast<CallInst>(I); 1445 const Value *Callee = CI->getCalledValue(); 1446 1447 // Can't handle inline asm yet. 1448 if (isa<InlineAsm>(Callee)) 1449 return false; 1450 1451 // Handle intrinsic calls. 1452 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) 1453 return X86VisitIntrinsicCall(*II); 1454 1455 // Handle only C and fastcc calling conventions for now. 1456 ImmutableCallSite CS(CI); 1457 CallingConv::ID CC = CS.getCallingConv(); 1458 if (CC != CallingConv::C && 1459 CC != CallingConv::Fast && 1460 CC != CallingConv::X86_FastCall) 1461 return false; 1462 1463 // fastcc with -tailcallopt is intended to provide a guaranteed 1464 // tail call optimization. Fastisel doesn't know how to do that. 1465 if (CC == CallingConv::Fast && GuaranteedTailCallOpt) 1466 return false; 1467 1468 // Let SDISel handle vararg functions. 1469 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1470 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1471 if (FTy->isVarArg()) 1472 return false; 1473 1474 // Fast-isel doesn't know about callee-pop yet. 1475 if (Subtarget->IsCalleePop(FTy->isVarArg(), CC)) 1476 return false; 1477 1478 // Handle *simple* calls for now. 1479 const Type *RetTy = CS.getType(); 1480 EVT RetVT; 1481 if (RetTy->isVoidTy()) 1482 RetVT = MVT::isVoid; 1483 else if (!isTypeLegal(RetTy, RetVT, true)) 1484 return false; 1485 1486 // Materialize callee address in a register. FIXME: GV address can be 1487 // handled with a CALLpcrel32 instead. 1488 X86AddressMode CalleeAM; 1489 if (!X86SelectCallAddress(Callee, CalleeAM)) 1490 return false; 1491 unsigned CalleeOp = 0; 1492 const GlobalValue *GV = 0; 1493 if (CalleeAM.GV != 0) { 1494 GV = CalleeAM.GV; 1495 } else if (CalleeAM.Base.Reg != 0) { 1496 CalleeOp = CalleeAM.Base.Reg; 1497 } else 1498 return false; 1499 1500 // Allow calls which produce i1 results. 1501 bool AndToI1 = false; 1502 if (RetVT == MVT::i1) { 1503 RetVT = MVT::i8; 1504 AndToI1 = true; 1505 } 1506 1507 // Deal with call operands first. 1508 SmallVector<const Value *, 8> ArgVals; 1509 SmallVector<unsigned, 8> Args; 1510 SmallVector<EVT, 8> ArgVTs; 1511 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1512 Args.reserve(CS.arg_size()); 1513 ArgVals.reserve(CS.arg_size()); 1514 ArgVTs.reserve(CS.arg_size()); 1515 ArgFlags.reserve(CS.arg_size()); 1516 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1517 i != e; ++i) { 1518 unsigned Arg = getRegForValue(*i); 1519 if (Arg == 0) 1520 return false; 1521 ISD::ArgFlagsTy Flags; 1522 unsigned AttrInd = i - CS.arg_begin() + 1; 1523 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1524 Flags.setSExt(); 1525 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1526 Flags.setZExt(); 1527 1528 // FIXME: Only handle *easy* calls for now. 1529 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1530 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1531 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1532 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1533 return false; 1534 1535 const Type *ArgTy = (*i)->getType(); 1536 EVT ArgVT; 1537 if (!isTypeLegal(ArgTy, ArgVT)) 1538 return false; 1539 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1540 Flags.setOrigAlign(OriginalAlignment); 1541 1542 Args.push_back(Arg); 1543 ArgVals.push_back(*i); 1544 ArgVTs.push_back(ArgVT); 1545 ArgFlags.push_back(Flags); 1546 } 1547 1548 // Analyze operands of the call, assigning locations to each operand. 1549 SmallVector<CCValAssign, 16> ArgLocs; 1550 CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); 1551 1552 // Allocate shadow area for Win64 1553 if (Subtarget->isTargetWin64()) { 1554 CCInfo.AllocateStack(32, 8); 1555 } 1556 1557 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); 1558 1559 // Get a count of how many bytes are to be pushed on the stack. 1560 unsigned NumBytes = CCInfo.getNextStackOffset(); 1561 1562 // Issue CALLSEQ_START 1563 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1564 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) 1565 .addImm(NumBytes); 1566 1567 // Process argument: walk the register/memloc assignments, inserting 1568 // copies / loads. 1569 SmallVector<unsigned, 4> RegArgs; 1570 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1571 CCValAssign &VA = ArgLocs[i]; 1572 unsigned Arg = Args[VA.getValNo()]; 1573 EVT ArgVT = ArgVTs[VA.getValNo()]; 1574 1575 // Promote the value if needed. 1576 switch (VA.getLocInfo()) { 1577 default: llvm_unreachable("Unknown loc info!"); 1578 case CCValAssign::Full: break; 1579 case CCValAssign::SExt: { 1580 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1581 Arg, ArgVT, Arg); 1582 assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted; 1583 Emitted = true; 1584 ArgVT = VA.getLocVT(); 1585 break; 1586 } 1587 case CCValAssign::ZExt: { 1588 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1589 Arg, ArgVT, Arg); 1590 assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted; 1591 Emitted = true; 1592 ArgVT = VA.getLocVT(); 1593 break; 1594 } 1595 case CCValAssign::AExt: { 1596 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1597 Arg, ArgVT, Arg); 1598 if (!Emitted) 1599 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1600 Arg, ArgVT, Arg); 1601 if (!Emitted) 1602 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1603 Arg, ArgVT, Arg); 1604 1605 assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted; 1606 ArgVT = VA.getLocVT(); 1607 break; 1608 } 1609 case CCValAssign::BCvt: { 1610 unsigned BC = FastEmit_r(ArgVT.getSimpleVT(), VA.getLocVT().getSimpleVT(), 1611 ISD::BIT_CONVERT, Arg, /*TODO: Kill=*/false); 1612 assert(BC != 0 && "Failed to emit a bitcast!"); 1613 Arg = BC; 1614 ArgVT = VA.getLocVT(); 1615 break; 1616 } 1617 } 1618 1619 if (VA.isRegLoc()) { 1620 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1621 VA.getLocReg()).addReg(Arg); 1622 RegArgs.push_back(VA.getLocReg()); 1623 } else { 1624 unsigned LocMemOffset = VA.getLocMemOffset(); 1625 X86AddressMode AM; 1626 AM.Base.Reg = StackPtr; 1627 AM.Disp = LocMemOffset; 1628 const Value *ArgVal = ArgVals[VA.getValNo()]; 1629 1630 // If this is a really simple value, emit this with the Value* version of 1631 // X86FastEmitStore. If it isn't simple, we don't want to do this, as it 1632 // can cause us to reevaluate the argument. 1633 if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) 1634 X86FastEmitStore(ArgVT, ArgVal, AM); 1635 else 1636 X86FastEmitStore(ArgVT, Arg, AM); 1637 } 1638 } 1639 1640 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1641 // GOT pointer. 1642 if (Subtarget->isPICStyleGOT()) { 1643 unsigned Base = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 1644 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1645 X86::EBX).addReg(Base); 1646 } 1647 1648 // Issue the call. 1649 MachineInstrBuilder MIB; 1650 if (CalleeOp) { 1651 // Register-indirect call. 1652 unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; 1653 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) 1654 .addReg(CalleeOp); 1655 1656 } else { 1657 // Direct call. 1658 assert(GV && "Not a direct call"); 1659 unsigned CallOpc = 1660 Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; 1661 1662 // See if we need any target-specific flags on the GV operand. 1663 unsigned char OpFlags = 0; 1664 1665 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to 1666 // external symbols most go through the PLT in PIC mode. If the symbol 1667 // has hidden or protected visibility, or if it is static or local, then 1668 // we don't need to use the PLT - we can directly call it. 1669 if (Subtarget->isTargetELF() && 1670 TM.getRelocationModel() == Reloc::PIC_ && 1671 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { 1672 OpFlags = X86II::MO_PLT; 1673 } else if (Subtarget->isPICStyleStubAny() && 1674 (GV->isDeclaration() || GV->isWeakForLinker()) && 1675 Subtarget->getDarwinVers() < 9) { 1676 // PC-relative references to external symbols should go through $stub, 1677 // unless we're building with the leopard linker or later, which 1678 // automatically synthesizes these stubs. 1679 OpFlags = X86II::MO_DARWIN_STUB; 1680 } 1681 1682 1683 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) 1684 .addGlobalAddress(GV, 0, OpFlags); 1685 } 1686 1687 // Add an implicit use GOT pointer in EBX. 1688 if (Subtarget->isPICStyleGOT()) 1689 MIB.addReg(X86::EBX); 1690 1691 // Add implicit physical register uses to the call. 1692 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1693 MIB.addReg(RegArgs[i]); 1694 1695 // Issue CALLSEQ_END 1696 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1697 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) 1698 .addImm(NumBytes).addImm(0); 1699 1700 // Now handle call return value (if any). 1701 SmallVector<unsigned, 4> UsedRegs; 1702 if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { 1703 SmallVector<CCValAssign, 16> RVLocs; 1704 CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); 1705 CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); 1706 1707 // Copy all of the result registers out of their specified physreg. 1708 assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); 1709 EVT CopyVT = RVLocs[0].getValVT(); 1710 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1711 1712 // If this is a call to a function that returns an fp value on the x87 fp 1713 // stack, but where we prefer to use the value in xmm registers, copy it 1714 // out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1715 if ((RVLocs[0].getLocReg() == X86::ST0 || 1716 RVLocs[0].getLocReg() == X86::ST1) && 1717 isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { 1718 CopyVT = MVT::f80; 1719 DstRC = X86::RFP80RegisterClass; 1720 } 1721 1722 unsigned ResultReg = createResultReg(DstRC); 1723 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1724 ResultReg).addReg(RVLocs[0].getLocReg()); 1725 UsedRegs.push_back(RVLocs[0].getLocReg()); 1726 1727 if (CopyVT != RVLocs[0].getValVT()) { 1728 // Round the F80 the right size, which also moves to the appropriate xmm 1729 // register. This is accomplished by storing the F80 value in memory and 1730 // then loading it back. Ewww... 1731 EVT ResVT = RVLocs[0].getValVT(); 1732 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 1733 unsigned MemSize = ResVT.getSizeInBits()/8; 1734 int FI = MFI.CreateStackObject(MemSize, MemSize, false); 1735 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1736 TII.get(Opc)), FI) 1737 .addReg(ResultReg); 1738 DstRC = ResVT == MVT::f32 1739 ? X86::FR32RegisterClass : X86::FR64RegisterClass; 1740 Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; 1741 ResultReg = createResultReg(DstRC); 1742 addFrameReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1743 TII.get(Opc), ResultReg), FI); 1744 } 1745 1746 if (AndToI1) { 1747 // Mask out all but lowest bit for some call which produces an i1. 1748 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 1749 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1750 TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); 1751 ResultReg = AndResult; 1752 } 1753 1754 UpdateValueMap(I, ResultReg); 1755 } 1756 1757 // Set all unused physreg defs as dead. 1758 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1759 1760 return true; 1761} 1762 1763 1764bool 1765X86FastISel::TargetSelectInstruction(const Instruction *I) { 1766 switch (I->getOpcode()) { 1767 default: break; 1768 case Instruction::Load: 1769 return X86SelectLoad(I); 1770 case Instruction::Store: 1771 return X86SelectStore(I); 1772 case Instruction::Ret: 1773 return X86SelectRet(I); 1774 case Instruction::ICmp: 1775 case Instruction::FCmp: 1776 return X86SelectCmp(I); 1777 case Instruction::ZExt: 1778 return X86SelectZExt(I); 1779 case Instruction::Br: 1780 return X86SelectBranch(I); 1781 case Instruction::Call: 1782 return X86SelectCall(I); 1783 case Instruction::LShr: 1784 case Instruction::AShr: 1785 case Instruction::Shl: 1786 return X86SelectShift(I); 1787 case Instruction::Select: 1788 return X86SelectSelect(I); 1789 case Instruction::Trunc: 1790 return X86SelectTrunc(I); 1791 case Instruction::FPExt: 1792 return X86SelectFPExt(I); 1793 case Instruction::FPTrunc: 1794 return X86SelectFPTrunc(I); 1795 case Instruction::ExtractValue: 1796 return X86SelectExtractValue(I); 1797 case Instruction::IntToPtr: // Deliberate fall-through. 1798 case Instruction::PtrToInt: { 1799 EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1800 EVT DstVT = TLI.getValueType(I->getType()); 1801 if (DstVT.bitsGT(SrcVT)) 1802 return X86SelectZExt(I); 1803 if (DstVT.bitsLT(SrcVT)) 1804 return X86SelectTrunc(I); 1805 unsigned Reg = getRegForValue(I->getOperand(0)); 1806 if (Reg == 0) return false; 1807 UpdateValueMap(I, Reg); 1808 return true; 1809 } 1810 } 1811 1812 return false; 1813} 1814 1815unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { 1816 EVT VT; 1817 if (!isTypeLegal(C->getType(), VT)) 1818 return false; 1819 1820 // Get opcode and regclass of the output for the given load instruction. 1821 unsigned Opc = 0; 1822 const TargetRegisterClass *RC = NULL; 1823 switch (VT.getSimpleVT().SimpleTy) { 1824 default: return false; 1825 case MVT::i8: 1826 Opc = X86::MOV8rm; 1827 RC = X86::GR8RegisterClass; 1828 break; 1829 case MVT::i16: 1830 Opc = X86::MOV16rm; 1831 RC = X86::GR16RegisterClass; 1832 break; 1833 case MVT::i32: 1834 Opc = X86::MOV32rm; 1835 RC = X86::GR32RegisterClass; 1836 break; 1837 case MVT::i64: 1838 // Must be in x86-64 mode. 1839 Opc = X86::MOV64rm; 1840 RC = X86::GR64RegisterClass; 1841 break; 1842 case MVT::f32: 1843 if (Subtarget->hasSSE1()) { 1844 Opc = X86::MOVSSrm; 1845 RC = X86::FR32RegisterClass; 1846 } else { 1847 Opc = X86::LD_Fp32m; 1848 RC = X86::RFP32RegisterClass; 1849 } 1850 break; 1851 case MVT::f64: 1852 if (Subtarget->hasSSE2()) { 1853 Opc = X86::MOVSDrm; 1854 RC = X86::FR64RegisterClass; 1855 } else { 1856 Opc = X86::LD_Fp64m; 1857 RC = X86::RFP64RegisterClass; 1858 } 1859 break; 1860 case MVT::f80: 1861 // No f80 support yet. 1862 return false; 1863 } 1864 1865 // Materialize addresses with LEA instructions. 1866 if (isa<GlobalValue>(C)) { 1867 X86AddressMode AM; 1868 if (X86SelectAddress(C, AM)) { 1869 if (TLI.getPointerTy() == MVT::i32) 1870 Opc = X86::LEA32r; 1871 else 1872 Opc = X86::LEA64r; 1873 unsigned ResultReg = createResultReg(RC); 1874 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1875 TII.get(Opc), ResultReg), AM); 1876 return ResultReg; 1877 } 1878 return 0; 1879 } 1880 1881 // MachineConstantPool wants an explicit alignment. 1882 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 1883 if (Align == 0) { 1884 // Alignment of vector types. FIXME! 1885 Align = TD.getTypeAllocSize(C->getType()); 1886 } 1887 1888 // x86-32 PIC requires a PIC base register for constant pools. 1889 unsigned PICBase = 0; 1890 unsigned char OpFlag = 0; 1891 if (Subtarget->isPICStyleStubPIC()) { // Not dynamic-no-pic 1892 OpFlag = X86II::MO_PIC_BASE_OFFSET; 1893 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 1894 } else if (Subtarget->isPICStyleGOT()) { 1895 OpFlag = X86II::MO_GOTOFF; 1896 PICBase = getInstrInfo()->getGlobalBaseReg(FuncInfo.MF); 1897 } else if (Subtarget->isPICStyleRIPRel() && 1898 TM.getCodeModel() == CodeModel::Small) { 1899 PICBase = X86::RIP; 1900 } 1901 1902 // Create the load from the constant pool. 1903 unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); 1904 unsigned ResultReg = createResultReg(RC); 1905 addConstantPoolReference(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1906 TII.get(Opc), ResultReg), 1907 MCPOffset, PICBase, OpFlag); 1908 1909 return ResultReg; 1910} 1911 1912unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { 1913 // Fail on dynamic allocas. At this point, getRegForValue has already 1914 // checked its CSE maps, so if we're here trying to handle a dynamic 1915 // alloca, we're not going to succeed. X86SelectAddress has a 1916 // check for dynamic allocas, because it's called directly from 1917 // various places, but TargetMaterializeAlloca also needs a check 1918 // in order to avoid recursion between getRegForValue, 1919 // X86SelectAddrss, and TargetMaterializeAlloca. 1920 if (!FuncInfo.StaticAllocaMap.count(C)) 1921 return 0; 1922 1923 X86AddressMode AM; 1924 if (!X86SelectAddress(C, AM)) 1925 return 0; 1926 unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; 1927 TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); 1928 unsigned ResultReg = createResultReg(RC); 1929 addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1930 TII.get(Opc), ResultReg), AM); 1931 return ResultReg; 1932} 1933 1934namespace llvm { 1935 llvm::FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo) { 1936 return new X86FastISel(funcInfo); 1937 } 1938} 1939