X86FastISel.cpp revision d1474d09cbe5fdeec8ba0d6c6b52f316f3422532
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the X86-specific support for the FastISel class. Much 11// of the target-specific code is generated by tablegen in the file 12// X86GenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86RegisterInfo.h" 20#include "X86Subtarget.h" 21#include "X86TargetMachine.h" 22#include "llvm/CallingConv.h" 23#include "llvm/DerivedTypes.h" 24#include "llvm/GlobalVariable.h" 25#include "llvm/Instructions.h" 26#include "llvm/IntrinsicInst.h" 27#include "llvm/CodeGen/FastISel.h" 28#include "llvm/CodeGen/MachineConstantPool.h" 29#include "llvm/CodeGen/MachineFrameInfo.h" 30#include "llvm/CodeGen/MachineRegisterInfo.h" 31#include "llvm/Support/CallSite.h" 32#include "llvm/Support/GetElementPtrTypeIterator.h" 33#include "llvm/Target/TargetOptions.h" 34using namespace llvm; 35 36namespace { 37 38class X86FastISel : public FastISel { 39 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 40 /// make the right decision when generating code for different targets. 41 const X86Subtarget *Subtarget; 42 43 /// StackPtr - Register used as the stack pointer. 44 /// 45 unsigned StackPtr; 46 47 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 48 /// floating point ops. 49 /// When SSE is available, use it for f32 operations. 50 /// When SSE2 is available, use it for f64 operations. 51 bool X86ScalarSSEf64; 52 bool X86ScalarSSEf32; 53 54public: 55 explicit X86FastISel(MachineFunction &mf, 56 MachineModuleInfo *mmi, 57 DwarfWriter *dw, 58 DenseMap<const Value *, unsigned> &vm, 59 DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, 60 DenseMap<const AllocaInst *, int> &am 61#ifndef NDEBUG 62 , SmallSet<Instruction*, 8> &cil 63#endif 64 ) 65 : FastISel(mf, mmi, dw, vm, bm, am 66#ifndef NDEBUG 67 , cil 68#endif 69 ) { 70 Subtarget = &TM.getSubtarget<X86Subtarget>(); 71 StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 72 X86ScalarSSEf64 = Subtarget->hasSSE2(); 73 X86ScalarSSEf32 = Subtarget->hasSSE1(); 74 } 75 76 virtual bool TargetSelectInstruction(Instruction *I); 77 78#include "X86GenFastISel.inc" 79 80private: 81 bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT); 82 83 bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR); 84 85 bool X86FastEmitStore(MVT VT, Value *Val, 86 const X86AddressMode &AM); 87 bool X86FastEmitStore(MVT VT, unsigned Val, 88 const X86AddressMode &AM); 89 90 bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT, 91 unsigned &ResultReg); 92 93 bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall); 94 95 bool X86SelectLoad(Instruction *I); 96 97 bool X86SelectStore(Instruction *I); 98 99 bool X86SelectCmp(Instruction *I); 100 101 bool X86SelectZExt(Instruction *I); 102 103 bool X86SelectBranch(Instruction *I); 104 105 bool X86SelectShift(Instruction *I); 106 107 bool X86SelectSelect(Instruction *I); 108 109 bool X86SelectTrunc(Instruction *I); 110 111 bool X86SelectFPExt(Instruction *I); 112 bool X86SelectFPTrunc(Instruction *I); 113 114 bool X86SelectExtractValue(Instruction *I); 115 116 bool X86VisitIntrinsicCall(IntrinsicInst &I); 117 bool X86SelectCall(Instruction *I); 118 119 CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false); 120 121 const X86InstrInfo *getInstrInfo() const { 122 return getTargetMachine()->getInstrInfo(); 123 } 124 const X86TargetMachine *getTargetMachine() const { 125 return static_cast<const X86TargetMachine *>(&TM); 126 } 127 128 unsigned TargetMaterializeConstant(Constant *C); 129 130 unsigned TargetMaterializeAlloca(AllocaInst *C); 131 132 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 133 /// computed in an SSE register, not on the X87 floating point stack. 134 bool isScalarFPTypeInSSEReg(MVT VT) const { 135 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 136 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 137 } 138 139 bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); 140}; 141 142} // end anonymous namespace. 143 144bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { 145 VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); 146 if (VT == MVT::Other || !VT.isSimple()) 147 // Unhandled type. Halt "fast" selection and bail. 148 return false; 149 150 // For now, require SSE/SSE2 for performing floating-point operations, 151 // since x87 requires additional work. 152 if (VT == MVT::f64 && !X86ScalarSSEf64) 153 return false; 154 if (VT == MVT::f32 && !X86ScalarSSEf32) 155 return false; 156 // Similarly, no f80 support yet. 157 if (VT == MVT::f80) 158 return false; 159 // We only handle legal types. For example, on x86-32 the instruction 160 // selector contains all of the 64-bit instructions from x86-64, 161 // under the assumption that i64 won't be used if the target doesn't 162 // support it. 163 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 164} 165 166#include "X86GenCallingConv.inc" 167 168/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling 169/// convention. 170CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { 171 if (Subtarget->is64Bit()) { 172 if (Subtarget->isTargetWin64()) 173 return CC_X86_Win64_C; 174 else 175 return CC_X86_64_C; 176 } 177 178 if (CC == CallingConv::X86_FastCall) 179 return CC_X86_32_FastCall; 180 else if (CC == CallingConv::Fast) 181 return CC_X86_32_FastCC; 182 else 183 return CC_X86_32_C; 184} 185 186/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 187/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 188/// Return true and the result register by reference if it is possible. 189bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM, 190 unsigned &ResultReg) { 191 // Get opcode and regclass of the output for the given load instruction. 192 unsigned Opc = 0; 193 const TargetRegisterClass *RC = NULL; 194 switch (VT.getSimpleVT()) { 195 default: return false; 196 case MVT::i8: 197 Opc = X86::MOV8rm; 198 RC = X86::GR8RegisterClass; 199 break; 200 case MVT::i16: 201 Opc = X86::MOV16rm; 202 RC = X86::GR16RegisterClass; 203 break; 204 case MVT::i32: 205 Opc = X86::MOV32rm; 206 RC = X86::GR32RegisterClass; 207 break; 208 case MVT::i64: 209 // Must be in x86-64 mode. 210 Opc = X86::MOV64rm; 211 RC = X86::GR64RegisterClass; 212 break; 213 case MVT::f32: 214 if (Subtarget->hasSSE1()) { 215 Opc = X86::MOVSSrm; 216 RC = X86::FR32RegisterClass; 217 } else { 218 Opc = X86::LD_Fp32m; 219 RC = X86::RFP32RegisterClass; 220 } 221 break; 222 case MVT::f64: 223 if (Subtarget->hasSSE2()) { 224 Opc = X86::MOVSDrm; 225 RC = X86::FR64RegisterClass; 226 } else { 227 Opc = X86::LD_Fp64m; 228 RC = X86::RFP64RegisterClass; 229 } 230 break; 231 case MVT::f80: 232 // No f80 support yet. 233 return false; 234 } 235 236 ResultReg = createResultReg(RC); 237 addFullAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 238 return true; 239} 240 241/// X86FastEmitStore - Emit a machine instruction to store a value Val of 242/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 243/// and a displacement offset, or a GlobalAddress, 244/// i.e. V. Return true if it is possible. 245bool 246X86FastISel::X86FastEmitStore(MVT VT, unsigned Val, 247 const X86AddressMode &AM) { 248 // Get opcode and regclass of the output for the given store instruction. 249 unsigned Opc = 0; 250 switch (VT.getSimpleVT()) { 251 case MVT::f80: // No f80 support yet. 252 default: return false; 253 case MVT::i8: Opc = X86::MOV8mr; break; 254 case MVT::i16: Opc = X86::MOV16mr; break; 255 case MVT::i32: Opc = X86::MOV32mr; break; 256 case MVT::i64: Opc = X86::MOV64mr; break; // Must be in x86-64 mode. 257 case MVT::f32: 258 Opc = Subtarget->hasSSE1() ? X86::MOVSSmr : X86::ST_Fp32m; 259 break; 260 case MVT::f64: 261 Opc = Subtarget->hasSSE2() ? X86::MOVSDmr : X86::ST_Fp64m; 262 break; 263 } 264 265 addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM).addReg(Val); 266 return true; 267} 268 269bool X86FastISel::X86FastEmitStore(MVT VT, Value *Val, 270 const X86AddressMode &AM) { 271 // Handle 'null' like i32/i64 0. 272 if (isa<ConstantPointerNull>(Val)) 273 Val = Constant::getNullValue(TD.getIntPtrType()); 274 275 // If this is a store of a simple constant, fold the constant into the store. 276 if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) { 277 unsigned Opc = 0; 278 switch (VT.getSimpleVT()) { 279 default: break; 280 case MVT::i8: Opc = X86::MOV8mi; break; 281 case MVT::i16: Opc = X86::MOV16mi; break; 282 case MVT::i32: Opc = X86::MOV32mi; break; 283 case MVT::i64: 284 // Must be a 32-bit sign extended value. 285 if ((int)CI->getSExtValue() == CI->getSExtValue()) 286 Opc = X86::MOV64mi32; 287 break; 288 } 289 290 if (Opc) { 291 addFullAddress(BuildMI(MBB, DL, TII.get(Opc)), AM) 292 .addImm(CI->getSExtValue()); 293 return true; 294 } 295 } 296 297 unsigned ValReg = getRegForValue(Val); 298 if (ValReg == 0) 299 return false; 300 301 return X86FastEmitStore(VT, ValReg, AM); 302} 303 304/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 305/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 306/// ISD::SIGN_EXTEND). 307bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, 308 unsigned Src, MVT SrcVT, 309 unsigned &ResultReg) { 310 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); 311 312 if (RR != 0) { 313 ResultReg = RR; 314 return true; 315 } else 316 return false; 317} 318 319/// X86SelectAddress - Attempt to fill in an address from the given value. 320/// 321bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { 322 User *U = NULL; 323 unsigned Opcode = Instruction::UserOp1; 324 if (Instruction *I = dyn_cast<Instruction>(V)) { 325 Opcode = I->getOpcode(); 326 U = I; 327 } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 328 Opcode = C->getOpcode(); 329 U = C; 330 } 331 332 switch (Opcode) { 333 default: break; 334 case Instruction::BitCast: 335 // Look past bitcasts. 336 return X86SelectAddress(U->getOperand(0), AM, isCall); 337 338 case Instruction::IntToPtr: 339 // Look past no-op inttoptrs. 340 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 341 return X86SelectAddress(U->getOperand(0), AM, isCall); 342 break; 343 344 case Instruction::PtrToInt: 345 // Look past no-op ptrtoints. 346 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 347 return X86SelectAddress(U->getOperand(0), AM, isCall); 348 break; 349 350 case Instruction::Alloca: { 351 if (isCall) break; 352 // Do static allocas. 353 const AllocaInst *A = cast<AllocaInst>(V); 354 DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A); 355 if (SI != StaticAllocaMap.end()) { 356 AM.BaseType = X86AddressMode::FrameIndexBase; 357 AM.Base.FrameIndex = SI->second; 358 return true; 359 } 360 break; 361 } 362 363 case Instruction::Add: { 364 if (isCall) break; 365 // Adds of constants are common and easy enough. 366 if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 367 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 368 // They have to fit in the 32-bit signed displacement field though. 369 if (isInt32(Disp)) { 370 AM.Disp = (uint32_t)Disp; 371 return X86SelectAddress(U->getOperand(0), AM, isCall); 372 } 373 } 374 break; 375 } 376 377 case Instruction::GetElementPtr: { 378 if (isCall) break; 379 // Pattern-match simple GEPs. 380 uint64_t Disp = (int32_t)AM.Disp; 381 unsigned IndexReg = AM.IndexReg; 382 unsigned Scale = AM.Scale; 383 gep_type_iterator GTI = gep_type_begin(U); 384 // Iterate through the indices, folding what we can. Constants can be 385 // folded, and one dynamic index can be handled, if the scale is supported. 386 for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); 387 i != e; ++i, ++GTI) { 388 Value *Op = *i; 389 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 390 const StructLayout *SL = TD.getStructLayout(STy); 391 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 392 Disp += SL->getElementOffset(Idx); 393 } else { 394 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 395 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 396 // Constant-offset addressing. 397 Disp += CI->getSExtValue() * S; 398 } else if (IndexReg == 0 && 399 (!AM.GV || !Subtarget->isPICStyleRIPRel()) && 400 (S == 1 || S == 2 || S == 4 || S == 8)) { 401 // Scaled-index addressing. 402 Scale = S; 403 IndexReg = getRegForGEPIndex(Op); 404 if (IndexReg == 0) 405 return false; 406 } else 407 // Unsupported. 408 goto unsupported_gep; 409 } 410 } 411 // Check for displacement overflow. 412 if (!isInt32(Disp)) 413 break; 414 // Ok, the GEP indices were covered by constant-offset and scaled-index 415 // addressing. Update the address state and move on to examining the base. 416 AM.IndexReg = IndexReg; 417 AM.Scale = Scale; 418 AM.Disp = (uint32_t)Disp; 419 return X86SelectAddress(U->getOperand(0), AM, isCall); 420 unsupported_gep: 421 // Ok, the GEP indices weren't all covered. 422 break; 423 } 424 } 425 426 // Handle constant address. 427 if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 428 // Can't handle alternate code models yet. 429 if (TM.getCodeModel() != CodeModel::Default && 430 TM.getCodeModel() != CodeModel::Small) 431 return false; 432 433 // RIP-relative addresses can't have additional register operands. 434 if (Subtarget->isPICStyleRIPRel() && 435 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 436 return false; 437 438 // Can't handle TLS yet. 439 if (GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) 440 if (GVar->isThreadLocal()) 441 return false; 442 443 // Okay, we've committed to selecting this global. Set up the basic address. 444 AM.GV = GV; 445 446 if (!isCall && 447 TM.getRelocationModel() == Reloc::PIC_ && 448 !Subtarget->is64Bit()) { 449 // FIXME: How do we know Base.Reg is free?? 450 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); 451 } 452 453 // If the ABI doesn't require an extra load, return a direct reference to 454 // the global. 455 if (!Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) { 456 if (Subtarget->isPICStyleRIPRel()) { 457 // Use rip-relative addressing if we can. Above we verified that the 458 // base and index registers are unused. 459 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 460 AM.Base.Reg = X86::RIP; 461 } else if (Subtarget->isPICStyleStub() && 462 TM.getRelocationModel() == Reloc::PIC_) { 463 AM.GVOpFlags = X86II::MO_PIC_BASE_OFFSET; 464 } else if (Subtarget->isPICStyleGOT()) { 465 AM.GVOpFlags = X86II::MO_GOTOFF; 466 } 467 468 return true; 469 } 470 471 // Check to see if we've already materialized this stub loaded value into a 472 // register in this block. If so, just reuse it. 473 DenseMap<const Value*, unsigned>::iterator I = LocalValueMap.find(V); 474 unsigned LoadReg; 475 if (I != LocalValueMap.end() && I->second != 0) { 476 LoadReg = I->second; 477 } else { 478 // Issue load from stub. 479 unsigned Opc = 0; 480 const TargetRegisterClass *RC = NULL; 481 X86AddressMode StubAM; 482 StubAM.Base.Reg = AM.Base.Reg; 483 StubAM.GV = GV; 484 485 if (TLI.getPointerTy() == MVT::i64) { 486 Opc = X86::MOV64rm; 487 RC = X86::GR64RegisterClass; 488 489 if (Subtarget->isPICStyleRIPRel()) { 490 StubAM.GVOpFlags = X86II::MO_GOTPCREL; 491 StubAM.Base.Reg = X86::RIP; 492 } 493 494 } else { 495 Opc = X86::MOV32rm; 496 RC = X86::GR32RegisterClass; 497 498 if (Subtarget->isPICStyleGOT()) 499 StubAM.GVOpFlags = X86II::MO_GOT; 500 else if (Subtarget->isPICStyleStub()) { 501 // In darwin, we have multiple different stub types, and we have both 502 // PIC and -mdynamic-no-pic. Determine whether we have a stub 503 // reference and/or whether the reference is relative to the PIC base 504 // or not. 505 bool IsPIC = TM.getRelocationModel() == Reloc::PIC_; 506 507 if (!GV->hasHiddenVisibility()) { 508 // Non-hidden $non_lazy_ptr reference. 509 StubAM.GVOpFlags = IsPIC ? X86II::MO_DARWIN_NONLAZY_PIC_BASE : 510 X86II::MO_DARWIN_NONLAZY; 511 } else { 512 // Hidden $non_lazy_ptr reference. 513 StubAM.GVOpFlags = IsPIC ? X86II::MO_DARWIN_HIDDEN_NONLAZY_PIC_BASE: 514 X86II::MO_DARWIN_HIDDEN_NONLAZY; 515 } 516 } 517 } 518 519 LoadReg = createResultReg(RC); 520 addFullAddress(BuildMI(MBB, DL, TII.get(Opc), LoadReg), StubAM); 521 522 // Prevent loading GV stub multiple times in same MBB. 523 LocalValueMap[V] = LoadReg; 524 } 525 526 // Now construct the final address. Note that the Disp, Scale, 527 // and Index values may already be set here. 528 AM.Base.Reg = LoadReg; 529 AM.GV = 0; 530 return true; 531 } 532 533 // If all else fails, try to materialize the value in a register. 534 if (!AM.GV || !Subtarget->isPICStyleRIPRel()) { 535 if (AM.Base.Reg == 0) { 536 AM.Base.Reg = getRegForValue(V); 537 return AM.Base.Reg != 0; 538 } 539 if (AM.IndexReg == 0) { 540 assert(AM.Scale == 1 && "Scale with no index!"); 541 AM.IndexReg = getRegForValue(V); 542 return AM.IndexReg != 0; 543 } 544 } 545 546 return false; 547} 548 549/// X86SelectStore - Select and emit code to implement store instructions. 550bool X86FastISel::X86SelectStore(Instruction* I) { 551 MVT VT; 552 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 553 return false; 554 555 X86AddressMode AM; 556 if (!X86SelectAddress(I->getOperand(1), AM, false)) 557 return false; 558 559 return X86FastEmitStore(VT, I->getOperand(0), AM); 560} 561 562/// X86SelectLoad - Select and emit code to implement load instructions. 563/// 564bool X86FastISel::X86SelectLoad(Instruction *I) { 565 MVT VT; 566 if (!isTypeLegal(I->getType(), VT)) 567 return false; 568 569 X86AddressMode AM; 570 if (!X86SelectAddress(I->getOperand(0), AM, false)) 571 return false; 572 573 unsigned ResultReg = 0; 574 if (X86FastEmitLoad(VT, AM, ResultReg)) { 575 UpdateValueMap(I, ResultReg); 576 return true; 577 } 578 return false; 579} 580 581static unsigned X86ChooseCmpOpcode(MVT VT) { 582 switch (VT.getSimpleVT()) { 583 default: return 0; 584 case MVT::i8: return X86::CMP8rr; 585 case MVT::i16: return X86::CMP16rr; 586 case MVT::i32: return X86::CMP32rr; 587 case MVT::i64: return X86::CMP64rr; 588 case MVT::f32: return X86::UCOMISSrr; 589 case MVT::f64: return X86::UCOMISDrr; 590 } 591} 592 593/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS 594/// of the comparison, return an opcode that works for the compare (e.g. 595/// CMP32ri) otherwise return 0. 596static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) { 597 switch (VT.getSimpleVT()) { 598 // Otherwise, we can't fold the immediate into this comparison. 599 default: return 0; 600 case MVT::i8: return X86::CMP8ri; 601 case MVT::i16: return X86::CMP16ri; 602 case MVT::i32: return X86::CMP32ri; 603 case MVT::i64: 604 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 605 // field. 606 if ((int)RHSC->getSExtValue() == RHSC->getSExtValue()) 607 return X86::CMP64ri32; 608 return 0; 609 } 610} 611 612bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) { 613 unsigned Op0Reg = getRegForValue(Op0); 614 if (Op0Reg == 0) return false; 615 616 // Handle 'null' like i32/i64 0. 617 if (isa<ConstantPointerNull>(Op1)) 618 Op1 = Constant::getNullValue(TD.getIntPtrType()); 619 620 // We have two options: compare with register or immediate. If the RHS of 621 // the compare is an immediate that we can fold into this compare, use 622 // CMPri, otherwise use CMPrr. 623 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 624 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 625 BuildMI(MBB, DL, TII.get(CompareImmOpc)).addReg(Op0Reg) 626 .addImm(Op1C->getSExtValue()); 627 return true; 628 } 629 } 630 631 unsigned CompareOpc = X86ChooseCmpOpcode(VT); 632 if (CompareOpc == 0) return false; 633 634 unsigned Op1Reg = getRegForValue(Op1); 635 if (Op1Reg == 0) return false; 636 BuildMI(MBB, DL, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg); 637 638 return true; 639} 640 641bool X86FastISel::X86SelectCmp(Instruction *I) { 642 CmpInst *CI = cast<CmpInst>(I); 643 644 MVT VT; 645 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 646 return false; 647 648 unsigned ResultReg = createResultReg(&X86::GR8RegClass); 649 unsigned SetCCOpc; 650 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 651 switch (CI->getPredicate()) { 652 case CmpInst::FCMP_OEQ: { 653 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 654 return false; 655 656 unsigned EReg = createResultReg(&X86::GR8RegClass); 657 unsigned NPReg = createResultReg(&X86::GR8RegClass); 658 BuildMI(MBB, DL, TII.get(X86::SETEr), EReg); 659 BuildMI(MBB, DL, TII.get(X86::SETNPr), NPReg); 660 BuildMI(MBB, DL, 661 TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); 662 UpdateValueMap(I, ResultReg); 663 return true; 664 } 665 case CmpInst::FCMP_UNE: { 666 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 667 return false; 668 669 unsigned NEReg = createResultReg(&X86::GR8RegClass); 670 unsigned PReg = createResultReg(&X86::GR8RegClass); 671 BuildMI(MBB, DL, TII.get(X86::SETNEr), NEReg); 672 BuildMI(MBB, DL, TII.get(X86::SETPr), PReg); 673 BuildMI(MBB, DL, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg); 674 UpdateValueMap(I, ResultReg); 675 return true; 676 } 677 case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 678 case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 679 case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break; 680 case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break; 681 case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 682 case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break; 683 case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break; 684 case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 685 case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break; 686 case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; 687 case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 688 case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 689 690 case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 691 case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 692 case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 693 case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 694 case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 695 case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 696 case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break; 697 case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break; 698 case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break; 699 case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break; 700 default: 701 return false; 702 } 703 704 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 705 if (SwapArgs) 706 std::swap(Op0, Op1); 707 708 // Emit a compare of Op0/Op1. 709 if (!X86FastEmitCompare(Op0, Op1, VT)) 710 return false; 711 712 BuildMI(MBB, DL, TII.get(SetCCOpc), ResultReg); 713 UpdateValueMap(I, ResultReg); 714 return true; 715} 716 717bool X86FastISel::X86SelectZExt(Instruction *I) { 718 // Handle zero-extension from i1 to i8, which is common. 719 if (I->getType() == Type::Int8Ty && 720 I->getOperand(0)->getType() == Type::Int1Ty) { 721 unsigned ResultReg = getRegForValue(I->getOperand(0)); 722 if (ResultReg == 0) return false; 723 // Set the high bits to zero. 724 ResultReg = FastEmitZExtFromI1(MVT::i8, ResultReg); 725 if (ResultReg == 0) return false; 726 UpdateValueMap(I, ResultReg); 727 return true; 728 } 729 730 return false; 731} 732 733 734bool X86FastISel::X86SelectBranch(Instruction *I) { 735 // Unconditional branches are selected by tablegen-generated code. 736 // Handle a conditional branch. 737 BranchInst *BI = cast<BranchInst>(I); 738 MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; 739 MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; 740 741 // Fold the common case of a conditional branch with a comparison. 742 if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 743 if (CI->hasOneUse()) { 744 MVT VT = TLI.getValueType(CI->getOperand(0)->getType()); 745 746 // Try to take advantage of fallthrough opportunities. 747 CmpInst::Predicate Predicate = CI->getPredicate(); 748 if (MBB->isLayoutSuccessor(TrueMBB)) { 749 std::swap(TrueMBB, FalseMBB); 750 Predicate = CmpInst::getInversePredicate(Predicate); 751 } 752 753 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 754 unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA" 755 756 switch (Predicate) { 757 case CmpInst::FCMP_OEQ: 758 std::swap(TrueMBB, FalseMBB); 759 Predicate = CmpInst::FCMP_UNE; 760 // FALL THROUGH 761 case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE; break; 762 case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break; 763 case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break; 764 case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break; 765 case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break; 766 case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break; 767 case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break; 768 case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break; 769 case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break; 770 case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break; 771 case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break; 772 case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; 773 case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; 774 775 case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break; 776 case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break; 777 case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break; 778 case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break; 779 case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; 780 case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; 781 case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break; 782 case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break; 783 case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break; 784 case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break; 785 default: 786 return false; 787 } 788 789 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 790 if (SwapArgs) 791 std::swap(Op0, Op1); 792 793 // Emit a compare of the LHS and RHS, setting the flags. 794 if (!X86FastEmitCompare(Op0, Op1, VT)) 795 return false; 796 797 BuildMI(MBB, DL, TII.get(BranchOpc)).addMBB(TrueMBB); 798 799 if (Predicate == CmpInst::FCMP_UNE) { 800 // X86 requires a second branch to handle UNE (and OEQ, 801 // which is mapped to UNE above). 802 BuildMI(MBB, DL, TII.get(X86::JP)).addMBB(TrueMBB); 803 } 804 805 FastEmitBranch(FalseMBB); 806 MBB->addSuccessor(TrueMBB); 807 return true; 808 } 809 } else if (ExtractValueInst *EI = 810 dyn_cast<ExtractValueInst>(BI->getCondition())) { 811 // Check to see if the branch instruction is from an "arithmetic with 812 // overflow" intrinsic. The main way these intrinsics are used is: 813 // 814 // %t = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %v1, i32 %v2) 815 // %sum = extractvalue { i32, i1 } %t, 0 816 // %obit = extractvalue { i32, i1 } %t, 1 817 // br i1 %obit, label %overflow, label %normal 818 // 819 // The %sum and %obit are converted in an ADD and a SETO/SETB before 820 // reaching the branch. Therefore, we search backwards through the MBB 821 // looking for the SETO/SETB instruction. If an instruction modifies the 822 // EFLAGS register before we reach the SETO/SETB instruction, then we can't 823 // convert the branch into a JO/JB instruction. 824 if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(EI->getAggregateOperand())){ 825 if (CI->getIntrinsicID() == Intrinsic::sadd_with_overflow || 826 CI->getIntrinsicID() == Intrinsic::uadd_with_overflow) { 827 const MachineInstr *SetMI = 0; 828 unsigned Reg = lookUpRegForValue(EI); 829 830 for (MachineBasicBlock::const_reverse_iterator 831 RI = MBB->rbegin(), RE = MBB->rend(); RI != RE; ++RI) { 832 const MachineInstr &MI = *RI; 833 834 if (MI.modifiesRegister(Reg)) { 835 unsigned Src, Dst, SrcSR, DstSR; 836 837 if (getInstrInfo()->isMoveInstr(MI, Src, Dst, SrcSR, DstSR)) { 838 Reg = Src; 839 continue; 840 } 841 842 SetMI = &MI; 843 break; 844 } 845 846 const TargetInstrDesc &TID = MI.getDesc(); 847 if (TID.hasUnmodeledSideEffects() || 848 TID.hasImplicitDefOfPhysReg(X86::EFLAGS)) 849 break; 850 } 851 852 if (SetMI) { 853 unsigned OpCode = SetMI->getOpcode(); 854 855 if (OpCode == X86::SETOr || OpCode == X86::SETBr) { 856 BuildMI(MBB, DL, TII.get(OpCode == X86::SETOr ? X86::JO : X86::JB)) 857 .addMBB(TrueMBB); 858 FastEmitBranch(FalseMBB); 859 MBB->addSuccessor(TrueMBB); 860 return true; 861 } 862 } 863 } 864 } 865 } 866 867 // Otherwise do a clumsy setcc and re-test it. 868 unsigned OpReg = getRegForValue(BI->getCondition()); 869 if (OpReg == 0) return false; 870 871 BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); 872 BuildMI(MBB, DL, TII.get(X86::JNE)).addMBB(TrueMBB); 873 FastEmitBranch(FalseMBB); 874 MBB->addSuccessor(TrueMBB); 875 return true; 876} 877 878bool X86FastISel::X86SelectShift(Instruction *I) { 879 unsigned CReg = 0, OpReg = 0, OpImm = 0; 880 const TargetRegisterClass *RC = NULL; 881 if (I->getType() == Type::Int8Ty) { 882 CReg = X86::CL; 883 RC = &X86::GR8RegClass; 884 switch (I->getOpcode()) { 885 case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break; 886 case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break; 887 case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; 888 default: return false; 889 } 890 } else if (I->getType() == Type::Int16Ty) { 891 CReg = X86::CX; 892 RC = &X86::GR16RegClass; 893 switch (I->getOpcode()) { 894 case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break; 895 case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break; 896 case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; 897 default: return false; 898 } 899 } else if (I->getType() == Type::Int32Ty) { 900 CReg = X86::ECX; 901 RC = &X86::GR32RegClass; 902 switch (I->getOpcode()) { 903 case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break; 904 case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break; 905 case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; 906 default: return false; 907 } 908 } else if (I->getType() == Type::Int64Ty) { 909 CReg = X86::RCX; 910 RC = &X86::GR64RegClass; 911 switch (I->getOpcode()) { 912 case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break; 913 case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break; 914 case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break; 915 default: return false; 916 } 917 } else { 918 return false; 919 } 920 921 MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 922 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 923 return false; 924 925 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 926 if (Op0Reg == 0) return false; 927 928 // Fold immediate in shl(x,3). 929 if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 930 unsigned ResultReg = createResultReg(RC); 931 BuildMI(MBB, DL, TII.get(OpImm), 932 ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue() & 0xff); 933 UpdateValueMap(I, ResultReg); 934 return true; 935 } 936 937 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 938 if (Op1Reg == 0) return false; 939 TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC); 940 941 // The shift instruction uses X86::CL. If we defined a super-register 942 // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what 943 // we're doing here. 944 if (CReg != X86::CL) 945 BuildMI(MBB, DL, TII.get(TargetInstrInfo::EXTRACT_SUBREG), X86::CL) 946 .addReg(CReg).addImm(X86::SUBREG_8BIT); 947 948 unsigned ResultReg = createResultReg(RC); 949 BuildMI(MBB, DL, TII.get(OpReg), ResultReg).addReg(Op0Reg); 950 UpdateValueMap(I, ResultReg); 951 return true; 952} 953 954bool X86FastISel::X86SelectSelect(Instruction *I) { 955 MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 956 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 957 return false; 958 959 unsigned Opc = 0; 960 const TargetRegisterClass *RC = NULL; 961 if (VT.getSimpleVT() == MVT::i16) { 962 Opc = X86::CMOVE16rr; 963 RC = &X86::GR16RegClass; 964 } else if (VT.getSimpleVT() == MVT::i32) { 965 Opc = X86::CMOVE32rr; 966 RC = &X86::GR32RegClass; 967 } else if (VT.getSimpleVT() == MVT::i64) { 968 Opc = X86::CMOVE64rr; 969 RC = &X86::GR64RegClass; 970 } else { 971 return false; 972 } 973 974 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 975 if (Op0Reg == 0) return false; 976 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 977 if (Op1Reg == 0) return false; 978 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 979 if (Op2Reg == 0) return false; 980 981 BuildMI(MBB, DL, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg); 982 unsigned ResultReg = createResultReg(RC); 983 BuildMI(MBB, DL, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg); 984 UpdateValueMap(I, ResultReg); 985 return true; 986} 987 988bool X86FastISel::X86SelectFPExt(Instruction *I) { 989 // fpext from float to double. 990 if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) { 991 Value *V = I->getOperand(0); 992 if (V->getType() == Type::FloatTy) { 993 unsigned OpReg = getRegForValue(V); 994 if (OpReg == 0) return false; 995 unsigned ResultReg = createResultReg(X86::FR64RegisterClass); 996 BuildMI(MBB, DL, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg); 997 UpdateValueMap(I, ResultReg); 998 return true; 999 } 1000 } 1001 1002 return false; 1003} 1004 1005bool X86FastISel::X86SelectFPTrunc(Instruction *I) { 1006 if (Subtarget->hasSSE2()) { 1007 if (I->getType() == Type::FloatTy) { 1008 Value *V = I->getOperand(0); 1009 if (V->getType() == Type::DoubleTy) { 1010 unsigned OpReg = getRegForValue(V); 1011 if (OpReg == 0) return false; 1012 unsigned ResultReg = createResultReg(X86::FR32RegisterClass); 1013 BuildMI(MBB, DL, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg); 1014 UpdateValueMap(I, ResultReg); 1015 return true; 1016 } 1017 } 1018 } 1019 1020 return false; 1021} 1022 1023bool X86FastISel::X86SelectTrunc(Instruction *I) { 1024 if (Subtarget->is64Bit()) 1025 // All other cases should be handled by the tblgen generated code. 1026 return false; 1027 MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1028 MVT DstVT = TLI.getValueType(I->getType()); 1029 1030 // This code only handles truncation to byte right now. 1031 if (DstVT != MVT::i8 && DstVT != MVT::i1) 1032 // All other cases should be handled by the tblgen generated code. 1033 return false; 1034 if (SrcVT != MVT::i16 && SrcVT != MVT::i32) 1035 // All other cases should be handled by the tblgen generated code. 1036 return false; 1037 1038 unsigned InputReg = getRegForValue(I->getOperand(0)); 1039 if (!InputReg) 1040 // Unhandled operand. Halt "fast" selection and bail. 1041 return false; 1042 1043 // First issue a copy to GR16_ABCD or GR32_ABCD. 1044 unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16rr : X86::MOV32rr; 1045 const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) 1046 ? X86::GR16_ABCDRegisterClass : X86::GR32_ABCDRegisterClass; 1047 unsigned CopyReg = createResultReg(CopyRC); 1048 BuildMI(MBB, DL, TII.get(CopyOpc), CopyReg).addReg(InputReg); 1049 1050 // Then issue an extract_subreg. 1051 unsigned ResultReg = FastEmitInst_extractsubreg(MVT::i8, 1052 CopyReg, X86::SUBREG_8BIT); 1053 if (!ResultReg) 1054 return false; 1055 1056 UpdateValueMap(I, ResultReg); 1057 return true; 1058} 1059 1060bool X86FastISel::X86SelectExtractValue(Instruction *I) { 1061 ExtractValueInst *EI = cast<ExtractValueInst>(I); 1062 Value *Agg = EI->getAggregateOperand(); 1063 1064 if (IntrinsicInst *CI = dyn_cast<IntrinsicInst>(Agg)) { 1065 switch (CI->getIntrinsicID()) { 1066 default: break; 1067 case Intrinsic::sadd_with_overflow: 1068 case Intrinsic::uadd_with_overflow: 1069 // Cheat a little. We know that the registers for "add" and "seto" are 1070 // allocated sequentially. However, we only keep track of the register 1071 // for "add" in the value map. Use extractvalue's index to get the 1072 // correct register for "seto". 1073 UpdateValueMap(I, lookUpRegForValue(Agg) + *EI->idx_begin()); 1074 return true; 1075 } 1076 } 1077 1078 return false; 1079} 1080 1081bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) { 1082 // FIXME: Handle more intrinsics. 1083 switch (I.getIntrinsicID()) { 1084 default: return false; 1085 case Intrinsic::sadd_with_overflow: 1086 case Intrinsic::uadd_with_overflow: { 1087 // Replace "add with overflow" intrinsics with an "add" instruction followed 1088 // by a seto/setc instruction. Later on, when the "extractvalue" 1089 // instructions are encountered, we use the fact that two registers were 1090 // created sequentially to get the correct registers for the "sum" and the 1091 // "overflow bit". 1092 const Function *Callee = I.getCalledFunction(); 1093 const Type *RetTy = 1094 cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); 1095 1096 MVT VT; 1097 if (!isTypeLegal(RetTy, VT)) 1098 return false; 1099 1100 Value *Op1 = I.getOperand(1); 1101 Value *Op2 = I.getOperand(2); 1102 unsigned Reg1 = getRegForValue(Op1); 1103 unsigned Reg2 = getRegForValue(Op2); 1104 1105 if (Reg1 == 0 || Reg2 == 0) 1106 // FIXME: Handle values *not* in registers. 1107 return false; 1108 1109 unsigned OpC = 0; 1110 if (VT == MVT::i32) 1111 OpC = X86::ADD32rr; 1112 else if (VT == MVT::i64) 1113 OpC = X86::ADD64rr; 1114 else 1115 return false; 1116 1117 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1118 BuildMI(MBB, DL, TII.get(OpC), ResultReg).addReg(Reg1).addReg(Reg2); 1119 unsigned DestReg1 = UpdateValueMap(&I, ResultReg); 1120 1121 // If the add with overflow is an intra-block value then we just want to 1122 // create temporaries for it like normal. If it is a cross-block value then 1123 // UpdateValueMap will return the cross-block register used. Since we 1124 // *really* want the value to be live in the register pair known by 1125 // UpdateValueMap, we have to use DestReg1+1 as the destination register in 1126 // the cross block case. In the non-cross-block case, we should just make 1127 // another register for the value. 1128 if (DestReg1 != ResultReg) 1129 ResultReg = DestReg1+1; 1130 else 1131 ResultReg = createResultReg(TLI.getRegClassFor(MVT::i8)); 1132 1133 unsigned Opc = X86::SETBr; 1134 if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) 1135 Opc = X86::SETOr; 1136 BuildMI(MBB, DL, TII.get(Opc), ResultReg); 1137 return true; 1138 } 1139 } 1140} 1141 1142bool X86FastISel::X86SelectCall(Instruction *I) { 1143 CallInst *CI = cast<CallInst>(I); 1144 Value *Callee = I->getOperand(0); 1145 1146 // Can't handle inline asm yet. 1147 if (isa<InlineAsm>(Callee)) 1148 return false; 1149 1150 // Handle intrinsic calls. 1151 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) 1152 return X86VisitIntrinsicCall(*II); 1153 1154 // Handle only C and fastcc calling conventions for now. 1155 CallSite CS(CI); 1156 unsigned CC = CS.getCallingConv(); 1157 if (CC != CallingConv::C && 1158 CC != CallingConv::Fast && 1159 CC != CallingConv::X86_FastCall) 1160 return false; 1161 1162 // On X86, -tailcallopt changes the fastcc ABI. FastISel doesn't 1163 // handle this for now. 1164 if (CC == CallingConv::Fast && PerformTailCallOpt) 1165 return false; 1166 1167 // Let SDISel handle vararg functions. 1168 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1169 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1170 if (FTy->isVarArg()) 1171 return false; 1172 1173 // Handle *simple* calls for now. 1174 const Type *RetTy = CS.getType(); 1175 MVT RetVT; 1176 if (RetTy == Type::VoidTy) 1177 RetVT = MVT::isVoid; 1178 else if (!isTypeLegal(RetTy, RetVT, true)) 1179 return false; 1180 1181 // Materialize callee address in a register. FIXME: GV address can be 1182 // handled with a CALLpcrel32 instead. 1183 X86AddressMode CalleeAM; 1184 if (!X86SelectAddress(Callee, CalleeAM, true)) 1185 return false; 1186 unsigned CalleeOp = 0; 1187 GlobalValue *GV = 0; 1188 if (CalleeAM.GV != 0) { 1189 GV = CalleeAM.GV; 1190 } else if (CalleeAM.Base.Reg != 0) { 1191 CalleeOp = CalleeAM.Base.Reg; 1192 } else 1193 return false; 1194 1195 // Allow calls which produce i1 results. 1196 bool AndToI1 = false; 1197 if (RetVT == MVT::i1) { 1198 RetVT = MVT::i8; 1199 AndToI1 = true; 1200 } 1201 1202 // Deal with call operands first. 1203 SmallVector<Value*, 8> ArgVals; 1204 SmallVector<unsigned, 8> Args; 1205 SmallVector<MVT, 8> ArgVTs; 1206 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1207 Args.reserve(CS.arg_size()); 1208 ArgVals.reserve(CS.arg_size()); 1209 ArgVTs.reserve(CS.arg_size()); 1210 ArgFlags.reserve(CS.arg_size()); 1211 for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1212 i != e; ++i) { 1213 unsigned Arg = getRegForValue(*i); 1214 if (Arg == 0) 1215 return false; 1216 ISD::ArgFlagsTy Flags; 1217 unsigned AttrInd = i - CS.arg_begin() + 1; 1218 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1219 Flags.setSExt(); 1220 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1221 Flags.setZExt(); 1222 1223 // FIXME: Only handle *easy* calls for now. 1224 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1225 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1226 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1227 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1228 return false; 1229 1230 const Type *ArgTy = (*i)->getType(); 1231 MVT ArgVT; 1232 if (!isTypeLegal(ArgTy, ArgVT)) 1233 return false; 1234 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1235 Flags.setOrigAlign(OriginalAlignment); 1236 1237 Args.push_back(Arg); 1238 ArgVals.push_back(*i); 1239 ArgVTs.push_back(ArgVT); 1240 ArgFlags.push_back(Flags); 1241 } 1242 1243 // Analyze operands of the call, assigning locations to each operand. 1244 SmallVector<CCValAssign, 16> ArgLocs; 1245 CCState CCInfo(CC, false, TM, ArgLocs, I->getParent()->getContext()); 1246 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); 1247 1248 // Get a count of how many bytes are to be pushed on the stack. 1249 unsigned NumBytes = CCInfo.getNextStackOffset(); 1250 1251 // Issue CALLSEQ_START 1252 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1253 BuildMI(MBB, DL, TII.get(AdjStackDown)).addImm(NumBytes); 1254 1255 // Process argument: walk the register/memloc assignments, inserting 1256 // copies / loads. 1257 SmallVector<unsigned, 4> RegArgs; 1258 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1259 CCValAssign &VA = ArgLocs[i]; 1260 unsigned Arg = Args[VA.getValNo()]; 1261 MVT ArgVT = ArgVTs[VA.getValNo()]; 1262 1263 // Promote the value if needed. 1264 switch (VA.getLocInfo()) { 1265 default: assert(0 && "Unknown loc info!"); 1266 case CCValAssign::Full: break; 1267 case CCValAssign::SExt: { 1268 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1269 Arg, ArgVT, Arg); 1270 assert(Emitted && "Failed to emit a sext!"); Emitted=Emitted; 1271 Emitted = true; 1272 ArgVT = VA.getLocVT(); 1273 break; 1274 } 1275 case CCValAssign::ZExt: { 1276 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1277 Arg, ArgVT, Arg); 1278 assert(Emitted && "Failed to emit a zext!"); Emitted=Emitted; 1279 Emitted = true; 1280 ArgVT = VA.getLocVT(); 1281 break; 1282 } 1283 case CCValAssign::AExt: { 1284 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1285 Arg, ArgVT, Arg); 1286 if (!Emitted) 1287 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1288 Arg, ArgVT, Arg); 1289 if (!Emitted) 1290 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1291 Arg, ArgVT, Arg); 1292 1293 assert(Emitted && "Failed to emit a aext!"); Emitted=Emitted; 1294 ArgVT = VA.getLocVT(); 1295 break; 1296 } 1297 } 1298 1299 if (VA.isRegLoc()) { 1300 TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); 1301 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), 1302 Arg, RC, RC); 1303 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1304 Emitted = true; 1305 RegArgs.push_back(VA.getLocReg()); 1306 } else { 1307 unsigned LocMemOffset = VA.getLocMemOffset(); 1308 X86AddressMode AM; 1309 AM.Base.Reg = StackPtr; 1310 AM.Disp = LocMemOffset; 1311 Value *ArgVal = ArgVals[VA.getValNo()]; 1312 1313 // If this is a really simple value, emit this with the Value* version of 1314 // X86FastEmitStore. If it isn't simple, we don't want to do this, as it 1315 // can cause us to reevaluate the argument. 1316 if (isa<ConstantInt>(ArgVal) || isa<ConstantPointerNull>(ArgVal)) 1317 X86FastEmitStore(ArgVT, ArgVal, AM); 1318 else 1319 X86FastEmitStore(ArgVT, Arg, AM); 1320 } 1321 } 1322 1323 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1324 // GOT pointer. 1325 if (Subtarget->isPICStyleGOT()) { 1326 TargetRegisterClass *RC = X86::GR32RegisterClass; 1327 unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); 1328 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC); 1329 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1330 Emitted = true; 1331 } 1332 1333 // Issue the call. 1334 MachineInstrBuilder MIB; 1335 if (CalleeOp) { 1336 // Register-indirect call. 1337 unsigned CallOpc = Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r; 1338 MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addReg(CalleeOp); 1339 1340 } else { 1341 // Direct call. 1342 assert(GV && "Not a direct call"); 1343 unsigned CallOpc = 1344 Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32; 1345 1346 // See if we need any target-specific flags on the GV operand. 1347 unsigned char OpFlags = 0; 1348 1349 // On ELF targets, in both X86-64 and X86-32 mode, direct calls to 1350 // external symbols most go through the PLT in PIC mode. If the symbol 1351 // has hidden or protected visibility, or if it is static or local, then 1352 // we don't need to use the PLT - we can directly call it. 1353 if (Subtarget->isTargetELF() && 1354 TM.getRelocationModel() == Reloc::PIC_ && 1355 GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { 1356 OpFlags = X86II::MO_PLT; 1357 } else if (Subtarget->isPICStyleStub() && 1358 (GV->isDeclaration() || GV->isWeakForLinker()) && 1359 Subtarget->getDarwinVers() < 9) { 1360 // PC-relative references to external symbols should go through $stub, 1361 // unless we're building with the leopard linker or later, which 1362 // automatically synthesizes these stubs. 1363 OpFlags = X86II::MO_DARWIN_STUB; 1364 } 1365 1366 1367 MIB = BuildMI(MBB, DL, TII.get(CallOpc)).addGlobalAddress(GV, 0, OpFlags); 1368 } 1369 1370 // Add an implicit use GOT pointer in EBX. 1371 if (Subtarget->isPICStyleGOT()) 1372 MIB.addReg(X86::EBX); 1373 1374 // Add implicit physical register uses to the call. 1375 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1376 MIB.addReg(RegArgs[i]); 1377 1378 // Issue CALLSEQ_END 1379 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1380 BuildMI(MBB, DL, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); 1381 1382 // Now handle call return value (if any). 1383 if (RetVT.getSimpleVT() != MVT::isVoid) { 1384 SmallVector<CCValAssign, 16> RVLocs; 1385 CCState CCInfo(CC, false, TM, RVLocs, I->getParent()->getContext()); 1386 CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); 1387 1388 // Copy all of the result registers out of their specified physreg. 1389 assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); 1390 MVT CopyVT = RVLocs[0].getValVT(); 1391 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1392 TargetRegisterClass *SrcRC = DstRC; 1393 1394 // If this is a call to a function that returns an fp value on the x87 fp 1395 // stack, but where we prefer to use the value in xmm registers, copy it 1396 // out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1397 if ((RVLocs[0].getLocReg() == X86::ST0 || 1398 RVLocs[0].getLocReg() == X86::ST1) && 1399 isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { 1400 CopyVT = MVT::f80; 1401 SrcRC = X86::RSTRegisterClass; 1402 DstRC = X86::RFP80RegisterClass; 1403 } 1404 1405 unsigned ResultReg = createResultReg(DstRC); 1406 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, 1407 RVLocs[0].getLocReg(), DstRC, SrcRC); 1408 assert(Emitted && "Failed to emit a copy instruction!"); Emitted=Emitted; 1409 Emitted = true; 1410 if (CopyVT != RVLocs[0].getValVT()) { 1411 // Round the F80 the right size, which also moves to the appropriate xmm 1412 // register. This is accomplished by storing the F80 value in memory and 1413 // then loading it back. Ewww... 1414 MVT ResVT = RVLocs[0].getValVT(); 1415 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 1416 unsigned MemSize = ResVT.getSizeInBits()/8; 1417 int FI = MFI.CreateStackObject(MemSize, MemSize); 1418 addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg); 1419 DstRC = ResVT == MVT::f32 1420 ? X86::FR32RegisterClass : X86::FR64RegisterClass; 1421 Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; 1422 ResultReg = createResultReg(DstRC); 1423 addFrameReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), FI); 1424 } 1425 1426 if (AndToI1) { 1427 // Mask out all but lowest bit for some call which produces an i1. 1428 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 1429 BuildMI(MBB, DL, 1430 TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); 1431 ResultReg = AndResult; 1432 } 1433 1434 UpdateValueMap(I, ResultReg); 1435 } 1436 1437 return true; 1438} 1439 1440 1441bool 1442X86FastISel::TargetSelectInstruction(Instruction *I) { 1443 switch (I->getOpcode()) { 1444 default: break; 1445 case Instruction::Load: 1446 return X86SelectLoad(I); 1447 case Instruction::Store: 1448 return X86SelectStore(I); 1449 case Instruction::ICmp: 1450 case Instruction::FCmp: 1451 return X86SelectCmp(I); 1452 case Instruction::ZExt: 1453 return X86SelectZExt(I); 1454 case Instruction::Br: 1455 return X86SelectBranch(I); 1456 case Instruction::Call: 1457 return X86SelectCall(I); 1458 case Instruction::LShr: 1459 case Instruction::AShr: 1460 case Instruction::Shl: 1461 return X86SelectShift(I); 1462 case Instruction::Select: 1463 return X86SelectSelect(I); 1464 case Instruction::Trunc: 1465 return X86SelectTrunc(I); 1466 case Instruction::FPExt: 1467 return X86SelectFPExt(I); 1468 case Instruction::FPTrunc: 1469 return X86SelectFPTrunc(I); 1470 case Instruction::ExtractValue: 1471 return X86SelectExtractValue(I); 1472 case Instruction::IntToPtr: // Deliberate fall-through. 1473 case Instruction::PtrToInt: { 1474 MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 1475 MVT DstVT = TLI.getValueType(I->getType()); 1476 if (DstVT.bitsGT(SrcVT)) 1477 return X86SelectZExt(I); 1478 if (DstVT.bitsLT(SrcVT)) 1479 return X86SelectTrunc(I); 1480 unsigned Reg = getRegForValue(I->getOperand(0)); 1481 if (Reg == 0) return false; 1482 UpdateValueMap(I, Reg); 1483 return true; 1484 } 1485 } 1486 1487 return false; 1488} 1489 1490unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { 1491 MVT VT; 1492 if (!isTypeLegal(C->getType(), VT)) 1493 return false; 1494 1495 // Get opcode and regclass of the output for the given load instruction. 1496 unsigned Opc = 0; 1497 const TargetRegisterClass *RC = NULL; 1498 switch (VT.getSimpleVT()) { 1499 default: return false; 1500 case MVT::i8: 1501 Opc = X86::MOV8rm; 1502 RC = X86::GR8RegisterClass; 1503 break; 1504 case MVT::i16: 1505 Opc = X86::MOV16rm; 1506 RC = X86::GR16RegisterClass; 1507 break; 1508 case MVT::i32: 1509 Opc = X86::MOV32rm; 1510 RC = X86::GR32RegisterClass; 1511 break; 1512 case MVT::i64: 1513 // Must be in x86-64 mode. 1514 Opc = X86::MOV64rm; 1515 RC = X86::GR64RegisterClass; 1516 break; 1517 case MVT::f32: 1518 if (Subtarget->hasSSE1()) { 1519 Opc = X86::MOVSSrm; 1520 RC = X86::FR32RegisterClass; 1521 } else { 1522 Opc = X86::LD_Fp32m; 1523 RC = X86::RFP32RegisterClass; 1524 } 1525 break; 1526 case MVT::f64: 1527 if (Subtarget->hasSSE2()) { 1528 Opc = X86::MOVSDrm; 1529 RC = X86::FR64RegisterClass; 1530 } else { 1531 Opc = X86::LD_Fp64m; 1532 RC = X86::RFP64RegisterClass; 1533 } 1534 break; 1535 case MVT::f80: 1536 // No f80 support yet. 1537 return false; 1538 } 1539 1540 // Materialize addresses with LEA instructions. 1541 if (isa<GlobalValue>(C)) { 1542 X86AddressMode AM; 1543 if (X86SelectAddress(C, AM, false)) { 1544 if (TLI.getPointerTy() == MVT::i32) 1545 Opc = X86::LEA32r; 1546 else 1547 Opc = X86::LEA64r; 1548 unsigned ResultReg = createResultReg(RC); 1549 addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 1550 return ResultReg; 1551 } 1552 return 0; 1553 } 1554 1555 // MachineConstantPool wants an explicit alignment. 1556 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 1557 if (Align == 0) { 1558 // Alignment of vector types. FIXME! 1559 Align = TD.getTypeAllocSize(C->getType()); 1560 } 1561 1562 // x86-32 PIC requires a PIC base register for constant pools. 1563 unsigned PICBase = 0; 1564 unsigned char OpFlag = 0; 1565 if (Subtarget->isPICStyleStub() && 1566 TM.getRelocationModel() == Reloc::PIC_) { // Not dynamic-no-pic 1567 OpFlag = X86II::MO_PIC_BASE_OFFSET; 1568 PICBase = getInstrInfo()->getGlobalBaseReg(&MF); 1569 } else if (Subtarget->isPICStyleGOT()) { 1570 OpFlag = X86II::MO_GOTOFF; 1571 PICBase = getInstrInfo()->getGlobalBaseReg(&MF); 1572 } else if (Subtarget->isPICStyleRIPRel() && 1573 TM.getCodeModel() == CodeModel::Small) { 1574 PICBase = X86::RIP; 1575 } 1576 1577 // Create the load from the constant pool. 1578 unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); 1579 unsigned ResultReg = createResultReg(RC); 1580 addConstantPoolReference(BuildMI(MBB, DL, TII.get(Opc), ResultReg), 1581 MCPOffset, PICBase, OpFlag); 1582 1583 return ResultReg; 1584} 1585 1586unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) { 1587 // Fail on dynamic allocas. At this point, getRegForValue has already 1588 // checked its CSE maps, so if we're here trying to handle a dynamic 1589 // alloca, we're not going to succeed. X86SelectAddress has a 1590 // check for dynamic allocas, because it's called directly from 1591 // various places, but TargetMaterializeAlloca also needs a check 1592 // in order to avoid recursion between getRegForValue, 1593 // X86SelectAddrss, and TargetMaterializeAlloca. 1594 if (!StaticAllocaMap.count(C)) 1595 return 0; 1596 1597 X86AddressMode AM; 1598 if (!X86SelectAddress(C, AM, false)) 1599 return 0; 1600 unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; 1601 TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); 1602 unsigned ResultReg = createResultReg(RC); 1603 addLeaAddress(BuildMI(MBB, DL, TII.get(Opc), ResultReg), AM); 1604 return ResultReg; 1605} 1606 1607namespace llvm { 1608 llvm::FastISel *X86::createFastISel(MachineFunction &mf, 1609 MachineModuleInfo *mmi, 1610 DwarfWriter *dw, 1611 DenseMap<const Value *, unsigned> &vm, 1612 DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, 1613 DenseMap<const AllocaInst *, int> &am 1614#ifndef NDEBUG 1615 , SmallSet<Instruction*, 8> &cil 1616#endif 1617 ) { 1618 return new X86FastISel(mf, mmi, dw, vm, bm, am 1619#ifndef NDEBUG 1620 , cil 1621#endif 1622 ); 1623 } 1624} 1625