X86FastISel.cpp revision d53886bb488fbef11b8083d7e99bcf53a51861a0
1//===-- X86FastISel.cpp - X86 FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the X86-specific support for the FastISel class. Much 11// of the target-specific code is generated by tablegen in the file 12// X86GenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "X86.h" 17#include "X86InstrBuilder.h" 18#include "X86ISelLowering.h" 19#include "X86RegisterInfo.h" 20#include "X86Subtarget.h" 21#include "X86TargetMachine.h" 22#include "llvm/CallingConv.h" 23#include "llvm/DerivedTypes.h" 24#include "llvm/Instructions.h" 25#include "llvm/CodeGen/FastISel.h" 26#include "llvm/CodeGen/MachineConstantPool.h" 27#include "llvm/CodeGen/MachineFrameInfo.h" 28#include "llvm/CodeGen/MachineRegisterInfo.h" 29#include "llvm/Support/CallSite.h" 30#include "llvm/Support/GetElementPtrTypeIterator.h" 31 32using namespace llvm; 33 34class X86FastISel : public FastISel { 35 /// Subtarget - Keep a pointer to the X86Subtarget around so that we can 36 /// make the right decision when generating code for different targets. 37 const X86Subtarget *Subtarget; 38 39 /// StackPtr - Register used as the stack pointer. 40 /// 41 unsigned StackPtr; 42 43 /// X86ScalarSSEf32, X86ScalarSSEf64 - Select between SSE or x87 44 /// floating point ops. 45 /// When SSE is available, use it for f32 operations. 46 /// When SSE2 is available, use it for f64 operations. 47 bool X86ScalarSSEf64; 48 bool X86ScalarSSEf32; 49 50public: 51 explicit X86FastISel(MachineFunction &mf, 52 MachineModuleInfo *mmi, 53 DenseMap<const Value *, unsigned> &vm, 54 DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, 55 DenseMap<const AllocaInst *, int> &am 56#ifndef NDEBUG 57 , SmallSet<Instruction*, 8> &cil 58#endif 59 ) 60 : FastISel(mf, mmi, vm, bm, am 61#ifndef NDEBUG 62 , cil 63#endif 64 ) { 65 Subtarget = &TM.getSubtarget<X86Subtarget>(); 66 StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; 67 X86ScalarSSEf64 = Subtarget->hasSSE2(); 68 X86ScalarSSEf32 = Subtarget->hasSSE1(); 69 } 70 71 virtual bool TargetSelectInstruction(Instruction *I); 72 73#include "X86GenFastISel.inc" 74 75private: 76 bool X86FastEmitCompare(Value *LHS, Value *RHS, MVT VT); 77 78 bool X86FastEmitLoad(MVT VT, const X86AddressMode &AM, unsigned &RR); 79 80 bool X86FastEmitStore(MVT VT, unsigned Val, 81 const X86AddressMode &AM); 82 83 bool X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, unsigned Src, MVT SrcVT, 84 unsigned &ResultReg); 85 86 bool X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall); 87 88 bool X86SelectLoad(Instruction *I); 89 90 bool X86SelectStore(Instruction *I); 91 92 bool X86SelectCmp(Instruction *I); 93 94 bool X86SelectZExt(Instruction *I); 95 96 bool X86SelectBranch(Instruction *I); 97 98 bool X86SelectShift(Instruction *I); 99 100 bool X86SelectSelect(Instruction *I); 101 102 bool X86SelectTrunc(Instruction *I); 103 104 bool X86SelectFPExt(Instruction *I); 105 bool X86SelectFPTrunc(Instruction *I); 106 107 bool X86SelectCall(Instruction *I); 108 109 CCAssignFn *CCAssignFnForCall(unsigned CC, bool isTailCall = false); 110 111 const X86InstrInfo *getInstrInfo() const { 112 return getTargetMachine()->getInstrInfo(); 113 } 114 const X86TargetMachine *getTargetMachine() const { 115 return static_cast<const X86TargetMachine *>(&TM); 116 } 117 118 unsigned TargetMaterializeConstant(Constant *C); 119 120 unsigned TargetMaterializeAlloca(AllocaInst *C); 121 122 /// isScalarFPTypeInSSEReg - Return true if the specified scalar FP type is 123 /// computed in an SSE register, not on the X87 floating point stack. 124 bool isScalarFPTypeInSSEReg(MVT VT) const { 125 return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2 126 (VT == MVT::f32 && X86ScalarSSEf32); // f32 is when SSE1 127 } 128 129 bool isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1 = false); 130}; 131 132bool X86FastISel::isTypeLegal(const Type *Ty, MVT &VT, bool AllowI1) { 133 VT = TLI.getValueType(Ty, /*HandleUnknown=*/true); 134 if (VT == MVT::Other || !VT.isSimple()) 135 // Unhandled type. Halt "fast" selection and bail. 136 return false; 137 138 // For now, require SSE/SSE2 for performing floating-point operations, 139 // since x87 requires additional work. 140 if (VT == MVT::f64 && !X86ScalarSSEf64) 141 return false; 142 if (VT == MVT::f32 && !X86ScalarSSEf32) 143 return false; 144 // Similarly, no f80 support yet. 145 if (VT == MVT::f80) 146 return false; 147 // We only handle legal types. For example, on x86-32 the instruction 148 // selector contains all of the 64-bit instructions from x86-64, 149 // under the assumption that i64 won't be used if the target doesn't 150 // support it. 151 return (AllowI1 && VT == MVT::i1) || TLI.isTypeLegal(VT); 152} 153 154#include "X86GenCallingConv.inc" 155 156/// CCAssignFnForCall - Selects the correct CCAssignFn for a given calling 157/// convention. 158CCAssignFn *X86FastISel::CCAssignFnForCall(unsigned CC, bool isTaillCall) { 159 if (Subtarget->is64Bit()) { 160 if (Subtarget->isTargetWin64()) 161 return CC_X86_Win64_C; 162 else if (CC == CallingConv::Fast && isTaillCall) 163 return CC_X86_64_TailCall; 164 else 165 return CC_X86_64_C; 166 } 167 168 if (CC == CallingConv::X86_FastCall) 169 return CC_X86_32_FastCall; 170 else if (CC == CallingConv::Fast) 171 return CC_X86_32_FastCC; 172 else 173 return CC_X86_32_C; 174} 175 176/// X86FastEmitLoad - Emit a machine instruction to load a value of type VT. 177/// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. 178/// Return true and the result register by reference if it is possible. 179bool X86FastISel::X86FastEmitLoad(MVT VT, const X86AddressMode &AM, 180 unsigned &ResultReg) { 181 // Get opcode and regclass of the output for the given load instruction. 182 unsigned Opc = 0; 183 const TargetRegisterClass *RC = NULL; 184 switch (VT.getSimpleVT()) { 185 default: return false; 186 case MVT::i8: 187 Opc = X86::MOV8rm; 188 RC = X86::GR8RegisterClass; 189 break; 190 case MVT::i16: 191 Opc = X86::MOV16rm; 192 RC = X86::GR16RegisterClass; 193 break; 194 case MVT::i32: 195 Opc = X86::MOV32rm; 196 RC = X86::GR32RegisterClass; 197 break; 198 case MVT::i64: 199 // Must be in x86-64 mode. 200 Opc = X86::MOV64rm; 201 RC = X86::GR64RegisterClass; 202 break; 203 case MVT::f32: 204 if (Subtarget->hasSSE1()) { 205 Opc = X86::MOVSSrm; 206 RC = X86::FR32RegisterClass; 207 } else { 208 Opc = X86::LD_Fp32m; 209 RC = X86::RFP32RegisterClass; 210 } 211 break; 212 case MVT::f64: 213 if (Subtarget->hasSSE2()) { 214 Opc = X86::MOVSDrm; 215 RC = X86::FR64RegisterClass; 216 } else { 217 Opc = X86::LD_Fp64m; 218 RC = X86::RFP64RegisterClass; 219 } 220 break; 221 case MVT::f80: 222 // No f80 support yet. 223 return false; 224 } 225 226 ResultReg = createResultReg(RC); 227 addFullAddress(BuildMI(MBB, TII.get(Opc), ResultReg), AM); 228 return true; 229} 230 231/// X86FastEmitStore - Emit a machine instruction to store a value Val of 232/// type VT. The address is either pre-computed, consisted of a base ptr, Ptr 233/// and a displacement offset, or a GlobalAddress, 234/// i.e. V. Return true if it is possible. 235bool 236X86FastISel::X86FastEmitStore(MVT VT, unsigned Val, 237 const X86AddressMode &AM) { 238 // Get opcode and regclass of the output for the given store instruction. 239 unsigned Opc = 0; 240 const TargetRegisterClass *RC = NULL; 241 switch (VT.getSimpleVT()) { 242 default: return false; 243 case MVT::i8: 244 Opc = X86::MOV8mr; 245 RC = X86::GR8RegisterClass; 246 break; 247 case MVT::i16: 248 Opc = X86::MOV16mr; 249 RC = X86::GR16RegisterClass; 250 break; 251 case MVT::i32: 252 Opc = X86::MOV32mr; 253 RC = X86::GR32RegisterClass; 254 break; 255 case MVT::i64: 256 // Must be in x86-64 mode. 257 Opc = X86::MOV64mr; 258 RC = X86::GR64RegisterClass; 259 break; 260 case MVT::f32: 261 if (Subtarget->hasSSE1()) { 262 Opc = X86::MOVSSmr; 263 RC = X86::FR32RegisterClass; 264 } else { 265 Opc = X86::ST_Fp32m; 266 RC = X86::RFP32RegisterClass; 267 } 268 break; 269 case MVT::f64: 270 if (Subtarget->hasSSE2()) { 271 Opc = X86::MOVSDmr; 272 RC = X86::FR64RegisterClass; 273 } else { 274 Opc = X86::ST_Fp64m; 275 RC = X86::RFP64RegisterClass; 276 } 277 break; 278 case MVT::f80: 279 // No f80 support yet. 280 return false; 281 } 282 283 addFullAddress(BuildMI(MBB, TII.get(Opc)), AM).addReg(Val); 284 return true; 285} 286 287/// X86FastEmitExtend - Emit a machine instruction to extend a value Src of 288/// type SrcVT to type DstVT using the specified extension opcode Opc (e.g. 289/// ISD::SIGN_EXTEND). 290bool X86FastISel::X86FastEmitExtend(ISD::NodeType Opc, MVT DstVT, 291 unsigned Src, MVT SrcVT, 292 unsigned &ResultReg) { 293 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, Src); 294 295 if (RR != 0) { 296 ResultReg = RR; 297 return true; 298 } else 299 return false; 300} 301 302/// X86SelectAddress - Attempt to fill in an address from the given value. 303/// 304bool X86FastISel::X86SelectAddress(Value *V, X86AddressMode &AM, bool isCall) { 305 User *U; 306 unsigned Opcode = Instruction::UserOp1; 307 if (Instruction *I = dyn_cast<Instruction>(V)) { 308 Opcode = I->getOpcode(); 309 U = I; 310 } else if (ConstantExpr *C = dyn_cast<ConstantExpr>(V)) { 311 Opcode = C->getOpcode(); 312 U = C; 313 } 314 315 switch (Opcode) { 316 default: break; 317 case Instruction::BitCast: 318 // Look past bitcasts. 319 return X86SelectAddress(U->getOperand(0), AM, isCall); 320 321 case Instruction::IntToPtr: 322 // Look past no-op inttoptrs. 323 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 324 return X86SelectAddress(U->getOperand(0), AM, isCall); 325 326 case Instruction::PtrToInt: 327 // Look past no-op ptrtoints. 328 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 329 return X86SelectAddress(U->getOperand(0), AM, isCall); 330 331 case Instruction::Alloca: { 332 if (isCall) break; 333 // Do static allocas. 334 const AllocaInst *A = cast<AllocaInst>(V); 335 DenseMap<const AllocaInst*, int>::iterator SI = StaticAllocaMap.find(A); 336 if (SI != StaticAllocaMap.end()) { 337 AM.BaseType = X86AddressMode::FrameIndexBase; 338 AM.Base.FrameIndex = SI->second; 339 return true; 340 } 341 break; 342 } 343 344 case Instruction::Add: { 345 if (isCall) break; 346 // Adds of constants are common and easy enough. 347 if (ConstantInt *CI = dyn_cast<ConstantInt>(U->getOperand(1))) { 348 uint64_t Disp = (int32_t)AM.Disp + (uint64_t)CI->getSExtValue(); 349 // They have to fit in the 32-bit signed displacement field though. 350 if (isInt32(Disp)) { 351 AM.Disp = (uint32_t)Disp; 352 return X86SelectAddress(U->getOperand(0), AM, isCall); 353 } 354 } 355 break; 356 } 357 358 case Instruction::GetElementPtr: { 359 if (isCall) break; 360 // Pattern-match simple GEPs. 361 uint64_t Disp = (int32_t)AM.Disp; 362 unsigned IndexReg = AM.IndexReg; 363 unsigned Scale = AM.Scale; 364 gep_type_iterator GTI = gep_type_begin(U); 365 // Look at all but the last index. Constants can be folded, 366 // and one dynamic index can be handled, if the scale is supported. 367 for (User::op_iterator i = U->op_begin() + 1, e = U->op_end(); 368 i != e; ++i, ++GTI) { 369 Value *Op = *i; 370 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 371 const StructLayout *SL = TD.getStructLayout(STy); 372 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 373 Disp += SL->getElementOffset(Idx); 374 } else { 375 uint64_t S = TD.getABITypeSize(GTI.getIndexedType()); 376 if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 377 // Constant-offset addressing. 378 Disp += CI->getSExtValue() * S; 379 } else if (IndexReg == 0 && 380 (!AM.GV || 381 !getTargetMachine()->symbolicAddressesAreRIPRel()) && 382 (S == 1 || S == 2 || S == 4 || S == 8)) { 383 // Scaled-index addressing. 384 Scale = S; 385 IndexReg = getRegForValue(Op); 386 if (IndexReg == 0) 387 return false; 388 } else 389 // Unsupported. 390 goto unsupported_gep; 391 } 392 } 393 // Check for displacement overflow. 394 if (!isInt32(Disp)) 395 break; 396 // Ok, the GEP indices were covered by constant-offset and scaled-index 397 // addressing. Update the address state and move on to examining the base. 398 AM.IndexReg = IndexReg; 399 AM.Scale = Scale; 400 AM.Disp = (uint32_t)Disp; 401 return X86SelectAddress(U->getOperand(0), AM, isCall); 402 unsupported_gep: 403 // Ok, the GEP indices weren't all covered. 404 break; 405 } 406 } 407 408 // Handle constant address. 409 if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { 410 // Can't handle alternate code models yet. 411 if (TM.getCodeModel() != CodeModel::Default && 412 TM.getCodeModel() != CodeModel::Small) 413 return false; 414 415 // RIP-relative addresses can't have additional register operands. 416 if (getTargetMachine()->symbolicAddressesAreRIPRel() && 417 (AM.Base.Reg != 0 || AM.IndexReg != 0)) 418 return false; 419 420 // Set up the basic address. 421 AM.GV = GV; 422 if (!isCall && 423 TM.getRelocationModel() == Reloc::PIC_ && 424 !Subtarget->is64Bit()) 425 AM.Base.Reg = getInstrInfo()->getGlobalBaseReg(&MF); 426 427 // Emit an extra load if the ABI requires it. 428 if (Subtarget->GVRequiresExtraLoad(GV, TM, isCall)) { 429 // Check to see if we've already materialized this 430 // value in a register in this block. 431 if (unsigned Reg = LocalValueMap[V]) { 432 AM.Base.Reg = Reg; 433 AM.GV = 0; 434 return true; 435 } 436 // Issue load from stub if necessary. 437 unsigned Opc = 0; 438 const TargetRegisterClass *RC = NULL; 439 if (TLI.getPointerTy() == MVT::i32) { 440 Opc = X86::MOV32rm; 441 RC = X86::GR32RegisterClass; 442 } else { 443 Opc = X86::MOV64rm; 444 RC = X86::GR64RegisterClass; 445 } 446 447 X86AddressMode StubAM; 448 StubAM.Base.Reg = AM.Base.Reg; 449 StubAM.GV = AM.GV; 450 unsigned ResultReg = createResultReg(RC); 451 addFullAddress(BuildMI(MBB, TII.get(Opc), ResultReg), StubAM); 452 453 // Now construct the final address. Note that the Disp, Scale, 454 // and Index values may already be set here. 455 AM.Base.Reg = ResultReg; 456 AM.GV = 0; 457 458 // Prevent loading GV stub multiple times in same MBB. 459 LocalValueMap[V] = AM.Base.Reg; 460 } 461 return true; 462 } 463 464 // If all else fails, try to materialize the value in a register. 465 if (!AM.GV || !getTargetMachine()->symbolicAddressesAreRIPRel()) { 466 if (AM.Base.Reg == 0) { 467 AM.Base.Reg = getRegForValue(V); 468 return AM.Base.Reg != 0; 469 } 470 if (AM.IndexReg == 0) { 471 assert(AM.Scale == 1 && "Scale with no index!"); 472 AM.IndexReg = getRegForValue(V); 473 return AM.IndexReg != 0; 474 } 475 } 476 477 return false; 478} 479 480/// X86SelectStore - Select and emit code to implement store instructions. 481bool X86FastISel::X86SelectStore(Instruction* I) { 482 MVT VT; 483 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 484 return false; 485 unsigned Val = getRegForValue(I->getOperand(0)); 486 if (Val == 0) 487 // Unhandled operand. Halt "fast" selection and bail. 488 return false; 489 490 X86AddressMode AM; 491 if (!X86SelectAddress(I->getOperand(1), AM, false)) 492 return false; 493 494 return X86FastEmitStore(VT, Val, AM); 495} 496 497/// X86SelectLoad - Select and emit code to implement load instructions. 498/// 499bool X86FastISel::X86SelectLoad(Instruction *I) { 500 MVT VT; 501 if (!isTypeLegal(I->getType(), VT)) 502 return false; 503 504 X86AddressMode AM; 505 if (!X86SelectAddress(I->getOperand(0), AM, false)) 506 return false; 507 508 unsigned ResultReg = 0; 509 if (X86FastEmitLoad(VT, AM, ResultReg)) { 510 UpdateValueMap(I, ResultReg); 511 return true; 512 } 513 return false; 514} 515 516static unsigned X86ChooseCmpOpcode(MVT VT) { 517 switch (VT.getSimpleVT()) { 518 default: return 0; 519 case MVT::i8: return X86::CMP8rr; 520 case MVT::i16: return X86::CMP16rr; 521 case MVT::i32: return X86::CMP32rr; 522 case MVT::i64: return X86::CMP64rr; 523 case MVT::f32: return X86::UCOMISSrr; 524 case MVT::f64: return X86::UCOMISDrr; 525 } 526} 527 528/// X86ChooseCmpImmediateOpcode - If we have a comparison with RHS as the RHS 529/// of the comparison, return an opcode that works for the compare (e.g. 530/// CMP32ri) otherwise return 0. 531static unsigned X86ChooseCmpImmediateOpcode(MVT VT, ConstantInt *RHSC) { 532 switch (VT.getSimpleVT()) { 533 // Otherwise, we can't fold the immediate into this comparison. 534 default: return 0; 535 case MVT::i8: return X86::CMP8ri; 536 case MVT::i16: return X86::CMP16ri; 537 case MVT::i32: return X86::CMP32ri; 538 case MVT::i64: 539 // 64-bit comparisons are only valid if the immediate fits in a 32-bit sext 540 // field. 541 if (RHSC->getType() == Type::Int64Ty && 542 (int)RHSC->getSExtValue() == RHSC->getSExtValue()) 543 return X86::CMP64ri32; 544 return 0; 545 } 546} 547 548bool X86FastISel::X86FastEmitCompare(Value *Op0, Value *Op1, MVT VT) { 549 unsigned Op0Reg = getRegForValue(Op0); 550 if (Op0Reg == 0) return false; 551 552 // Handle 'null' like i32/i64 0. 553 if (isa<ConstantPointerNull>(Op1)) 554 Op1 = Constant::getNullValue(TD.getIntPtrType()); 555 556 // We have two options: compare with register or immediate. If the RHS of 557 // the compare is an immediate that we can fold into this compare, use 558 // CMPri, otherwise use CMPrr. 559 if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) { 560 if (unsigned CompareImmOpc = X86ChooseCmpImmediateOpcode(VT, Op1C)) { 561 BuildMI(MBB, TII.get(CompareImmOpc)).addReg(Op0Reg) 562 .addImm(Op1C->getSExtValue()); 563 return true; 564 } 565 } 566 567 unsigned CompareOpc = X86ChooseCmpOpcode(VT); 568 if (CompareOpc == 0) return false; 569 570 unsigned Op1Reg = getRegForValue(Op1); 571 if (Op1Reg == 0) return false; 572 BuildMI(MBB, TII.get(CompareOpc)).addReg(Op0Reg).addReg(Op1Reg); 573 574 return true; 575} 576 577bool X86FastISel::X86SelectCmp(Instruction *I) { 578 CmpInst *CI = cast<CmpInst>(I); 579 580 MVT VT; 581 if (!isTypeLegal(I->getOperand(0)->getType(), VT)) 582 return false; 583 584 unsigned ResultReg = createResultReg(&X86::GR8RegClass); 585 unsigned SetCCOpc; 586 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 587 switch (CI->getPredicate()) { 588 case CmpInst::FCMP_OEQ: { 589 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 590 return false; 591 592 unsigned EReg = createResultReg(&X86::GR8RegClass); 593 unsigned NPReg = createResultReg(&X86::GR8RegClass); 594 BuildMI(MBB, TII.get(X86::SETEr), EReg); 595 BuildMI(MBB, TII.get(X86::SETNPr), NPReg); 596 BuildMI(MBB, TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); 597 UpdateValueMap(I, ResultReg); 598 return true; 599 } 600 case CmpInst::FCMP_UNE: { 601 if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) 602 return false; 603 604 unsigned NEReg = createResultReg(&X86::GR8RegClass); 605 unsigned PReg = createResultReg(&X86::GR8RegClass); 606 BuildMI(MBB, TII.get(X86::SETNEr), NEReg); 607 BuildMI(MBB, TII.get(X86::SETPr), PReg); 608 BuildMI(MBB, TII.get(X86::OR8rr), ResultReg).addReg(PReg).addReg(NEReg); 609 UpdateValueMap(I, ResultReg); 610 return true; 611 } 612 case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 613 case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 614 case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break; 615 case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break; 616 case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 617 case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break; 618 case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break; 619 case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 620 case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break; 621 case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; 622 case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 623 case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 624 625 case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; 626 case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; 627 case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; 628 case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; 629 case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; 630 case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; 631 case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break; 632 case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break; 633 case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break; 634 case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break; 635 default: 636 return false; 637 } 638 639 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 640 if (SwapArgs) 641 std::swap(Op0, Op1); 642 643 // Emit a compare of Op0/Op1. 644 if (!X86FastEmitCompare(Op0, Op1, VT)) 645 return false; 646 647 BuildMI(MBB, TII.get(SetCCOpc), ResultReg); 648 UpdateValueMap(I, ResultReg); 649 return true; 650} 651 652bool X86FastISel::X86SelectZExt(Instruction *I) { 653 // Special-case hack: The only i1 values we know how to produce currently 654 // set the upper bits of an i8 value to zero. 655 if (I->getType() == Type::Int8Ty && 656 I->getOperand(0)->getType() == Type::Int1Ty) { 657 unsigned ResultReg = getRegForValue(I->getOperand(0)); 658 if (ResultReg == 0) return false; 659 UpdateValueMap(I, ResultReg); 660 return true; 661 } 662 663 return false; 664} 665 666 667bool X86FastISel::X86SelectBranch(Instruction *I) { 668 // Unconditional branches are selected by tablegen-generated code. 669 // Handle a conditional branch. 670 BranchInst *BI = cast<BranchInst>(I); 671 MachineBasicBlock *TrueMBB = MBBMap[BI->getSuccessor(0)]; 672 MachineBasicBlock *FalseMBB = MBBMap[BI->getSuccessor(1)]; 673 674 // Fold the common case of a conditional branch with a comparison. 675 if (CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 676 if (CI->hasOneUse()) { 677 MVT VT = TLI.getValueType(CI->getOperand(0)->getType()); 678 679 // Try to take advantage of fallthrough opportunities. 680 CmpInst::Predicate Predicate = CI->getPredicate(); 681 if (MBB->isLayoutSuccessor(TrueMBB)) { 682 std::swap(TrueMBB, FalseMBB); 683 Predicate = CmpInst::getInversePredicate(Predicate); 684 } 685 686 bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. 687 unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA" 688 689 switch (Predicate) { 690 case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA; break; 691 case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE; break; 692 case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA; break; 693 case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE; break; 694 case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE; break; 695 case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP; break; 696 case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP; break; 697 case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE; break; 698 case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB; break; 699 case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE; break; 700 case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; 701 case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; 702 703 case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE; break; 704 case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE; break; 705 case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA; break; 706 case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE; break; 707 case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB; break; 708 case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE; break; 709 case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG; break; 710 case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE; break; 711 case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL; break; 712 case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE; break; 713 default: 714 return false; 715 } 716 717 Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); 718 if (SwapArgs) 719 std::swap(Op0, Op1); 720 721 // Emit a compare of the LHS and RHS, setting the flags. 722 if (!X86FastEmitCompare(Op0, Op1, VT)) 723 return false; 724 725 BuildMI(MBB, TII.get(BranchOpc)).addMBB(TrueMBB); 726 FastEmitBranch(FalseMBB); 727 MBB->addSuccessor(TrueMBB); 728 return true; 729 } 730 } 731 732 // Otherwise do a clumsy setcc and re-test it. 733 unsigned OpReg = getRegForValue(BI->getCondition()); 734 if (OpReg == 0) return false; 735 736 BuildMI(MBB, TII.get(X86::TEST8rr)).addReg(OpReg).addReg(OpReg); 737 BuildMI(MBB, TII.get(X86::JNE)).addMBB(TrueMBB); 738 FastEmitBranch(FalseMBB); 739 MBB->addSuccessor(TrueMBB); 740 return true; 741} 742 743bool X86FastISel::X86SelectShift(Instruction *I) { 744 unsigned CReg = 0, OpReg = 0, OpImm = 0; 745 const TargetRegisterClass *RC = NULL; 746 if (I->getType() == Type::Int8Ty) { 747 CReg = X86::CL; 748 RC = &X86::GR8RegClass; 749 switch (I->getOpcode()) { 750 case Instruction::LShr: OpReg = X86::SHR8rCL; OpImm = X86::SHR8ri; break; 751 case Instruction::AShr: OpReg = X86::SAR8rCL; OpImm = X86::SAR8ri; break; 752 case Instruction::Shl: OpReg = X86::SHL8rCL; OpImm = X86::SHL8ri; break; 753 default: return false; 754 } 755 } else if (I->getType() == Type::Int16Ty) { 756 CReg = X86::CX; 757 RC = &X86::GR16RegClass; 758 switch (I->getOpcode()) { 759 case Instruction::LShr: OpReg = X86::SHR16rCL; OpImm = X86::SHR16ri; break; 760 case Instruction::AShr: OpReg = X86::SAR16rCL; OpImm = X86::SAR16ri; break; 761 case Instruction::Shl: OpReg = X86::SHL16rCL; OpImm = X86::SHL16ri; break; 762 default: return false; 763 } 764 } else if (I->getType() == Type::Int32Ty) { 765 CReg = X86::ECX; 766 RC = &X86::GR32RegClass; 767 switch (I->getOpcode()) { 768 case Instruction::LShr: OpReg = X86::SHR32rCL; OpImm = X86::SHR32ri; break; 769 case Instruction::AShr: OpReg = X86::SAR32rCL; OpImm = X86::SAR32ri; break; 770 case Instruction::Shl: OpReg = X86::SHL32rCL; OpImm = X86::SHL32ri; break; 771 default: return false; 772 } 773 } else if (I->getType() == Type::Int64Ty) { 774 CReg = X86::RCX; 775 RC = &X86::GR64RegClass; 776 switch (I->getOpcode()) { 777 case Instruction::LShr: OpReg = X86::SHR64rCL; OpImm = X86::SHR64ri; break; 778 case Instruction::AShr: OpReg = X86::SAR64rCL; OpImm = X86::SAR64ri; break; 779 case Instruction::Shl: OpReg = X86::SHL64rCL; OpImm = X86::SHL64ri; break; 780 default: return false; 781 } 782 } else { 783 return false; 784 } 785 786 MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 787 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 788 return false; 789 790 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 791 if (Op0Reg == 0) return false; 792 793 // Fold immediate in shl(x,3). 794 if (ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1))) { 795 unsigned ResultReg = createResultReg(RC); 796 BuildMI(MBB, TII.get(OpImm), 797 ResultReg).addReg(Op0Reg).addImm(CI->getZExtValue()); 798 UpdateValueMap(I, ResultReg); 799 return true; 800 } 801 802 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 803 if (Op1Reg == 0) return false; 804 TII.copyRegToReg(*MBB, MBB->end(), CReg, Op1Reg, RC, RC); 805 806 // The shift instruction uses X86::CL. If we defined a super-register 807 // of X86::CL, emit an EXTRACT_SUBREG to precisely describe what 808 // we're doing here. 809 if (CReg != X86::CL) 810 BuildMI(MBB, TII.get(TargetInstrInfo::EXTRACT_SUBREG), X86::CL) 811 .addReg(CReg).addImm(X86::SUBREG_8BIT); 812 813 unsigned ResultReg = createResultReg(RC); 814 BuildMI(MBB, TII.get(OpReg), ResultReg).addReg(Op0Reg); 815 UpdateValueMap(I, ResultReg); 816 return true; 817} 818 819bool X86FastISel::X86SelectSelect(Instruction *I) { 820 MVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 821 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 822 return false; 823 824 unsigned Opc = 0; 825 const TargetRegisterClass *RC = NULL; 826 if (VT.getSimpleVT() == MVT::i16) { 827 Opc = X86::CMOVE16rr; 828 RC = &X86::GR16RegClass; 829 } else if (VT.getSimpleVT() == MVT::i32) { 830 Opc = X86::CMOVE32rr; 831 RC = &X86::GR32RegClass; 832 } else if (VT.getSimpleVT() == MVT::i64) { 833 Opc = X86::CMOVE64rr; 834 RC = &X86::GR64RegClass; 835 } else { 836 return false; 837 } 838 839 unsigned Op0Reg = getRegForValue(I->getOperand(0)); 840 if (Op0Reg == 0) return false; 841 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 842 if (Op1Reg == 0) return false; 843 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 844 if (Op2Reg == 0) return false; 845 846 BuildMI(MBB, TII.get(X86::TEST8rr)).addReg(Op0Reg).addReg(Op0Reg); 847 unsigned ResultReg = createResultReg(RC); 848 BuildMI(MBB, TII.get(Opc), ResultReg).addReg(Op1Reg).addReg(Op2Reg); 849 UpdateValueMap(I, ResultReg); 850 return true; 851} 852 853bool X86FastISel::X86SelectFPExt(Instruction *I) { 854 // fpext from float to double. 855 if (Subtarget->hasSSE2() && I->getType() == Type::DoubleTy) { 856 Value *V = I->getOperand(0); 857 if (V->getType() == Type::FloatTy) { 858 unsigned OpReg = getRegForValue(V); 859 if (OpReg == 0) return false; 860 unsigned ResultReg = createResultReg(X86::FR64RegisterClass); 861 BuildMI(MBB, TII.get(X86::CVTSS2SDrr), ResultReg).addReg(OpReg); 862 UpdateValueMap(I, ResultReg); 863 return true; 864 } 865 } 866 867 return false; 868} 869 870bool X86FastISel::X86SelectFPTrunc(Instruction *I) { 871 if (Subtarget->hasSSE2()) { 872 if (I->getType() == Type::FloatTy) { 873 Value *V = I->getOperand(0); 874 if (V->getType() == Type::DoubleTy) { 875 unsigned OpReg = getRegForValue(V); 876 if (OpReg == 0) return false; 877 unsigned ResultReg = createResultReg(X86::FR32RegisterClass); 878 BuildMI(MBB, TII.get(X86::CVTSD2SSrr), ResultReg).addReg(OpReg); 879 UpdateValueMap(I, ResultReg); 880 return true; 881 } 882 } 883 } 884 885 return false; 886} 887 888bool X86FastISel::X86SelectTrunc(Instruction *I) { 889 if (Subtarget->is64Bit()) 890 // All other cases should be handled by the tblgen generated code. 891 return false; 892 MVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); 893 MVT DstVT = TLI.getValueType(I->getType()); 894 if (DstVT != MVT::i8) 895 // All other cases should be handled by the tblgen generated code. 896 return false; 897 if (SrcVT != MVT::i16 && SrcVT != MVT::i32) 898 // All other cases should be handled by the tblgen generated code. 899 return false; 900 901 unsigned InputReg = getRegForValue(I->getOperand(0)); 902 if (!InputReg) 903 // Unhandled operand. Halt "fast" selection and bail. 904 return false; 905 906 // First issue a copy to GR16_ or GR32_. 907 unsigned CopyOpc = (SrcVT == MVT::i16) ? X86::MOV16to16_ : X86::MOV32to32_; 908 const TargetRegisterClass *CopyRC = (SrcVT == MVT::i16) 909 ? X86::GR16_RegisterClass : X86::GR32_RegisterClass; 910 unsigned CopyReg = createResultReg(CopyRC); 911 BuildMI(MBB, TII.get(CopyOpc), CopyReg).addReg(InputReg); 912 913 // Then issue an extract_subreg. 914 unsigned ResultReg = FastEmitInst_extractsubreg(CopyReg, X86::SUBREG_8BIT); 915 if (!ResultReg) 916 return false; 917 918 UpdateValueMap(I, ResultReg); 919 return true; 920} 921 922bool X86FastISel::X86SelectCall(Instruction *I) { 923 CallInst *CI = cast<CallInst>(I); 924 Value *Callee = I->getOperand(0); 925 926 // Can't handle inline asm yet. 927 if (isa<InlineAsm>(Callee)) 928 return false; 929 930 // FIXME: Handle some intrinsics. 931 if (Function *F = CI->getCalledFunction()) { 932 if (F->isDeclaration() &&F->getIntrinsicID()) 933 return false; 934 } 935 936 // Handle only C and fastcc calling conventions for now. 937 CallSite CS(CI); 938 unsigned CC = CS.getCallingConv(); 939 if (CC != CallingConv::C && 940 CC != CallingConv::Fast && 941 CC != CallingConv::X86_FastCall) 942 return false; 943 944 // Let SDISel handle vararg functions. 945 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 946 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 947 if (FTy->isVarArg()) 948 return false; 949 950 // Handle *simple* calls for now. 951 const Type *RetTy = CS.getType(); 952 MVT RetVT; 953 if (RetTy == Type::VoidTy) 954 RetVT = MVT::isVoid; 955 else if (!isTypeLegal(RetTy, RetVT, true)) 956 return false; 957 958 // Materialize callee address in a register. FIXME: GV address can be 959 // handled with a CALLpcrel32 instead. 960 X86AddressMode CalleeAM; 961 if (!X86SelectAddress(Callee, CalleeAM, true)) 962 return false; 963 unsigned CalleeOp = 0; 964 GlobalValue *GV = 0; 965 if (CalleeAM.Base.Reg != 0) { 966 assert(CalleeAM.GV == 0); 967 CalleeOp = CalleeAM.Base.Reg; 968 } else if (CalleeAM.GV != 0) { 969 assert(CalleeAM.GV != 0); 970 GV = CalleeAM.GV; 971 } else 972 return false; 973 974 // Allow calls which produce i1 results. 975 bool AndToI1 = false; 976 if (RetVT == MVT::i1) { 977 RetVT = MVT::i8; 978 AndToI1 = true; 979 } 980 981 // Deal with call operands first. 982 SmallVector<unsigned, 4> Args; 983 SmallVector<MVT, 4> ArgVTs; 984 SmallVector<ISD::ArgFlagsTy, 4> ArgFlags; 985 Args.reserve(CS.arg_size()); 986 ArgVTs.reserve(CS.arg_size()); 987 ArgFlags.reserve(CS.arg_size()); 988 for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 989 i != e; ++i) { 990 unsigned Arg = getRegForValue(*i); 991 if (Arg == 0) 992 return false; 993 ISD::ArgFlagsTy Flags; 994 unsigned AttrInd = i - CS.arg_begin() + 1; 995 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 996 Flags.setSExt(); 997 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 998 Flags.setZExt(); 999 1000 // FIXME: Only handle *easy* calls for now. 1001 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1002 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1003 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1004 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1005 return false; 1006 1007 const Type *ArgTy = (*i)->getType(); 1008 MVT ArgVT; 1009 if (!isTypeLegal(ArgTy, ArgVT)) 1010 return false; 1011 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1012 Flags.setOrigAlign(OriginalAlignment); 1013 1014 Args.push_back(Arg); 1015 ArgVTs.push_back(ArgVT); 1016 ArgFlags.push_back(Flags); 1017 } 1018 1019 // Analyze operands of the call, assigning locations to each operand. 1020 SmallVector<CCValAssign, 16> ArgLocs; 1021 CCState CCInfo(CC, false, TM, ArgLocs); 1022 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC)); 1023 1024 // Get a count of how many bytes are to be pushed on the stack. 1025 unsigned NumBytes = CCInfo.getNextStackOffset(); 1026 1027 // Issue CALLSEQ_START 1028 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1029 BuildMI(MBB, TII.get(AdjStackDown)).addImm(NumBytes); 1030 1031 // Process argumenet: walk the register/memloc assignments, inserting 1032 // copies / loads. 1033 SmallVector<unsigned, 4> RegArgs; 1034 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1035 CCValAssign &VA = ArgLocs[i]; 1036 unsigned Arg = Args[VA.getValNo()]; 1037 MVT ArgVT = ArgVTs[VA.getValNo()]; 1038 1039 // Promote the value if needed. 1040 switch (VA.getLocInfo()) { 1041 default: assert(0 && "Unknown loc info!"); 1042 case CCValAssign::Full: break; 1043 case CCValAssign::SExt: { 1044 bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1045 Arg, ArgVT, Arg); 1046 assert(Emitted && "Failed to emit a sext!"); 1047 ArgVT = VA.getLocVT(); 1048 break; 1049 } 1050 case CCValAssign::ZExt: { 1051 bool Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1052 Arg, ArgVT, Arg); 1053 assert(Emitted && "Failed to emit a zext!"); 1054 ArgVT = VA.getLocVT(); 1055 break; 1056 } 1057 case CCValAssign::AExt: { 1058 bool Emitted = X86FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1059 Arg, ArgVT, Arg); 1060 if (!Emitted) 1061 Emitted = X86FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1062 Arg, ArgVT, Arg); 1063 if (!Emitted) 1064 Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1065 Arg, ArgVT, Arg); 1066 1067 assert(Emitted && "Failed to emit a aext!"); 1068 ArgVT = VA.getLocVT(); 1069 break; 1070 } 1071 } 1072 1073 if (VA.isRegLoc()) { 1074 TargetRegisterClass* RC = TLI.getRegClassFor(ArgVT); 1075 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), VA.getLocReg(), 1076 Arg, RC, RC); 1077 assert(Emitted && "Failed to emit a copy instruction!"); 1078 RegArgs.push_back(VA.getLocReg()); 1079 } else { 1080 unsigned LocMemOffset = VA.getLocMemOffset(); 1081 X86AddressMode AM; 1082 AM.Base.Reg = StackPtr; 1083 AM.Disp = LocMemOffset; 1084 X86FastEmitStore(ArgVT, Arg, AM); 1085 } 1086 } 1087 1088 // ELF / PIC requires GOT in the EBX register before function calls via PLT 1089 // GOT pointer. 1090 if (!Subtarget->is64Bit() && 1091 TM.getRelocationModel() == Reloc::PIC_ && 1092 Subtarget->isPICStyleGOT()) { 1093 TargetRegisterClass *RC = X86::GR32RegisterClass; 1094 unsigned Base = getInstrInfo()->getGlobalBaseReg(&MF); 1095 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), X86::EBX, Base, RC, RC); 1096 assert(Emitted && "Failed to emit a copy instruction!"); 1097 } 1098 1099 // Issue the call. 1100 unsigned CallOpc = CalleeOp 1101 ? (Subtarget->is64Bit() ? X86::CALL64r : X86::CALL32r) 1102 : (Subtarget->is64Bit() ? X86::CALL64pcrel32 : X86::CALLpcrel32); 1103 MachineInstrBuilder MIB = CalleeOp 1104 ? BuildMI(MBB, TII.get(CallOpc)).addReg(CalleeOp) 1105 : BuildMI(MBB, TII.get(CallOpc)).addGlobalAddress(GV); 1106 1107 // Add an implicit use GOT pointer in EBX. 1108 if (!Subtarget->is64Bit() && 1109 TM.getRelocationModel() == Reloc::PIC_ && 1110 Subtarget->isPICStyleGOT()) 1111 MIB.addReg(X86::EBX); 1112 1113 // Add implicit physical register uses to the call. 1114 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1115 MIB.addReg(RegArgs[i]); 1116 1117 // Issue CALLSEQ_END 1118 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1119 BuildMI(MBB, TII.get(AdjStackUp)).addImm(NumBytes).addImm(0); 1120 1121 // Now handle call return value (if any). 1122 if (RetVT.getSimpleVT() != MVT::isVoid) { 1123 SmallVector<CCValAssign, 16> RVLocs; 1124 CCState CCInfo(CC, false, TM, RVLocs); 1125 CCInfo.AnalyzeCallResult(RetVT, RetCC_X86); 1126 1127 // Copy all of the result registers out of their specified physreg. 1128 assert(RVLocs.size() == 1 && "Can't handle multi-value calls!"); 1129 MVT CopyVT = RVLocs[0].getValVT(); 1130 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1131 TargetRegisterClass *SrcRC = DstRC; 1132 1133 // If this is a call to a function that returns an fp value on the x87 fp 1134 // stack, but where we prefer to use the value in xmm registers, copy it 1135 // out as F80 and use a truncate to move it from fp stack reg to xmm reg. 1136 if ((RVLocs[0].getLocReg() == X86::ST0 || 1137 RVLocs[0].getLocReg() == X86::ST1) && 1138 isScalarFPTypeInSSEReg(RVLocs[0].getValVT())) { 1139 CopyVT = MVT::f80; 1140 SrcRC = X86::RSTRegisterClass; 1141 DstRC = X86::RFP80RegisterClass; 1142 } 1143 1144 unsigned ResultReg = createResultReg(DstRC); 1145 bool Emitted = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, 1146 RVLocs[0].getLocReg(), DstRC, SrcRC); 1147 assert(Emitted && "Failed to emit a copy instruction!"); 1148 if (CopyVT != RVLocs[0].getValVT()) { 1149 // Round the F80 the right size, which also moves to the appropriate xmm 1150 // register. This is accomplished by storing the F80 value in memory and 1151 // then loading it back. Ewww... 1152 MVT ResVT = RVLocs[0].getValVT(); 1153 unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64; 1154 unsigned MemSize = ResVT.getSizeInBits()/8; 1155 int FI = MFI.CreateStackObject(MemSize, MemSize); 1156 addFrameReference(BuildMI(MBB, TII.get(Opc)), FI).addReg(ResultReg); 1157 DstRC = ResVT == MVT::f32 1158 ? X86::FR32RegisterClass : X86::FR64RegisterClass; 1159 Opc = ResVT == MVT::f32 ? X86::MOVSSrm : X86::MOVSDrm; 1160 ResultReg = createResultReg(DstRC); 1161 addFrameReference(BuildMI(MBB, TII.get(Opc), ResultReg), FI); 1162 } 1163 1164 if (AndToI1) { 1165 // Mask out all but lowest bit for some call which produces an i1. 1166 unsigned AndResult = createResultReg(X86::GR8RegisterClass); 1167 BuildMI(MBB, TII.get(X86::AND8ri), AndResult).addReg(ResultReg).addImm(1); 1168 ResultReg = AndResult; 1169 } 1170 1171 UpdateValueMap(I, ResultReg); 1172 } 1173 1174 return true; 1175} 1176 1177 1178bool 1179X86FastISel::TargetSelectInstruction(Instruction *I) { 1180 switch (I->getOpcode()) { 1181 default: break; 1182 case Instruction::Load: 1183 return X86SelectLoad(I); 1184 case Instruction::Store: 1185 return X86SelectStore(I); 1186 case Instruction::ICmp: 1187 case Instruction::FCmp: 1188 return X86SelectCmp(I); 1189 case Instruction::ZExt: 1190 return X86SelectZExt(I); 1191 case Instruction::Br: 1192 return X86SelectBranch(I); 1193 case Instruction::Call: 1194 return X86SelectCall(I); 1195 case Instruction::LShr: 1196 case Instruction::AShr: 1197 case Instruction::Shl: 1198 return X86SelectShift(I); 1199 case Instruction::Select: 1200 return X86SelectSelect(I); 1201 case Instruction::Trunc: 1202 return X86SelectTrunc(I); 1203 case Instruction::FPExt: 1204 return X86SelectFPExt(I); 1205 case Instruction::FPTrunc: 1206 return X86SelectFPTrunc(I); 1207 } 1208 1209 return false; 1210} 1211 1212unsigned X86FastISel::TargetMaterializeConstant(Constant *C) { 1213 MVT VT; 1214 if (!isTypeLegal(C->getType(), VT)) 1215 return false; 1216 1217 // Get opcode and regclass of the output for the given load instruction. 1218 unsigned Opc = 0; 1219 const TargetRegisterClass *RC = NULL; 1220 switch (VT.getSimpleVT()) { 1221 default: return false; 1222 case MVT::i8: 1223 Opc = X86::MOV8rm; 1224 RC = X86::GR8RegisterClass; 1225 break; 1226 case MVT::i16: 1227 Opc = X86::MOV16rm; 1228 RC = X86::GR16RegisterClass; 1229 break; 1230 case MVT::i32: 1231 Opc = X86::MOV32rm; 1232 RC = X86::GR32RegisterClass; 1233 break; 1234 case MVT::i64: 1235 // Must be in x86-64 mode. 1236 Opc = X86::MOV64rm; 1237 RC = X86::GR64RegisterClass; 1238 break; 1239 case MVT::f32: 1240 if (Subtarget->hasSSE1()) { 1241 Opc = X86::MOVSSrm; 1242 RC = X86::FR32RegisterClass; 1243 } else { 1244 Opc = X86::LD_Fp32m; 1245 RC = X86::RFP32RegisterClass; 1246 } 1247 break; 1248 case MVT::f64: 1249 if (Subtarget->hasSSE2()) { 1250 Opc = X86::MOVSDrm; 1251 RC = X86::FR64RegisterClass; 1252 } else { 1253 Opc = X86::LD_Fp64m; 1254 RC = X86::RFP64RegisterClass; 1255 } 1256 break; 1257 case MVT::f80: 1258 // No f80 support yet. 1259 return false; 1260 } 1261 1262 // Materialize addresses with LEA instructions. 1263 if (isa<GlobalValue>(C)) { 1264 X86AddressMode AM; 1265 if (X86SelectAddress(C, AM, false)) { 1266 if (TLI.getPointerTy() == MVT::i32) 1267 Opc = X86::LEA32r; 1268 else 1269 Opc = X86::LEA64r; 1270 unsigned ResultReg = createResultReg(RC); 1271 addFullAddress(BuildMI(MBB, TII.get(Opc), ResultReg), AM); 1272 return ResultReg; 1273 } 1274 return 0; 1275 } 1276 1277 // MachineConstantPool wants an explicit alignment. 1278 unsigned Align = TD.getPreferredTypeAlignmentShift(C->getType()); 1279 if (Align == 0) { 1280 // Alignment of vector types. FIXME! 1281 Align = TD.getABITypeSize(C->getType()); 1282 Align = Log2_64(Align); 1283 } 1284 1285 // x86-32 PIC requires a PIC base register for constant pools. 1286 unsigned PICBase = 0; 1287 if (TM.getRelocationModel() == Reloc::PIC_ && 1288 !Subtarget->is64Bit()) 1289 PICBase = getInstrInfo()->getGlobalBaseReg(&MF); 1290 1291 // Create the load from the constant pool. 1292 unsigned MCPOffset = MCP.getConstantPoolIndex(C, Align); 1293 unsigned ResultReg = createResultReg(RC); 1294 addConstantPoolReference(BuildMI(MBB, TII.get(Opc), ResultReg), MCPOffset, 1295 PICBase); 1296 1297 return ResultReg; 1298} 1299 1300unsigned X86FastISel::TargetMaterializeAlloca(AllocaInst *C) { 1301 // Fail on dynamic allocas. At this point, getRegForValue has already 1302 // checked its CSE maps, so if we're here trying to handle a dynamic 1303 // alloca, we're not going to succeed. X86SelectAddress has a 1304 // check for dynamic allocas, because it's called directly from 1305 // various places, but TargetMaterializeAlloca also needs a check 1306 // in order to avoid recursion between getRegForValue, 1307 // X86SelectAddrss, and TargetMaterializeAlloca. 1308 if (!StaticAllocaMap.count(C)) 1309 return 0; 1310 1311 X86AddressMode AM; 1312 if (!X86SelectAddress(C, AM, false)) 1313 return 0; 1314 unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; 1315 TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); 1316 unsigned ResultReg = createResultReg(RC); 1317 addFullAddress(BuildMI(MBB, TII.get(Opc), ResultReg), AM); 1318 return ResultReg; 1319} 1320 1321namespace llvm { 1322 llvm::FastISel *X86::createFastISel(MachineFunction &mf, 1323 MachineModuleInfo *mmi, 1324 DenseMap<const Value *, unsigned> &vm, 1325 DenseMap<const BasicBlock *, MachineBasicBlock *> &bm, 1326 DenseMap<const AllocaInst *, int> &am 1327#ifndef NDEBUG 1328 , SmallSet<Instruction*, 8> &cil 1329#endif 1330 ) { 1331 return new X86FastISel(mf, mmi, vm, bm, am 1332#ifndef NDEBUG 1333 , cil 1334#endif 1335 ); 1336 } 1337} 1338