ARMFastISel.cpp revision 0435661582c5ccbd3984a710850fc8bc8939e566
1//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the ARM-specific support for the FastISel class. Some 11// of the target-specific code is generated by tablegen in the file 12// ARMGenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "ARM.h" 17#include "ARMBaseInstrInfo.h" 18#include "ARMCallingConv.h" 19#include "ARMRegisterInfo.h" 20#include "ARMTargetMachine.h" 21#include "ARMSubtarget.h" 22#include "ARMConstantPoolValue.h" 23#include "llvm/CallingConv.h" 24#include "llvm/DerivedTypes.h" 25#include "llvm/GlobalVariable.h" 26#include "llvm/Instructions.h" 27#include "llvm/IntrinsicInst.h" 28#include "llvm/Module.h" 29#include "llvm/CodeGen/Analysis.h" 30#include "llvm/CodeGen/FastISel.h" 31#include "llvm/CodeGen/FunctionLoweringInfo.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/MachineModuleInfo.h" 34#include "llvm/CodeGen/MachineConstantPool.h" 35#include "llvm/CodeGen/MachineFrameInfo.h" 36#include "llvm/CodeGen/MachineMemOperand.h" 37#include "llvm/CodeGen/MachineRegisterInfo.h" 38#include "llvm/CodeGen/PseudoSourceValue.h" 39#include "llvm/Support/CallSite.h" 40#include "llvm/Support/CommandLine.h" 41#include "llvm/Support/ErrorHandling.h" 42#include "llvm/Support/GetElementPtrTypeIterator.h" 43#include "llvm/Target/TargetData.h" 44#include "llvm/Target/TargetInstrInfo.h" 45#include "llvm/Target/TargetLowering.h" 46#include "llvm/Target/TargetMachine.h" 47#include "llvm/Target/TargetOptions.h" 48using namespace llvm; 49 50static cl::opt<bool> 51DisableARMFastISel("disable-arm-fast-isel", 52 cl::desc("Turn off experimental ARM fast-isel support"), 53 cl::init(false), cl::Hidden); 54 55extern cl::opt<bool> EnableARMLongCalls; 56 57namespace { 58 59 // All possible address modes, plus some. 60 typedef struct Address { 61 enum { 62 RegBase, 63 FrameIndexBase 64 } BaseType; 65 66 union { 67 unsigned Reg; 68 int FI; 69 } Base; 70 71 int Offset; 72 unsigned Scale; 73 unsigned PlusReg; 74 75 // Innocuous defaults for our address. 76 Address() 77 : BaseType(RegBase), Offset(0), Scale(0), PlusReg(0) { 78 Base.Reg = 0; 79 } 80 } Address; 81 82class ARMFastISel : public FastISel { 83 84 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 85 /// make the right decision when generating code for different targets. 86 const ARMSubtarget *Subtarget; 87 const TargetMachine &TM; 88 const TargetInstrInfo &TII; 89 const TargetLowering &TLI; 90 ARMFunctionInfo *AFI; 91 92 // Convenience variables to avoid some queries. 93 bool isThumb; 94 LLVMContext *Context; 95 96 public: 97 explicit ARMFastISel(FunctionLoweringInfo &funcInfo) 98 : FastISel(funcInfo), 99 TM(funcInfo.MF->getTarget()), 100 TII(*TM.getInstrInfo()), 101 TLI(*TM.getTargetLowering()) { 102 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 103 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); 104 isThumb = AFI->isThumbFunction(); 105 Context = &funcInfo.Fn->getContext(); 106 } 107 108 // Code from FastISel.cpp. 109 virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, 110 const TargetRegisterClass *RC); 111 virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, 112 const TargetRegisterClass *RC, 113 unsigned Op0, bool Op0IsKill); 114 virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, 115 const TargetRegisterClass *RC, 116 unsigned Op0, bool Op0IsKill, 117 unsigned Op1, bool Op1IsKill); 118 virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, 119 const TargetRegisterClass *RC, 120 unsigned Op0, bool Op0IsKill, 121 unsigned Op1, bool Op1IsKill, 122 unsigned Op2, bool Op2IsKill); 123 virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, 124 const TargetRegisterClass *RC, 125 unsigned Op0, bool Op0IsKill, 126 uint64_t Imm); 127 virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, 128 const TargetRegisterClass *RC, 129 unsigned Op0, bool Op0IsKill, 130 const ConstantFP *FPImm); 131 virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, 132 const TargetRegisterClass *RC, 133 unsigned Op0, bool Op0IsKill, 134 unsigned Op1, bool Op1IsKill, 135 uint64_t Imm); 136 virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, 137 const TargetRegisterClass *RC, 138 uint64_t Imm); 139 140 virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, 141 unsigned Op0, bool Op0IsKill, 142 uint32_t Idx); 143 144 // Backend specific FastISel code. 145 virtual bool TargetSelectInstruction(const Instruction *I); 146 virtual unsigned TargetMaterializeConstant(const Constant *C); 147 virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); 148 149 #include "ARMGenFastISel.inc" 150 151 // Instruction selection routines. 152 private: 153 bool SelectLoad(const Instruction *I); 154 bool SelectStore(const Instruction *I); 155 bool SelectBranch(const Instruction *I); 156 bool SelectCmp(const Instruction *I); 157 bool SelectFPExt(const Instruction *I); 158 bool SelectFPTrunc(const Instruction *I); 159 bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); 160 bool SelectSIToFP(const Instruction *I); 161 bool SelectFPToSI(const Instruction *I); 162 bool SelectSDiv(const Instruction *I); 163 bool SelectSRem(const Instruction *I); 164 bool SelectCall(const Instruction *I); 165 bool SelectSelect(const Instruction *I); 166 bool SelectRet(const Instruction *I); 167 168 // Utility routines. 169 private: 170 bool isTypeLegal(const Type *Ty, MVT &VT); 171 bool isLoadTypeLegal(const Type *Ty, MVT &VT); 172 bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); 173 bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); 174 bool ARMComputeAddress(const Value *Obj, Address &Addr); 175 void ARMSimplifyAddress(Address &Addr, EVT VT); 176 unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); 177 unsigned ARMMaterializeInt(const Constant *C, EVT VT); 178 unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); 179 unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); 180 unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); 181 unsigned ARMSelectCallOp(const GlobalValue *GV); 182 183 // Call handling routines. 184 private: 185 bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 186 unsigned &ResultReg); 187 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); 188 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, 189 SmallVectorImpl<unsigned> &ArgRegs, 190 SmallVectorImpl<MVT> &ArgVTs, 191 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 192 SmallVectorImpl<unsigned> &RegArgs, 193 CallingConv::ID CC, 194 unsigned &NumBytes); 195 bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 196 const Instruction *I, CallingConv::ID CC, 197 unsigned &NumBytes); 198 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); 199 200 // OptionalDef handling routines. 201 private: 202 bool isARMNEONPred(const MachineInstr *MI); 203 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); 204 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); 205 void AddLoadStoreOperands(EVT VT, Address &Addr, 206 const MachineInstrBuilder &MIB); 207}; 208 209} // end anonymous namespace 210 211#include "ARMGenCallingConv.inc" 212 213// DefinesOptionalPredicate - This is different from DefinesPredicate in that 214// we don't care about implicit defs here, just places we'll need to add a 215// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. 216bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { 217 const TargetInstrDesc &TID = MI->getDesc(); 218 if (!TID.hasOptionalDef()) 219 return false; 220 221 // Look to see if our OptionalDef is defining CPSR or CCR. 222 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 223 const MachineOperand &MO = MI->getOperand(i); 224 if (!MO.isReg() || !MO.isDef()) continue; 225 if (MO.getReg() == ARM::CPSR) 226 *CPSR = true; 227 } 228 return true; 229} 230 231bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { 232 const TargetInstrDesc &TID = MI->getDesc(); 233 234 // If we're a thumb2 or not NEON function we were handled via isPredicable. 235 if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || 236 AFI->isThumb2Function()) 237 return false; 238 239 for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) 240 if (TID.OpInfo[i].isPredicate()) 241 return true; 242 243 return false; 244} 245 246// If the machine is predicable go ahead and add the predicate operands, if 247// it needs default CC operands add those. 248// TODO: If we want to support thumb1 then we'll need to deal with optional 249// CPSR defs that need to be added before the remaining operands. See s_cc_out 250// for descriptions why. 251const MachineInstrBuilder & 252ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { 253 MachineInstr *MI = &*MIB; 254 255 // Do we use a predicate? or... 256 // Are we NEON in ARM mode and have a predicate operand? If so, I know 257 // we're not predicable but add it anyways. 258 if (TII.isPredicable(MI) || isARMNEONPred(MI)) 259 AddDefaultPred(MIB); 260 261 // Do we optionally set a predicate? Preds is size > 0 iff the predicate 262 // defines CPSR. All other OptionalDefines in ARM are the CCR register. 263 bool CPSR = false; 264 if (DefinesOptionalPredicate(MI, &CPSR)) { 265 if (CPSR) 266 AddDefaultT1CC(MIB); 267 else 268 AddDefaultCC(MIB); 269 } 270 return MIB; 271} 272 273unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, 274 const TargetRegisterClass* RC) { 275 unsigned ResultReg = createResultReg(RC); 276 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 277 278 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); 279 return ResultReg; 280} 281 282unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, 283 const TargetRegisterClass *RC, 284 unsigned Op0, bool Op0IsKill) { 285 unsigned ResultReg = createResultReg(RC); 286 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 287 288 if (II.getNumDefs() >= 1) 289 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 290 .addReg(Op0, Op0IsKill * RegState::Kill)); 291 else { 292 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 293 .addReg(Op0, Op0IsKill * RegState::Kill)); 294 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 295 TII.get(TargetOpcode::COPY), ResultReg) 296 .addReg(II.ImplicitDefs[0])); 297 } 298 return ResultReg; 299} 300 301unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, 302 const TargetRegisterClass *RC, 303 unsigned Op0, bool Op0IsKill, 304 unsigned Op1, bool Op1IsKill) { 305 unsigned ResultReg = createResultReg(RC); 306 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 307 308 if (II.getNumDefs() >= 1) 309 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 310 .addReg(Op0, Op0IsKill * RegState::Kill) 311 .addReg(Op1, Op1IsKill * RegState::Kill)); 312 else { 313 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 314 .addReg(Op0, Op0IsKill * RegState::Kill) 315 .addReg(Op1, Op1IsKill * RegState::Kill)); 316 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 317 TII.get(TargetOpcode::COPY), ResultReg) 318 .addReg(II.ImplicitDefs[0])); 319 } 320 return ResultReg; 321} 322 323unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, 324 const TargetRegisterClass *RC, 325 unsigned Op0, bool Op0IsKill, 326 unsigned Op1, bool Op1IsKill, 327 unsigned Op2, bool Op2IsKill) { 328 unsigned ResultReg = createResultReg(RC); 329 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 330 331 if (II.getNumDefs() >= 1) 332 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 333 .addReg(Op0, Op0IsKill * RegState::Kill) 334 .addReg(Op1, Op1IsKill * RegState::Kill) 335 .addReg(Op2, Op2IsKill * RegState::Kill)); 336 else { 337 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 338 .addReg(Op0, Op0IsKill * RegState::Kill) 339 .addReg(Op1, Op1IsKill * RegState::Kill) 340 .addReg(Op2, Op2IsKill * RegState::Kill)); 341 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 342 TII.get(TargetOpcode::COPY), ResultReg) 343 .addReg(II.ImplicitDefs[0])); 344 } 345 return ResultReg; 346} 347 348unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, 349 const TargetRegisterClass *RC, 350 unsigned Op0, bool Op0IsKill, 351 uint64_t Imm) { 352 unsigned ResultReg = createResultReg(RC); 353 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 354 355 if (II.getNumDefs() >= 1) 356 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 357 .addReg(Op0, Op0IsKill * RegState::Kill) 358 .addImm(Imm)); 359 else { 360 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 361 .addReg(Op0, Op0IsKill * RegState::Kill) 362 .addImm(Imm)); 363 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 364 TII.get(TargetOpcode::COPY), ResultReg) 365 .addReg(II.ImplicitDefs[0])); 366 } 367 return ResultReg; 368} 369 370unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, 371 const TargetRegisterClass *RC, 372 unsigned Op0, bool Op0IsKill, 373 const ConstantFP *FPImm) { 374 unsigned ResultReg = createResultReg(RC); 375 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 376 377 if (II.getNumDefs() >= 1) 378 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 379 .addReg(Op0, Op0IsKill * RegState::Kill) 380 .addFPImm(FPImm)); 381 else { 382 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 383 .addReg(Op0, Op0IsKill * RegState::Kill) 384 .addFPImm(FPImm)); 385 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 386 TII.get(TargetOpcode::COPY), ResultReg) 387 .addReg(II.ImplicitDefs[0])); 388 } 389 return ResultReg; 390} 391 392unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, 393 const TargetRegisterClass *RC, 394 unsigned Op0, bool Op0IsKill, 395 unsigned Op1, bool Op1IsKill, 396 uint64_t Imm) { 397 unsigned ResultReg = createResultReg(RC); 398 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 399 400 if (II.getNumDefs() >= 1) 401 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 402 .addReg(Op0, Op0IsKill * RegState::Kill) 403 .addReg(Op1, Op1IsKill * RegState::Kill) 404 .addImm(Imm)); 405 else { 406 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 407 .addReg(Op0, Op0IsKill * RegState::Kill) 408 .addReg(Op1, Op1IsKill * RegState::Kill) 409 .addImm(Imm)); 410 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 411 TII.get(TargetOpcode::COPY), ResultReg) 412 .addReg(II.ImplicitDefs[0])); 413 } 414 return ResultReg; 415} 416 417unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, 418 const TargetRegisterClass *RC, 419 uint64_t Imm) { 420 unsigned ResultReg = createResultReg(RC); 421 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 422 423 if (II.getNumDefs() >= 1) 424 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 425 .addImm(Imm)); 426 else { 427 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 428 .addImm(Imm)); 429 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 430 TII.get(TargetOpcode::COPY), ResultReg) 431 .addReg(II.ImplicitDefs[0])); 432 } 433 return ResultReg; 434} 435 436unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, 437 unsigned Op0, bool Op0IsKill, 438 uint32_t Idx) { 439 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 440 assert(TargetRegisterInfo::isVirtualRegister(Op0) && 441 "Cannot yet extract from physregs"); 442 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 443 DL, TII.get(TargetOpcode::COPY), ResultReg) 444 .addReg(Op0, getKillRegState(Op0IsKill), Idx)); 445 return ResultReg; 446} 447 448// TODO: Don't worry about 64-bit now, but when this is fixed remove the 449// checks from the various callers. 450unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { 451 if (VT == MVT::f64) return 0; 452 453 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 454 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 455 TII.get(ARM::VMOVRS), MoveReg) 456 .addReg(SrcReg)); 457 return MoveReg; 458} 459 460unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { 461 if (VT == MVT::i64) return 0; 462 463 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 464 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 465 TII.get(ARM::VMOVSR), MoveReg) 466 .addReg(SrcReg)); 467 return MoveReg; 468} 469 470// For double width floating point we need to materialize two constants 471// (the high and the low) into integer registers then use a move to get 472// the combined constant into an FP reg. 473unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { 474 const APFloat Val = CFP->getValueAPF(); 475 bool is64bit = VT == MVT::f64; 476 477 // This checks to see if we can use VFP3 instructions to materialize 478 // a constant, otherwise we have to go through the constant pool. 479 if (TLI.isFPImmLegal(Val, VT)) { 480 unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS; 481 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 482 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 483 DestReg) 484 .addFPImm(CFP)); 485 return DestReg; 486 } 487 488 // Require VFP2 for loading fp constants. 489 if (!Subtarget->hasVFP2()) return false; 490 491 // MachineConstantPool wants an explicit alignment. 492 unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); 493 if (Align == 0) { 494 // TODO: Figure out if this is correct. 495 Align = TD.getTypeAllocSize(CFP->getType()); 496 } 497 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 498 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 499 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; 500 501 // The extra reg is for addrmode5. 502 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 503 DestReg) 504 .addConstantPoolIndex(Idx) 505 .addReg(0)); 506 return DestReg; 507} 508 509unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { 510 511 // For now 32-bit only. 512 if (VT != MVT::i32) return false; 513 514 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 515 516 // If we can do this in a single instruction without a constant pool entry 517 // do so now. 518 const ConstantInt *CI = cast<ConstantInt>(C); 519 if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) { 520 unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; 521 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 522 TII.get(Opc), DestReg) 523 .addImm(CI->getSExtValue())); 524 return DestReg; 525 } 526 527 // MachineConstantPool wants an explicit alignment. 528 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 529 if (Align == 0) { 530 // TODO: Figure out if this is correct. 531 Align = TD.getTypeAllocSize(C->getType()); 532 } 533 unsigned Idx = MCP.getConstantPoolIndex(C, Align); 534 535 if (isThumb) 536 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 537 TII.get(ARM::t2LDRpci), DestReg) 538 .addConstantPoolIndex(Idx)); 539 else 540 // The extra immediate is for addrmode2. 541 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 542 TII.get(ARM::LDRcp), DestReg) 543 .addConstantPoolIndex(Idx) 544 .addImm(0)); 545 546 return DestReg; 547} 548 549unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { 550 // For now 32-bit only. 551 if (VT != MVT::i32) return 0; 552 553 Reloc::Model RelocM = TM.getRelocationModel(); 554 555 // TODO: No external globals for now. 556 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0; 557 558 // TODO: Need more magic for ARM PIC. 559 if (!isThumb && (RelocM == Reloc::PIC_)) return 0; 560 561 // MachineConstantPool wants an explicit alignment. 562 unsigned Align = TD.getPrefTypeAlignment(GV->getType()); 563 if (Align == 0) { 564 // TODO: Figure out if this is correct. 565 Align = TD.getTypeAllocSize(GV->getType()); 566 } 567 568 // Grab index. 569 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); 570 unsigned Id = AFI->createPICLabelUId(); 571 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id, 572 ARMCP::CPValue, PCAdj); 573 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); 574 575 // Load value. 576 MachineInstrBuilder MIB; 577 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 578 if (isThumb) { 579 unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; 580 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) 581 .addConstantPoolIndex(Idx); 582 if (RelocM == Reloc::PIC_) 583 MIB.addImm(Id); 584 } else { 585 // The extra immediate is for addrmode2. 586 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), 587 DestReg) 588 .addConstantPoolIndex(Idx) 589 .addImm(0); 590 } 591 AddOptionalDefs(MIB); 592 return DestReg; 593} 594 595unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { 596 EVT VT = TLI.getValueType(C->getType(), true); 597 598 // Only handle simple types. 599 if (!VT.isSimple()) return 0; 600 601 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 602 return ARMMaterializeFP(CFP, VT); 603 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 604 return ARMMaterializeGV(GV, VT); 605 else if (isa<ConstantInt>(C)) 606 return ARMMaterializeInt(C, VT); 607 608 return 0; 609} 610 611unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { 612 // Don't handle dynamic allocas. 613 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; 614 615 MVT VT; 616 if (!isLoadTypeLegal(AI->getType(), VT)) return false; 617 618 DenseMap<const AllocaInst*, int>::iterator SI = 619 FuncInfo.StaticAllocaMap.find(AI); 620 621 // This will get lowered later into the correct offsets and registers 622 // via rewriteXFrameIndex. 623 if (SI != FuncInfo.StaticAllocaMap.end()) { 624 TargetRegisterClass* RC = TLI.getRegClassFor(VT); 625 unsigned ResultReg = createResultReg(RC); 626 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 627 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 628 TII.get(Opc), ResultReg) 629 .addFrameIndex(SI->second) 630 .addImm(0)); 631 return ResultReg; 632 } 633 634 return 0; 635} 636 637bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) { 638 EVT evt = TLI.getValueType(Ty, true); 639 640 // Only handle simple types. 641 if (evt == MVT::Other || !evt.isSimple()) return false; 642 VT = evt.getSimpleVT(); 643 644 // Handle all legal types, i.e. a register that will directly hold this 645 // value. 646 return TLI.isTypeLegal(VT); 647} 648 649bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) { 650 if (isTypeLegal(Ty, VT)) return true; 651 652 // If this is a type than can be sign or zero-extended to a basic operation 653 // go ahead and accept it now. 654 if (VT == MVT::i8 || VT == MVT::i16) 655 return true; 656 657 return false; 658} 659 660// Computes the address to get to an object. 661bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { 662 // Some boilerplate from the X86 FastISel. 663 const User *U = NULL; 664 unsigned Opcode = Instruction::UserOp1; 665 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 666 // Don't walk into other basic blocks unless the object is an alloca from 667 // another block, otherwise it may not have a virtual register assigned. 668 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 669 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 670 Opcode = I->getOpcode(); 671 U = I; 672 } 673 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 674 Opcode = C->getOpcode(); 675 U = C; 676 } 677 678 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 679 if (Ty->getAddressSpace() > 255) 680 // Fast instruction selection doesn't support the special 681 // address spaces. 682 return false; 683 684 switch (Opcode) { 685 default: 686 break; 687 case Instruction::BitCast: { 688 // Look through bitcasts. 689 return ARMComputeAddress(U->getOperand(0), Addr); 690 } 691 case Instruction::IntToPtr: { 692 // Look past no-op inttoptrs. 693 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 694 return ARMComputeAddress(U->getOperand(0), Addr); 695 break; 696 } 697 case Instruction::PtrToInt: { 698 // Look past no-op ptrtoints. 699 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 700 return ARMComputeAddress(U->getOperand(0), Addr); 701 break; 702 } 703 case Instruction::GetElementPtr: { 704 Address SavedAddr = Addr; 705 int TmpOffset = Addr.Offset; 706 707 // Iterate through the GEP folding the constants into offsets where 708 // we can. 709 gep_type_iterator GTI = gep_type_begin(U); 710 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 711 i != e; ++i, ++GTI) { 712 const Value *Op = *i; 713 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 714 const StructLayout *SL = TD.getStructLayout(STy); 715 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 716 TmpOffset += SL->getElementOffset(Idx); 717 } else { 718 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 719 for (;;) { 720 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 721 // Constant-offset addressing. 722 TmpOffset += CI->getSExtValue() * S; 723 break; 724 } 725 if (isa<AddOperator>(Op) && 726 (!isa<Instruction>(Op) || 727 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] 728 == FuncInfo.MBB) && 729 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { 730 // An add (in the same block) with a constant operand. Fold the 731 // constant. 732 ConstantInt *CI = 733 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 734 TmpOffset += CI->getSExtValue() * S; 735 // Iterate on the other operand. 736 Op = cast<AddOperator>(Op)->getOperand(0); 737 continue; 738 } 739 // Unsupported 740 goto unsupported_gep; 741 } 742 } 743 } 744 745 // Try to grab the base operand now. 746 Addr.Offset = TmpOffset; 747 if (ARMComputeAddress(U->getOperand(0), Addr)) return true; 748 749 // We failed, restore everything and try the other options. 750 Addr = SavedAddr; 751 752 unsupported_gep: 753 break; 754 } 755 case Instruction::Alloca: { 756 const AllocaInst *AI = cast<AllocaInst>(Obj); 757 DenseMap<const AllocaInst*, int>::iterator SI = 758 FuncInfo.StaticAllocaMap.find(AI); 759 if (SI != FuncInfo.StaticAllocaMap.end()) { 760 Addr.BaseType = Address::FrameIndexBase; 761 Addr.Base.FI = SI->second; 762 return true; 763 } 764 break; 765 } 766 } 767 768 // Materialize the global variable's address into a reg which can 769 // then be used later to load the variable. 770 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { 771 unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); 772 if (Tmp == 0) return false; 773 774 Addr.Base.Reg = Tmp; 775 return true; 776 } 777 778 // Try to get this in a register if nothing else has worked. 779 if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); 780 return Addr.Base.Reg != 0; 781} 782 783void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { 784 785 assert(VT.isSimple() && "Non-simple types are invalid here!"); 786 787 bool needsLowering = false; 788 switch (VT.getSimpleVT().SimpleTy) { 789 default: 790 assert(false && "Unhandled load/store type!"); 791 case MVT::i1: 792 case MVT::i8: 793 case MVT::i16: 794 case MVT::i32: 795 // Integer loads/stores handle 12-bit offsets. 796 needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); 797 break; 798 case MVT::f32: 799 case MVT::f64: 800 // Floating point operands handle 8-bit offsets. 801 needsLowering = ((Addr.Offset & 0xff) != Addr.Offset); 802 break; 803 } 804 805 // If this is a stack pointer and the offset needs to be simplified then 806 // put the alloca address into a register, set the base type back to 807 // register and continue. This should almost never happen. 808 if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { 809 TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : 810 ARM::GPRRegisterClass; 811 unsigned ResultReg = createResultReg(RC); 812 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 813 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 814 TII.get(Opc), ResultReg) 815 .addFrameIndex(Addr.Base.FI) 816 .addImm(0)); 817 Addr.Base.Reg = ResultReg; 818 Addr.BaseType = Address::RegBase; 819 } 820 821 // Since the offset is too large for the load/store instruction 822 // get the reg+offset into a register. 823 if (needsLowering) { 824 ARMCC::CondCodes Pred = ARMCC::AL; 825 unsigned PredReg = 0; 826 827 TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : 828 ARM::GPRRegisterClass; 829 unsigned BaseReg = createResultReg(RC); 830 831 if (!isThumb) 832 emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 833 BaseReg, Addr.Base.Reg, Addr.Offset, 834 Pred, PredReg, 835 static_cast<const ARMBaseInstrInfo&>(TII)); 836 else { 837 assert(AFI->isThumb2Function()); 838 emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 839 BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg, 840 static_cast<const ARMBaseInstrInfo&>(TII)); 841 } 842 Addr.Offset = 0; 843 Addr.Base.Reg = BaseReg; 844 } 845} 846 847void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, 848 const MachineInstrBuilder &MIB) { 849 // addrmode5 output depends on the selection dag addressing dividing the 850 // offset by 4 that it then later multiplies. Do this here as well. 851 if (VT.getSimpleVT().SimpleTy == MVT::f32 || 852 VT.getSimpleVT().SimpleTy == MVT::f64) 853 Addr.Offset /= 4; 854 855 // Frame base works a bit differently. Handle it separately. 856 if (Addr.BaseType == Address::FrameIndexBase) { 857 int FI = Addr.Base.FI; 858 int Offset = Addr.Offset; 859 MachineMemOperand *MMO = 860 FuncInfo.MF->getMachineMemOperand( 861 MachinePointerInfo::getFixedStack(FI, Offset), 862 MachineMemOperand::MOLoad, 863 MFI.getObjectSize(FI), 864 MFI.getObjectAlignment(FI)); 865 // Now add the rest of the operands. 866 MIB.addFrameIndex(FI); 867 868 // ARM halfword load/stores need an additional operand. 869 if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); 870 871 MIB.addImm(Addr.Offset); 872 MIB.addMemOperand(MMO); 873 } else { 874 // Now add the rest of the operands. 875 MIB.addReg(Addr.Base.Reg); 876 877 // ARM halfword load/stores need an additional operand. 878 if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); 879 880 MIB.addImm(Addr.Offset); 881 } 882 AddOptionalDefs(MIB); 883} 884 885bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { 886 887 assert(VT.isSimple() && "Non-simple types are invalid here!"); 888 unsigned Opc; 889 TargetRegisterClass *RC; 890 switch (VT.getSimpleVT().SimpleTy) { 891 // This is mostly going to be Neon/vector support. 892 default: return false; 893 case MVT::i16: 894 Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH; 895 RC = ARM::GPRRegisterClass; 896 break; 897 case MVT::i8: 898 Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12; 899 RC = ARM::GPRRegisterClass; 900 break; 901 case MVT::i32: 902 Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12; 903 RC = ARM::GPRRegisterClass; 904 break; 905 case MVT::f32: 906 Opc = ARM::VLDRS; 907 RC = TLI.getRegClassFor(VT); 908 break; 909 case MVT::f64: 910 Opc = ARM::VLDRD; 911 RC = TLI.getRegClassFor(VT); 912 break; 913 } 914 // Simplify this down to something we can handle. 915 ARMSimplifyAddress(Addr, VT); 916 917 // Create the base instruction, then add the operands. 918 ResultReg = createResultReg(RC); 919 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 920 TII.get(Opc), ResultReg); 921 AddLoadStoreOperands(VT, Addr, MIB); 922 return true; 923} 924 925bool ARMFastISel::SelectLoad(const Instruction *I) { 926 // Verify we have a legal type before going any further. 927 MVT VT; 928 if (!isLoadTypeLegal(I->getType(), VT)) 929 return false; 930 931 // See if we can handle this address. 932 Address Addr; 933 if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; 934 935 unsigned ResultReg; 936 if (!ARMEmitLoad(VT, ResultReg, Addr)) return false; 937 UpdateValueMap(I, ResultReg); 938 return true; 939} 940 941bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { 942 unsigned StrOpc; 943 switch (VT.getSimpleVT().SimpleTy) { 944 // This is mostly going to be Neon/vector support. 945 default: return false; 946 case MVT::i1: { 947 unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass : 948 ARM::GPRRegisterClass); 949 unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; 950 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 951 TII.get(Opc), Res) 952 .addReg(SrcReg).addImm(1)); 953 SrcReg = Res; 954 } // Fallthrough here. 955 case MVT::i8: 956 StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12; 957 break; 958 case MVT::i16: 959 StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH; 960 break; 961 case MVT::i32: 962 StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12; 963 break; 964 case MVT::f32: 965 if (!Subtarget->hasVFP2()) return false; 966 StrOpc = ARM::VSTRS; 967 break; 968 case MVT::f64: 969 if (!Subtarget->hasVFP2()) return false; 970 StrOpc = ARM::VSTRD; 971 break; 972 } 973 // Simplify this down to something we can handle. 974 ARMSimplifyAddress(Addr, VT); 975 976 // Create the base instruction, then add the operands. 977 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 978 TII.get(StrOpc)) 979 .addReg(SrcReg, getKillRegState(true)); 980 AddLoadStoreOperands(VT, Addr, MIB); 981 return true; 982} 983 984bool ARMFastISel::SelectStore(const Instruction *I) { 985 Value *Op0 = I->getOperand(0); 986 unsigned SrcReg = 0; 987 988 // Verify we have a legal type before going any further. 989 MVT VT; 990 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) 991 return false; 992 993 // Get the value to be stored into a register. 994 SrcReg = getRegForValue(Op0); 995 if (SrcReg == 0) return false; 996 997 // See if we can handle this address. 998 Address Addr; 999 if (!ARMComputeAddress(I->getOperand(1), Addr)) 1000 return false; 1001 1002 if (!ARMEmitStore(VT, SrcReg, Addr)) return false; 1003 return true; 1004} 1005 1006static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { 1007 switch (Pred) { 1008 // Needs two compares... 1009 case CmpInst::FCMP_ONE: 1010 case CmpInst::FCMP_UEQ: 1011 default: 1012 // AL is our "false" for now. The other two need more compares. 1013 return ARMCC::AL; 1014 case CmpInst::ICMP_EQ: 1015 case CmpInst::FCMP_OEQ: 1016 return ARMCC::EQ; 1017 case CmpInst::ICMP_SGT: 1018 case CmpInst::FCMP_OGT: 1019 return ARMCC::GT; 1020 case CmpInst::ICMP_SGE: 1021 case CmpInst::FCMP_OGE: 1022 return ARMCC::GE; 1023 case CmpInst::ICMP_UGT: 1024 case CmpInst::FCMP_UGT: 1025 return ARMCC::HI; 1026 case CmpInst::FCMP_OLT: 1027 return ARMCC::MI; 1028 case CmpInst::ICMP_ULE: 1029 case CmpInst::FCMP_OLE: 1030 return ARMCC::LS; 1031 case CmpInst::FCMP_ORD: 1032 return ARMCC::VC; 1033 case CmpInst::FCMP_UNO: 1034 return ARMCC::VS; 1035 case CmpInst::FCMP_UGE: 1036 return ARMCC::PL; 1037 case CmpInst::ICMP_SLT: 1038 case CmpInst::FCMP_ULT: 1039 return ARMCC::LT; 1040 case CmpInst::ICMP_SLE: 1041 case CmpInst::FCMP_ULE: 1042 return ARMCC::LE; 1043 case CmpInst::FCMP_UNE: 1044 case CmpInst::ICMP_NE: 1045 return ARMCC::NE; 1046 case CmpInst::ICMP_UGE: 1047 return ARMCC::HS; 1048 case CmpInst::ICMP_ULT: 1049 return ARMCC::LO; 1050 } 1051} 1052 1053bool ARMFastISel::SelectBranch(const Instruction *I) { 1054 const BranchInst *BI = cast<BranchInst>(I); 1055 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 1056 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 1057 1058 // Simple branch support. 1059 1060 // If we can, avoid recomputing the compare - redoing it could lead to wonky 1061 // behavior. 1062 // TODO: Factor this out. 1063 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 1064 if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { 1065 MVT VT; 1066 const Type *Ty = CI->getOperand(0)->getType(); 1067 if (!isTypeLegal(Ty, VT)) 1068 return false; 1069 1070 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1071 if (isFloat && !Subtarget->hasVFP2()) 1072 return false; 1073 1074 unsigned CmpOpc; 1075 switch (VT.SimpleTy) { 1076 default: return false; 1077 // TODO: Verify compares. 1078 case MVT::f32: 1079 CmpOpc = ARM::VCMPES; 1080 break; 1081 case MVT::f64: 1082 CmpOpc = ARM::VCMPED; 1083 break; 1084 case MVT::i32: 1085 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 1086 break; 1087 } 1088 1089 // Get the compare predicate. 1090 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 1091 1092 // We may not handle every CC for now. 1093 if (ARMPred == ARMCC::AL) return false; 1094 1095 unsigned Arg1 = getRegForValue(CI->getOperand(0)); 1096 if (Arg1 == 0) return false; 1097 1098 unsigned Arg2 = getRegForValue(CI->getOperand(1)); 1099 if (Arg2 == 0) return false; 1100 1101 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1102 TII.get(CmpOpc)) 1103 .addReg(Arg1).addReg(Arg2)); 1104 1105 // For floating point we need to move the result to a comparison register 1106 // that we can then use for branches. 1107 if (isFloat) 1108 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1109 TII.get(ARM::FMSTAT))); 1110 1111 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1112 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1113 .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); 1114 FastEmitBranch(FBB, DL); 1115 FuncInfo.MBB->addSuccessor(TBB); 1116 return true; 1117 } 1118 } 1119 1120 unsigned CmpReg = getRegForValue(BI->getCondition()); 1121 if (CmpReg == 0) return false; 1122 1123 // Re-set the flags just in case. 1124 unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri; 1125 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1126 .addReg(CmpReg).addImm(0)); 1127 1128 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1129 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1130 .addMBB(TBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 1131 FastEmitBranch(FBB, DL); 1132 FuncInfo.MBB->addSuccessor(TBB); 1133 return true; 1134} 1135 1136bool ARMFastISel::SelectCmp(const Instruction *I) { 1137 const CmpInst *CI = cast<CmpInst>(I); 1138 1139 MVT VT; 1140 const Type *Ty = CI->getOperand(0)->getType(); 1141 if (!isTypeLegal(Ty, VT)) 1142 return false; 1143 1144 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1145 if (isFloat && !Subtarget->hasVFP2()) 1146 return false; 1147 1148 unsigned CmpOpc; 1149 unsigned CondReg; 1150 switch (VT.SimpleTy) { 1151 default: return false; 1152 // TODO: Verify compares. 1153 case MVT::f32: 1154 CmpOpc = ARM::VCMPES; 1155 CondReg = ARM::FPSCR; 1156 break; 1157 case MVT::f64: 1158 CmpOpc = ARM::VCMPED; 1159 CondReg = ARM::FPSCR; 1160 break; 1161 case MVT::i32: 1162 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 1163 CondReg = ARM::CPSR; 1164 break; 1165 } 1166 1167 // Get the compare predicate. 1168 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 1169 1170 // We may not handle every CC for now. 1171 if (ARMPred == ARMCC::AL) return false; 1172 1173 unsigned Arg1 = getRegForValue(CI->getOperand(0)); 1174 if (Arg1 == 0) return false; 1175 1176 unsigned Arg2 = getRegForValue(CI->getOperand(1)); 1177 if (Arg2 == 0) return false; 1178 1179 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1180 .addReg(Arg1).addReg(Arg2)); 1181 1182 // For floating point we need to move the result to a comparison register 1183 // that we can then use for branches. 1184 if (isFloat) 1185 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1186 TII.get(ARM::FMSTAT))); 1187 1188 // Now set a register based on the comparison. Explicitly set the predicates 1189 // here. 1190 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; 1191 TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass 1192 : ARM::GPRRegisterClass; 1193 unsigned DestReg = createResultReg(RC); 1194 Constant *Zero 1195 = ConstantInt::get(Type::getInt32Ty(*Context), 0); 1196 unsigned ZeroReg = TargetMaterializeConstant(Zero); 1197 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) 1198 .addReg(ZeroReg).addImm(1) 1199 .addImm(ARMPred).addReg(CondReg); 1200 1201 UpdateValueMap(I, DestReg); 1202 return true; 1203} 1204 1205bool ARMFastISel::SelectFPExt(const Instruction *I) { 1206 // Make sure we have VFP and that we're extending float to double. 1207 if (!Subtarget->hasVFP2()) return false; 1208 1209 Value *V = I->getOperand(0); 1210 if (!I->getType()->isDoubleTy() || 1211 !V->getType()->isFloatTy()) return false; 1212 1213 unsigned Op = getRegForValue(V); 1214 if (Op == 0) return false; 1215 1216 unsigned Result = createResultReg(ARM::DPRRegisterClass); 1217 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1218 TII.get(ARM::VCVTDS), Result) 1219 .addReg(Op)); 1220 UpdateValueMap(I, Result); 1221 return true; 1222} 1223 1224bool ARMFastISel::SelectFPTrunc(const Instruction *I) { 1225 // Make sure we have VFP and that we're truncating double to float. 1226 if (!Subtarget->hasVFP2()) return false; 1227 1228 Value *V = I->getOperand(0); 1229 if (!(I->getType()->isFloatTy() && 1230 V->getType()->isDoubleTy())) return false; 1231 1232 unsigned Op = getRegForValue(V); 1233 if (Op == 0) return false; 1234 1235 unsigned Result = createResultReg(ARM::SPRRegisterClass); 1236 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1237 TII.get(ARM::VCVTSD), Result) 1238 .addReg(Op)); 1239 UpdateValueMap(I, Result); 1240 return true; 1241} 1242 1243bool ARMFastISel::SelectSIToFP(const Instruction *I) { 1244 // Make sure we have VFP. 1245 if (!Subtarget->hasVFP2()) return false; 1246 1247 MVT DstVT; 1248 const Type *Ty = I->getType(); 1249 if (!isTypeLegal(Ty, DstVT)) 1250 return false; 1251 1252 unsigned Op = getRegForValue(I->getOperand(0)); 1253 if (Op == 0) return false; 1254 1255 // The conversion routine works on fp-reg to fp-reg and the operand above 1256 // was an integer, move it to the fp registers if possible. 1257 unsigned FP = ARMMoveToFPReg(MVT::f32, Op); 1258 if (FP == 0) return false; 1259 1260 unsigned Opc; 1261 if (Ty->isFloatTy()) Opc = ARM::VSITOS; 1262 else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; 1263 else return 0; 1264 1265 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); 1266 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1267 ResultReg) 1268 .addReg(FP)); 1269 UpdateValueMap(I, ResultReg); 1270 return true; 1271} 1272 1273bool ARMFastISel::SelectFPToSI(const Instruction *I) { 1274 // Make sure we have VFP. 1275 if (!Subtarget->hasVFP2()) return false; 1276 1277 MVT DstVT; 1278 const Type *RetTy = I->getType(); 1279 if (!isTypeLegal(RetTy, DstVT)) 1280 return false; 1281 1282 unsigned Op = getRegForValue(I->getOperand(0)); 1283 if (Op == 0) return false; 1284 1285 unsigned Opc; 1286 const Type *OpTy = I->getOperand(0)->getType(); 1287 if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; 1288 else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; 1289 else return 0; 1290 1291 // f64->s32 or f32->s32 both need an intermediate f32 reg. 1292 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1293 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1294 ResultReg) 1295 .addReg(Op)); 1296 1297 // This result needs to be in an integer register, but the conversion only 1298 // takes place in fp-regs. 1299 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); 1300 if (IntReg == 0) return false; 1301 1302 UpdateValueMap(I, IntReg); 1303 return true; 1304} 1305 1306bool ARMFastISel::SelectSelect(const Instruction *I) { 1307 MVT VT; 1308 if (!isTypeLegal(I->getType(), VT)) 1309 return false; 1310 1311 // Things need to be register sized for register moves. 1312 if (VT != MVT::i32) return false; 1313 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 1314 1315 unsigned CondReg = getRegForValue(I->getOperand(0)); 1316 if (CondReg == 0) return false; 1317 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1318 if (Op1Reg == 0) return false; 1319 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1320 if (Op2Reg == 0) return false; 1321 1322 unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1323 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1324 .addReg(CondReg).addImm(1)); 1325 unsigned ResultReg = createResultReg(RC); 1326 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr; 1327 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1328 .addReg(Op1Reg).addReg(Op2Reg) 1329 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 1330 UpdateValueMap(I, ResultReg); 1331 return true; 1332} 1333 1334bool ARMFastISel::SelectSDiv(const Instruction *I) { 1335 MVT VT; 1336 const Type *Ty = I->getType(); 1337 if (!isTypeLegal(Ty, VT)) 1338 return false; 1339 1340 // If we have integer div support we should have selected this automagically. 1341 // In case we have a real miss go ahead and return false and we'll pick 1342 // it up later. 1343 if (Subtarget->hasDivide()) return false; 1344 1345 // Otherwise emit a libcall. 1346 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1347 if (VT == MVT::i8) 1348 LC = RTLIB::SDIV_I8; 1349 else if (VT == MVT::i16) 1350 LC = RTLIB::SDIV_I16; 1351 else if (VT == MVT::i32) 1352 LC = RTLIB::SDIV_I32; 1353 else if (VT == MVT::i64) 1354 LC = RTLIB::SDIV_I64; 1355 else if (VT == MVT::i128) 1356 LC = RTLIB::SDIV_I128; 1357 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); 1358 1359 return ARMEmitLibcall(I, LC); 1360} 1361 1362bool ARMFastISel::SelectSRem(const Instruction *I) { 1363 MVT VT; 1364 const Type *Ty = I->getType(); 1365 if (!isTypeLegal(Ty, VT)) 1366 return false; 1367 1368 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1369 if (VT == MVT::i8) 1370 LC = RTLIB::SREM_I8; 1371 else if (VT == MVT::i16) 1372 LC = RTLIB::SREM_I16; 1373 else if (VT == MVT::i32) 1374 LC = RTLIB::SREM_I32; 1375 else if (VT == MVT::i64) 1376 LC = RTLIB::SREM_I64; 1377 else if (VT == MVT::i128) 1378 LC = RTLIB::SREM_I128; 1379 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); 1380 1381 return ARMEmitLibcall(I, LC); 1382} 1383 1384bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { 1385 EVT VT = TLI.getValueType(I->getType(), true); 1386 1387 // We can get here in the case when we want to use NEON for our fp 1388 // operations, but can't figure out how to. Just use the vfp instructions 1389 // if we have them. 1390 // FIXME: It'd be nice to use NEON instructions. 1391 const Type *Ty = I->getType(); 1392 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1393 if (isFloat && !Subtarget->hasVFP2()) 1394 return false; 1395 1396 unsigned Op1 = getRegForValue(I->getOperand(0)); 1397 if (Op1 == 0) return false; 1398 1399 unsigned Op2 = getRegForValue(I->getOperand(1)); 1400 if (Op2 == 0) return false; 1401 1402 unsigned Opc; 1403 bool is64bit = VT == MVT::f64 || VT == MVT::i64; 1404 switch (ISDOpcode) { 1405 default: return false; 1406 case ISD::FADD: 1407 Opc = is64bit ? ARM::VADDD : ARM::VADDS; 1408 break; 1409 case ISD::FSUB: 1410 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS; 1411 break; 1412 case ISD::FMUL: 1413 Opc = is64bit ? ARM::VMULD : ARM::VMULS; 1414 break; 1415 } 1416 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1417 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1418 TII.get(Opc), ResultReg) 1419 .addReg(Op1).addReg(Op2)); 1420 UpdateValueMap(I, ResultReg); 1421 return true; 1422} 1423 1424// Call Handling Code 1425 1426bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, 1427 EVT SrcVT, unsigned &ResultReg) { 1428 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 1429 Src, /*TODO: Kill=*/false); 1430 1431 if (RR != 0) { 1432 ResultReg = RR; 1433 return true; 1434 } else 1435 return false; 1436} 1437 1438// This is largely taken directly from CCAssignFnForNode - we don't support 1439// varargs in FastISel so that part has been removed. 1440// TODO: We may not support all of this. 1441CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { 1442 switch (CC) { 1443 default: 1444 llvm_unreachable("Unsupported calling convention"); 1445 case CallingConv::Fast: 1446 // Ignore fastcc. Silence compiler warnings. 1447 (void)RetFastCC_ARM_APCS; 1448 (void)FastCC_ARM_APCS; 1449 // Fallthrough 1450 case CallingConv::C: 1451 // Use target triple & subtarget features to do actual dispatch. 1452 if (Subtarget->isAAPCS_ABI()) { 1453 if (Subtarget->hasVFP2() && 1454 FloatABIType == FloatABI::Hard) 1455 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1456 else 1457 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1458 } else 1459 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1460 case CallingConv::ARM_AAPCS_VFP: 1461 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1462 case CallingConv::ARM_AAPCS: 1463 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1464 case CallingConv::ARM_APCS: 1465 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1466 } 1467} 1468 1469bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, 1470 SmallVectorImpl<unsigned> &ArgRegs, 1471 SmallVectorImpl<MVT> &ArgVTs, 1472 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 1473 SmallVectorImpl<unsigned> &RegArgs, 1474 CallingConv::ID CC, 1475 unsigned &NumBytes) { 1476 SmallVector<CCValAssign, 16> ArgLocs; 1477 CCState CCInfo(CC, false, TM, ArgLocs, *Context); 1478 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); 1479 1480 // Get a count of how many bytes are to be pushed on the stack. 1481 NumBytes = CCInfo.getNextStackOffset(); 1482 1483 // Issue CALLSEQ_START 1484 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1485 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1486 TII.get(AdjStackDown)) 1487 .addImm(NumBytes)); 1488 1489 // Process the args. 1490 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1491 CCValAssign &VA = ArgLocs[i]; 1492 unsigned Arg = ArgRegs[VA.getValNo()]; 1493 MVT ArgVT = ArgVTs[VA.getValNo()]; 1494 1495 // We don't handle NEON/vector parameters yet. 1496 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) 1497 return false; 1498 1499 // Handle arg promotion, etc. 1500 switch (VA.getLocInfo()) { 1501 case CCValAssign::Full: break; 1502 case CCValAssign::SExt: { 1503 bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1504 Arg, ArgVT, Arg); 1505 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 1506 Emitted = true; 1507 ArgVT = VA.getLocVT(); 1508 break; 1509 } 1510 case CCValAssign::ZExt: { 1511 bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1512 Arg, ArgVT, Arg); 1513 assert(Emitted && "Failed to emit a zext!"); (void)Emitted; 1514 Emitted = true; 1515 ArgVT = VA.getLocVT(); 1516 break; 1517 } 1518 case CCValAssign::AExt: { 1519 bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1520 Arg, ArgVT, Arg); 1521 if (!Emitted) 1522 Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1523 Arg, ArgVT, Arg); 1524 if (!Emitted) 1525 Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1526 Arg, ArgVT, Arg); 1527 1528 assert(Emitted && "Failed to emit a aext!"); (void)Emitted; 1529 ArgVT = VA.getLocVT(); 1530 break; 1531 } 1532 case CCValAssign::BCvt: { 1533 unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, 1534 /*TODO: Kill=*/false); 1535 assert(BC != 0 && "Failed to emit a bitcast!"); 1536 Arg = BC; 1537 ArgVT = VA.getLocVT(); 1538 break; 1539 } 1540 default: llvm_unreachable("Unknown arg promotion!"); 1541 } 1542 1543 // Now copy/store arg to correct locations. 1544 if (VA.isRegLoc() && !VA.needsCustom()) { 1545 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1546 VA.getLocReg()) 1547 .addReg(Arg); 1548 RegArgs.push_back(VA.getLocReg()); 1549 } else if (VA.needsCustom()) { 1550 // TODO: We need custom lowering for vector (v2f64) args. 1551 if (VA.getLocVT() != MVT::f64) return false; 1552 1553 CCValAssign &NextVA = ArgLocs[++i]; 1554 1555 // TODO: Only handle register args for now. 1556 if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; 1557 1558 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1559 TII.get(ARM::VMOVRRD), VA.getLocReg()) 1560 .addReg(NextVA.getLocReg(), RegState::Define) 1561 .addReg(Arg)); 1562 RegArgs.push_back(VA.getLocReg()); 1563 RegArgs.push_back(NextVA.getLocReg()); 1564 } else { 1565 assert(VA.isMemLoc()); 1566 // Need to store on the stack. 1567 Address Addr; 1568 Addr.BaseType = Address::RegBase; 1569 Addr.Base.Reg = ARM::SP; 1570 Addr.Offset = VA.getLocMemOffset(); 1571 1572 if (!ARMEmitStore(ArgVT, Arg, Addr)) return false; 1573 } 1574 } 1575 return true; 1576} 1577 1578bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 1579 const Instruction *I, CallingConv::ID CC, 1580 unsigned &NumBytes) { 1581 // Issue CALLSEQ_END 1582 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1583 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1584 TII.get(AdjStackUp)) 1585 .addImm(NumBytes).addImm(0)); 1586 1587 // Now the return value. 1588 if (RetVT != MVT::isVoid) { 1589 SmallVector<CCValAssign, 16> RVLocs; 1590 CCState CCInfo(CC, false, TM, RVLocs, *Context); 1591 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); 1592 1593 // Copy all of the result registers out of their specified physreg. 1594 if (RVLocs.size() == 2 && RetVT == MVT::f64) { 1595 // For this move we copy into two registers and then move into the 1596 // double fp reg we want. 1597 EVT DestVT = RVLocs[0].getValVT(); 1598 TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); 1599 unsigned ResultReg = createResultReg(DstRC); 1600 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1601 TII.get(ARM::VMOVDRR), ResultReg) 1602 .addReg(RVLocs[0].getLocReg()) 1603 .addReg(RVLocs[1].getLocReg())); 1604 1605 UsedRegs.push_back(RVLocs[0].getLocReg()); 1606 UsedRegs.push_back(RVLocs[1].getLocReg()); 1607 1608 // Finally update the result. 1609 UpdateValueMap(I, ResultReg); 1610 } else { 1611 assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); 1612 EVT CopyVT = RVLocs[0].getValVT(); 1613 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1614 1615 unsigned ResultReg = createResultReg(DstRC); 1616 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1617 ResultReg).addReg(RVLocs[0].getLocReg()); 1618 UsedRegs.push_back(RVLocs[0].getLocReg()); 1619 1620 // Finally update the result. 1621 UpdateValueMap(I, ResultReg); 1622 } 1623 } 1624 1625 return true; 1626} 1627 1628bool ARMFastISel::SelectRet(const Instruction *I) { 1629 const ReturnInst *Ret = cast<ReturnInst>(I); 1630 const Function &F = *I->getParent()->getParent(); 1631 1632 if (!FuncInfo.CanLowerReturn) 1633 return false; 1634 1635 if (F.isVarArg()) 1636 return false; 1637 1638 CallingConv::ID CC = F.getCallingConv(); 1639 if (Ret->getNumOperands() > 0) { 1640 SmallVector<ISD::OutputArg, 4> Outs; 1641 GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 1642 Outs, TLI); 1643 1644 // Analyze operands of the call, assigning locations to each operand. 1645 SmallVector<CCValAssign, 16> ValLocs; 1646 CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); 1647 CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); 1648 1649 const Value *RV = Ret->getOperand(0); 1650 unsigned Reg = getRegForValue(RV); 1651 if (Reg == 0) 1652 return false; 1653 1654 // Only handle a single return value for now. 1655 if (ValLocs.size() != 1) 1656 return false; 1657 1658 CCValAssign &VA = ValLocs[0]; 1659 1660 // Don't bother handling odd stuff for now. 1661 if (VA.getLocInfo() != CCValAssign::Full) 1662 return false; 1663 // Only handle register returns for now. 1664 if (!VA.isRegLoc()) 1665 return false; 1666 // TODO: For now, don't try to handle cases where getLocInfo() 1667 // says Full but the types don't match. 1668 if (TLI.getValueType(RV->getType()) != VA.getValVT()) 1669 return false; 1670 1671 // Make the copy. 1672 unsigned SrcReg = Reg + VA.getValNo(); 1673 unsigned DstReg = VA.getLocReg(); 1674 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 1675 // Avoid a cross-class copy. This is very unlikely. 1676 if (!SrcRC->contains(DstReg)) 1677 return false; 1678 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1679 DstReg).addReg(SrcReg); 1680 1681 // Mark the register as live out of the function. 1682 MRI.addLiveOut(VA.getLocReg()); 1683 } 1684 1685 unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET; 1686 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1687 TII.get(RetOpc))); 1688 return true; 1689} 1690 1691unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { 1692 1693 // Darwin needs the r9 versions of the opcodes. 1694 bool isDarwin = Subtarget->isTargetDarwin(); 1695 if (isThumb) { 1696 return isDarwin ? ARM::tBLr9 : ARM::tBL; 1697 } else { 1698 return isDarwin ? ARM::BLr9 : ARM::BL; 1699 } 1700} 1701 1702// A quick function that will emit a call for a named libcall in F with the 1703// vector of passed arguments for the Instruction in I. We can assume that we 1704// can emit a call for any libcall we can produce. This is an abridged version 1705// of the full call infrastructure since we won't need to worry about things 1706// like computed function pointers or strange arguments at call sites. 1707// TODO: Try to unify this and the normal call bits for ARM, then try to unify 1708// with X86. 1709bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { 1710 CallingConv::ID CC = TLI.getLibcallCallingConv(Call); 1711 1712 // Handle *simple* calls for now. 1713 const Type *RetTy = I->getType(); 1714 MVT RetVT; 1715 if (RetTy->isVoidTy()) 1716 RetVT = MVT::isVoid; 1717 else if (!isTypeLegal(RetTy, RetVT)) 1718 return false; 1719 1720 // For now we're using BLX etc on the assumption that we have v5t ops. 1721 if (!Subtarget->hasV5TOps()) return false; 1722 1723 // TODO: For now if we have long calls specified we don't handle the call. 1724 if (EnableARMLongCalls) return false; 1725 1726 // Set up the argument vectors. 1727 SmallVector<Value*, 8> Args; 1728 SmallVector<unsigned, 8> ArgRegs; 1729 SmallVector<MVT, 8> ArgVTs; 1730 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1731 Args.reserve(I->getNumOperands()); 1732 ArgRegs.reserve(I->getNumOperands()); 1733 ArgVTs.reserve(I->getNumOperands()); 1734 ArgFlags.reserve(I->getNumOperands()); 1735 for (unsigned i = 0; i < I->getNumOperands(); ++i) { 1736 Value *Op = I->getOperand(i); 1737 unsigned Arg = getRegForValue(Op); 1738 if (Arg == 0) return false; 1739 1740 const Type *ArgTy = Op->getType(); 1741 MVT ArgVT; 1742 if (!isTypeLegal(ArgTy, ArgVT)) return false; 1743 1744 ISD::ArgFlagsTy Flags; 1745 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1746 Flags.setOrigAlign(OriginalAlignment); 1747 1748 Args.push_back(Op); 1749 ArgRegs.push_back(Arg); 1750 ArgVTs.push_back(ArgVT); 1751 ArgFlags.push_back(Flags); 1752 } 1753 1754 // Handle the arguments now that we've gotten them. 1755 SmallVector<unsigned, 4> RegArgs; 1756 unsigned NumBytes; 1757 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1758 return false; 1759 1760 // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. 1761 // TODO: Turn this into the table of arm call ops. 1762 MachineInstrBuilder MIB; 1763 unsigned CallOpc = ARMSelectCallOp(NULL); 1764 if(isThumb) 1765 // Explicitly adding the predicate here. 1766 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1767 TII.get(CallOpc))) 1768 .addExternalSymbol(TLI.getLibcallName(Call)); 1769 else 1770 // Explicitly adding the predicate here. 1771 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1772 TII.get(CallOpc)) 1773 .addExternalSymbol(TLI.getLibcallName(Call))); 1774 1775 // Add implicit physical register uses to the call. 1776 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1777 MIB.addReg(RegArgs[i]); 1778 1779 // Finish off the call including any return values. 1780 SmallVector<unsigned, 4> UsedRegs; 1781 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1782 1783 // Set all unused physreg defs as dead. 1784 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1785 1786 return true; 1787} 1788 1789bool ARMFastISel::SelectCall(const Instruction *I) { 1790 const CallInst *CI = cast<CallInst>(I); 1791 const Value *Callee = CI->getCalledValue(); 1792 1793 // Can't handle inline asm or worry about intrinsics yet. 1794 if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false; 1795 1796 // Only handle global variable Callees that are direct calls. 1797 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 1798 if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel())) 1799 return false; 1800 1801 // Check the calling convention. 1802 ImmutableCallSite CS(CI); 1803 CallingConv::ID CC = CS.getCallingConv(); 1804 1805 // TODO: Avoid some calling conventions? 1806 1807 // Let SDISel handle vararg functions. 1808 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1809 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1810 if (FTy->isVarArg()) 1811 return false; 1812 1813 // Handle *simple* calls for now. 1814 const Type *RetTy = I->getType(); 1815 MVT RetVT; 1816 if (RetTy->isVoidTy()) 1817 RetVT = MVT::isVoid; 1818 else if (!isTypeLegal(RetTy, RetVT)) 1819 return false; 1820 1821 // For now we're using BLX etc on the assumption that we have v5t ops. 1822 // TODO: Maybe? 1823 if (!Subtarget->hasV5TOps()) return false; 1824 1825 // TODO: For now if we have long calls specified we don't handle the call. 1826 if (EnableARMLongCalls) return false; 1827 1828 // Set up the argument vectors. 1829 SmallVector<Value*, 8> Args; 1830 SmallVector<unsigned, 8> ArgRegs; 1831 SmallVector<MVT, 8> ArgVTs; 1832 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1833 Args.reserve(CS.arg_size()); 1834 ArgRegs.reserve(CS.arg_size()); 1835 ArgVTs.reserve(CS.arg_size()); 1836 ArgFlags.reserve(CS.arg_size()); 1837 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1838 i != e; ++i) { 1839 unsigned Arg = getRegForValue(*i); 1840 1841 if (Arg == 0) 1842 return false; 1843 ISD::ArgFlagsTy Flags; 1844 unsigned AttrInd = i - CS.arg_begin() + 1; 1845 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1846 Flags.setSExt(); 1847 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1848 Flags.setZExt(); 1849 1850 // FIXME: Only handle *easy* calls for now. 1851 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1852 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1853 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1854 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1855 return false; 1856 1857 const Type *ArgTy = (*i)->getType(); 1858 MVT ArgVT; 1859 if (!isTypeLegal(ArgTy, ArgVT)) 1860 return false; 1861 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1862 Flags.setOrigAlign(OriginalAlignment); 1863 1864 Args.push_back(*i); 1865 ArgRegs.push_back(Arg); 1866 ArgVTs.push_back(ArgVT); 1867 ArgFlags.push_back(Flags); 1868 } 1869 1870 // Handle the arguments now that we've gotten them. 1871 SmallVector<unsigned, 4> RegArgs; 1872 unsigned NumBytes; 1873 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1874 return false; 1875 1876 // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. 1877 // TODO: Turn this into the table of arm call ops. 1878 MachineInstrBuilder MIB; 1879 unsigned CallOpc = ARMSelectCallOp(GV); 1880 // Explicitly adding the predicate here. 1881 if(isThumb) 1882 // Explicitly adding the predicate here. 1883 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1884 TII.get(CallOpc))) 1885 .addGlobalAddress(GV, 0, 0); 1886 else 1887 // Explicitly adding the predicate here. 1888 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1889 TII.get(CallOpc)) 1890 .addGlobalAddress(GV, 0, 0)); 1891 1892 // Add implicit physical register uses to the call. 1893 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1894 MIB.addReg(RegArgs[i]); 1895 1896 // Finish off the call including any return values. 1897 SmallVector<unsigned, 4> UsedRegs; 1898 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1899 1900 // Set all unused physreg defs as dead. 1901 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1902 1903 return true; 1904 1905} 1906 1907// TODO: SoftFP support. 1908bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { 1909 1910 switch (I->getOpcode()) { 1911 case Instruction::Load: 1912 return SelectLoad(I); 1913 case Instruction::Store: 1914 return SelectStore(I); 1915 case Instruction::Br: 1916 return SelectBranch(I); 1917 case Instruction::ICmp: 1918 case Instruction::FCmp: 1919 return SelectCmp(I); 1920 case Instruction::FPExt: 1921 return SelectFPExt(I); 1922 case Instruction::FPTrunc: 1923 return SelectFPTrunc(I); 1924 case Instruction::SIToFP: 1925 return SelectSIToFP(I); 1926 case Instruction::FPToSI: 1927 return SelectFPToSI(I); 1928 case Instruction::FAdd: 1929 return SelectBinaryOp(I, ISD::FADD); 1930 case Instruction::FSub: 1931 return SelectBinaryOp(I, ISD::FSUB); 1932 case Instruction::FMul: 1933 return SelectBinaryOp(I, ISD::FMUL); 1934 case Instruction::SDiv: 1935 return SelectSDiv(I); 1936 case Instruction::SRem: 1937 return SelectSRem(I); 1938 case Instruction::Call: 1939 return SelectCall(I); 1940 case Instruction::Select: 1941 return SelectSelect(I); 1942 case Instruction::Ret: 1943 return SelectRet(I); 1944 default: break; 1945 } 1946 return false; 1947} 1948 1949namespace llvm { 1950 llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { 1951 // Completely untested on non-darwin. 1952 const TargetMachine &TM = funcInfo.MF->getTarget(); 1953 1954 // Darwin and thumb1 only for now. 1955 const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); 1956 if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() && 1957 !DisableARMFastISel) 1958 return new ARMFastISel(funcInfo); 1959 return 0; 1960 } 1961} 1962