ARMFastISel.cpp revision fe409dd67c7093684012818b6a64ab93d4b27928
1//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the ARM-specific support for the FastISel class. Some 11// of the target-specific code is generated by tablegen in the file 12// ARMGenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "ARM.h" 17#include "ARMBaseInstrInfo.h" 18#include "ARMCallingConv.h" 19#include "ARMRegisterInfo.h" 20#include "ARMTargetMachine.h" 21#include "ARMSubtarget.h" 22#include "ARMConstantPoolValue.h" 23#include "MCTargetDesc/ARMAddressingModes.h" 24#include "llvm/CallingConv.h" 25#include "llvm/DerivedTypes.h" 26#include "llvm/GlobalVariable.h" 27#include "llvm/Instructions.h" 28#include "llvm/IntrinsicInst.h" 29#include "llvm/Module.h" 30#include "llvm/Operator.h" 31#include "llvm/CodeGen/Analysis.h" 32#include "llvm/CodeGen/FastISel.h" 33#include "llvm/CodeGen/FunctionLoweringInfo.h" 34#include "llvm/CodeGen/MachineInstrBuilder.h" 35#include "llvm/CodeGen/MachineModuleInfo.h" 36#include "llvm/CodeGen/MachineConstantPool.h" 37#include "llvm/CodeGen/MachineFrameInfo.h" 38#include "llvm/CodeGen/MachineMemOperand.h" 39#include "llvm/CodeGen/MachineRegisterInfo.h" 40#include "llvm/CodeGen/PseudoSourceValue.h" 41#include "llvm/Support/CallSite.h" 42#include "llvm/Support/CommandLine.h" 43#include "llvm/Support/ErrorHandling.h" 44#include "llvm/Support/GetElementPtrTypeIterator.h" 45#include "llvm/Target/TargetData.h" 46#include "llvm/Target/TargetInstrInfo.h" 47#include "llvm/Target/TargetLowering.h" 48#include "llvm/Target/TargetMachine.h" 49#include "llvm/Target/TargetOptions.h" 50using namespace llvm; 51 52static cl::opt<bool> 53DisableARMFastISel("disable-arm-fast-isel", 54 cl::desc("Turn off experimental ARM fast-isel support"), 55 cl::init(false), cl::Hidden); 56 57extern cl::opt<bool> EnableARMLongCalls; 58 59namespace { 60 61 // All possible address modes, plus some. 62 typedef struct Address { 63 enum { 64 RegBase, 65 FrameIndexBase 66 } BaseType; 67 68 union { 69 unsigned Reg; 70 int FI; 71 } Base; 72 73 int Offset; 74 75 // Innocuous defaults for our address. 76 Address() 77 : BaseType(RegBase), Offset(0) { 78 Base.Reg = 0; 79 } 80 } Address; 81 82class ARMFastISel : public FastISel { 83 84 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 85 /// make the right decision when generating code for different targets. 86 const ARMSubtarget *Subtarget; 87 const TargetMachine &TM; 88 const TargetInstrInfo &TII; 89 const TargetLowering &TLI; 90 ARMFunctionInfo *AFI; 91 92 // Convenience variables to avoid some queries. 93 bool isThumb; 94 LLVMContext *Context; 95 96 public: 97 explicit ARMFastISel(FunctionLoweringInfo &funcInfo) 98 : FastISel(funcInfo), 99 TM(funcInfo.MF->getTarget()), 100 TII(*TM.getInstrInfo()), 101 TLI(*TM.getTargetLowering()) { 102 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 103 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); 104 isThumb = AFI->isThumbFunction(); 105 Context = &funcInfo.Fn->getContext(); 106 } 107 108 // Code from FastISel.cpp. 109 virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, 110 const TargetRegisterClass *RC); 111 virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, 112 const TargetRegisterClass *RC, 113 unsigned Op0, bool Op0IsKill); 114 virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, 115 const TargetRegisterClass *RC, 116 unsigned Op0, bool Op0IsKill, 117 unsigned Op1, bool Op1IsKill); 118 virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, 119 const TargetRegisterClass *RC, 120 unsigned Op0, bool Op0IsKill, 121 unsigned Op1, bool Op1IsKill, 122 unsigned Op2, bool Op2IsKill); 123 virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, 124 const TargetRegisterClass *RC, 125 unsigned Op0, bool Op0IsKill, 126 uint64_t Imm); 127 virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, 128 const TargetRegisterClass *RC, 129 unsigned Op0, bool Op0IsKill, 130 const ConstantFP *FPImm); 131 virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, 132 const TargetRegisterClass *RC, 133 unsigned Op0, bool Op0IsKill, 134 unsigned Op1, bool Op1IsKill, 135 uint64_t Imm); 136 virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, 137 const TargetRegisterClass *RC, 138 uint64_t Imm); 139 virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode, 140 const TargetRegisterClass *RC, 141 uint64_t Imm1, uint64_t Imm2); 142 143 virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, 144 unsigned Op0, bool Op0IsKill, 145 uint32_t Idx); 146 147 // Backend specific FastISel code. 148 virtual bool TargetSelectInstruction(const Instruction *I); 149 virtual unsigned TargetMaterializeConstant(const Constant *C); 150 virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); 151 152 #include "ARMGenFastISel.inc" 153 154 // Instruction selection routines. 155 private: 156 bool SelectLoad(const Instruction *I); 157 bool SelectStore(const Instruction *I); 158 bool SelectBranch(const Instruction *I); 159 bool SelectCmp(const Instruction *I); 160 bool SelectFPExt(const Instruction *I); 161 bool SelectFPTrunc(const Instruction *I); 162 bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); 163 bool SelectSIToFP(const Instruction *I); 164 bool SelectFPToSI(const Instruction *I); 165 bool SelectSDiv(const Instruction *I); 166 bool SelectSRem(const Instruction *I); 167 bool SelectCall(const Instruction *I); 168 bool SelectSelect(const Instruction *I); 169 bool SelectRet(const Instruction *I); 170 bool SelectTrunc(const Instruction *I); 171 bool SelectIntExt(const Instruction *I); 172 173 // Utility routines. 174 private: 175 bool isTypeLegal(Type *Ty, MVT &VT); 176 bool isLoadTypeLegal(Type *Ty, MVT &VT); 177 bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value); 178 bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); 179 bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); 180 bool ARMComputeAddress(const Value *Obj, Address &Addr); 181 void ARMSimplifyAddress(Address &Addr, EVT VT); 182 unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); 183 unsigned ARMMaterializeInt(const Constant *C, EVT VT); 184 unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); 185 unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); 186 unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); 187 unsigned ARMSelectCallOp(const GlobalValue *GV); 188 189 // Call handling routines. 190 private: 191 bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 192 unsigned &ResultReg); 193 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); 194 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, 195 SmallVectorImpl<unsigned> &ArgRegs, 196 SmallVectorImpl<MVT> &ArgVTs, 197 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 198 SmallVectorImpl<unsigned> &RegArgs, 199 CallingConv::ID CC, 200 unsigned &NumBytes); 201 bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 202 const Instruction *I, CallingConv::ID CC, 203 unsigned &NumBytes); 204 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); 205 206 // OptionalDef handling routines. 207 private: 208 bool isARMNEONPred(const MachineInstr *MI); 209 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); 210 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); 211 void AddLoadStoreOperands(EVT VT, Address &Addr, 212 const MachineInstrBuilder &MIB, 213 unsigned Flags); 214}; 215 216} // end anonymous namespace 217 218#include "ARMGenCallingConv.inc" 219 220// DefinesOptionalPredicate - This is different from DefinesPredicate in that 221// we don't care about implicit defs here, just places we'll need to add a 222// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. 223bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { 224 const MCInstrDesc &MCID = MI->getDesc(); 225 if (!MCID.hasOptionalDef()) 226 return false; 227 228 // Look to see if our OptionalDef is defining CPSR or CCR. 229 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 230 const MachineOperand &MO = MI->getOperand(i); 231 if (!MO.isReg() || !MO.isDef()) continue; 232 if (MO.getReg() == ARM::CPSR) 233 *CPSR = true; 234 } 235 return true; 236} 237 238bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { 239 const MCInstrDesc &MCID = MI->getDesc(); 240 241 // If we're a thumb2 or not NEON function we were handled via isPredicable. 242 if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || 243 AFI->isThumb2Function()) 244 return false; 245 246 for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) 247 if (MCID.OpInfo[i].isPredicate()) 248 return true; 249 250 return false; 251} 252 253// If the machine is predicable go ahead and add the predicate operands, if 254// it needs default CC operands add those. 255// TODO: If we want to support thumb1 then we'll need to deal with optional 256// CPSR defs that need to be added before the remaining operands. See s_cc_out 257// for descriptions why. 258const MachineInstrBuilder & 259ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { 260 MachineInstr *MI = &*MIB; 261 262 // Do we use a predicate? or... 263 // Are we NEON in ARM mode and have a predicate operand? If so, I know 264 // we're not predicable but add it anyways. 265 if (TII.isPredicable(MI) || isARMNEONPred(MI)) 266 AddDefaultPred(MIB); 267 268 // Do we optionally set a predicate? Preds is size > 0 iff the predicate 269 // defines CPSR. All other OptionalDefines in ARM are the CCR register. 270 bool CPSR = false; 271 if (DefinesOptionalPredicate(MI, &CPSR)) { 272 if (CPSR) 273 AddDefaultT1CC(MIB); 274 else 275 AddDefaultCC(MIB); 276 } 277 return MIB; 278} 279 280unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, 281 const TargetRegisterClass* RC) { 282 unsigned ResultReg = createResultReg(RC); 283 const MCInstrDesc &II = TII.get(MachineInstOpcode); 284 285 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); 286 return ResultReg; 287} 288 289unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, 290 const TargetRegisterClass *RC, 291 unsigned Op0, bool Op0IsKill) { 292 unsigned ResultReg = createResultReg(RC); 293 const MCInstrDesc &II = TII.get(MachineInstOpcode); 294 295 if (II.getNumDefs() >= 1) 296 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 297 .addReg(Op0, Op0IsKill * RegState::Kill)); 298 else { 299 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 300 .addReg(Op0, Op0IsKill * RegState::Kill)); 301 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 302 TII.get(TargetOpcode::COPY), ResultReg) 303 .addReg(II.ImplicitDefs[0])); 304 } 305 return ResultReg; 306} 307 308unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, 309 const TargetRegisterClass *RC, 310 unsigned Op0, bool Op0IsKill, 311 unsigned Op1, bool Op1IsKill) { 312 unsigned ResultReg = createResultReg(RC); 313 const MCInstrDesc &II = TII.get(MachineInstOpcode); 314 315 if (II.getNumDefs() >= 1) 316 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 317 .addReg(Op0, Op0IsKill * RegState::Kill) 318 .addReg(Op1, Op1IsKill * RegState::Kill)); 319 else { 320 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 321 .addReg(Op0, Op0IsKill * RegState::Kill) 322 .addReg(Op1, Op1IsKill * RegState::Kill)); 323 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 324 TII.get(TargetOpcode::COPY), ResultReg) 325 .addReg(II.ImplicitDefs[0])); 326 } 327 return ResultReg; 328} 329 330unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, 331 const TargetRegisterClass *RC, 332 unsigned Op0, bool Op0IsKill, 333 unsigned Op1, bool Op1IsKill, 334 unsigned Op2, bool Op2IsKill) { 335 unsigned ResultReg = createResultReg(RC); 336 const MCInstrDesc &II = TII.get(MachineInstOpcode); 337 338 if (II.getNumDefs() >= 1) 339 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 340 .addReg(Op0, Op0IsKill * RegState::Kill) 341 .addReg(Op1, Op1IsKill * RegState::Kill) 342 .addReg(Op2, Op2IsKill * RegState::Kill)); 343 else { 344 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 345 .addReg(Op0, Op0IsKill * RegState::Kill) 346 .addReg(Op1, Op1IsKill * RegState::Kill) 347 .addReg(Op2, Op2IsKill * RegState::Kill)); 348 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 349 TII.get(TargetOpcode::COPY), ResultReg) 350 .addReg(II.ImplicitDefs[0])); 351 } 352 return ResultReg; 353} 354 355unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, 356 const TargetRegisterClass *RC, 357 unsigned Op0, bool Op0IsKill, 358 uint64_t Imm) { 359 unsigned ResultReg = createResultReg(RC); 360 const MCInstrDesc &II = TII.get(MachineInstOpcode); 361 362 if (II.getNumDefs() >= 1) 363 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 364 .addReg(Op0, Op0IsKill * RegState::Kill) 365 .addImm(Imm)); 366 else { 367 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 368 .addReg(Op0, Op0IsKill * RegState::Kill) 369 .addImm(Imm)); 370 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 371 TII.get(TargetOpcode::COPY), ResultReg) 372 .addReg(II.ImplicitDefs[0])); 373 } 374 return ResultReg; 375} 376 377unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, 378 const TargetRegisterClass *RC, 379 unsigned Op0, bool Op0IsKill, 380 const ConstantFP *FPImm) { 381 unsigned ResultReg = createResultReg(RC); 382 const MCInstrDesc &II = TII.get(MachineInstOpcode); 383 384 if (II.getNumDefs() >= 1) 385 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 386 .addReg(Op0, Op0IsKill * RegState::Kill) 387 .addFPImm(FPImm)); 388 else { 389 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 390 .addReg(Op0, Op0IsKill * RegState::Kill) 391 .addFPImm(FPImm)); 392 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 393 TII.get(TargetOpcode::COPY), ResultReg) 394 .addReg(II.ImplicitDefs[0])); 395 } 396 return ResultReg; 397} 398 399unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, 400 const TargetRegisterClass *RC, 401 unsigned Op0, bool Op0IsKill, 402 unsigned Op1, bool Op1IsKill, 403 uint64_t Imm) { 404 unsigned ResultReg = createResultReg(RC); 405 const MCInstrDesc &II = TII.get(MachineInstOpcode); 406 407 if (II.getNumDefs() >= 1) 408 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 409 .addReg(Op0, Op0IsKill * RegState::Kill) 410 .addReg(Op1, Op1IsKill * RegState::Kill) 411 .addImm(Imm)); 412 else { 413 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 414 .addReg(Op0, Op0IsKill * RegState::Kill) 415 .addReg(Op1, Op1IsKill * RegState::Kill) 416 .addImm(Imm)); 417 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 418 TII.get(TargetOpcode::COPY), ResultReg) 419 .addReg(II.ImplicitDefs[0])); 420 } 421 return ResultReg; 422} 423 424unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, 425 const TargetRegisterClass *RC, 426 uint64_t Imm) { 427 unsigned ResultReg = createResultReg(RC); 428 const MCInstrDesc &II = TII.get(MachineInstOpcode); 429 430 if (II.getNumDefs() >= 1) 431 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 432 .addImm(Imm)); 433 else { 434 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 435 .addImm(Imm)); 436 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 437 TII.get(TargetOpcode::COPY), ResultReg) 438 .addReg(II.ImplicitDefs[0])); 439 } 440 return ResultReg; 441} 442 443unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, 444 const TargetRegisterClass *RC, 445 uint64_t Imm1, uint64_t Imm2) { 446 unsigned ResultReg = createResultReg(RC); 447 const MCInstrDesc &II = TII.get(MachineInstOpcode); 448 449 if (II.getNumDefs() >= 1) 450 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 451 .addImm(Imm1).addImm(Imm2)); 452 else { 453 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 454 .addImm(Imm1).addImm(Imm2)); 455 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 456 TII.get(TargetOpcode::COPY), 457 ResultReg) 458 .addReg(II.ImplicitDefs[0])); 459 } 460 return ResultReg; 461} 462 463unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, 464 unsigned Op0, bool Op0IsKill, 465 uint32_t Idx) { 466 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 467 assert(TargetRegisterInfo::isVirtualRegister(Op0) && 468 "Cannot yet extract from physregs"); 469 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 470 DL, TII.get(TargetOpcode::COPY), ResultReg) 471 .addReg(Op0, getKillRegState(Op0IsKill), Idx)); 472 return ResultReg; 473} 474 475// TODO: Don't worry about 64-bit now, but when this is fixed remove the 476// checks from the various callers. 477unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { 478 if (VT == MVT::f64) return 0; 479 480 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 481 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 482 TII.get(ARM::VMOVRS), MoveReg) 483 .addReg(SrcReg)); 484 return MoveReg; 485} 486 487unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { 488 if (VT == MVT::i64) return 0; 489 490 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 491 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 492 TII.get(ARM::VMOVSR), MoveReg) 493 .addReg(SrcReg)); 494 return MoveReg; 495} 496 497// For double width floating point we need to materialize two constants 498// (the high and the low) into integer registers then use a move to get 499// the combined constant into an FP reg. 500unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { 501 const APFloat Val = CFP->getValueAPF(); 502 bool is64bit = VT == MVT::f64; 503 504 // This checks to see if we can use VFP3 instructions to materialize 505 // a constant, otherwise we have to go through the constant pool. 506 if (TLI.isFPImmLegal(Val, VT)) { 507 int Imm; 508 unsigned Opc; 509 if (is64bit) { 510 Imm = ARM_AM::getFP64Imm(Val); 511 Opc = ARM::FCONSTD; 512 } else { 513 Imm = ARM_AM::getFP32Imm(Val); 514 Opc = ARM::FCONSTS; 515 } 516 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 517 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 518 DestReg) 519 .addImm(Imm)); 520 return DestReg; 521 } 522 523 // Require VFP2 for loading fp constants. 524 if (!Subtarget->hasVFP2()) return false; 525 526 // MachineConstantPool wants an explicit alignment. 527 unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); 528 if (Align == 0) { 529 // TODO: Figure out if this is correct. 530 Align = TD.getTypeAllocSize(CFP->getType()); 531 } 532 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 533 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 534 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; 535 536 // The extra reg is for addrmode5. 537 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 538 DestReg) 539 .addConstantPoolIndex(Idx) 540 .addReg(0)); 541 return DestReg; 542} 543 544unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { 545 546 // For now 32-bit only. 547 if (VT != MVT::i32) return false; 548 549 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 550 551 // If we can do this in a single instruction without a constant pool entry 552 // do so now. 553 const ConstantInt *CI = cast<ConstantInt>(C); 554 if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) { 555 unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; 556 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 557 TII.get(Opc), DestReg) 558 .addImm(CI->getSExtValue())); 559 return DestReg; 560 } 561 562 // MachineConstantPool wants an explicit alignment. 563 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 564 if (Align == 0) { 565 // TODO: Figure out if this is correct. 566 Align = TD.getTypeAllocSize(C->getType()); 567 } 568 unsigned Idx = MCP.getConstantPoolIndex(C, Align); 569 570 if (isThumb) 571 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 572 TII.get(ARM::t2LDRpci), DestReg) 573 .addConstantPoolIndex(Idx)); 574 else 575 // The extra immediate is for addrmode2. 576 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 577 TII.get(ARM::LDRcp), DestReg) 578 .addConstantPoolIndex(Idx) 579 .addImm(0)); 580 581 return DestReg; 582} 583 584unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { 585 // For now 32-bit only. 586 if (VT != MVT::i32) return 0; 587 588 Reloc::Model RelocM = TM.getRelocationModel(); 589 590 // TODO: Need more magic for ARM PIC. 591 if (!isThumb && (RelocM == Reloc::PIC_)) return 0; 592 593 // MachineConstantPool wants an explicit alignment. 594 unsigned Align = TD.getPrefTypeAlignment(GV->getType()); 595 if (Align == 0) { 596 // TODO: Figure out if this is correct. 597 Align = TD.getTypeAllocSize(GV->getType()); 598 } 599 600 // Grab index. 601 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); 602 unsigned Id = AFI->createPICLabelUId(); 603 ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id, 604 ARMCP::CPValue, 605 PCAdj); 606 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); 607 608 // Load value. 609 MachineInstrBuilder MIB; 610 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 611 if (isThumb) { 612 unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; 613 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) 614 .addConstantPoolIndex(Idx); 615 if (RelocM == Reloc::PIC_) 616 MIB.addImm(Id); 617 } else { 618 // The extra immediate is for addrmode2. 619 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), 620 DestReg) 621 .addConstantPoolIndex(Idx) 622 .addImm(0); 623 } 624 AddOptionalDefs(MIB); 625 626 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { 627 unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); 628 if (isThumb) 629 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 630 TII.get(ARM::t2LDRi12), NewDestReg) 631 .addReg(DestReg) 632 .addImm(0); 633 else 634 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRi12), 635 NewDestReg) 636 .addReg(DestReg) 637 .addImm(0); 638 DestReg = NewDestReg; 639 AddOptionalDefs(MIB); 640 } 641 642 return DestReg; 643} 644 645unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { 646 EVT VT = TLI.getValueType(C->getType(), true); 647 648 // Only handle simple types. 649 if (!VT.isSimple()) return 0; 650 651 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 652 return ARMMaterializeFP(CFP, VT); 653 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 654 return ARMMaterializeGV(GV, VT); 655 else if (isa<ConstantInt>(C)) 656 return ARMMaterializeInt(C, VT); 657 658 return 0; 659} 660 661unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { 662 // Don't handle dynamic allocas. 663 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; 664 665 MVT VT; 666 if (!isLoadTypeLegal(AI->getType(), VT)) return false; 667 668 DenseMap<const AllocaInst*, int>::iterator SI = 669 FuncInfo.StaticAllocaMap.find(AI); 670 671 // This will get lowered later into the correct offsets and registers 672 // via rewriteXFrameIndex. 673 if (SI != FuncInfo.StaticAllocaMap.end()) { 674 TargetRegisterClass* RC = TLI.getRegClassFor(VT); 675 unsigned ResultReg = createResultReg(RC); 676 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 677 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 678 TII.get(Opc), ResultReg) 679 .addFrameIndex(SI->second) 680 .addImm(0)); 681 return ResultReg; 682 } 683 684 return 0; 685} 686 687bool ARMFastISel::isTypeLegal(Type *Ty, MVT &VT) { 688 EVT evt = TLI.getValueType(Ty, true); 689 690 // Only handle simple types. 691 if (evt == MVT::Other || !evt.isSimple()) return false; 692 VT = evt.getSimpleVT(); 693 694 // Handle all legal types, i.e. a register that will directly hold this 695 // value. 696 return TLI.isTypeLegal(VT); 697} 698 699bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { 700 if (isTypeLegal(Ty, VT)) return true; 701 702 // If this is a type than can be sign or zero-extended to a basic operation 703 // go ahead and accept it now. 704 if (VT == MVT::i8 || VT == MVT::i16) 705 return true; 706 707 return false; 708} 709 710// Computes the address to get to an object. 711bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { 712 // Some boilerplate from the X86 FastISel. 713 const User *U = NULL; 714 unsigned Opcode = Instruction::UserOp1; 715 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 716 // Don't walk into other basic blocks unless the object is an alloca from 717 // another block, otherwise it may not have a virtual register assigned. 718 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 719 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 720 Opcode = I->getOpcode(); 721 U = I; 722 } 723 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 724 Opcode = C->getOpcode(); 725 U = C; 726 } 727 728 if (PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 729 if (Ty->getAddressSpace() > 255) 730 // Fast instruction selection doesn't support the special 731 // address spaces. 732 return false; 733 734 switch (Opcode) { 735 default: 736 break; 737 case Instruction::BitCast: { 738 // Look through bitcasts. 739 return ARMComputeAddress(U->getOperand(0), Addr); 740 } 741 case Instruction::IntToPtr: { 742 // Look past no-op inttoptrs. 743 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 744 return ARMComputeAddress(U->getOperand(0), Addr); 745 break; 746 } 747 case Instruction::PtrToInt: { 748 // Look past no-op ptrtoints. 749 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 750 return ARMComputeAddress(U->getOperand(0), Addr); 751 break; 752 } 753 case Instruction::GetElementPtr: { 754 Address SavedAddr = Addr; 755 int TmpOffset = Addr.Offset; 756 757 // Iterate through the GEP folding the constants into offsets where 758 // we can. 759 gep_type_iterator GTI = gep_type_begin(U); 760 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 761 i != e; ++i, ++GTI) { 762 const Value *Op = *i; 763 if (StructType *STy = dyn_cast<StructType>(*GTI)) { 764 const StructLayout *SL = TD.getStructLayout(STy); 765 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 766 TmpOffset += SL->getElementOffset(Idx); 767 } else { 768 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 769 for (;;) { 770 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 771 // Constant-offset addressing. 772 TmpOffset += CI->getSExtValue() * S; 773 break; 774 } 775 if (isa<AddOperator>(Op) && 776 (!isa<Instruction>(Op) || 777 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] 778 == FuncInfo.MBB) && 779 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { 780 // An add (in the same block) with a constant operand. Fold the 781 // constant. 782 ConstantInt *CI = 783 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 784 TmpOffset += CI->getSExtValue() * S; 785 // Iterate on the other operand. 786 Op = cast<AddOperator>(Op)->getOperand(0); 787 continue; 788 } 789 // Unsupported 790 goto unsupported_gep; 791 } 792 } 793 } 794 795 // Try to grab the base operand now. 796 Addr.Offset = TmpOffset; 797 if (ARMComputeAddress(U->getOperand(0), Addr)) return true; 798 799 // We failed, restore everything and try the other options. 800 Addr = SavedAddr; 801 802 unsupported_gep: 803 break; 804 } 805 case Instruction::Alloca: { 806 const AllocaInst *AI = cast<AllocaInst>(Obj); 807 DenseMap<const AllocaInst*, int>::iterator SI = 808 FuncInfo.StaticAllocaMap.find(AI); 809 if (SI != FuncInfo.StaticAllocaMap.end()) { 810 Addr.BaseType = Address::FrameIndexBase; 811 Addr.Base.FI = SI->second; 812 return true; 813 } 814 break; 815 } 816 } 817 818 // Materialize the global variable's address into a reg which can 819 // then be used later to load the variable. 820 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { 821 unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); 822 if (Tmp == 0) return false; 823 824 Addr.Base.Reg = Tmp; 825 return true; 826 } 827 828 // Try to get this in a register if nothing else has worked. 829 if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); 830 return Addr.Base.Reg != 0; 831} 832 833void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { 834 835 assert(VT.isSimple() && "Non-simple types are invalid here!"); 836 837 bool needsLowering = false; 838 switch (VT.getSimpleVT().SimpleTy) { 839 default: 840 assert(false && "Unhandled load/store type!"); 841 case MVT::i1: 842 case MVT::i8: 843 case MVT::i16: 844 case MVT::i32: 845 // Integer loads/stores handle 12-bit offsets. 846 needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); 847 break; 848 case MVT::f32: 849 case MVT::f64: 850 // Floating point operands handle 8-bit offsets. 851 needsLowering = ((Addr.Offset & 0xff) != Addr.Offset); 852 break; 853 } 854 855 // If this is a stack pointer and the offset needs to be simplified then 856 // put the alloca address into a register, set the base type back to 857 // register and continue. This should almost never happen. 858 if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { 859 TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : 860 ARM::GPRRegisterClass; 861 unsigned ResultReg = createResultReg(RC); 862 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 863 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 864 TII.get(Opc), ResultReg) 865 .addFrameIndex(Addr.Base.FI) 866 .addImm(0)); 867 Addr.Base.Reg = ResultReg; 868 Addr.BaseType = Address::RegBase; 869 } 870 871 // Since the offset is too large for the load/store instruction 872 // get the reg+offset into a register. 873 if (needsLowering) { 874 Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, 875 /*Op0IsKill*/false, Addr.Offset, MVT::i32); 876 Addr.Offset = 0; 877 } 878} 879 880void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, 881 const MachineInstrBuilder &MIB, 882 unsigned Flags) { 883 // addrmode5 output depends on the selection dag addressing dividing the 884 // offset by 4 that it then later multiplies. Do this here as well. 885 if (VT.getSimpleVT().SimpleTy == MVT::f32 || 886 VT.getSimpleVT().SimpleTy == MVT::f64) 887 Addr.Offset /= 4; 888 889 // Frame base works a bit differently. Handle it separately. 890 if (Addr.BaseType == Address::FrameIndexBase) { 891 int FI = Addr.Base.FI; 892 int Offset = Addr.Offset; 893 MachineMemOperand *MMO = 894 FuncInfo.MF->getMachineMemOperand( 895 MachinePointerInfo::getFixedStack(FI, Offset), 896 Flags, 897 MFI.getObjectSize(FI), 898 MFI.getObjectAlignment(FI)); 899 // Now add the rest of the operands. 900 MIB.addFrameIndex(FI); 901 902 // ARM halfword load/stores need an additional operand. 903 if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); 904 905 MIB.addImm(Addr.Offset); 906 MIB.addMemOperand(MMO); 907 } else { 908 // Now add the rest of the operands. 909 MIB.addReg(Addr.Base.Reg); 910 911 // ARM halfword load/stores need an additional operand. 912 if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); 913 914 MIB.addImm(Addr.Offset); 915 } 916 AddOptionalDefs(MIB); 917} 918 919bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { 920 921 assert(VT.isSimple() && "Non-simple types are invalid here!"); 922 unsigned Opc; 923 TargetRegisterClass *RC; 924 switch (VT.getSimpleVT().SimpleTy) { 925 // This is mostly going to be Neon/vector support. 926 default: return false; 927 case MVT::i16: 928 Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH; 929 RC = ARM::GPRRegisterClass; 930 break; 931 case MVT::i8: 932 Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12; 933 RC = ARM::GPRRegisterClass; 934 break; 935 case MVT::i32: 936 Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12; 937 RC = ARM::GPRRegisterClass; 938 break; 939 case MVT::f32: 940 Opc = ARM::VLDRS; 941 RC = TLI.getRegClassFor(VT); 942 break; 943 case MVT::f64: 944 Opc = ARM::VLDRD; 945 RC = TLI.getRegClassFor(VT); 946 break; 947 } 948 // Simplify this down to something we can handle. 949 ARMSimplifyAddress(Addr, VT); 950 951 // Create the base instruction, then add the operands. 952 ResultReg = createResultReg(RC); 953 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 954 TII.get(Opc), ResultReg); 955 AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad); 956 return true; 957} 958 959bool ARMFastISel::SelectLoad(const Instruction *I) { 960 // Atomic loads need special handling. 961 if (cast<LoadInst>(I)->isAtomic()) 962 return false; 963 964 // Verify we have a legal type before going any further. 965 MVT VT; 966 if (!isLoadTypeLegal(I->getType(), VT)) 967 return false; 968 969 // See if we can handle this address. 970 Address Addr; 971 if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; 972 973 unsigned ResultReg; 974 if (!ARMEmitLoad(VT, ResultReg, Addr)) return false; 975 UpdateValueMap(I, ResultReg); 976 return true; 977} 978 979bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { 980 unsigned StrOpc; 981 switch (VT.getSimpleVT().SimpleTy) { 982 // This is mostly going to be Neon/vector support. 983 default: return false; 984 case MVT::i1: { 985 unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass : 986 ARM::GPRRegisterClass); 987 unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; 988 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 989 TII.get(Opc), Res) 990 .addReg(SrcReg).addImm(1)); 991 SrcReg = Res; 992 } // Fallthrough here. 993 case MVT::i8: 994 StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12; 995 break; 996 case MVT::i16: 997 StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH; 998 break; 999 case MVT::i32: 1000 StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12; 1001 break; 1002 case MVT::f32: 1003 if (!Subtarget->hasVFP2()) return false; 1004 StrOpc = ARM::VSTRS; 1005 break; 1006 case MVT::f64: 1007 if (!Subtarget->hasVFP2()) return false; 1008 StrOpc = ARM::VSTRD; 1009 break; 1010 } 1011 // Simplify this down to something we can handle. 1012 ARMSimplifyAddress(Addr, VT); 1013 1014 // Create the base instruction, then add the operands. 1015 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1016 TII.get(StrOpc)) 1017 .addReg(SrcReg, getKillRegState(true)); 1018 AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore); 1019 return true; 1020} 1021 1022bool ARMFastISel::SelectStore(const Instruction *I) { 1023 Value *Op0 = I->getOperand(0); 1024 unsigned SrcReg = 0; 1025 1026 // Atomic stores need special handling. 1027 if (cast<StoreInst>(I)->isAtomic()) 1028 return false; 1029 1030 // Verify we have a legal type before going any further. 1031 MVT VT; 1032 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) 1033 return false; 1034 1035 // Get the value to be stored into a register. 1036 SrcReg = getRegForValue(Op0); 1037 if (SrcReg == 0) return false; 1038 1039 // See if we can handle this address. 1040 Address Addr; 1041 if (!ARMComputeAddress(I->getOperand(1), Addr)) 1042 return false; 1043 1044 if (!ARMEmitStore(VT, SrcReg, Addr)) return false; 1045 return true; 1046} 1047 1048static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { 1049 switch (Pred) { 1050 // Needs two compares... 1051 case CmpInst::FCMP_ONE: 1052 case CmpInst::FCMP_UEQ: 1053 default: 1054 // AL is our "false" for now. The other two need more compares. 1055 return ARMCC::AL; 1056 case CmpInst::ICMP_EQ: 1057 case CmpInst::FCMP_OEQ: 1058 return ARMCC::EQ; 1059 case CmpInst::ICMP_SGT: 1060 case CmpInst::FCMP_OGT: 1061 return ARMCC::GT; 1062 case CmpInst::ICMP_SGE: 1063 case CmpInst::FCMP_OGE: 1064 return ARMCC::GE; 1065 case CmpInst::ICMP_UGT: 1066 case CmpInst::FCMP_UGT: 1067 return ARMCC::HI; 1068 case CmpInst::FCMP_OLT: 1069 return ARMCC::MI; 1070 case CmpInst::ICMP_ULE: 1071 case CmpInst::FCMP_OLE: 1072 return ARMCC::LS; 1073 case CmpInst::FCMP_ORD: 1074 return ARMCC::VC; 1075 case CmpInst::FCMP_UNO: 1076 return ARMCC::VS; 1077 case CmpInst::FCMP_UGE: 1078 return ARMCC::PL; 1079 case CmpInst::ICMP_SLT: 1080 case CmpInst::FCMP_ULT: 1081 return ARMCC::LT; 1082 case CmpInst::ICMP_SLE: 1083 case CmpInst::FCMP_ULE: 1084 return ARMCC::LE; 1085 case CmpInst::FCMP_UNE: 1086 case CmpInst::ICMP_NE: 1087 return ARMCC::NE; 1088 case CmpInst::ICMP_UGE: 1089 return ARMCC::HS; 1090 case CmpInst::ICMP_ULT: 1091 return ARMCC::LO; 1092 } 1093} 1094 1095bool ARMFastISel::SelectBranch(const Instruction *I) { 1096 const BranchInst *BI = cast<BranchInst>(I); 1097 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 1098 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 1099 1100 // Simple branch support. 1101 1102 // If we can, avoid recomputing the compare - redoing it could lead to wonky 1103 // behavior. 1104 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 1105 if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { 1106 1107 // Get the compare predicate. 1108 // Try to take advantage of fallthrough opportunities. 1109 CmpInst::Predicate Predicate = CI->getPredicate(); 1110 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1111 std::swap(TBB, FBB); 1112 Predicate = CmpInst::getInversePredicate(Predicate); 1113 } 1114 1115 ARMCC::CondCodes ARMPred = getComparePred(Predicate); 1116 1117 // We may not handle every CC for now. 1118 if (ARMPred == ARMCC::AL) return false; 1119 1120 // Emit the compare. 1121 if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1))) 1122 return false; 1123 1124 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1125 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1126 .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); 1127 FastEmitBranch(FBB, DL); 1128 FuncInfo.MBB->addSuccessor(TBB); 1129 return true; 1130 } 1131 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 1132 MVT SourceVT; 1133 if (TI->hasOneUse() && TI->getParent() == I->getParent() && 1134 (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { 1135 unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1136 unsigned OpReg = getRegForValue(TI->getOperand(0)); 1137 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1138 TII.get(TstOpc)) 1139 .addReg(OpReg).addImm(1)); 1140 1141 unsigned CCMode = ARMCC::NE; 1142 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1143 std::swap(TBB, FBB); 1144 CCMode = ARMCC::EQ; 1145 } 1146 1147 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1148 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1149 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); 1150 1151 FastEmitBranch(FBB, DL); 1152 FuncInfo.MBB->addSuccessor(TBB); 1153 return true; 1154 } 1155 } else if (const ConstantInt *CI = 1156 dyn_cast<ConstantInt>(BI->getCondition())) { 1157 uint64_t Imm = CI->getZExtValue(); 1158 MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; 1159 FastEmitBranch(Target, DL); 1160 return true; 1161 } 1162 1163 unsigned CmpReg = getRegForValue(BI->getCondition()); 1164 if (CmpReg == 0) return false; 1165 1166 // We've been divorced from our compare! Our block was split, and 1167 // now our compare lives in a predecessor block. We musn't 1168 // re-compare here, as the children of the compare aren't guaranteed 1169 // live across the block boundary (we *could* check for this). 1170 // Regardless, the compare has been done in the predecessor block, 1171 // and it left a value for us in a virtual register. Ergo, we test 1172 // the one-bit value left in the virtual register. 1173 unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1174 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) 1175 .addReg(CmpReg).addImm(1)); 1176 1177 unsigned CCMode = ARMCC::NE; 1178 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1179 std::swap(TBB, FBB); 1180 CCMode = ARMCC::EQ; 1181 } 1182 1183 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1184 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1185 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); 1186 FastEmitBranch(FBB, DL); 1187 FuncInfo.MBB->addSuccessor(TBB); 1188 return true; 1189} 1190 1191bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value) { 1192 MVT VT; 1193 Type *Ty = Src1Value->getType(); 1194 if (!isTypeLegal(Ty, VT)) 1195 return false; 1196 1197 bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); 1198 if (isFloat && !Subtarget->hasVFP2()) 1199 return false; 1200 1201 unsigned CmpOpc; 1202 switch (VT.SimpleTy) { 1203 // TODO: Add support for non-legal types (i.e., i1, i8, i16). 1204 default: return false; 1205 // TODO: Verify compares. 1206 case MVT::f32: 1207 CmpOpc = ARM::VCMPES; 1208 break; 1209 case MVT::f64: 1210 CmpOpc = ARM::VCMPED; 1211 break; 1212 case MVT::i32: 1213 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 1214 break; 1215 } 1216 1217 unsigned Src1 = getRegForValue(Src1Value); 1218 if (Src1 == 0) return false; 1219 1220 unsigned Src2 = getRegForValue(Src2Value); 1221 if (Src2 == 0) return false; 1222 1223 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1224 .addReg(Src1).addReg(Src2)); 1225 1226 // For floating point we need to move the result to a comparison register 1227 // that we can then use for branches. 1228 if (Ty->isFloatTy() || Ty->isDoubleTy()) 1229 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1230 TII.get(ARM::FMSTAT))); 1231 return true; 1232} 1233 1234bool ARMFastISel::SelectCmp(const Instruction *I) { 1235 const CmpInst *CI = cast<CmpInst>(I); 1236 Type *Ty = CI->getOperand(0)->getType(); 1237 1238 // Get the compare predicate. 1239 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 1240 1241 // We may not handle every CC for now. 1242 if (ARMPred == ARMCC::AL) return false; 1243 1244 // Emit the compare. 1245 if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1))) 1246 return false; 1247 1248 // Now set a register based on the comparison. Explicitly set the predicates 1249 // here. 1250 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; 1251 TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass 1252 : ARM::GPRRegisterClass; 1253 unsigned DestReg = createResultReg(RC); 1254 Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); 1255 unsigned ZeroReg = TargetMaterializeConstant(Zero); 1256 bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); 1257 unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR; 1258 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) 1259 .addReg(ZeroReg).addImm(1) 1260 .addImm(ARMPred).addReg(CondReg); 1261 1262 UpdateValueMap(I, DestReg); 1263 return true; 1264} 1265 1266bool ARMFastISel::SelectFPExt(const Instruction *I) { 1267 // Make sure we have VFP and that we're extending float to double. 1268 if (!Subtarget->hasVFP2()) return false; 1269 1270 Value *V = I->getOperand(0); 1271 if (!I->getType()->isDoubleTy() || 1272 !V->getType()->isFloatTy()) return false; 1273 1274 unsigned Op = getRegForValue(V); 1275 if (Op == 0) return false; 1276 1277 unsigned Result = createResultReg(ARM::DPRRegisterClass); 1278 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1279 TII.get(ARM::VCVTDS), Result) 1280 .addReg(Op)); 1281 UpdateValueMap(I, Result); 1282 return true; 1283} 1284 1285bool ARMFastISel::SelectFPTrunc(const Instruction *I) { 1286 // Make sure we have VFP and that we're truncating double to float. 1287 if (!Subtarget->hasVFP2()) return false; 1288 1289 Value *V = I->getOperand(0); 1290 if (!(I->getType()->isFloatTy() && 1291 V->getType()->isDoubleTy())) return false; 1292 1293 unsigned Op = getRegForValue(V); 1294 if (Op == 0) return false; 1295 1296 unsigned Result = createResultReg(ARM::SPRRegisterClass); 1297 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1298 TII.get(ARM::VCVTSD), Result) 1299 .addReg(Op)); 1300 UpdateValueMap(I, Result); 1301 return true; 1302} 1303 1304bool ARMFastISel::SelectSIToFP(const Instruction *I) { 1305 // Make sure we have VFP. 1306 if (!Subtarget->hasVFP2()) return false; 1307 1308 MVT DstVT; 1309 Type *Ty = I->getType(); 1310 if (!isTypeLegal(Ty, DstVT)) 1311 return false; 1312 1313 // FIXME: Handle sign-extension where necessary. 1314 if (!I->getOperand(0)->getType()->isIntegerTy(32)) 1315 return false; 1316 1317 unsigned Op = getRegForValue(I->getOperand(0)); 1318 if (Op == 0) return false; 1319 1320 // The conversion routine works on fp-reg to fp-reg and the operand above 1321 // was an integer, move it to the fp registers if possible. 1322 unsigned FP = ARMMoveToFPReg(MVT::f32, Op); 1323 if (FP == 0) return false; 1324 1325 unsigned Opc; 1326 if (Ty->isFloatTy()) Opc = ARM::VSITOS; 1327 else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; 1328 else return false; 1329 1330 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); 1331 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1332 ResultReg) 1333 .addReg(FP)); 1334 UpdateValueMap(I, ResultReg); 1335 return true; 1336} 1337 1338bool ARMFastISel::SelectFPToSI(const Instruction *I) { 1339 // Make sure we have VFP. 1340 if (!Subtarget->hasVFP2()) return false; 1341 1342 MVT DstVT; 1343 Type *RetTy = I->getType(); 1344 if (!isTypeLegal(RetTy, DstVT)) 1345 return false; 1346 1347 unsigned Op = getRegForValue(I->getOperand(0)); 1348 if (Op == 0) return false; 1349 1350 unsigned Opc; 1351 Type *OpTy = I->getOperand(0)->getType(); 1352 if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; 1353 else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; 1354 else return false; 1355 1356 // f64->s32 or f32->s32 both need an intermediate f32 reg. 1357 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1358 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1359 ResultReg) 1360 .addReg(Op)); 1361 1362 // This result needs to be in an integer register, but the conversion only 1363 // takes place in fp-regs. 1364 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); 1365 if (IntReg == 0) return false; 1366 1367 UpdateValueMap(I, IntReg); 1368 return true; 1369} 1370 1371bool ARMFastISel::SelectSelect(const Instruction *I) { 1372 MVT VT; 1373 if (!isTypeLegal(I->getType(), VT)) 1374 return false; 1375 1376 // Things need to be register sized for register moves. 1377 if (VT != MVT::i32) return false; 1378 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 1379 1380 unsigned CondReg = getRegForValue(I->getOperand(0)); 1381 if (CondReg == 0) return false; 1382 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1383 if (Op1Reg == 0) return false; 1384 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1385 if (Op2Reg == 0) return false; 1386 1387 unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1388 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1389 .addReg(CondReg).addImm(1)); 1390 unsigned ResultReg = createResultReg(RC); 1391 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr; 1392 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1393 .addReg(Op1Reg).addReg(Op2Reg) 1394 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 1395 UpdateValueMap(I, ResultReg); 1396 return true; 1397} 1398 1399bool ARMFastISel::SelectSDiv(const Instruction *I) { 1400 MVT VT; 1401 Type *Ty = I->getType(); 1402 if (!isTypeLegal(Ty, VT)) 1403 return false; 1404 1405 // If we have integer div support we should have selected this automagically. 1406 // In case we have a real miss go ahead and return false and we'll pick 1407 // it up later. 1408 if (Subtarget->hasDivide()) return false; 1409 1410 // Otherwise emit a libcall. 1411 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1412 if (VT == MVT::i8) 1413 LC = RTLIB::SDIV_I8; 1414 else if (VT == MVT::i16) 1415 LC = RTLIB::SDIV_I16; 1416 else if (VT == MVT::i32) 1417 LC = RTLIB::SDIV_I32; 1418 else if (VT == MVT::i64) 1419 LC = RTLIB::SDIV_I64; 1420 else if (VT == MVT::i128) 1421 LC = RTLIB::SDIV_I128; 1422 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); 1423 1424 return ARMEmitLibcall(I, LC); 1425} 1426 1427bool ARMFastISel::SelectSRem(const Instruction *I) { 1428 MVT VT; 1429 Type *Ty = I->getType(); 1430 if (!isTypeLegal(Ty, VT)) 1431 return false; 1432 1433 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1434 if (VT == MVT::i8) 1435 LC = RTLIB::SREM_I8; 1436 else if (VT == MVT::i16) 1437 LC = RTLIB::SREM_I16; 1438 else if (VT == MVT::i32) 1439 LC = RTLIB::SREM_I32; 1440 else if (VT == MVT::i64) 1441 LC = RTLIB::SREM_I64; 1442 else if (VT == MVT::i128) 1443 LC = RTLIB::SREM_I128; 1444 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); 1445 1446 return ARMEmitLibcall(I, LC); 1447} 1448 1449bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { 1450 EVT VT = TLI.getValueType(I->getType(), true); 1451 1452 // We can get here in the case when we want to use NEON for our fp 1453 // operations, but can't figure out how to. Just use the vfp instructions 1454 // if we have them. 1455 // FIXME: It'd be nice to use NEON instructions. 1456 Type *Ty = I->getType(); 1457 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1458 if (isFloat && !Subtarget->hasVFP2()) 1459 return false; 1460 1461 unsigned Op1 = getRegForValue(I->getOperand(0)); 1462 if (Op1 == 0) return false; 1463 1464 unsigned Op2 = getRegForValue(I->getOperand(1)); 1465 if (Op2 == 0) return false; 1466 1467 unsigned Opc; 1468 bool is64bit = VT == MVT::f64 || VT == MVT::i64; 1469 switch (ISDOpcode) { 1470 default: return false; 1471 case ISD::FADD: 1472 Opc = is64bit ? ARM::VADDD : ARM::VADDS; 1473 break; 1474 case ISD::FSUB: 1475 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS; 1476 break; 1477 case ISD::FMUL: 1478 Opc = is64bit ? ARM::VMULD : ARM::VMULS; 1479 break; 1480 } 1481 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1482 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1483 TII.get(Opc), ResultReg) 1484 .addReg(Op1).addReg(Op2)); 1485 UpdateValueMap(I, ResultReg); 1486 return true; 1487} 1488 1489// Call Handling Code 1490 1491bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, 1492 EVT SrcVT, unsigned &ResultReg) { 1493 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 1494 Src, /*TODO: Kill=*/false); 1495 1496 if (RR != 0) { 1497 ResultReg = RR; 1498 return true; 1499 } else 1500 return false; 1501} 1502 1503// This is largely taken directly from CCAssignFnForNode - we don't support 1504// varargs in FastISel so that part has been removed. 1505// TODO: We may not support all of this. 1506CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { 1507 switch (CC) { 1508 default: 1509 llvm_unreachable("Unsupported calling convention"); 1510 case CallingConv::Fast: 1511 // Ignore fastcc. Silence compiler warnings. 1512 (void)RetFastCC_ARM_APCS; 1513 (void)FastCC_ARM_APCS; 1514 // Fallthrough 1515 case CallingConv::C: 1516 // Use target triple & subtarget features to do actual dispatch. 1517 if (Subtarget->isAAPCS_ABI()) { 1518 if (Subtarget->hasVFP2() && 1519 FloatABIType == FloatABI::Hard) 1520 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1521 else 1522 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1523 } else 1524 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1525 case CallingConv::ARM_AAPCS_VFP: 1526 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1527 case CallingConv::ARM_AAPCS: 1528 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1529 case CallingConv::ARM_APCS: 1530 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1531 } 1532} 1533 1534bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, 1535 SmallVectorImpl<unsigned> &ArgRegs, 1536 SmallVectorImpl<MVT> &ArgVTs, 1537 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 1538 SmallVectorImpl<unsigned> &RegArgs, 1539 CallingConv::ID CC, 1540 unsigned &NumBytes) { 1541 SmallVector<CCValAssign, 16> ArgLocs; 1542 CCState CCInfo(CC, false, *FuncInfo.MF, TM, ArgLocs, *Context); 1543 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); 1544 1545 // Get a count of how many bytes are to be pushed on the stack. 1546 NumBytes = CCInfo.getNextStackOffset(); 1547 1548 // Issue CALLSEQ_START 1549 unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); 1550 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1551 TII.get(AdjStackDown)) 1552 .addImm(NumBytes)); 1553 1554 // Process the args. 1555 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1556 CCValAssign &VA = ArgLocs[i]; 1557 unsigned Arg = ArgRegs[VA.getValNo()]; 1558 MVT ArgVT = ArgVTs[VA.getValNo()]; 1559 1560 // We don't handle NEON/vector parameters yet. 1561 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) 1562 return false; 1563 1564 // Handle arg promotion, etc. 1565 switch (VA.getLocInfo()) { 1566 case CCValAssign::Full: break; 1567 case CCValAssign::SExt: { 1568 bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1569 Arg, ArgVT, Arg); 1570 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 1571 Emitted = true; 1572 ArgVT = VA.getLocVT(); 1573 break; 1574 } 1575 case CCValAssign::ZExt: { 1576 bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1577 Arg, ArgVT, Arg); 1578 assert(Emitted && "Failed to emit a zext!"); (void)Emitted; 1579 Emitted = true; 1580 ArgVT = VA.getLocVT(); 1581 break; 1582 } 1583 case CCValAssign::AExt: { 1584 bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1585 Arg, ArgVT, Arg); 1586 if (!Emitted) 1587 Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1588 Arg, ArgVT, Arg); 1589 if (!Emitted) 1590 Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1591 Arg, ArgVT, Arg); 1592 1593 assert(Emitted && "Failed to emit a aext!"); (void)Emitted; 1594 ArgVT = VA.getLocVT(); 1595 break; 1596 } 1597 case CCValAssign::BCvt: { 1598 unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, 1599 /*TODO: Kill=*/false); 1600 assert(BC != 0 && "Failed to emit a bitcast!"); 1601 Arg = BC; 1602 ArgVT = VA.getLocVT(); 1603 break; 1604 } 1605 default: llvm_unreachable("Unknown arg promotion!"); 1606 } 1607 1608 // Now copy/store arg to correct locations. 1609 if (VA.isRegLoc() && !VA.needsCustom()) { 1610 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1611 VA.getLocReg()) 1612 .addReg(Arg); 1613 RegArgs.push_back(VA.getLocReg()); 1614 } else if (VA.needsCustom()) { 1615 // TODO: We need custom lowering for vector (v2f64) args. 1616 if (VA.getLocVT() != MVT::f64) return false; 1617 1618 CCValAssign &NextVA = ArgLocs[++i]; 1619 1620 // TODO: Only handle register args for now. 1621 if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; 1622 1623 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1624 TII.get(ARM::VMOVRRD), VA.getLocReg()) 1625 .addReg(NextVA.getLocReg(), RegState::Define) 1626 .addReg(Arg)); 1627 RegArgs.push_back(VA.getLocReg()); 1628 RegArgs.push_back(NextVA.getLocReg()); 1629 } else { 1630 assert(VA.isMemLoc()); 1631 // Need to store on the stack. 1632 Address Addr; 1633 Addr.BaseType = Address::RegBase; 1634 Addr.Base.Reg = ARM::SP; 1635 Addr.Offset = VA.getLocMemOffset(); 1636 1637 if (!ARMEmitStore(ArgVT, Arg, Addr)) return false; 1638 } 1639 } 1640 return true; 1641} 1642 1643bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 1644 const Instruction *I, CallingConv::ID CC, 1645 unsigned &NumBytes) { 1646 // Issue CALLSEQ_END 1647 unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); 1648 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1649 TII.get(AdjStackUp)) 1650 .addImm(NumBytes).addImm(0)); 1651 1652 // Now the return value. 1653 if (RetVT != MVT::isVoid) { 1654 SmallVector<CCValAssign, 16> RVLocs; 1655 CCState CCInfo(CC, false, *FuncInfo.MF, TM, RVLocs, *Context); 1656 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); 1657 1658 // Copy all of the result registers out of their specified physreg. 1659 if (RVLocs.size() == 2 && RetVT == MVT::f64) { 1660 // For this move we copy into two registers and then move into the 1661 // double fp reg we want. 1662 EVT DestVT = RVLocs[0].getValVT(); 1663 TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); 1664 unsigned ResultReg = createResultReg(DstRC); 1665 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1666 TII.get(ARM::VMOVDRR), ResultReg) 1667 .addReg(RVLocs[0].getLocReg()) 1668 .addReg(RVLocs[1].getLocReg())); 1669 1670 UsedRegs.push_back(RVLocs[0].getLocReg()); 1671 UsedRegs.push_back(RVLocs[1].getLocReg()); 1672 1673 // Finally update the result. 1674 UpdateValueMap(I, ResultReg); 1675 } else { 1676 assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); 1677 EVT CopyVT = RVLocs[0].getValVT(); 1678 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1679 1680 unsigned ResultReg = createResultReg(DstRC); 1681 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1682 ResultReg).addReg(RVLocs[0].getLocReg()); 1683 UsedRegs.push_back(RVLocs[0].getLocReg()); 1684 1685 // Finally update the result. 1686 UpdateValueMap(I, ResultReg); 1687 } 1688 } 1689 1690 return true; 1691} 1692 1693bool ARMFastISel::SelectRet(const Instruction *I) { 1694 const ReturnInst *Ret = cast<ReturnInst>(I); 1695 const Function &F = *I->getParent()->getParent(); 1696 1697 if (!FuncInfo.CanLowerReturn) 1698 return false; 1699 1700 if (F.isVarArg()) 1701 return false; 1702 1703 CallingConv::ID CC = F.getCallingConv(); 1704 if (Ret->getNumOperands() > 0) { 1705 SmallVector<ISD::OutputArg, 4> Outs; 1706 GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 1707 Outs, TLI); 1708 1709 // Analyze operands of the call, assigning locations to each operand. 1710 SmallVector<CCValAssign, 16> ValLocs; 1711 CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, TM, ValLocs,I->getContext()); 1712 CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); 1713 1714 const Value *RV = Ret->getOperand(0); 1715 unsigned Reg = getRegForValue(RV); 1716 if (Reg == 0) 1717 return false; 1718 1719 // Only handle a single return value for now. 1720 if (ValLocs.size() != 1) 1721 return false; 1722 1723 CCValAssign &VA = ValLocs[0]; 1724 1725 // Don't bother handling odd stuff for now. 1726 // FIXME: Should be able to handle i1, i8, and/or i16 return types. 1727 if (VA.getLocInfo() != CCValAssign::Full) 1728 return false; 1729 // Only handle register returns for now. 1730 if (!VA.isRegLoc()) 1731 return false; 1732 // TODO: For now, don't try to handle cases where getLocInfo() 1733 // says Full but the types don't match. 1734 if (TLI.getValueType(RV->getType()) != VA.getValVT()) 1735 return false; 1736 1737 // Make the copy. 1738 unsigned SrcReg = Reg + VA.getValNo(); 1739 unsigned DstReg = VA.getLocReg(); 1740 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 1741 // Avoid a cross-class copy. This is very unlikely. 1742 if (!SrcRC->contains(DstReg)) 1743 return false; 1744 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1745 DstReg).addReg(SrcReg); 1746 1747 // Mark the register as live out of the function. 1748 MRI.addLiveOut(VA.getLocReg()); 1749 } 1750 1751 unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET; 1752 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1753 TII.get(RetOpc))); 1754 return true; 1755} 1756 1757unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { 1758 1759 // Darwin needs the r9 versions of the opcodes. 1760 bool isDarwin = Subtarget->isTargetDarwin(); 1761 if (isThumb) { 1762 return isDarwin ? ARM::tBLr9 : ARM::tBL; 1763 } else { 1764 return isDarwin ? ARM::BLr9 : ARM::BL; 1765 } 1766} 1767 1768// A quick function that will emit a call for a named libcall in F with the 1769// vector of passed arguments for the Instruction in I. We can assume that we 1770// can emit a call for any libcall we can produce. This is an abridged version 1771// of the full call infrastructure since we won't need to worry about things 1772// like computed function pointers or strange arguments at call sites. 1773// TODO: Try to unify this and the normal call bits for ARM, then try to unify 1774// with X86. 1775bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { 1776 CallingConv::ID CC = TLI.getLibcallCallingConv(Call); 1777 1778 // Handle *simple* calls for now. 1779 Type *RetTy = I->getType(); 1780 MVT RetVT; 1781 if (RetTy->isVoidTy()) 1782 RetVT = MVT::isVoid; 1783 else if (!isTypeLegal(RetTy, RetVT)) 1784 return false; 1785 1786 // TODO: For now if we have long calls specified we don't handle the call. 1787 if (EnableARMLongCalls) return false; 1788 1789 // Set up the argument vectors. 1790 SmallVector<Value*, 8> Args; 1791 SmallVector<unsigned, 8> ArgRegs; 1792 SmallVector<MVT, 8> ArgVTs; 1793 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1794 Args.reserve(I->getNumOperands()); 1795 ArgRegs.reserve(I->getNumOperands()); 1796 ArgVTs.reserve(I->getNumOperands()); 1797 ArgFlags.reserve(I->getNumOperands()); 1798 for (unsigned i = 0; i < I->getNumOperands(); ++i) { 1799 Value *Op = I->getOperand(i); 1800 unsigned Arg = getRegForValue(Op); 1801 if (Arg == 0) return false; 1802 1803 Type *ArgTy = Op->getType(); 1804 MVT ArgVT; 1805 if (!isTypeLegal(ArgTy, ArgVT)) return false; 1806 1807 ISD::ArgFlagsTy Flags; 1808 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1809 Flags.setOrigAlign(OriginalAlignment); 1810 1811 Args.push_back(Op); 1812 ArgRegs.push_back(Arg); 1813 ArgVTs.push_back(ArgVT); 1814 ArgFlags.push_back(Flags); 1815 } 1816 1817 // Handle the arguments now that we've gotten them. 1818 SmallVector<unsigned, 4> RegArgs; 1819 unsigned NumBytes; 1820 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1821 return false; 1822 1823 // Issue the call, BLr9 for darwin, BL otherwise. 1824 // TODO: Turn this into the table of arm call ops. 1825 MachineInstrBuilder MIB; 1826 unsigned CallOpc = ARMSelectCallOp(NULL); 1827 if(isThumb) 1828 // Explicitly adding the predicate here. 1829 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1830 TII.get(CallOpc))) 1831 .addExternalSymbol(TLI.getLibcallName(Call)); 1832 else 1833 // Explicitly adding the predicate here. 1834 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1835 TII.get(CallOpc)) 1836 .addExternalSymbol(TLI.getLibcallName(Call))); 1837 1838 // Add implicit physical register uses to the call. 1839 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1840 MIB.addReg(RegArgs[i]); 1841 1842 // Finish off the call including any return values. 1843 SmallVector<unsigned, 4> UsedRegs; 1844 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1845 1846 // Set all unused physreg defs as dead. 1847 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1848 1849 return true; 1850} 1851 1852bool ARMFastISel::SelectCall(const Instruction *I) { 1853 const CallInst *CI = cast<CallInst>(I); 1854 const Value *Callee = CI->getCalledValue(); 1855 1856 // Can't handle inline asm or worry about intrinsics yet. 1857 if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false; 1858 1859 // Only handle global variable Callees. 1860 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 1861 if (!GV) 1862 return false; 1863 1864 // Check the calling convention. 1865 ImmutableCallSite CS(CI); 1866 CallingConv::ID CC = CS.getCallingConv(); 1867 1868 // TODO: Avoid some calling conventions? 1869 1870 // Let SDISel handle vararg functions. 1871 PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1872 FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1873 if (FTy->isVarArg()) 1874 return false; 1875 1876 // Handle *simple* calls for now. 1877 Type *RetTy = I->getType(); 1878 MVT RetVT; 1879 if (RetTy->isVoidTy()) 1880 RetVT = MVT::isVoid; 1881 else if (!isTypeLegal(RetTy, RetVT)) 1882 return false; 1883 1884 // TODO: For now if we have long calls specified we don't handle the call. 1885 if (EnableARMLongCalls) return false; 1886 1887 // Set up the argument vectors. 1888 SmallVector<Value*, 8> Args; 1889 SmallVector<unsigned, 8> ArgRegs; 1890 SmallVector<MVT, 8> ArgVTs; 1891 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1892 Args.reserve(CS.arg_size()); 1893 ArgRegs.reserve(CS.arg_size()); 1894 ArgVTs.reserve(CS.arg_size()); 1895 ArgFlags.reserve(CS.arg_size()); 1896 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1897 i != e; ++i) { 1898 unsigned Arg = getRegForValue(*i); 1899 1900 if (Arg == 0) 1901 return false; 1902 ISD::ArgFlagsTy Flags; 1903 unsigned AttrInd = i - CS.arg_begin() + 1; 1904 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1905 Flags.setSExt(); 1906 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1907 Flags.setZExt(); 1908 1909 // FIXME: Only handle *easy* calls for now. 1910 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1911 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1912 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1913 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1914 return false; 1915 1916 Type *ArgTy = (*i)->getType(); 1917 MVT ArgVT; 1918 // FIXME: Should be able to handle i1, i8, and/or i16 parameters. 1919 if (!isTypeLegal(ArgTy, ArgVT)) 1920 return false; 1921 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1922 Flags.setOrigAlign(OriginalAlignment); 1923 1924 Args.push_back(*i); 1925 ArgRegs.push_back(Arg); 1926 ArgVTs.push_back(ArgVT); 1927 ArgFlags.push_back(Flags); 1928 } 1929 1930 // Handle the arguments now that we've gotten them. 1931 SmallVector<unsigned, 4> RegArgs; 1932 unsigned NumBytes; 1933 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1934 return false; 1935 1936 // Issue the call, BLr9 for darwin, BL otherwise. 1937 // TODO: Turn this into the table of arm call ops. 1938 MachineInstrBuilder MIB; 1939 unsigned CallOpc = ARMSelectCallOp(GV); 1940 // Explicitly adding the predicate here. 1941 if(isThumb) 1942 // Explicitly adding the predicate here. 1943 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1944 TII.get(CallOpc))) 1945 .addGlobalAddress(GV, 0, 0); 1946 else 1947 // Explicitly adding the predicate here. 1948 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1949 TII.get(CallOpc)) 1950 .addGlobalAddress(GV, 0, 0)); 1951 1952 // Add implicit physical register uses to the call. 1953 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1954 MIB.addReg(RegArgs[i]); 1955 1956 // Finish off the call including any return values. 1957 SmallVector<unsigned, 4> UsedRegs; 1958 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1959 1960 // Set all unused physreg defs as dead. 1961 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1962 1963 return true; 1964 1965} 1966 1967bool ARMFastISel::SelectTrunc(const Instruction *I) { 1968 // The high bits for a type smaller than the register size are assumed to be 1969 // undefined. 1970 Value *Op = I->getOperand(0); 1971 1972 EVT SrcVT, DestVT; 1973 SrcVT = TLI.getValueType(Op->getType(), true); 1974 DestVT = TLI.getValueType(I->getType(), true); 1975 1976 if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) 1977 return false; 1978 if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) 1979 return false; 1980 1981 unsigned SrcReg = getRegForValue(Op); 1982 if (!SrcReg) return false; 1983 1984 // Because the high bits are undefined, a truncate doesn't generate 1985 // any code. 1986 UpdateValueMap(I, SrcReg); 1987 return true; 1988} 1989 1990bool ARMFastISel::SelectIntExt(const Instruction *I) { 1991 // On ARM, in general, integer casts don't involve legal types; this code 1992 // handles promotable integers. The high bits for a type smaller than 1993 // the register size are assumed to be undefined. 1994 Type *DestTy = I->getType(); 1995 Value *Op = I->getOperand(0); 1996 Type *SrcTy = Op->getType(); 1997 1998 EVT SrcVT, DestVT; 1999 SrcVT = TLI.getValueType(SrcTy, true); 2000 DestVT = TLI.getValueType(DestTy, true); 2001 2002 if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) 2003 return false; 2004 2005 unsigned Opc; 2006 bool isZext = isa<ZExtInst>(I); 2007 bool isBoolZext = false; 2008 if (!SrcVT.isSimple()) 2009 return false; 2010 switch (SrcVT.getSimpleVT().SimpleTy) { 2011 default: return false; 2012 case MVT::i16: 2013 if (!Subtarget->hasV6Ops()) return false; 2014 if (isZext) 2015 Opc = isThumb ? ARM::t2UXTH : ARM::UXTH; 2016 else 2017 Opc = isThumb ? ARM::t2SXTH : ARM::SXTH; 2018 break; 2019 case MVT::i8: 2020 if (!Subtarget->hasV6Ops()) return false; 2021 if (isZext) 2022 Opc = isThumb ? ARM::t2UXTB : ARM::UXTB; 2023 else 2024 Opc = isThumb ? ARM::t2SXTB : ARM::SXTB; 2025 break; 2026 case MVT::i1: 2027 if (isZext) { 2028 Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; 2029 isBoolZext = true; 2030 break; 2031 } 2032 return false; 2033 } 2034 2035 // FIXME: We could save an instruction in many cases by special-casing 2036 // load instructions. 2037 unsigned SrcReg = getRegForValue(Op); 2038 if (!SrcReg) return false; 2039 2040 unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32)); 2041 MachineInstrBuilder MIB; 2042 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) 2043 .addReg(SrcReg); 2044 if (isBoolZext) 2045 MIB.addImm(1); 2046 else 2047 MIB.addImm(0); 2048 AddOptionalDefs(MIB); 2049 UpdateValueMap(I, DestReg); 2050 return true; 2051} 2052 2053// TODO: SoftFP support. 2054bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { 2055 2056 switch (I->getOpcode()) { 2057 case Instruction::Load: 2058 return SelectLoad(I); 2059 case Instruction::Store: 2060 return SelectStore(I); 2061 case Instruction::Br: 2062 return SelectBranch(I); 2063 case Instruction::ICmp: 2064 case Instruction::FCmp: 2065 return SelectCmp(I); 2066 case Instruction::FPExt: 2067 return SelectFPExt(I); 2068 case Instruction::FPTrunc: 2069 return SelectFPTrunc(I); 2070 case Instruction::SIToFP: 2071 return SelectSIToFP(I); 2072 case Instruction::FPToSI: 2073 return SelectFPToSI(I); 2074 case Instruction::FAdd: 2075 return SelectBinaryOp(I, ISD::FADD); 2076 case Instruction::FSub: 2077 return SelectBinaryOp(I, ISD::FSUB); 2078 case Instruction::FMul: 2079 return SelectBinaryOp(I, ISD::FMUL); 2080 case Instruction::SDiv: 2081 return SelectSDiv(I); 2082 case Instruction::SRem: 2083 return SelectSRem(I); 2084 case Instruction::Call: 2085 return SelectCall(I); 2086 case Instruction::Select: 2087 return SelectSelect(I); 2088 case Instruction::Ret: 2089 return SelectRet(I); 2090 case Instruction::Trunc: 2091 return SelectTrunc(I); 2092 case Instruction::ZExt: 2093 case Instruction::SExt: 2094 return SelectIntExt(I); 2095 default: break; 2096 } 2097 return false; 2098} 2099 2100namespace llvm { 2101 llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { 2102 // Completely untested on non-darwin. 2103 const TargetMachine &TM = funcInfo.MF->getTarget(); 2104 2105 // Darwin and thumb1 only for now. 2106 const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); 2107 if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() && 2108 !DisableARMFastISel) 2109 return new ARMFastISel(funcInfo); 2110 return 0; 2111 } 2112} 2113