ARMFastISel.cpp revision 0c720761903394c4dd232b9fe49da7d5fb40172b
1//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the ARM-specific support for the FastISel class. Some 11// of the target-specific code is generated by tablegen in the file 12// ARMGenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMBaseInstrInfo.h" 19#include "ARMCallingConv.h" 20#include "ARMRegisterInfo.h" 21#include "ARMTargetMachine.h" 22#include "ARMSubtarget.h" 23#include "ARMConstantPoolValue.h" 24#include "llvm/CallingConv.h" 25#include "llvm/DerivedTypes.h" 26#include "llvm/GlobalVariable.h" 27#include "llvm/Instructions.h" 28#include "llvm/IntrinsicInst.h" 29#include "llvm/Module.h" 30#include "llvm/Operator.h" 31#include "llvm/CodeGen/Analysis.h" 32#include "llvm/CodeGen/FastISel.h" 33#include "llvm/CodeGen/FunctionLoweringInfo.h" 34#include "llvm/CodeGen/MachineInstrBuilder.h" 35#include "llvm/CodeGen/MachineModuleInfo.h" 36#include "llvm/CodeGen/MachineConstantPool.h" 37#include "llvm/CodeGen/MachineFrameInfo.h" 38#include "llvm/CodeGen/MachineMemOperand.h" 39#include "llvm/CodeGen/MachineRegisterInfo.h" 40#include "llvm/CodeGen/PseudoSourceValue.h" 41#include "llvm/Support/CallSite.h" 42#include "llvm/Support/CommandLine.h" 43#include "llvm/Support/ErrorHandling.h" 44#include "llvm/Support/GetElementPtrTypeIterator.h" 45#include "llvm/Target/TargetData.h" 46#include "llvm/Target/TargetInstrInfo.h" 47#include "llvm/Target/TargetLowering.h" 48#include "llvm/Target/TargetMachine.h" 49#include "llvm/Target/TargetOptions.h" 50using namespace llvm; 51 52static cl::opt<bool> 53DisableARMFastISel("disable-arm-fast-isel", 54 cl::desc("Turn off experimental ARM fast-isel support"), 55 cl::init(false), cl::Hidden); 56 57extern cl::opt<bool> EnableARMLongCalls; 58 59namespace { 60 61 // All possible address modes, plus some. 62 typedef struct Address { 63 enum { 64 RegBase, 65 FrameIndexBase 66 } BaseType; 67 68 union { 69 unsigned Reg; 70 int FI; 71 } Base; 72 73 int Offset; 74 75 // Innocuous defaults for our address. 76 Address() 77 : BaseType(RegBase), Offset(0) { 78 Base.Reg = 0; 79 } 80 } Address; 81 82class ARMFastISel : public FastISel { 83 84 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 85 /// make the right decision when generating code for different targets. 86 const ARMSubtarget *Subtarget; 87 const TargetMachine &TM; 88 const TargetInstrInfo &TII; 89 const TargetLowering &TLI; 90 ARMFunctionInfo *AFI; 91 92 // Convenience variables to avoid some queries. 93 bool isThumb; 94 LLVMContext *Context; 95 96 public: 97 explicit ARMFastISel(FunctionLoweringInfo &funcInfo) 98 : FastISel(funcInfo), 99 TM(funcInfo.MF->getTarget()), 100 TII(*TM.getInstrInfo()), 101 TLI(*TM.getTargetLowering()) { 102 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 103 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); 104 isThumb = AFI->isThumbFunction(); 105 Context = &funcInfo.Fn->getContext(); 106 } 107 108 // Code from FastISel.cpp. 109 virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, 110 const TargetRegisterClass *RC); 111 virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, 112 const TargetRegisterClass *RC, 113 unsigned Op0, bool Op0IsKill); 114 virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, 115 const TargetRegisterClass *RC, 116 unsigned Op0, bool Op0IsKill, 117 unsigned Op1, bool Op1IsKill); 118 virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, 119 const TargetRegisterClass *RC, 120 unsigned Op0, bool Op0IsKill, 121 unsigned Op1, bool Op1IsKill, 122 unsigned Op2, bool Op2IsKill); 123 virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, 124 const TargetRegisterClass *RC, 125 unsigned Op0, bool Op0IsKill, 126 uint64_t Imm); 127 virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, 128 const TargetRegisterClass *RC, 129 unsigned Op0, bool Op0IsKill, 130 const ConstantFP *FPImm); 131 virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, 132 const TargetRegisterClass *RC, 133 unsigned Op0, bool Op0IsKill, 134 unsigned Op1, bool Op1IsKill, 135 uint64_t Imm); 136 virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, 137 const TargetRegisterClass *RC, 138 uint64_t Imm); 139 virtual unsigned FastEmitInst_ii(unsigned MachineInstOpcode, 140 const TargetRegisterClass *RC, 141 uint64_t Imm1, uint64_t Imm2); 142 143 virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, 144 unsigned Op0, bool Op0IsKill, 145 uint32_t Idx); 146 147 // Backend specific FastISel code. 148 virtual bool TargetSelectInstruction(const Instruction *I); 149 virtual unsigned TargetMaterializeConstant(const Constant *C); 150 virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); 151 152 #include "ARMGenFastISel.inc" 153 154 // Instruction selection routines. 155 private: 156 bool SelectLoad(const Instruction *I); 157 bool SelectStore(const Instruction *I); 158 bool SelectBranch(const Instruction *I); 159 bool SelectCmp(const Instruction *I); 160 bool SelectFPExt(const Instruction *I); 161 bool SelectFPTrunc(const Instruction *I); 162 bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); 163 bool SelectSIToFP(const Instruction *I); 164 bool SelectFPToSI(const Instruction *I); 165 bool SelectSDiv(const Instruction *I); 166 bool SelectSRem(const Instruction *I); 167 bool SelectCall(const Instruction *I); 168 bool SelectSelect(const Instruction *I); 169 bool SelectRet(const Instruction *I); 170 171 // Utility routines. 172 private: 173 bool isTypeLegal(const Type *Ty, MVT &VT); 174 bool isLoadTypeLegal(const Type *Ty, MVT &VT); 175 bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); 176 bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); 177 bool ARMComputeAddress(const Value *Obj, Address &Addr); 178 void ARMSimplifyAddress(Address &Addr, EVT VT); 179 unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); 180 unsigned ARMMaterializeInt(const Constant *C, EVT VT); 181 unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); 182 unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); 183 unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); 184 unsigned ARMSelectCallOp(const GlobalValue *GV); 185 186 // Call handling routines. 187 private: 188 bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 189 unsigned &ResultReg); 190 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); 191 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, 192 SmallVectorImpl<unsigned> &ArgRegs, 193 SmallVectorImpl<MVT> &ArgVTs, 194 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 195 SmallVectorImpl<unsigned> &RegArgs, 196 CallingConv::ID CC, 197 unsigned &NumBytes); 198 bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 199 const Instruction *I, CallingConv::ID CC, 200 unsigned &NumBytes); 201 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); 202 203 // OptionalDef handling routines. 204 private: 205 bool isARMNEONPred(const MachineInstr *MI); 206 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); 207 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); 208 void AddLoadStoreOperands(EVT VT, Address &Addr, 209 const MachineInstrBuilder &MIB); 210}; 211 212} // end anonymous namespace 213 214#include "ARMGenCallingConv.inc" 215 216// DefinesOptionalPredicate - This is different from DefinesPredicate in that 217// we don't care about implicit defs here, just places we'll need to add a 218// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. 219bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { 220 const TargetInstrDesc &TID = MI->getDesc(); 221 if (!TID.hasOptionalDef()) 222 return false; 223 224 // Look to see if our OptionalDef is defining CPSR or CCR. 225 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 226 const MachineOperand &MO = MI->getOperand(i); 227 if (!MO.isReg() || !MO.isDef()) continue; 228 if (MO.getReg() == ARM::CPSR) 229 *CPSR = true; 230 } 231 return true; 232} 233 234bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { 235 const TargetInstrDesc &TID = MI->getDesc(); 236 237 // If we're a thumb2 or not NEON function we were handled via isPredicable. 238 if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || 239 AFI->isThumb2Function()) 240 return false; 241 242 for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) 243 if (TID.OpInfo[i].isPredicate()) 244 return true; 245 246 return false; 247} 248 249// If the machine is predicable go ahead and add the predicate operands, if 250// it needs default CC operands add those. 251// TODO: If we want to support thumb1 then we'll need to deal with optional 252// CPSR defs that need to be added before the remaining operands. See s_cc_out 253// for descriptions why. 254const MachineInstrBuilder & 255ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { 256 MachineInstr *MI = &*MIB; 257 258 // Do we use a predicate? or... 259 // Are we NEON in ARM mode and have a predicate operand? If so, I know 260 // we're not predicable but add it anyways. 261 if (TII.isPredicable(MI) || isARMNEONPred(MI)) 262 AddDefaultPred(MIB); 263 264 // Do we optionally set a predicate? Preds is size > 0 iff the predicate 265 // defines CPSR. All other OptionalDefines in ARM are the CCR register. 266 bool CPSR = false; 267 if (DefinesOptionalPredicate(MI, &CPSR)) { 268 if (CPSR) 269 AddDefaultT1CC(MIB); 270 else 271 AddDefaultCC(MIB); 272 } 273 return MIB; 274} 275 276unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, 277 const TargetRegisterClass* RC) { 278 unsigned ResultReg = createResultReg(RC); 279 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 280 281 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); 282 return ResultReg; 283} 284 285unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, 286 const TargetRegisterClass *RC, 287 unsigned Op0, bool Op0IsKill) { 288 unsigned ResultReg = createResultReg(RC); 289 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 290 291 if (II.getNumDefs() >= 1) 292 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 293 .addReg(Op0, Op0IsKill * RegState::Kill)); 294 else { 295 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 296 .addReg(Op0, Op0IsKill * RegState::Kill)); 297 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 298 TII.get(TargetOpcode::COPY), ResultReg) 299 .addReg(II.ImplicitDefs[0])); 300 } 301 return ResultReg; 302} 303 304unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, 305 const TargetRegisterClass *RC, 306 unsigned Op0, bool Op0IsKill, 307 unsigned Op1, bool Op1IsKill) { 308 unsigned ResultReg = createResultReg(RC); 309 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 310 311 if (II.getNumDefs() >= 1) 312 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 313 .addReg(Op0, Op0IsKill * RegState::Kill) 314 .addReg(Op1, Op1IsKill * RegState::Kill)); 315 else { 316 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 317 .addReg(Op0, Op0IsKill * RegState::Kill) 318 .addReg(Op1, Op1IsKill * RegState::Kill)); 319 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 320 TII.get(TargetOpcode::COPY), ResultReg) 321 .addReg(II.ImplicitDefs[0])); 322 } 323 return ResultReg; 324} 325 326unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, 327 const TargetRegisterClass *RC, 328 unsigned Op0, bool Op0IsKill, 329 unsigned Op1, bool Op1IsKill, 330 unsigned Op2, bool Op2IsKill) { 331 unsigned ResultReg = createResultReg(RC); 332 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 333 334 if (II.getNumDefs() >= 1) 335 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 336 .addReg(Op0, Op0IsKill * RegState::Kill) 337 .addReg(Op1, Op1IsKill * RegState::Kill) 338 .addReg(Op2, Op2IsKill * RegState::Kill)); 339 else { 340 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 341 .addReg(Op0, Op0IsKill * RegState::Kill) 342 .addReg(Op1, Op1IsKill * RegState::Kill) 343 .addReg(Op2, Op2IsKill * RegState::Kill)); 344 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 345 TII.get(TargetOpcode::COPY), ResultReg) 346 .addReg(II.ImplicitDefs[0])); 347 } 348 return ResultReg; 349} 350 351unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, 352 const TargetRegisterClass *RC, 353 unsigned Op0, bool Op0IsKill, 354 uint64_t Imm) { 355 unsigned ResultReg = createResultReg(RC); 356 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 357 358 if (II.getNumDefs() >= 1) 359 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 360 .addReg(Op0, Op0IsKill * RegState::Kill) 361 .addImm(Imm)); 362 else { 363 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 364 .addReg(Op0, Op0IsKill * RegState::Kill) 365 .addImm(Imm)); 366 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 367 TII.get(TargetOpcode::COPY), ResultReg) 368 .addReg(II.ImplicitDefs[0])); 369 } 370 return ResultReg; 371} 372 373unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, 374 const TargetRegisterClass *RC, 375 unsigned Op0, bool Op0IsKill, 376 const ConstantFP *FPImm) { 377 unsigned ResultReg = createResultReg(RC); 378 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 379 380 if (II.getNumDefs() >= 1) 381 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 382 .addReg(Op0, Op0IsKill * RegState::Kill) 383 .addFPImm(FPImm)); 384 else { 385 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 386 .addReg(Op0, Op0IsKill * RegState::Kill) 387 .addFPImm(FPImm)); 388 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 389 TII.get(TargetOpcode::COPY), ResultReg) 390 .addReg(II.ImplicitDefs[0])); 391 } 392 return ResultReg; 393} 394 395unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, 396 const TargetRegisterClass *RC, 397 unsigned Op0, bool Op0IsKill, 398 unsigned Op1, bool Op1IsKill, 399 uint64_t Imm) { 400 unsigned ResultReg = createResultReg(RC); 401 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 402 403 if (II.getNumDefs() >= 1) 404 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 405 .addReg(Op0, Op0IsKill * RegState::Kill) 406 .addReg(Op1, Op1IsKill * RegState::Kill) 407 .addImm(Imm)); 408 else { 409 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 410 .addReg(Op0, Op0IsKill * RegState::Kill) 411 .addReg(Op1, Op1IsKill * RegState::Kill) 412 .addImm(Imm)); 413 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 414 TII.get(TargetOpcode::COPY), ResultReg) 415 .addReg(II.ImplicitDefs[0])); 416 } 417 return ResultReg; 418} 419 420unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, 421 const TargetRegisterClass *RC, 422 uint64_t Imm) { 423 unsigned ResultReg = createResultReg(RC); 424 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 425 426 if (II.getNumDefs() >= 1) 427 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 428 .addImm(Imm)); 429 else { 430 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 431 .addImm(Imm)); 432 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 433 TII.get(TargetOpcode::COPY), ResultReg) 434 .addReg(II.ImplicitDefs[0])); 435 } 436 return ResultReg; 437} 438 439unsigned ARMFastISel::FastEmitInst_ii(unsigned MachineInstOpcode, 440 const TargetRegisterClass *RC, 441 uint64_t Imm1, uint64_t Imm2) { 442 unsigned ResultReg = createResultReg(RC); 443 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 444 445 if (II.getNumDefs() >= 1) 446 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 447 .addImm(Imm1).addImm(Imm2)); 448 else { 449 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 450 .addImm(Imm1).addImm(Imm2)); 451 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 452 TII.get(TargetOpcode::COPY), 453 ResultReg) 454 .addReg(II.ImplicitDefs[0])); 455 } 456 return ResultReg; 457} 458 459unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, 460 unsigned Op0, bool Op0IsKill, 461 uint32_t Idx) { 462 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 463 assert(TargetRegisterInfo::isVirtualRegister(Op0) && 464 "Cannot yet extract from physregs"); 465 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 466 DL, TII.get(TargetOpcode::COPY), ResultReg) 467 .addReg(Op0, getKillRegState(Op0IsKill), Idx)); 468 return ResultReg; 469} 470 471// TODO: Don't worry about 64-bit now, but when this is fixed remove the 472// checks from the various callers. 473unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { 474 if (VT == MVT::f64) return 0; 475 476 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 477 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 478 TII.get(ARM::VMOVRS), MoveReg) 479 .addReg(SrcReg)); 480 return MoveReg; 481} 482 483unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { 484 if (VT == MVT::i64) return 0; 485 486 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 487 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 488 TII.get(ARM::VMOVSR), MoveReg) 489 .addReg(SrcReg)); 490 return MoveReg; 491} 492 493// For double width floating point we need to materialize two constants 494// (the high and the low) into integer registers then use a move to get 495// the combined constant into an FP reg. 496unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { 497 const APFloat Val = CFP->getValueAPF(); 498 bool is64bit = VT == MVT::f64; 499 500 // This checks to see if we can use VFP3 instructions to materialize 501 // a constant, otherwise we have to go through the constant pool. 502 if (TLI.isFPImmLegal(Val, VT)) { 503 unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS; 504 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 505 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 506 DestReg) 507 .addFPImm(CFP)); 508 return DestReg; 509 } 510 511 // Require VFP2 for loading fp constants. 512 if (!Subtarget->hasVFP2()) return false; 513 514 // MachineConstantPool wants an explicit alignment. 515 unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); 516 if (Align == 0) { 517 // TODO: Figure out if this is correct. 518 Align = TD.getTypeAllocSize(CFP->getType()); 519 } 520 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 521 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 522 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; 523 524 // The extra reg is for addrmode5. 525 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 526 DestReg) 527 .addConstantPoolIndex(Idx) 528 .addReg(0)); 529 return DestReg; 530} 531 532unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { 533 534 // For now 32-bit only. 535 if (VT != MVT::i32) return false; 536 537 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 538 539 // If we can do this in a single instruction without a constant pool entry 540 // do so now. 541 const ConstantInt *CI = cast<ConstantInt>(C); 542 if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) { 543 unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; 544 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 545 TII.get(Opc), DestReg) 546 .addImm(CI->getSExtValue())); 547 return DestReg; 548 } 549 550 // MachineConstantPool wants an explicit alignment. 551 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 552 if (Align == 0) { 553 // TODO: Figure out if this is correct. 554 Align = TD.getTypeAllocSize(C->getType()); 555 } 556 unsigned Idx = MCP.getConstantPoolIndex(C, Align); 557 558 if (isThumb) 559 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 560 TII.get(ARM::t2LDRpci), DestReg) 561 .addConstantPoolIndex(Idx)); 562 else 563 // The extra immediate is for addrmode2. 564 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 565 TII.get(ARM::LDRcp), DestReg) 566 .addConstantPoolIndex(Idx) 567 .addImm(0)); 568 569 return DestReg; 570} 571 572unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { 573 // For now 32-bit only. 574 if (VT != MVT::i32) return 0; 575 576 Reloc::Model RelocM = TM.getRelocationModel(); 577 578 // TODO: No external globals for now. 579 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0; 580 581 // TODO: Need more magic for ARM PIC. 582 if (!isThumb && (RelocM == Reloc::PIC_)) return 0; 583 584 // MachineConstantPool wants an explicit alignment. 585 unsigned Align = TD.getPrefTypeAlignment(GV->getType()); 586 if (Align == 0) { 587 // TODO: Figure out if this is correct. 588 Align = TD.getTypeAllocSize(GV->getType()); 589 } 590 591 // Grab index. 592 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); 593 unsigned Id = AFI->createPICLabelUId(); 594 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id, 595 ARMCP::CPValue, PCAdj); 596 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); 597 598 // Load value. 599 MachineInstrBuilder MIB; 600 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 601 if (isThumb) { 602 unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; 603 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) 604 .addConstantPoolIndex(Idx); 605 if (RelocM == Reloc::PIC_) 606 MIB.addImm(Id); 607 } else { 608 // The extra immediate is for addrmode2. 609 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), 610 DestReg) 611 .addConstantPoolIndex(Idx) 612 .addImm(0); 613 } 614 AddOptionalDefs(MIB); 615 return DestReg; 616} 617 618unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { 619 EVT VT = TLI.getValueType(C->getType(), true); 620 621 // Only handle simple types. 622 if (!VT.isSimple()) return 0; 623 624 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 625 return ARMMaterializeFP(CFP, VT); 626 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 627 return ARMMaterializeGV(GV, VT); 628 else if (isa<ConstantInt>(C)) 629 return ARMMaterializeInt(C, VT); 630 631 return 0; 632} 633 634unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { 635 // Don't handle dynamic allocas. 636 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; 637 638 MVT VT; 639 if (!isLoadTypeLegal(AI->getType(), VT)) return false; 640 641 DenseMap<const AllocaInst*, int>::iterator SI = 642 FuncInfo.StaticAllocaMap.find(AI); 643 644 // This will get lowered later into the correct offsets and registers 645 // via rewriteXFrameIndex. 646 if (SI != FuncInfo.StaticAllocaMap.end()) { 647 TargetRegisterClass* RC = TLI.getRegClassFor(VT); 648 unsigned ResultReg = createResultReg(RC); 649 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 650 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 651 TII.get(Opc), ResultReg) 652 .addFrameIndex(SI->second) 653 .addImm(0)); 654 return ResultReg; 655 } 656 657 return 0; 658} 659 660bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) { 661 EVT evt = TLI.getValueType(Ty, true); 662 663 // Only handle simple types. 664 if (evt == MVT::Other || !evt.isSimple()) return false; 665 VT = evt.getSimpleVT(); 666 667 // Handle all legal types, i.e. a register that will directly hold this 668 // value. 669 return TLI.isTypeLegal(VT); 670} 671 672bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) { 673 if (isTypeLegal(Ty, VT)) return true; 674 675 // If this is a type than can be sign or zero-extended to a basic operation 676 // go ahead and accept it now. 677 if (VT == MVT::i8 || VT == MVT::i16) 678 return true; 679 680 return false; 681} 682 683// Computes the address to get to an object. 684bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { 685 // Some boilerplate from the X86 FastISel. 686 const User *U = NULL; 687 unsigned Opcode = Instruction::UserOp1; 688 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 689 // Don't walk into other basic blocks unless the object is an alloca from 690 // another block, otherwise it may not have a virtual register assigned. 691 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 692 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 693 Opcode = I->getOpcode(); 694 U = I; 695 } 696 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 697 Opcode = C->getOpcode(); 698 U = C; 699 } 700 701 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 702 if (Ty->getAddressSpace() > 255) 703 // Fast instruction selection doesn't support the special 704 // address spaces. 705 return false; 706 707 switch (Opcode) { 708 default: 709 break; 710 case Instruction::BitCast: { 711 // Look through bitcasts. 712 return ARMComputeAddress(U->getOperand(0), Addr); 713 } 714 case Instruction::IntToPtr: { 715 // Look past no-op inttoptrs. 716 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 717 return ARMComputeAddress(U->getOperand(0), Addr); 718 break; 719 } 720 case Instruction::PtrToInt: { 721 // Look past no-op ptrtoints. 722 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 723 return ARMComputeAddress(U->getOperand(0), Addr); 724 break; 725 } 726 case Instruction::GetElementPtr: { 727 Address SavedAddr = Addr; 728 int TmpOffset = Addr.Offset; 729 730 // Iterate through the GEP folding the constants into offsets where 731 // we can. 732 gep_type_iterator GTI = gep_type_begin(U); 733 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 734 i != e; ++i, ++GTI) { 735 const Value *Op = *i; 736 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 737 const StructLayout *SL = TD.getStructLayout(STy); 738 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 739 TmpOffset += SL->getElementOffset(Idx); 740 } else { 741 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 742 for (;;) { 743 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 744 // Constant-offset addressing. 745 TmpOffset += CI->getSExtValue() * S; 746 break; 747 } 748 if (isa<AddOperator>(Op) && 749 (!isa<Instruction>(Op) || 750 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] 751 == FuncInfo.MBB) && 752 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { 753 // An add (in the same block) with a constant operand. Fold the 754 // constant. 755 ConstantInt *CI = 756 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 757 TmpOffset += CI->getSExtValue() * S; 758 // Iterate on the other operand. 759 Op = cast<AddOperator>(Op)->getOperand(0); 760 continue; 761 } 762 // Unsupported 763 goto unsupported_gep; 764 } 765 } 766 } 767 768 // Try to grab the base operand now. 769 Addr.Offset = TmpOffset; 770 if (ARMComputeAddress(U->getOperand(0), Addr)) return true; 771 772 // We failed, restore everything and try the other options. 773 Addr = SavedAddr; 774 775 unsupported_gep: 776 break; 777 } 778 case Instruction::Alloca: { 779 const AllocaInst *AI = cast<AllocaInst>(Obj); 780 DenseMap<const AllocaInst*, int>::iterator SI = 781 FuncInfo.StaticAllocaMap.find(AI); 782 if (SI != FuncInfo.StaticAllocaMap.end()) { 783 Addr.BaseType = Address::FrameIndexBase; 784 Addr.Base.FI = SI->second; 785 return true; 786 } 787 break; 788 } 789 } 790 791 // Materialize the global variable's address into a reg which can 792 // then be used later to load the variable. 793 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { 794 unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); 795 if (Tmp == 0) return false; 796 797 Addr.Base.Reg = Tmp; 798 return true; 799 } 800 801 // Try to get this in a register if nothing else has worked. 802 if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); 803 return Addr.Base.Reg != 0; 804} 805 806void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { 807 808 assert(VT.isSimple() && "Non-simple types are invalid here!"); 809 810 bool needsLowering = false; 811 switch (VT.getSimpleVT().SimpleTy) { 812 default: 813 assert(false && "Unhandled load/store type!"); 814 case MVT::i1: 815 case MVT::i8: 816 case MVT::i16: 817 case MVT::i32: 818 // Integer loads/stores handle 12-bit offsets. 819 needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); 820 break; 821 case MVT::f32: 822 case MVT::f64: 823 // Floating point operands handle 8-bit offsets. 824 needsLowering = ((Addr.Offset & 0xff) != Addr.Offset); 825 break; 826 } 827 828 // If this is a stack pointer and the offset needs to be simplified then 829 // put the alloca address into a register, set the base type back to 830 // register and continue. This should almost never happen. 831 if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { 832 TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : 833 ARM::GPRRegisterClass; 834 unsigned ResultReg = createResultReg(RC); 835 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 836 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 837 TII.get(Opc), ResultReg) 838 .addFrameIndex(Addr.Base.FI) 839 .addImm(0)); 840 Addr.Base.Reg = ResultReg; 841 Addr.BaseType = Address::RegBase; 842 } 843 844 // Since the offset is too large for the load/store instruction 845 // get the reg+offset into a register. 846 if (needsLowering) { 847 Addr.Base.Reg = FastEmit_ri_(MVT::i32, ISD::ADD, Addr.Base.Reg, 848 /*Op0IsKill*/false, Addr.Offset, MVT::i32); 849 Addr.Offset = 0; 850 } 851} 852 853void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, 854 const MachineInstrBuilder &MIB) { 855 // addrmode5 output depends on the selection dag addressing dividing the 856 // offset by 4 that it then later multiplies. Do this here as well. 857 if (VT.getSimpleVT().SimpleTy == MVT::f32 || 858 VT.getSimpleVT().SimpleTy == MVT::f64) 859 Addr.Offset /= 4; 860 861 // Frame base works a bit differently. Handle it separately. 862 if (Addr.BaseType == Address::FrameIndexBase) { 863 int FI = Addr.Base.FI; 864 int Offset = Addr.Offset; 865 MachineMemOperand *MMO = 866 FuncInfo.MF->getMachineMemOperand( 867 MachinePointerInfo::getFixedStack(FI, Offset), 868 MachineMemOperand::MOLoad, 869 MFI.getObjectSize(FI), 870 MFI.getObjectAlignment(FI)); 871 // Now add the rest of the operands. 872 MIB.addFrameIndex(FI); 873 874 // ARM halfword load/stores need an additional operand. 875 if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); 876 877 MIB.addImm(Addr.Offset); 878 MIB.addMemOperand(MMO); 879 } else { 880 // Now add the rest of the operands. 881 MIB.addReg(Addr.Base.Reg); 882 883 // ARM halfword load/stores need an additional operand. 884 if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); 885 886 MIB.addImm(Addr.Offset); 887 } 888 AddOptionalDefs(MIB); 889} 890 891bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { 892 893 assert(VT.isSimple() && "Non-simple types are invalid here!"); 894 unsigned Opc; 895 TargetRegisterClass *RC; 896 switch (VT.getSimpleVT().SimpleTy) { 897 // This is mostly going to be Neon/vector support. 898 default: return false; 899 case MVT::i16: 900 Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH; 901 RC = ARM::GPRRegisterClass; 902 break; 903 case MVT::i8: 904 Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12; 905 RC = ARM::GPRRegisterClass; 906 break; 907 case MVT::i32: 908 Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12; 909 RC = ARM::GPRRegisterClass; 910 break; 911 case MVT::f32: 912 Opc = ARM::VLDRS; 913 RC = TLI.getRegClassFor(VT); 914 break; 915 case MVT::f64: 916 Opc = ARM::VLDRD; 917 RC = TLI.getRegClassFor(VT); 918 break; 919 } 920 // Simplify this down to something we can handle. 921 ARMSimplifyAddress(Addr, VT); 922 923 // Create the base instruction, then add the operands. 924 ResultReg = createResultReg(RC); 925 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 926 TII.get(Opc), ResultReg); 927 AddLoadStoreOperands(VT, Addr, MIB); 928 return true; 929} 930 931bool ARMFastISel::SelectLoad(const Instruction *I) { 932 // Verify we have a legal type before going any further. 933 MVT VT; 934 if (!isLoadTypeLegal(I->getType(), VT)) 935 return false; 936 937 // See if we can handle this address. 938 Address Addr; 939 if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; 940 941 unsigned ResultReg; 942 if (!ARMEmitLoad(VT, ResultReg, Addr)) return false; 943 UpdateValueMap(I, ResultReg); 944 return true; 945} 946 947bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { 948 unsigned StrOpc; 949 switch (VT.getSimpleVT().SimpleTy) { 950 // This is mostly going to be Neon/vector support. 951 default: return false; 952 case MVT::i1: { 953 unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass : 954 ARM::GPRRegisterClass); 955 unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; 956 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 957 TII.get(Opc), Res) 958 .addReg(SrcReg).addImm(1)); 959 SrcReg = Res; 960 } // Fallthrough here. 961 case MVT::i8: 962 StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12; 963 break; 964 case MVT::i16: 965 StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH; 966 break; 967 case MVT::i32: 968 StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12; 969 break; 970 case MVT::f32: 971 if (!Subtarget->hasVFP2()) return false; 972 StrOpc = ARM::VSTRS; 973 break; 974 case MVT::f64: 975 if (!Subtarget->hasVFP2()) return false; 976 StrOpc = ARM::VSTRD; 977 break; 978 } 979 // Simplify this down to something we can handle. 980 ARMSimplifyAddress(Addr, VT); 981 982 // Create the base instruction, then add the operands. 983 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 984 TII.get(StrOpc)) 985 .addReg(SrcReg, getKillRegState(true)); 986 AddLoadStoreOperands(VT, Addr, MIB); 987 return true; 988} 989 990bool ARMFastISel::SelectStore(const Instruction *I) { 991 Value *Op0 = I->getOperand(0); 992 unsigned SrcReg = 0; 993 994 // Verify we have a legal type before going any further. 995 MVT VT; 996 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) 997 return false; 998 999 // Get the value to be stored into a register. 1000 SrcReg = getRegForValue(Op0); 1001 if (SrcReg == 0) return false; 1002 1003 // See if we can handle this address. 1004 Address Addr; 1005 if (!ARMComputeAddress(I->getOperand(1), Addr)) 1006 return false; 1007 1008 if (!ARMEmitStore(VT, SrcReg, Addr)) return false; 1009 return true; 1010} 1011 1012static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { 1013 switch (Pred) { 1014 // Needs two compares... 1015 case CmpInst::FCMP_ONE: 1016 case CmpInst::FCMP_UEQ: 1017 default: 1018 // AL is our "false" for now. The other two need more compares. 1019 return ARMCC::AL; 1020 case CmpInst::ICMP_EQ: 1021 case CmpInst::FCMP_OEQ: 1022 return ARMCC::EQ; 1023 case CmpInst::ICMP_SGT: 1024 case CmpInst::FCMP_OGT: 1025 return ARMCC::GT; 1026 case CmpInst::ICMP_SGE: 1027 case CmpInst::FCMP_OGE: 1028 return ARMCC::GE; 1029 case CmpInst::ICMP_UGT: 1030 case CmpInst::FCMP_UGT: 1031 return ARMCC::HI; 1032 case CmpInst::FCMP_OLT: 1033 return ARMCC::MI; 1034 case CmpInst::ICMP_ULE: 1035 case CmpInst::FCMP_OLE: 1036 return ARMCC::LS; 1037 case CmpInst::FCMP_ORD: 1038 return ARMCC::VC; 1039 case CmpInst::FCMP_UNO: 1040 return ARMCC::VS; 1041 case CmpInst::FCMP_UGE: 1042 return ARMCC::PL; 1043 case CmpInst::ICMP_SLT: 1044 case CmpInst::FCMP_ULT: 1045 return ARMCC::LT; 1046 case CmpInst::ICMP_SLE: 1047 case CmpInst::FCMP_ULE: 1048 return ARMCC::LE; 1049 case CmpInst::FCMP_UNE: 1050 case CmpInst::ICMP_NE: 1051 return ARMCC::NE; 1052 case CmpInst::ICMP_UGE: 1053 return ARMCC::HS; 1054 case CmpInst::ICMP_ULT: 1055 return ARMCC::LO; 1056 } 1057} 1058 1059bool ARMFastISel::SelectBranch(const Instruction *I) { 1060 const BranchInst *BI = cast<BranchInst>(I); 1061 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 1062 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 1063 1064 // Simple branch support. 1065 1066 // If we can, avoid recomputing the compare - redoing it could lead to wonky 1067 // behavior. 1068 // TODO: Factor this out. 1069 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 1070 MVT SourceVT; 1071 const Type *Ty = CI->getOperand(0)->getType(); 1072 if (CI->hasOneUse() && (CI->getParent() == I->getParent()) 1073 && isTypeLegal(Ty, SourceVT)) { 1074 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1075 if (isFloat && !Subtarget->hasVFP2()) 1076 return false; 1077 1078 unsigned CmpOpc; 1079 switch (SourceVT.SimpleTy) { 1080 default: return false; 1081 // TODO: Verify compares. 1082 case MVT::f32: 1083 CmpOpc = ARM::VCMPES; 1084 break; 1085 case MVT::f64: 1086 CmpOpc = ARM::VCMPED; 1087 break; 1088 case MVT::i32: 1089 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 1090 break; 1091 } 1092 1093 // Get the compare predicate. 1094 // Try to take advantage of fallthrough opportunities. 1095 CmpInst::Predicate Predicate = CI->getPredicate(); 1096 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1097 std::swap(TBB, FBB); 1098 Predicate = CmpInst::getInversePredicate(Predicate); 1099 } 1100 1101 ARMCC::CondCodes ARMPred = getComparePred(Predicate); 1102 1103 // We may not handle every CC for now. 1104 if (ARMPred == ARMCC::AL) return false; 1105 1106 unsigned Arg1 = getRegForValue(CI->getOperand(0)); 1107 if (Arg1 == 0) return false; 1108 1109 unsigned Arg2 = getRegForValue(CI->getOperand(1)); 1110 if (Arg2 == 0) return false; 1111 1112 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1113 TII.get(CmpOpc)) 1114 .addReg(Arg1).addReg(Arg2)); 1115 1116 // For floating point we need to move the result to a comparison register 1117 // that we can then use for branches. 1118 if (isFloat) 1119 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1120 TII.get(ARM::FMSTAT))); 1121 1122 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1123 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1124 .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); 1125 FastEmitBranch(FBB, DL); 1126 FuncInfo.MBB->addSuccessor(TBB); 1127 return true; 1128 } 1129 } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { 1130 MVT SourceVT; 1131 if (TI->hasOneUse() && TI->getParent() == I->getParent() && 1132 (isTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { 1133 unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1134 unsigned OpReg = getRegForValue(TI->getOperand(0)); 1135 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1136 TII.get(TstOpc)) 1137 .addReg(OpReg).addImm(1)); 1138 1139 unsigned CCMode = ARMCC::NE; 1140 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1141 std::swap(TBB, FBB); 1142 CCMode = ARMCC::EQ; 1143 } 1144 1145 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1146 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1147 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); 1148 1149 FastEmitBranch(FBB, DL); 1150 FuncInfo.MBB->addSuccessor(TBB); 1151 return true; 1152 } 1153 } 1154 1155 unsigned CmpReg = getRegForValue(BI->getCondition()); 1156 if (CmpReg == 0) return false; 1157 1158 // We've been divorced from our compare! Our block was split, and 1159 // now our compare lives in a predecessor block. We musn't 1160 // re-compare here, as the children of the compare aren't guaranteed 1161 // live across the block boundary (we *could* check for this). 1162 // Regardless, the compare has been done in the predecessor block, 1163 // and it left a value for us in a virtual register. Ergo, we test 1164 // the one-bit value left in the virtual register. 1165 unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1166 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) 1167 .addReg(CmpReg).addImm(1)); 1168 1169 unsigned CCMode = ARMCC::NE; 1170 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1171 std::swap(TBB, FBB); 1172 CCMode = ARMCC::EQ; 1173 } 1174 1175 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1176 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1177 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); 1178 FastEmitBranch(FBB, DL); 1179 FuncInfo.MBB->addSuccessor(TBB); 1180 return true; 1181} 1182 1183bool ARMFastISel::SelectCmp(const Instruction *I) { 1184 const CmpInst *CI = cast<CmpInst>(I); 1185 1186 MVT VT; 1187 const Type *Ty = CI->getOperand(0)->getType(); 1188 if (!isTypeLegal(Ty, VT)) 1189 return false; 1190 1191 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1192 if (isFloat && !Subtarget->hasVFP2()) 1193 return false; 1194 1195 unsigned CmpOpc; 1196 unsigned CondReg; 1197 switch (VT.SimpleTy) { 1198 default: return false; 1199 // TODO: Verify compares. 1200 case MVT::f32: 1201 CmpOpc = ARM::VCMPES; 1202 CondReg = ARM::FPSCR; 1203 break; 1204 case MVT::f64: 1205 CmpOpc = ARM::VCMPED; 1206 CondReg = ARM::FPSCR; 1207 break; 1208 case MVT::i32: 1209 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 1210 CondReg = ARM::CPSR; 1211 break; 1212 } 1213 1214 // Get the compare predicate. 1215 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 1216 1217 // We may not handle every CC for now. 1218 if (ARMPred == ARMCC::AL) return false; 1219 1220 unsigned Arg1 = getRegForValue(CI->getOperand(0)); 1221 if (Arg1 == 0) return false; 1222 1223 unsigned Arg2 = getRegForValue(CI->getOperand(1)); 1224 if (Arg2 == 0) return false; 1225 1226 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1227 .addReg(Arg1).addReg(Arg2)); 1228 1229 // For floating point we need to move the result to a comparison register 1230 // that we can then use for branches. 1231 if (isFloat) 1232 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1233 TII.get(ARM::FMSTAT))); 1234 1235 // Now set a register based on the comparison. Explicitly set the predicates 1236 // here. 1237 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; 1238 TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass 1239 : ARM::GPRRegisterClass; 1240 unsigned DestReg = createResultReg(RC); 1241 Constant *Zero 1242 = ConstantInt::get(Type::getInt32Ty(*Context), 0); 1243 unsigned ZeroReg = TargetMaterializeConstant(Zero); 1244 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) 1245 .addReg(ZeroReg).addImm(1) 1246 .addImm(ARMPred).addReg(CondReg); 1247 1248 UpdateValueMap(I, DestReg); 1249 return true; 1250} 1251 1252bool ARMFastISel::SelectFPExt(const Instruction *I) { 1253 // Make sure we have VFP and that we're extending float to double. 1254 if (!Subtarget->hasVFP2()) return false; 1255 1256 Value *V = I->getOperand(0); 1257 if (!I->getType()->isDoubleTy() || 1258 !V->getType()->isFloatTy()) return false; 1259 1260 unsigned Op = getRegForValue(V); 1261 if (Op == 0) return false; 1262 1263 unsigned Result = createResultReg(ARM::DPRRegisterClass); 1264 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1265 TII.get(ARM::VCVTDS), Result) 1266 .addReg(Op)); 1267 UpdateValueMap(I, Result); 1268 return true; 1269} 1270 1271bool ARMFastISel::SelectFPTrunc(const Instruction *I) { 1272 // Make sure we have VFP and that we're truncating double to float. 1273 if (!Subtarget->hasVFP2()) return false; 1274 1275 Value *V = I->getOperand(0); 1276 if (!(I->getType()->isFloatTy() && 1277 V->getType()->isDoubleTy())) return false; 1278 1279 unsigned Op = getRegForValue(V); 1280 if (Op == 0) return false; 1281 1282 unsigned Result = createResultReg(ARM::SPRRegisterClass); 1283 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1284 TII.get(ARM::VCVTSD), Result) 1285 .addReg(Op)); 1286 UpdateValueMap(I, Result); 1287 return true; 1288} 1289 1290bool ARMFastISel::SelectSIToFP(const Instruction *I) { 1291 // Make sure we have VFP. 1292 if (!Subtarget->hasVFP2()) return false; 1293 1294 MVT DstVT; 1295 const Type *Ty = I->getType(); 1296 if (!isTypeLegal(Ty, DstVT)) 1297 return false; 1298 1299 unsigned Op = getRegForValue(I->getOperand(0)); 1300 if (Op == 0) return false; 1301 1302 // The conversion routine works on fp-reg to fp-reg and the operand above 1303 // was an integer, move it to the fp registers if possible. 1304 unsigned FP = ARMMoveToFPReg(MVT::f32, Op); 1305 if (FP == 0) return false; 1306 1307 unsigned Opc; 1308 if (Ty->isFloatTy()) Opc = ARM::VSITOS; 1309 else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; 1310 else return 0; 1311 1312 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); 1313 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1314 ResultReg) 1315 .addReg(FP)); 1316 UpdateValueMap(I, ResultReg); 1317 return true; 1318} 1319 1320bool ARMFastISel::SelectFPToSI(const Instruction *I) { 1321 // Make sure we have VFP. 1322 if (!Subtarget->hasVFP2()) return false; 1323 1324 MVT DstVT; 1325 const Type *RetTy = I->getType(); 1326 if (!isTypeLegal(RetTy, DstVT)) 1327 return false; 1328 1329 unsigned Op = getRegForValue(I->getOperand(0)); 1330 if (Op == 0) return false; 1331 1332 unsigned Opc; 1333 const Type *OpTy = I->getOperand(0)->getType(); 1334 if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; 1335 else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; 1336 else return 0; 1337 1338 // f64->s32 or f32->s32 both need an intermediate f32 reg. 1339 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1340 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1341 ResultReg) 1342 .addReg(Op)); 1343 1344 // This result needs to be in an integer register, but the conversion only 1345 // takes place in fp-regs. 1346 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); 1347 if (IntReg == 0) return false; 1348 1349 UpdateValueMap(I, IntReg); 1350 return true; 1351} 1352 1353bool ARMFastISel::SelectSelect(const Instruction *I) { 1354 MVT VT; 1355 if (!isTypeLegal(I->getType(), VT)) 1356 return false; 1357 1358 // Things need to be register sized for register moves. 1359 if (VT != MVT::i32) return false; 1360 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 1361 1362 unsigned CondReg = getRegForValue(I->getOperand(0)); 1363 if (CondReg == 0) return false; 1364 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1365 if (Op1Reg == 0) return false; 1366 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1367 if (Op2Reg == 0) return false; 1368 1369 unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1370 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1371 .addReg(CondReg).addImm(1)); 1372 unsigned ResultReg = createResultReg(RC); 1373 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr; 1374 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1375 .addReg(Op1Reg).addReg(Op2Reg) 1376 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 1377 UpdateValueMap(I, ResultReg); 1378 return true; 1379} 1380 1381bool ARMFastISel::SelectSDiv(const Instruction *I) { 1382 MVT VT; 1383 const Type *Ty = I->getType(); 1384 if (!isTypeLegal(Ty, VT)) 1385 return false; 1386 1387 // If we have integer div support we should have selected this automagically. 1388 // In case we have a real miss go ahead and return false and we'll pick 1389 // it up later. 1390 if (Subtarget->hasDivide()) return false; 1391 1392 // Otherwise emit a libcall. 1393 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1394 if (VT == MVT::i8) 1395 LC = RTLIB::SDIV_I8; 1396 else if (VT == MVT::i16) 1397 LC = RTLIB::SDIV_I16; 1398 else if (VT == MVT::i32) 1399 LC = RTLIB::SDIV_I32; 1400 else if (VT == MVT::i64) 1401 LC = RTLIB::SDIV_I64; 1402 else if (VT == MVT::i128) 1403 LC = RTLIB::SDIV_I128; 1404 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); 1405 1406 return ARMEmitLibcall(I, LC); 1407} 1408 1409bool ARMFastISel::SelectSRem(const Instruction *I) { 1410 MVT VT; 1411 const Type *Ty = I->getType(); 1412 if (!isTypeLegal(Ty, VT)) 1413 return false; 1414 1415 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1416 if (VT == MVT::i8) 1417 LC = RTLIB::SREM_I8; 1418 else if (VT == MVT::i16) 1419 LC = RTLIB::SREM_I16; 1420 else if (VT == MVT::i32) 1421 LC = RTLIB::SREM_I32; 1422 else if (VT == MVT::i64) 1423 LC = RTLIB::SREM_I64; 1424 else if (VT == MVT::i128) 1425 LC = RTLIB::SREM_I128; 1426 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); 1427 1428 return ARMEmitLibcall(I, LC); 1429} 1430 1431bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { 1432 EVT VT = TLI.getValueType(I->getType(), true); 1433 1434 // We can get here in the case when we want to use NEON for our fp 1435 // operations, but can't figure out how to. Just use the vfp instructions 1436 // if we have them. 1437 // FIXME: It'd be nice to use NEON instructions. 1438 const Type *Ty = I->getType(); 1439 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1440 if (isFloat && !Subtarget->hasVFP2()) 1441 return false; 1442 1443 unsigned Op1 = getRegForValue(I->getOperand(0)); 1444 if (Op1 == 0) return false; 1445 1446 unsigned Op2 = getRegForValue(I->getOperand(1)); 1447 if (Op2 == 0) return false; 1448 1449 unsigned Opc; 1450 bool is64bit = VT == MVT::f64 || VT == MVT::i64; 1451 switch (ISDOpcode) { 1452 default: return false; 1453 case ISD::FADD: 1454 Opc = is64bit ? ARM::VADDD : ARM::VADDS; 1455 break; 1456 case ISD::FSUB: 1457 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS; 1458 break; 1459 case ISD::FMUL: 1460 Opc = is64bit ? ARM::VMULD : ARM::VMULS; 1461 break; 1462 } 1463 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1464 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1465 TII.get(Opc), ResultReg) 1466 .addReg(Op1).addReg(Op2)); 1467 UpdateValueMap(I, ResultReg); 1468 return true; 1469} 1470 1471// Call Handling Code 1472 1473bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, 1474 EVT SrcVT, unsigned &ResultReg) { 1475 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 1476 Src, /*TODO: Kill=*/false); 1477 1478 if (RR != 0) { 1479 ResultReg = RR; 1480 return true; 1481 } else 1482 return false; 1483} 1484 1485// This is largely taken directly from CCAssignFnForNode - we don't support 1486// varargs in FastISel so that part has been removed. 1487// TODO: We may not support all of this. 1488CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { 1489 switch (CC) { 1490 default: 1491 llvm_unreachable("Unsupported calling convention"); 1492 case CallingConv::Fast: 1493 // Ignore fastcc. Silence compiler warnings. 1494 (void)RetFastCC_ARM_APCS; 1495 (void)FastCC_ARM_APCS; 1496 // Fallthrough 1497 case CallingConv::C: 1498 // Use target triple & subtarget features to do actual dispatch. 1499 if (Subtarget->isAAPCS_ABI()) { 1500 if (Subtarget->hasVFP2() && 1501 FloatABIType == FloatABI::Hard) 1502 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1503 else 1504 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1505 } else 1506 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1507 case CallingConv::ARM_AAPCS_VFP: 1508 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1509 case CallingConv::ARM_AAPCS: 1510 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1511 case CallingConv::ARM_APCS: 1512 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1513 } 1514} 1515 1516bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, 1517 SmallVectorImpl<unsigned> &ArgRegs, 1518 SmallVectorImpl<MVT> &ArgVTs, 1519 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 1520 SmallVectorImpl<unsigned> &RegArgs, 1521 CallingConv::ID CC, 1522 unsigned &NumBytes) { 1523 SmallVector<CCValAssign, 16> ArgLocs; 1524 CCState CCInfo(CC, false, TM, ArgLocs, *Context); 1525 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); 1526 1527 // Get a count of how many bytes are to be pushed on the stack. 1528 NumBytes = CCInfo.getNextStackOffset(); 1529 1530 // Issue CALLSEQ_START 1531 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1532 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1533 TII.get(AdjStackDown)) 1534 .addImm(NumBytes)); 1535 1536 // Process the args. 1537 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1538 CCValAssign &VA = ArgLocs[i]; 1539 unsigned Arg = ArgRegs[VA.getValNo()]; 1540 MVT ArgVT = ArgVTs[VA.getValNo()]; 1541 1542 // We don't handle NEON/vector parameters yet. 1543 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) 1544 return false; 1545 1546 // Handle arg promotion, etc. 1547 switch (VA.getLocInfo()) { 1548 case CCValAssign::Full: break; 1549 case CCValAssign::SExt: { 1550 bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1551 Arg, ArgVT, Arg); 1552 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 1553 Emitted = true; 1554 ArgVT = VA.getLocVT(); 1555 break; 1556 } 1557 case CCValAssign::ZExt: { 1558 bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1559 Arg, ArgVT, Arg); 1560 assert(Emitted && "Failed to emit a zext!"); (void)Emitted; 1561 Emitted = true; 1562 ArgVT = VA.getLocVT(); 1563 break; 1564 } 1565 case CCValAssign::AExt: { 1566 bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1567 Arg, ArgVT, Arg); 1568 if (!Emitted) 1569 Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1570 Arg, ArgVT, Arg); 1571 if (!Emitted) 1572 Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1573 Arg, ArgVT, Arg); 1574 1575 assert(Emitted && "Failed to emit a aext!"); (void)Emitted; 1576 ArgVT = VA.getLocVT(); 1577 break; 1578 } 1579 case CCValAssign::BCvt: { 1580 unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, 1581 /*TODO: Kill=*/false); 1582 assert(BC != 0 && "Failed to emit a bitcast!"); 1583 Arg = BC; 1584 ArgVT = VA.getLocVT(); 1585 break; 1586 } 1587 default: llvm_unreachable("Unknown arg promotion!"); 1588 } 1589 1590 // Now copy/store arg to correct locations. 1591 if (VA.isRegLoc() && !VA.needsCustom()) { 1592 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1593 VA.getLocReg()) 1594 .addReg(Arg); 1595 RegArgs.push_back(VA.getLocReg()); 1596 } else if (VA.needsCustom()) { 1597 // TODO: We need custom lowering for vector (v2f64) args. 1598 if (VA.getLocVT() != MVT::f64) return false; 1599 1600 CCValAssign &NextVA = ArgLocs[++i]; 1601 1602 // TODO: Only handle register args for now. 1603 if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; 1604 1605 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1606 TII.get(ARM::VMOVRRD), VA.getLocReg()) 1607 .addReg(NextVA.getLocReg(), RegState::Define) 1608 .addReg(Arg)); 1609 RegArgs.push_back(VA.getLocReg()); 1610 RegArgs.push_back(NextVA.getLocReg()); 1611 } else { 1612 assert(VA.isMemLoc()); 1613 // Need to store on the stack. 1614 Address Addr; 1615 Addr.BaseType = Address::RegBase; 1616 Addr.Base.Reg = ARM::SP; 1617 Addr.Offset = VA.getLocMemOffset(); 1618 1619 if (!ARMEmitStore(ArgVT, Arg, Addr)) return false; 1620 } 1621 } 1622 return true; 1623} 1624 1625bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 1626 const Instruction *I, CallingConv::ID CC, 1627 unsigned &NumBytes) { 1628 // Issue CALLSEQ_END 1629 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1630 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1631 TII.get(AdjStackUp)) 1632 .addImm(NumBytes).addImm(0)); 1633 1634 // Now the return value. 1635 if (RetVT != MVT::isVoid) { 1636 SmallVector<CCValAssign, 16> RVLocs; 1637 CCState CCInfo(CC, false, TM, RVLocs, *Context); 1638 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); 1639 1640 // Copy all of the result registers out of their specified physreg. 1641 if (RVLocs.size() == 2 && RetVT == MVT::f64) { 1642 // For this move we copy into two registers and then move into the 1643 // double fp reg we want. 1644 EVT DestVT = RVLocs[0].getValVT(); 1645 TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); 1646 unsigned ResultReg = createResultReg(DstRC); 1647 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1648 TII.get(ARM::VMOVDRR), ResultReg) 1649 .addReg(RVLocs[0].getLocReg()) 1650 .addReg(RVLocs[1].getLocReg())); 1651 1652 UsedRegs.push_back(RVLocs[0].getLocReg()); 1653 UsedRegs.push_back(RVLocs[1].getLocReg()); 1654 1655 // Finally update the result. 1656 UpdateValueMap(I, ResultReg); 1657 } else { 1658 assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); 1659 EVT CopyVT = RVLocs[0].getValVT(); 1660 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1661 1662 unsigned ResultReg = createResultReg(DstRC); 1663 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1664 ResultReg).addReg(RVLocs[0].getLocReg()); 1665 UsedRegs.push_back(RVLocs[0].getLocReg()); 1666 1667 // Finally update the result. 1668 UpdateValueMap(I, ResultReg); 1669 } 1670 } 1671 1672 return true; 1673} 1674 1675bool ARMFastISel::SelectRet(const Instruction *I) { 1676 const ReturnInst *Ret = cast<ReturnInst>(I); 1677 const Function &F = *I->getParent()->getParent(); 1678 1679 if (!FuncInfo.CanLowerReturn) 1680 return false; 1681 1682 if (F.isVarArg()) 1683 return false; 1684 1685 CallingConv::ID CC = F.getCallingConv(); 1686 if (Ret->getNumOperands() > 0) { 1687 SmallVector<ISD::OutputArg, 4> Outs; 1688 GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 1689 Outs, TLI); 1690 1691 // Analyze operands of the call, assigning locations to each operand. 1692 SmallVector<CCValAssign, 16> ValLocs; 1693 CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); 1694 CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); 1695 1696 const Value *RV = Ret->getOperand(0); 1697 unsigned Reg = getRegForValue(RV); 1698 if (Reg == 0) 1699 return false; 1700 1701 // Only handle a single return value for now. 1702 if (ValLocs.size() != 1) 1703 return false; 1704 1705 CCValAssign &VA = ValLocs[0]; 1706 1707 // Don't bother handling odd stuff for now. 1708 if (VA.getLocInfo() != CCValAssign::Full) 1709 return false; 1710 // Only handle register returns for now. 1711 if (!VA.isRegLoc()) 1712 return false; 1713 // TODO: For now, don't try to handle cases where getLocInfo() 1714 // says Full but the types don't match. 1715 if (TLI.getValueType(RV->getType()) != VA.getValVT()) 1716 return false; 1717 1718 // Make the copy. 1719 unsigned SrcReg = Reg + VA.getValNo(); 1720 unsigned DstReg = VA.getLocReg(); 1721 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 1722 // Avoid a cross-class copy. This is very unlikely. 1723 if (!SrcRC->contains(DstReg)) 1724 return false; 1725 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1726 DstReg).addReg(SrcReg); 1727 1728 // Mark the register as live out of the function. 1729 MRI.addLiveOut(VA.getLocReg()); 1730 } 1731 1732 unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET; 1733 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1734 TII.get(RetOpc))); 1735 return true; 1736} 1737 1738unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { 1739 1740 // Darwin needs the r9 versions of the opcodes. 1741 bool isDarwin = Subtarget->isTargetDarwin(); 1742 if (isThumb) { 1743 return isDarwin ? ARM::tBLr9 : ARM::tBL; 1744 } else { 1745 return isDarwin ? ARM::BLr9 : ARM::BL; 1746 } 1747} 1748 1749// A quick function that will emit a call for a named libcall in F with the 1750// vector of passed arguments for the Instruction in I. We can assume that we 1751// can emit a call for any libcall we can produce. This is an abridged version 1752// of the full call infrastructure since we won't need to worry about things 1753// like computed function pointers or strange arguments at call sites. 1754// TODO: Try to unify this and the normal call bits for ARM, then try to unify 1755// with X86. 1756bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { 1757 CallingConv::ID CC = TLI.getLibcallCallingConv(Call); 1758 1759 // Handle *simple* calls for now. 1760 const Type *RetTy = I->getType(); 1761 MVT RetVT; 1762 if (RetTy->isVoidTy()) 1763 RetVT = MVT::isVoid; 1764 else if (!isTypeLegal(RetTy, RetVT)) 1765 return false; 1766 1767 // TODO: For now if we have long calls specified we don't handle the call. 1768 if (EnableARMLongCalls) return false; 1769 1770 // Set up the argument vectors. 1771 SmallVector<Value*, 8> Args; 1772 SmallVector<unsigned, 8> ArgRegs; 1773 SmallVector<MVT, 8> ArgVTs; 1774 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1775 Args.reserve(I->getNumOperands()); 1776 ArgRegs.reserve(I->getNumOperands()); 1777 ArgVTs.reserve(I->getNumOperands()); 1778 ArgFlags.reserve(I->getNumOperands()); 1779 for (unsigned i = 0; i < I->getNumOperands(); ++i) { 1780 Value *Op = I->getOperand(i); 1781 unsigned Arg = getRegForValue(Op); 1782 if (Arg == 0) return false; 1783 1784 const Type *ArgTy = Op->getType(); 1785 MVT ArgVT; 1786 if (!isTypeLegal(ArgTy, ArgVT)) return false; 1787 1788 ISD::ArgFlagsTy Flags; 1789 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1790 Flags.setOrigAlign(OriginalAlignment); 1791 1792 Args.push_back(Op); 1793 ArgRegs.push_back(Arg); 1794 ArgVTs.push_back(ArgVT); 1795 ArgFlags.push_back(Flags); 1796 } 1797 1798 // Handle the arguments now that we've gotten them. 1799 SmallVector<unsigned, 4> RegArgs; 1800 unsigned NumBytes; 1801 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1802 return false; 1803 1804 // Issue the call, BLr9 for darwin, BL otherwise. 1805 // TODO: Turn this into the table of arm call ops. 1806 MachineInstrBuilder MIB; 1807 unsigned CallOpc = ARMSelectCallOp(NULL); 1808 if(isThumb) 1809 // Explicitly adding the predicate here. 1810 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1811 TII.get(CallOpc))) 1812 .addExternalSymbol(TLI.getLibcallName(Call)); 1813 else 1814 // Explicitly adding the predicate here. 1815 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1816 TII.get(CallOpc)) 1817 .addExternalSymbol(TLI.getLibcallName(Call))); 1818 1819 // Add implicit physical register uses to the call. 1820 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1821 MIB.addReg(RegArgs[i]); 1822 1823 // Finish off the call including any return values. 1824 SmallVector<unsigned, 4> UsedRegs; 1825 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1826 1827 // Set all unused physreg defs as dead. 1828 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1829 1830 return true; 1831} 1832 1833bool ARMFastISel::SelectCall(const Instruction *I) { 1834 const CallInst *CI = cast<CallInst>(I); 1835 const Value *Callee = CI->getCalledValue(); 1836 1837 // Can't handle inline asm or worry about intrinsics yet. 1838 if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false; 1839 1840 // Only handle global variable Callees. 1841 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 1842 if (!GV) 1843 return false; 1844 1845 // Check the calling convention. 1846 ImmutableCallSite CS(CI); 1847 CallingConv::ID CC = CS.getCallingConv(); 1848 1849 // TODO: Avoid some calling conventions? 1850 1851 // Let SDISel handle vararg functions. 1852 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1853 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1854 if (FTy->isVarArg()) 1855 return false; 1856 1857 // Handle *simple* calls for now. 1858 const Type *RetTy = I->getType(); 1859 MVT RetVT; 1860 if (RetTy->isVoidTy()) 1861 RetVT = MVT::isVoid; 1862 else if (!isTypeLegal(RetTy, RetVT)) 1863 return false; 1864 1865 // TODO: For now if we have long calls specified we don't handle the call. 1866 if (EnableARMLongCalls) return false; 1867 1868 // Set up the argument vectors. 1869 SmallVector<Value*, 8> Args; 1870 SmallVector<unsigned, 8> ArgRegs; 1871 SmallVector<MVT, 8> ArgVTs; 1872 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1873 Args.reserve(CS.arg_size()); 1874 ArgRegs.reserve(CS.arg_size()); 1875 ArgVTs.reserve(CS.arg_size()); 1876 ArgFlags.reserve(CS.arg_size()); 1877 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1878 i != e; ++i) { 1879 unsigned Arg = getRegForValue(*i); 1880 1881 if (Arg == 0) 1882 return false; 1883 ISD::ArgFlagsTy Flags; 1884 unsigned AttrInd = i - CS.arg_begin() + 1; 1885 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1886 Flags.setSExt(); 1887 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1888 Flags.setZExt(); 1889 1890 // FIXME: Only handle *easy* calls for now. 1891 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1892 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1893 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1894 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1895 return false; 1896 1897 const Type *ArgTy = (*i)->getType(); 1898 MVT ArgVT; 1899 if (!isTypeLegal(ArgTy, ArgVT)) 1900 return false; 1901 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1902 Flags.setOrigAlign(OriginalAlignment); 1903 1904 Args.push_back(*i); 1905 ArgRegs.push_back(Arg); 1906 ArgVTs.push_back(ArgVT); 1907 ArgFlags.push_back(Flags); 1908 } 1909 1910 // Handle the arguments now that we've gotten them. 1911 SmallVector<unsigned, 4> RegArgs; 1912 unsigned NumBytes; 1913 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1914 return false; 1915 1916 // Issue the call, BLr9 for darwin, BL otherwise. 1917 // TODO: Turn this into the table of arm call ops. 1918 MachineInstrBuilder MIB; 1919 unsigned CallOpc = ARMSelectCallOp(GV); 1920 // Explicitly adding the predicate here. 1921 if(isThumb) 1922 // Explicitly adding the predicate here. 1923 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1924 TII.get(CallOpc))) 1925 .addGlobalAddress(GV, 0, 0); 1926 else 1927 // Explicitly adding the predicate here. 1928 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1929 TII.get(CallOpc)) 1930 .addGlobalAddress(GV, 0, 0)); 1931 1932 // Add implicit physical register uses to the call. 1933 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1934 MIB.addReg(RegArgs[i]); 1935 1936 // Finish off the call including any return values. 1937 SmallVector<unsigned, 4> UsedRegs; 1938 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1939 1940 // Set all unused physreg defs as dead. 1941 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1942 1943 return true; 1944 1945} 1946 1947// TODO: SoftFP support. 1948bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { 1949 1950 switch (I->getOpcode()) { 1951 case Instruction::Load: 1952 return SelectLoad(I); 1953 case Instruction::Store: 1954 return SelectStore(I); 1955 case Instruction::Br: 1956 return SelectBranch(I); 1957 case Instruction::ICmp: 1958 case Instruction::FCmp: 1959 return SelectCmp(I); 1960 case Instruction::FPExt: 1961 return SelectFPExt(I); 1962 case Instruction::FPTrunc: 1963 return SelectFPTrunc(I); 1964 case Instruction::SIToFP: 1965 return SelectSIToFP(I); 1966 case Instruction::FPToSI: 1967 return SelectFPToSI(I); 1968 case Instruction::FAdd: 1969 return SelectBinaryOp(I, ISD::FADD); 1970 case Instruction::FSub: 1971 return SelectBinaryOp(I, ISD::FSUB); 1972 case Instruction::FMul: 1973 return SelectBinaryOp(I, ISD::FMUL); 1974 case Instruction::SDiv: 1975 return SelectSDiv(I); 1976 case Instruction::SRem: 1977 return SelectSRem(I); 1978 case Instruction::Call: 1979 return SelectCall(I); 1980 case Instruction::Select: 1981 return SelectSelect(I); 1982 case Instruction::Ret: 1983 return SelectRet(I); 1984 default: break; 1985 } 1986 return false; 1987} 1988 1989namespace llvm { 1990 llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { 1991 // Completely untested on non-darwin. 1992 const TargetMachine &TM = funcInfo.MF->getTarget(); 1993 1994 // Darwin and thumb1 only for now. 1995 const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); 1996 if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() && 1997 !DisableARMFastISel) 1998 return new ARMFastISel(funcInfo); 1999 return 0; 2000 } 2001} 2002