ARMFastISel.cpp revision 6344a5f1464dade7735ac440d7a3d24bf009f298
1//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the ARM-specific support for the FastISel class. Some 11// of the target-specific code is generated by tablegen in the file 12// ARMGenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "ARM.h" 17#include "ARMBaseInstrInfo.h" 18#include "ARMCallingConv.h" 19#include "ARMRegisterInfo.h" 20#include "ARMTargetMachine.h" 21#include "ARMSubtarget.h" 22#include "ARMConstantPoolValue.h" 23#include "llvm/CallingConv.h" 24#include "llvm/DerivedTypes.h" 25#include "llvm/GlobalVariable.h" 26#include "llvm/Instructions.h" 27#include "llvm/IntrinsicInst.h" 28#include "llvm/Module.h" 29#include "llvm/Operator.h" 30#include "llvm/CodeGen/Analysis.h" 31#include "llvm/CodeGen/FastISel.h" 32#include "llvm/CodeGen/FunctionLoweringInfo.h" 33#include "llvm/CodeGen/MachineInstrBuilder.h" 34#include "llvm/CodeGen/MachineModuleInfo.h" 35#include "llvm/CodeGen/MachineConstantPool.h" 36#include "llvm/CodeGen/MachineFrameInfo.h" 37#include "llvm/CodeGen/MachineMemOperand.h" 38#include "llvm/CodeGen/MachineRegisterInfo.h" 39#include "llvm/CodeGen/PseudoSourceValue.h" 40#include "llvm/Support/CallSite.h" 41#include "llvm/Support/CommandLine.h" 42#include "llvm/Support/ErrorHandling.h" 43#include "llvm/Support/GetElementPtrTypeIterator.h" 44#include "llvm/Target/TargetData.h" 45#include "llvm/Target/TargetInstrInfo.h" 46#include "llvm/Target/TargetLowering.h" 47#include "llvm/Target/TargetMachine.h" 48#include "llvm/Target/TargetOptions.h" 49using namespace llvm; 50 51static cl::opt<bool> 52DisableARMFastISel("disable-arm-fast-isel", 53 cl::desc("Turn off experimental ARM fast-isel support"), 54 cl::init(false), cl::Hidden); 55 56extern cl::opt<bool> EnableARMLongCalls; 57 58namespace { 59 60 // All possible address modes, plus some. 61 typedef struct Address { 62 enum { 63 RegBase, 64 FrameIndexBase 65 } BaseType; 66 67 union { 68 unsigned Reg; 69 int FI; 70 } Base; 71 72 int Offset; 73 unsigned Scale; 74 unsigned PlusReg; 75 76 // Innocuous defaults for our address. 77 Address() 78 : BaseType(RegBase), Offset(0), Scale(0), PlusReg(0) { 79 Base.Reg = 0; 80 } 81 } Address; 82 83class ARMFastISel : public FastISel { 84 85 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 86 /// make the right decision when generating code for different targets. 87 const ARMSubtarget *Subtarget; 88 const TargetMachine &TM; 89 const TargetInstrInfo &TII; 90 const TargetLowering &TLI; 91 ARMFunctionInfo *AFI; 92 93 // Convenience variables to avoid some queries. 94 bool isThumb; 95 LLVMContext *Context; 96 97 public: 98 explicit ARMFastISel(FunctionLoweringInfo &funcInfo) 99 : FastISel(funcInfo), 100 TM(funcInfo.MF->getTarget()), 101 TII(*TM.getInstrInfo()), 102 TLI(*TM.getTargetLowering()) { 103 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 104 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); 105 isThumb = AFI->isThumbFunction(); 106 Context = &funcInfo.Fn->getContext(); 107 } 108 109 // Code from FastISel.cpp. 110 virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, 111 const TargetRegisterClass *RC); 112 virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, 113 const TargetRegisterClass *RC, 114 unsigned Op0, bool Op0IsKill); 115 virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, 116 const TargetRegisterClass *RC, 117 unsigned Op0, bool Op0IsKill, 118 unsigned Op1, bool Op1IsKill); 119 virtual unsigned FastEmitInst_rrr(unsigned MachineInstOpcode, 120 const TargetRegisterClass *RC, 121 unsigned Op0, bool Op0IsKill, 122 unsigned Op1, bool Op1IsKill, 123 unsigned Op2, bool Op2IsKill); 124 virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, 125 const TargetRegisterClass *RC, 126 unsigned Op0, bool Op0IsKill, 127 uint64_t Imm); 128 virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, 129 const TargetRegisterClass *RC, 130 unsigned Op0, bool Op0IsKill, 131 const ConstantFP *FPImm); 132 virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, 133 const TargetRegisterClass *RC, 134 unsigned Op0, bool Op0IsKill, 135 unsigned Op1, bool Op1IsKill, 136 uint64_t Imm); 137 virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, 138 const TargetRegisterClass *RC, 139 uint64_t Imm); 140 141 virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, 142 unsigned Op0, bool Op0IsKill, 143 uint32_t Idx); 144 145 // Backend specific FastISel code. 146 virtual bool TargetSelectInstruction(const Instruction *I); 147 virtual unsigned TargetMaterializeConstant(const Constant *C); 148 virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); 149 150 #include "ARMGenFastISel.inc" 151 152 // Instruction selection routines. 153 private: 154 bool SelectLoad(const Instruction *I); 155 bool SelectStore(const Instruction *I); 156 bool SelectBranch(const Instruction *I); 157 bool SelectCmp(const Instruction *I); 158 bool SelectFPExt(const Instruction *I); 159 bool SelectFPTrunc(const Instruction *I); 160 bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); 161 bool SelectSIToFP(const Instruction *I); 162 bool SelectFPToSI(const Instruction *I); 163 bool SelectSDiv(const Instruction *I); 164 bool SelectSRem(const Instruction *I); 165 bool SelectCall(const Instruction *I); 166 bool SelectSelect(const Instruction *I); 167 bool SelectRet(const Instruction *I); 168 169 // Utility routines. 170 private: 171 bool isTypeLegal(const Type *Ty, MVT &VT); 172 bool isLoadTypeLegal(const Type *Ty, MVT &VT); 173 bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); 174 bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); 175 bool ARMComputeAddress(const Value *Obj, Address &Addr); 176 void ARMSimplifyAddress(Address &Addr, EVT VT); 177 unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); 178 unsigned ARMMaterializeInt(const Constant *C, EVT VT); 179 unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); 180 unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); 181 unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); 182 unsigned ARMSelectCallOp(const GlobalValue *GV); 183 184 // Call handling routines. 185 private: 186 bool FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, 187 unsigned &ResultReg); 188 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); 189 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, 190 SmallVectorImpl<unsigned> &ArgRegs, 191 SmallVectorImpl<MVT> &ArgVTs, 192 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 193 SmallVectorImpl<unsigned> &RegArgs, 194 CallingConv::ID CC, 195 unsigned &NumBytes); 196 bool FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 197 const Instruction *I, CallingConv::ID CC, 198 unsigned &NumBytes); 199 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); 200 201 // OptionalDef handling routines. 202 private: 203 bool isARMNEONPred(const MachineInstr *MI); 204 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); 205 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); 206 void AddLoadStoreOperands(EVT VT, Address &Addr, 207 const MachineInstrBuilder &MIB); 208}; 209 210} // end anonymous namespace 211 212#include "ARMGenCallingConv.inc" 213 214// DefinesOptionalPredicate - This is different from DefinesPredicate in that 215// we don't care about implicit defs here, just places we'll need to add a 216// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. 217bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { 218 const TargetInstrDesc &TID = MI->getDesc(); 219 if (!TID.hasOptionalDef()) 220 return false; 221 222 // Look to see if our OptionalDef is defining CPSR or CCR. 223 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 224 const MachineOperand &MO = MI->getOperand(i); 225 if (!MO.isReg() || !MO.isDef()) continue; 226 if (MO.getReg() == ARM::CPSR) 227 *CPSR = true; 228 } 229 return true; 230} 231 232bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { 233 const TargetInstrDesc &TID = MI->getDesc(); 234 235 // If we're a thumb2 or not NEON function we were handled via isPredicable. 236 if ((TID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || 237 AFI->isThumb2Function()) 238 return false; 239 240 for (unsigned i = 0, e = TID.getNumOperands(); i != e; ++i) 241 if (TID.OpInfo[i].isPredicate()) 242 return true; 243 244 return false; 245} 246 247// If the machine is predicable go ahead and add the predicate operands, if 248// it needs default CC operands add those. 249// TODO: If we want to support thumb1 then we'll need to deal with optional 250// CPSR defs that need to be added before the remaining operands. See s_cc_out 251// for descriptions why. 252const MachineInstrBuilder & 253ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { 254 MachineInstr *MI = &*MIB; 255 256 // Do we use a predicate? or... 257 // Are we NEON in ARM mode and have a predicate operand? If so, I know 258 // we're not predicable but add it anyways. 259 if (TII.isPredicable(MI) || isARMNEONPred(MI)) 260 AddDefaultPred(MIB); 261 262 // Do we optionally set a predicate? Preds is size > 0 iff the predicate 263 // defines CPSR. All other OptionalDefines in ARM are the CCR register. 264 bool CPSR = false; 265 if (DefinesOptionalPredicate(MI, &CPSR)) { 266 if (CPSR) 267 AddDefaultT1CC(MIB); 268 else 269 AddDefaultCC(MIB); 270 } 271 return MIB; 272} 273 274unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, 275 const TargetRegisterClass* RC) { 276 unsigned ResultReg = createResultReg(RC); 277 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 278 279 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); 280 return ResultReg; 281} 282 283unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, 284 const TargetRegisterClass *RC, 285 unsigned Op0, bool Op0IsKill) { 286 unsigned ResultReg = createResultReg(RC); 287 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 288 289 if (II.getNumDefs() >= 1) 290 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 291 .addReg(Op0, Op0IsKill * RegState::Kill)); 292 else { 293 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 294 .addReg(Op0, Op0IsKill * RegState::Kill)); 295 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 296 TII.get(TargetOpcode::COPY), ResultReg) 297 .addReg(II.ImplicitDefs[0])); 298 } 299 return ResultReg; 300} 301 302unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, 303 const TargetRegisterClass *RC, 304 unsigned Op0, bool Op0IsKill, 305 unsigned Op1, bool Op1IsKill) { 306 unsigned ResultReg = createResultReg(RC); 307 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 308 309 if (II.getNumDefs() >= 1) 310 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 311 .addReg(Op0, Op0IsKill * RegState::Kill) 312 .addReg(Op1, Op1IsKill * RegState::Kill)); 313 else { 314 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 315 .addReg(Op0, Op0IsKill * RegState::Kill) 316 .addReg(Op1, Op1IsKill * RegState::Kill)); 317 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 318 TII.get(TargetOpcode::COPY), ResultReg) 319 .addReg(II.ImplicitDefs[0])); 320 } 321 return ResultReg; 322} 323 324unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, 325 const TargetRegisterClass *RC, 326 unsigned Op0, bool Op0IsKill, 327 unsigned Op1, bool Op1IsKill, 328 unsigned Op2, bool Op2IsKill) { 329 unsigned ResultReg = createResultReg(RC); 330 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 331 332 if (II.getNumDefs() >= 1) 333 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 334 .addReg(Op0, Op0IsKill * RegState::Kill) 335 .addReg(Op1, Op1IsKill * RegState::Kill) 336 .addReg(Op2, Op2IsKill * RegState::Kill)); 337 else { 338 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 339 .addReg(Op0, Op0IsKill * RegState::Kill) 340 .addReg(Op1, Op1IsKill * RegState::Kill) 341 .addReg(Op2, Op2IsKill * RegState::Kill)); 342 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 343 TII.get(TargetOpcode::COPY), ResultReg) 344 .addReg(II.ImplicitDefs[0])); 345 } 346 return ResultReg; 347} 348 349unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, 350 const TargetRegisterClass *RC, 351 unsigned Op0, bool Op0IsKill, 352 uint64_t Imm) { 353 unsigned ResultReg = createResultReg(RC); 354 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 355 356 if (II.getNumDefs() >= 1) 357 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 358 .addReg(Op0, Op0IsKill * RegState::Kill) 359 .addImm(Imm)); 360 else { 361 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 362 .addReg(Op0, Op0IsKill * RegState::Kill) 363 .addImm(Imm)); 364 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 365 TII.get(TargetOpcode::COPY), ResultReg) 366 .addReg(II.ImplicitDefs[0])); 367 } 368 return ResultReg; 369} 370 371unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, 372 const TargetRegisterClass *RC, 373 unsigned Op0, bool Op0IsKill, 374 const ConstantFP *FPImm) { 375 unsigned ResultReg = createResultReg(RC); 376 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 377 378 if (II.getNumDefs() >= 1) 379 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 380 .addReg(Op0, Op0IsKill * RegState::Kill) 381 .addFPImm(FPImm)); 382 else { 383 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 384 .addReg(Op0, Op0IsKill * RegState::Kill) 385 .addFPImm(FPImm)); 386 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 387 TII.get(TargetOpcode::COPY), ResultReg) 388 .addReg(II.ImplicitDefs[0])); 389 } 390 return ResultReg; 391} 392 393unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, 394 const TargetRegisterClass *RC, 395 unsigned Op0, bool Op0IsKill, 396 unsigned Op1, bool Op1IsKill, 397 uint64_t Imm) { 398 unsigned ResultReg = createResultReg(RC); 399 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 400 401 if (II.getNumDefs() >= 1) 402 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 403 .addReg(Op0, Op0IsKill * RegState::Kill) 404 .addReg(Op1, Op1IsKill * RegState::Kill) 405 .addImm(Imm)); 406 else { 407 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 408 .addReg(Op0, Op0IsKill * RegState::Kill) 409 .addReg(Op1, Op1IsKill * RegState::Kill) 410 .addImm(Imm)); 411 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 412 TII.get(TargetOpcode::COPY), ResultReg) 413 .addReg(II.ImplicitDefs[0])); 414 } 415 return ResultReg; 416} 417 418unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, 419 const TargetRegisterClass *RC, 420 uint64_t Imm) { 421 unsigned ResultReg = createResultReg(RC); 422 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 423 424 if (II.getNumDefs() >= 1) 425 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 426 .addImm(Imm)); 427 else { 428 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 429 .addImm(Imm)); 430 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 431 TII.get(TargetOpcode::COPY), ResultReg) 432 .addReg(II.ImplicitDefs[0])); 433 } 434 return ResultReg; 435} 436 437unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, 438 unsigned Op0, bool Op0IsKill, 439 uint32_t Idx) { 440 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 441 assert(TargetRegisterInfo::isVirtualRegister(Op0) && 442 "Cannot yet extract from physregs"); 443 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 444 DL, TII.get(TargetOpcode::COPY), ResultReg) 445 .addReg(Op0, getKillRegState(Op0IsKill), Idx)); 446 return ResultReg; 447} 448 449// TODO: Don't worry about 64-bit now, but when this is fixed remove the 450// checks from the various callers. 451unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { 452 if (VT == MVT::f64) return 0; 453 454 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 455 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 456 TII.get(ARM::VMOVRS), MoveReg) 457 .addReg(SrcReg)); 458 return MoveReg; 459} 460 461unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { 462 if (VT == MVT::i64) return 0; 463 464 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 465 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 466 TII.get(ARM::VMOVSR), MoveReg) 467 .addReg(SrcReg)); 468 return MoveReg; 469} 470 471// For double width floating point we need to materialize two constants 472// (the high and the low) into integer registers then use a move to get 473// the combined constant into an FP reg. 474unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { 475 const APFloat Val = CFP->getValueAPF(); 476 bool is64bit = VT == MVT::f64; 477 478 // This checks to see if we can use VFP3 instructions to materialize 479 // a constant, otherwise we have to go through the constant pool. 480 if (TLI.isFPImmLegal(Val, VT)) { 481 unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS; 482 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 483 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 484 DestReg) 485 .addFPImm(CFP)); 486 return DestReg; 487 } 488 489 // Require VFP2 for loading fp constants. 490 if (!Subtarget->hasVFP2()) return false; 491 492 // MachineConstantPool wants an explicit alignment. 493 unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); 494 if (Align == 0) { 495 // TODO: Figure out if this is correct. 496 Align = TD.getTypeAllocSize(CFP->getType()); 497 } 498 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 499 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 500 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; 501 502 // The extra reg is for addrmode5. 503 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 504 DestReg) 505 .addConstantPoolIndex(Idx) 506 .addReg(0)); 507 return DestReg; 508} 509 510unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { 511 512 // For now 32-bit only. 513 if (VT != MVT::i32) return false; 514 515 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 516 517 // If we can do this in a single instruction without a constant pool entry 518 // do so now. 519 const ConstantInt *CI = cast<ConstantInt>(C); 520 if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) { 521 unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; 522 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 523 TII.get(Opc), DestReg) 524 .addImm(CI->getSExtValue())); 525 return DestReg; 526 } 527 528 // MachineConstantPool wants an explicit alignment. 529 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 530 if (Align == 0) { 531 // TODO: Figure out if this is correct. 532 Align = TD.getTypeAllocSize(C->getType()); 533 } 534 unsigned Idx = MCP.getConstantPoolIndex(C, Align); 535 536 if (isThumb) 537 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 538 TII.get(ARM::t2LDRpci), DestReg) 539 .addConstantPoolIndex(Idx)); 540 else 541 // The extra immediate is for addrmode2. 542 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 543 TII.get(ARM::LDRcp), DestReg) 544 .addConstantPoolIndex(Idx) 545 .addImm(0)); 546 547 return DestReg; 548} 549 550unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { 551 // For now 32-bit only. 552 if (VT != MVT::i32) return 0; 553 554 Reloc::Model RelocM = TM.getRelocationModel(); 555 556 // TODO: No external globals for now. 557 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0; 558 559 // TODO: Need more magic for ARM PIC. 560 if (!isThumb && (RelocM == Reloc::PIC_)) return 0; 561 562 // MachineConstantPool wants an explicit alignment. 563 unsigned Align = TD.getPrefTypeAlignment(GV->getType()); 564 if (Align == 0) { 565 // TODO: Figure out if this is correct. 566 Align = TD.getTypeAllocSize(GV->getType()); 567 } 568 569 // Grab index. 570 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); 571 unsigned Id = AFI->createPICLabelUId(); 572 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id, 573 ARMCP::CPValue, PCAdj); 574 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); 575 576 // Load value. 577 MachineInstrBuilder MIB; 578 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 579 if (isThumb) { 580 unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; 581 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) 582 .addConstantPoolIndex(Idx); 583 if (RelocM == Reloc::PIC_) 584 MIB.addImm(Id); 585 } else { 586 // The extra immediate is for addrmode2. 587 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), 588 DestReg) 589 .addConstantPoolIndex(Idx) 590 .addImm(0); 591 } 592 AddOptionalDefs(MIB); 593 return DestReg; 594} 595 596unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { 597 EVT VT = TLI.getValueType(C->getType(), true); 598 599 // Only handle simple types. 600 if (!VT.isSimple()) return 0; 601 602 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 603 return ARMMaterializeFP(CFP, VT); 604 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 605 return ARMMaterializeGV(GV, VT); 606 else if (isa<ConstantInt>(C)) 607 return ARMMaterializeInt(C, VT); 608 609 return 0; 610} 611 612unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { 613 // Don't handle dynamic allocas. 614 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; 615 616 MVT VT; 617 if (!isLoadTypeLegal(AI->getType(), VT)) return false; 618 619 DenseMap<const AllocaInst*, int>::iterator SI = 620 FuncInfo.StaticAllocaMap.find(AI); 621 622 // This will get lowered later into the correct offsets and registers 623 // via rewriteXFrameIndex. 624 if (SI != FuncInfo.StaticAllocaMap.end()) { 625 TargetRegisterClass* RC = TLI.getRegClassFor(VT); 626 unsigned ResultReg = createResultReg(RC); 627 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 628 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 629 TII.get(Opc), ResultReg) 630 .addFrameIndex(SI->second) 631 .addImm(0)); 632 return ResultReg; 633 } 634 635 return 0; 636} 637 638bool ARMFastISel::isTypeLegal(const Type *Ty, MVT &VT) { 639 EVT evt = TLI.getValueType(Ty, true); 640 641 // Only handle simple types. 642 if (evt == MVT::Other || !evt.isSimple()) return false; 643 VT = evt.getSimpleVT(); 644 645 // Handle all legal types, i.e. a register that will directly hold this 646 // value. 647 return TLI.isTypeLegal(VT); 648} 649 650bool ARMFastISel::isLoadTypeLegal(const Type *Ty, MVT &VT) { 651 if (isTypeLegal(Ty, VT)) return true; 652 653 // If this is a type than can be sign or zero-extended to a basic operation 654 // go ahead and accept it now. 655 if (VT == MVT::i8 || VT == MVT::i16) 656 return true; 657 658 return false; 659} 660 661// Computes the address to get to an object. 662bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { 663 // Some boilerplate from the X86 FastISel. 664 const User *U = NULL; 665 unsigned Opcode = Instruction::UserOp1; 666 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 667 // Don't walk into other basic blocks unless the object is an alloca from 668 // another block, otherwise it may not have a virtual register assigned. 669 if (FuncInfo.StaticAllocaMap.count(static_cast<const AllocaInst *>(Obj)) || 670 FuncInfo.MBBMap[I->getParent()] == FuncInfo.MBB) { 671 Opcode = I->getOpcode(); 672 U = I; 673 } 674 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 675 Opcode = C->getOpcode(); 676 U = C; 677 } 678 679 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 680 if (Ty->getAddressSpace() > 255) 681 // Fast instruction selection doesn't support the special 682 // address spaces. 683 return false; 684 685 switch (Opcode) { 686 default: 687 break; 688 case Instruction::BitCast: { 689 // Look through bitcasts. 690 return ARMComputeAddress(U->getOperand(0), Addr); 691 } 692 case Instruction::IntToPtr: { 693 // Look past no-op inttoptrs. 694 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 695 return ARMComputeAddress(U->getOperand(0), Addr); 696 break; 697 } 698 case Instruction::PtrToInt: { 699 // Look past no-op ptrtoints. 700 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 701 return ARMComputeAddress(U->getOperand(0), Addr); 702 break; 703 } 704 case Instruction::GetElementPtr: { 705 Address SavedAddr = Addr; 706 int TmpOffset = Addr.Offset; 707 708 // Iterate through the GEP folding the constants into offsets where 709 // we can. 710 gep_type_iterator GTI = gep_type_begin(U); 711 for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); 712 i != e; ++i, ++GTI) { 713 const Value *Op = *i; 714 if (const StructType *STy = dyn_cast<StructType>(*GTI)) { 715 const StructLayout *SL = TD.getStructLayout(STy); 716 unsigned Idx = cast<ConstantInt>(Op)->getZExtValue(); 717 TmpOffset += SL->getElementOffset(Idx); 718 } else { 719 uint64_t S = TD.getTypeAllocSize(GTI.getIndexedType()); 720 for (;;) { 721 if (const ConstantInt *CI = dyn_cast<ConstantInt>(Op)) { 722 // Constant-offset addressing. 723 TmpOffset += CI->getSExtValue() * S; 724 break; 725 } 726 if (isa<AddOperator>(Op) && 727 (!isa<Instruction>(Op) || 728 FuncInfo.MBBMap[cast<Instruction>(Op)->getParent()] 729 == FuncInfo.MBB) && 730 isa<ConstantInt>(cast<AddOperator>(Op)->getOperand(1))) { 731 // An add (in the same block) with a constant operand. Fold the 732 // constant. 733 ConstantInt *CI = 734 cast<ConstantInt>(cast<AddOperator>(Op)->getOperand(1)); 735 TmpOffset += CI->getSExtValue() * S; 736 // Iterate on the other operand. 737 Op = cast<AddOperator>(Op)->getOperand(0); 738 continue; 739 } 740 // Unsupported 741 goto unsupported_gep; 742 } 743 } 744 } 745 746 // Try to grab the base operand now. 747 Addr.Offset = TmpOffset; 748 if (ARMComputeAddress(U->getOperand(0), Addr)) return true; 749 750 // We failed, restore everything and try the other options. 751 Addr = SavedAddr; 752 753 unsupported_gep: 754 break; 755 } 756 case Instruction::Alloca: { 757 const AllocaInst *AI = cast<AllocaInst>(Obj); 758 DenseMap<const AllocaInst*, int>::iterator SI = 759 FuncInfo.StaticAllocaMap.find(AI); 760 if (SI != FuncInfo.StaticAllocaMap.end()) { 761 Addr.BaseType = Address::FrameIndexBase; 762 Addr.Base.FI = SI->second; 763 return true; 764 } 765 break; 766 } 767 } 768 769 // Materialize the global variable's address into a reg which can 770 // then be used later to load the variable. 771 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { 772 unsigned Tmp = ARMMaterializeGV(GV, TLI.getValueType(Obj->getType())); 773 if (Tmp == 0) return false; 774 775 Addr.Base.Reg = Tmp; 776 return true; 777 } 778 779 // Try to get this in a register if nothing else has worked. 780 if (Addr.Base.Reg == 0) Addr.Base.Reg = getRegForValue(Obj); 781 return Addr.Base.Reg != 0; 782} 783 784void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { 785 786 assert(VT.isSimple() && "Non-simple types are invalid here!"); 787 788 bool needsLowering = false; 789 switch (VT.getSimpleVT().SimpleTy) { 790 default: 791 assert(false && "Unhandled load/store type!"); 792 case MVT::i1: 793 case MVT::i8: 794 case MVT::i16: 795 case MVT::i32: 796 // Integer loads/stores handle 12-bit offsets. 797 needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); 798 break; 799 case MVT::f32: 800 case MVT::f64: 801 // Floating point operands handle 8-bit offsets. 802 needsLowering = ((Addr.Offset & 0xff) != Addr.Offset); 803 break; 804 } 805 806 // If this is a stack pointer and the offset needs to be simplified then 807 // put the alloca address into a register, set the base type back to 808 // register and continue. This should almost never happen. 809 if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { 810 TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : 811 ARM::GPRRegisterClass; 812 unsigned ResultReg = createResultReg(RC); 813 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 814 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 815 TII.get(Opc), ResultReg) 816 .addFrameIndex(Addr.Base.FI) 817 .addImm(0)); 818 Addr.Base.Reg = ResultReg; 819 Addr.BaseType = Address::RegBase; 820 } 821 822 // Since the offset is too large for the load/store instruction 823 // get the reg+offset into a register. 824 if (needsLowering) { 825 ARMCC::CondCodes Pred = ARMCC::AL; 826 unsigned PredReg = 0; 827 828 TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : 829 ARM::GPRRegisterClass; 830 unsigned BaseReg = createResultReg(RC); 831 832 if (!isThumb) 833 emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 834 BaseReg, Addr.Base.Reg, Addr.Offset, 835 Pred, PredReg, 836 static_cast<const ARMBaseInstrInfo&>(TII)); 837 else { 838 assert(AFI->isThumb2Function()); 839 emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 840 BaseReg, Addr.Base.Reg, Addr.Offset, Pred, PredReg, 841 static_cast<const ARMBaseInstrInfo&>(TII)); 842 } 843 Addr.Offset = 0; 844 Addr.Base.Reg = BaseReg; 845 } 846} 847 848void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, 849 const MachineInstrBuilder &MIB) { 850 // addrmode5 output depends on the selection dag addressing dividing the 851 // offset by 4 that it then later multiplies. Do this here as well. 852 if (VT.getSimpleVT().SimpleTy == MVT::f32 || 853 VT.getSimpleVT().SimpleTy == MVT::f64) 854 Addr.Offset /= 4; 855 856 // Frame base works a bit differently. Handle it separately. 857 if (Addr.BaseType == Address::FrameIndexBase) { 858 int FI = Addr.Base.FI; 859 int Offset = Addr.Offset; 860 MachineMemOperand *MMO = 861 FuncInfo.MF->getMachineMemOperand( 862 MachinePointerInfo::getFixedStack(FI, Offset), 863 MachineMemOperand::MOLoad, 864 MFI.getObjectSize(FI), 865 MFI.getObjectAlignment(FI)); 866 // Now add the rest of the operands. 867 MIB.addFrameIndex(FI); 868 869 // ARM halfword load/stores need an additional operand. 870 if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); 871 872 MIB.addImm(Addr.Offset); 873 MIB.addMemOperand(MMO); 874 } else { 875 // Now add the rest of the operands. 876 MIB.addReg(Addr.Base.Reg); 877 878 // ARM halfword load/stores need an additional operand. 879 if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); 880 881 MIB.addImm(Addr.Offset); 882 } 883 AddOptionalDefs(MIB); 884} 885 886bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { 887 888 assert(VT.isSimple() && "Non-simple types are invalid here!"); 889 unsigned Opc; 890 TargetRegisterClass *RC; 891 switch (VT.getSimpleVT().SimpleTy) { 892 // This is mostly going to be Neon/vector support. 893 default: return false; 894 case MVT::i16: 895 Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH; 896 RC = ARM::GPRRegisterClass; 897 break; 898 case MVT::i8: 899 Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12; 900 RC = ARM::GPRRegisterClass; 901 break; 902 case MVT::i32: 903 Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12; 904 RC = ARM::GPRRegisterClass; 905 break; 906 case MVT::f32: 907 Opc = ARM::VLDRS; 908 RC = TLI.getRegClassFor(VT); 909 break; 910 case MVT::f64: 911 Opc = ARM::VLDRD; 912 RC = TLI.getRegClassFor(VT); 913 break; 914 } 915 // Simplify this down to something we can handle. 916 ARMSimplifyAddress(Addr, VT); 917 918 // Create the base instruction, then add the operands. 919 ResultReg = createResultReg(RC); 920 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 921 TII.get(Opc), ResultReg); 922 AddLoadStoreOperands(VT, Addr, MIB); 923 return true; 924} 925 926bool ARMFastISel::SelectLoad(const Instruction *I) { 927 // Verify we have a legal type before going any further. 928 MVT VT; 929 if (!isLoadTypeLegal(I->getType(), VT)) 930 return false; 931 932 // See if we can handle this address. 933 Address Addr; 934 if (!ARMComputeAddress(I->getOperand(0), Addr)) return false; 935 936 unsigned ResultReg; 937 if (!ARMEmitLoad(VT, ResultReg, Addr)) return false; 938 UpdateValueMap(I, ResultReg); 939 return true; 940} 941 942bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { 943 unsigned StrOpc; 944 switch (VT.getSimpleVT().SimpleTy) { 945 // This is mostly going to be Neon/vector support. 946 default: return false; 947 case MVT::i1: { 948 unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass : 949 ARM::GPRRegisterClass); 950 unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; 951 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 952 TII.get(Opc), Res) 953 .addReg(SrcReg).addImm(1)); 954 SrcReg = Res; 955 } // Fallthrough here. 956 case MVT::i8: 957 StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12; 958 break; 959 case MVT::i16: 960 StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH; 961 break; 962 case MVT::i32: 963 StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12; 964 break; 965 case MVT::f32: 966 if (!Subtarget->hasVFP2()) return false; 967 StrOpc = ARM::VSTRS; 968 break; 969 case MVT::f64: 970 if (!Subtarget->hasVFP2()) return false; 971 StrOpc = ARM::VSTRD; 972 break; 973 } 974 // Simplify this down to something we can handle. 975 ARMSimplifyAddress(Addr, VT); 976 977 // Create the base instruction, then add the operands. 978 MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 979 TII.get(StrOpc)) 980 .addReg(SrcReg, getKillRegState(true)); 981 AddLoadStoreOperands(VT, Addr, MIB); 982 return true; 983} 984 985bool ARMFastISel::SelectStore(const Instruction *I) { 986 Value *Op0 = I->getOperand(0); 987 unsigned SrcReg = 0; 988 989 // Verify we have a legal type before going any further. 990 MVT VT; 991 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) 992 return false; 993 994 // Get the value to be stored into a register. 995 SrcReg = getRegForValue(Op0); 996 if (SrcReg == 0) return false; 997 998 // See if we can handle this address. 999 Address Addr; 1000 if (!ARMComputeAddress(I->getOperand(1), Addr)) 1001 return false; 1002 1003 if (!ARMEmitStore(VT, SrcReg, Addr)) return false; 1004 return true; 1005} 1006 1007static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { 1008 switch (Pred) { 1009 // Needs two compares... 1010 case CmpInst::FCMP_ONE: 1011 case CmpInst::FCMP_UEQ: 1012 default: 1013 // AL is our "false" for now. The other two need more compares. 1014 return ARMCC::AL; 1015 case CmpInst::ICMP_EQ: 1016 case CmpInst::FCMP_OEQ: 1017 return ARMCC::EQ; 1018 case CmpInst::ICMP_SGT: 1019 case CmpInst::FCMP_OGT: 1020 return ARMCC::GT; 1021 case CmpInst::ICMP_SGE: 1022 case CmpInst::FCMP_OGE: 1023 return ARMCC::GE; 1024 case CmpInst::ICMP_UGT: 1025 case CmpInst::FCMP_UGT: 1026 return ARMCC::HI; 1027 case CmpInst::FCMP_OLT: 1028 return ARMCC::MI; 1029 case CmpInst::ICMP_ULE: 1030 case CmpInst::FCMP_OLE: 1031 return ARMCC::LS; 1032 case CmpInst::FCMP_ORD: 1033 return ARMCC::VC; 1034 case CmpInst::FCMP_UNO: 1035 return ARMCC::VS; 1036 case CmpInst::FCMP_UGE: 1037 return ARMCC::PL; 1038 case CmpInst::ICMP_SLT: 1039 case CmpInst::FCMP_ULT: 1040 return ARMCC::LT; 1041 case CmpInst::ICMP_SLE: 1042 case CmpInst::FCMP_ULE: 1043 return ARMCC::LE; 1044 case CmpInst::FCMP_UNE: 1045 case CmpInst::ICMP_NE: 1046 return ARMCC::NE; 1047 case CmpInst::ICMP_UGE: 1048 return ARMCC::HS; 1049 case CmpInst::ICMP_ULT: 1050 return ARMCC::LO; 1051 } 1052} 1053 1054bool ARMFastISel::SelectBranch(const Instruction *I) { 1055 const BranchInst *BI = cast<BranchInst>(I); 1056 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 1057 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 1058 1059 // Simple branch support. 1060 1061 // If we can, avoid recomputing the compare - redoing it could lead to wonky 1062 // behavior. 1063 // TODO: Factor this out. 1064 if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { 1065 if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { 1066 MVT VT; 1067 const Type *Ty = CI->getOperand(0)->getType(); 1068 if (!isTypeLegal(Ty, VT)) 1069 return false; 1070 1071 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1072 if (isFloat && !Subtarget->hasVFP2()) 1073 return false; 1074 1075 unsigned CmpOpc; 1076 switch (VT.SimpleTy) { 1077 default: return false; 1078 // TODO: Verify compares. 1079 case MVT::f32: 1080 CmpOpc = ARM::VCMPES; 1081 break; 1082 case MVT::f64: 1083 CmpOpc = ARM::VCMPED; 1084 break; 1085 case MVT::i32: 1086 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 1087 break; 1088 } 1089 1090 // Get the compare predicate. 1091 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 1092 1093 // We may not handle every CC for now. 1094 if (ARMPred == ARMCC::AL) return false; 1095 1096 unsigned Arg1 = getRegForValue(CI->getOperand(0)); 1097 if (Arg1 == 0) return false; 1098 1099 unsigned Arg2 = getRegForValue(CI->getOperand(1)); 1100 if (Arg2 == 0) return false; 1101 1102 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1103 TII.get(CmpOpc)) 1104 .addReg(Arg1).addReg(Arg2)); 1105 1106 // For floating point we need to move the result to a comparison register 1107 // that we can then use for branches. 1108 if (isFloat) 1109 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1110 TII.get(ARM::FMSTAT))); 1111 1112 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1113 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1114 .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); 1115 FastEmitBranch(FBB, DL); 1116 FuncInfo.MBB->addSuccessor(TBB); 1117 return true; 1118 } 1119 } 1120 1121 unsigned CmpReg = getRegForValue(BI->getCondition()); 1122 if (CmpReg == 0) return false; 1123 1124 // We've been divorced from our compare! Our block was split, and 1125 // now our compare lives in a predecessor block. We musn't 1126 // re-compare here, as the children of the compare aren't guaranteed 1127 // live across the block boundary (we *could* check for this). 1128 // Regardless, the compare has been done in the predecessor block, 1129 // and it left a value for us in a virtual register. Ergo, we test 1130 // the one-bit value left in the virtual register. 1131 unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1132 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) 1133 .addReg(CmpReg).addImm(1)); 1134 1135 1136 unsigned CCMode = ARMCC::NE; 1137 if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { 1138 std::swap(TBB, FBB); 1139 CCMode = ARMCC::EQ; 1140 } 1141 1142 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 1143 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 1144 .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); 1145 FastEmitBranch(FBB, DL); 1146 FuncInfo.MBB->addSuccessor(TBB); 1147 return true; 1148} 1149 1150bool ARMFastISel::SelectCmp(const Instruction *I) { 1151 const CmpInst *CI = cast<CmpInst>(I); 1152 1153 MVT VT; 1154 const Type *Ty = CI->getOperand(0)->getType(); 1155 if (!isTypeLegal(Ty, VT)) 1156 return false; 1157 1158 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1159 if (isFloat && !Subtarget->hasVFP2()) 1160 return false; 1161 1162 unsigned CmpOpc; 1163 unsigned CondReg; 1164 switch (VT.SimpleTy) { 1165 default: return false; 1166 // TODO: Verify compares. 1167 case MVT::f32: 1168 CmpOpc = ARM::VCMPES; 1169 CondReg = ARM::FPSCR; 1170 break; 1171 case MVT::f64: 1172 CmpOpc = ARM::VCMPED; 1173 CondReg = ARM::FPSCR; 1174 break; 1175 case MVT::i32: 1176 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 1177 CondReg = ARM::CPSR; 1178 break; 1179 } 1180 1181 // Get the compare predicate. 1182 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 1183 1184 // We may not handle every CC for now. 1185 if (ARMPred == ARMCC::AL) return false; 1186 1187 unsigned Arg1 = getRegForValue(CI->getOperand(0)); 1188 if (Arg1 == 0) return false; 1189 1190 unsigned Arg2 = getRegForValue(CI->getOperand(1)); 1191 if (Arg2 == 0) return false; 1192 1193 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1194 .addReg(Arg1).addReg(Arg2)); 1195 1196 // For floating point we need to move the result to a comparison register 1197 // that we can then use for branches. 1198 if (isFloat) 1199 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1200 TII.get(ARM::FMSTAT))); 1201 1202 // Now set a register based on the comparison. Explicitly set the predicates 1203 // here. 1204 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; 1205 TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass 1206 : ARM::GPRRegisterClass; 1207 unsigned DestReg = createResultReg(RC); 1208 Constant *Zero 1209 = ConstantInt::get(Type::getInt32Ty(*Context), 0); 1210 unsigned ZeroReg = TargetMaterializeConstant(Zero); 1211 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) 1212 .addReg(ZeroReg).addImm(1) 1213 .addImm(ARMPred).addReg(CondReg); 1214 1215 UpdateValueMap(I, DestReg); 1216 return true; 1217} 1218 1219bool ARMFastISel::SelectFPExt(const Instruction *I) { 1220 // Make sure we have VFP and that we're extending float to double. 1221 if (!Subtarget->hasVFP2()) return false; 1222 1223 Value *V = I->getOperand(0); 1224 if (!I->getType()->isDoubleTy() || 1225 !V->getType()->isFloatTy()) return false; 1226 1227 unsigned Op = getRegForValue(V); 1228 if (Op == 0) return false; 1229 1230 unsigned Result = createResultReg(ARM::DPRRegisterClass); 1231 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1232 TII.get(ARM::VCVTDS), Result) 1233 .addReg(Op)); 1234 UpdateValueMap(I, Result); 1235 return true; 1236} 1237 1238bool ARMFastISel::SelectFPTrunc(const Instruction *I) { 1239 // Make sure we have VFP and that we're truncating double to float. 1240 if (!Subtarget->hasVFP2()) return false; 1241 1242 Value *V = I->getOperand(0); 1243 if (!(I->getType()->isFloatTy() && 1244 V->getType()->isDoubleTy())) return false; 1245 1246 unsigned Op = getRegForValue(V); 1247 if (Op == 0) return false; 1248 1249 unsigned Result = createResultReg(ARM::SPRRegisterClass); 1250 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1251 TII.get(ARM::VCVTSD), Result) 1252 .addReg(Op)); 1253 UpdateValueMap(I, Result); 1254 return true; 1255} 1256 1257bool ARMFastISel::SelectSIToFP(const Instruction *I) { 1258 // Make sure we have VFP. 1259 if (!Subtarget->hasVFP2()) return false; 1260 1261 MVT DstVT; 1262 const Type *Ty = I->getType(); 1263 if (!isTypeLegal(Ty, DstVT)) 1264 return false; 1265 1266 unsigned Op = getRegForValue(I->getOperand(0)); 1267 if (Op == 0) return false; 1268 1269 // The conversion routine works on fp-reg to fp-reg and the operand above 1270 // was an integer, move it to the fp registers if possible. 1271 unsigned FP = ARMMoveToFPReg(MVT::f32, Op); 1272 if (FP == 0) return false; 1273 1274 unsigned Opc; 1275 if (Ty->isFloatTy()) Opc = ARM::VSITOS; 1276 else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; 1277 else return 0; 1278 1279 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); 1280 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1281 ResultReg) 1282 .addReg(FP)); 1283 UpdateValueMap(I, ResultReg); 1284 return true; 1285} 1286 1287bool ARMFastISel::SelectFPToSI(const Instruction *I) { 1288 // Make sure we have VFP. 1289 if (!Subtarget->hasVFP2()) return false; 1290 1291 MVT DstVT; 1292 const Type *RetTy = I->getType(); 1293 if (!isTypeLegal(RetTy, DstVT)) 1294 return false; 1295 1296 unsigned Op = getRegForValue(I->getOperand(0)); 1297 if (Op == 0) return false; 1298 1299 unsigned Opc; 1300 const Type *OpTy = I->getOperand(0)->getType(); 1301 if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; 1302 else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; 1303 else return 0; 1304 1305 // f64->s32 or f32->s32 both need an intermediate f32 reg. 1306 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1307 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1308 ResultReg) 1309 .addReg(Op)); 1310 1311 // This result needs to be in an integer register, but the conversion only 1312 // takes place in fp-regs. 1313 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); 1314 if (IntReg == 0) return false; 1315 1316 UpdateValueMap(I, IntReg); 1317 return true; 1318} 1319 1320bool ARMFastISel::SelectSelect(const Instruction *I) { 1321 MVT VT; 1322 if (!isTypeLegal(I->getType(), VT)) 1323 return false; 1324 1325 // Things need to be register sized for register moves. 1326 if (VT != MVT::i32) return false; 1327 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 1328 1329 unsigned CondReg = getRegForValue(I->getOperand(0)); 1330 if (CondReg == 0) return false; 1331 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1332 if (Op1Reg == 0) return false; 1333 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1334 if (Op2Reg == 0) return false; 1335 1336 unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1337 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1338 .addReg(CondReg).addImm(1)); 1339 unsigned ResultReg = createResultReg(RC); 1340 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr; 1341 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1342 .addReg(Op1Reg).addReg(Op2Reg) 1343 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 1344 UpdateValueMap(I, ResultReg); 1345 return true; 1346} 1347 1348bool ARMFastISel::SelectSDiv(const Instruction *I) { 1349 MVT VT; 1350 const Type *Ty = I->getType(); 1351 if (!isTypeLegal(Ty, VT)) 1352 return false; 1353 1354 // If we have integer div support we should have selected this automagically. 1355 // In case we have a real miss go ahead and return false and we'll pick 1356 // it up later. 1357 if (Subtarget->hasDivide()) return false; 1358 1359 // Otherwise emit a libcall. 1360 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1361 if (VT == MVT::i8) 1362 LC = RTLIB::SDIV_I8; 1363 else if (VT == MVT::i16) 1364 LC = RTLIB::SDIV_I16; 1365 else if (VT == MVT::i32) 1366 LC = RTLIB::SDIV_I32; 1367 else if (VT == MVT::i64) 1368 LC = RTLIB::SDIV_I64; 1369 else if (VT == MVT::i128) 1370 LC = RTLIB::SDIV_I128; 1371 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); 1372 1373 return ARMEmitLibcall(I, LC); 1374} 1375 1376bool ARMFastISel::SelectSRem(const Instruction *I) { 1377 MVT VT; 1378 const Type *Ty = I->getType(); 1379 if (!isTypeLegal(Ty, VT)) 1380 return false; 1381 1382 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1383 if (VT == MVT::i8) 1384 LC = RTLIB::SREM_I8; 1385 else if (VT == MVT::i16) 1386 LC = RTLIB::SREM_I16; 1387 else if (VT == MVT::i32) 1388 LC = RTLIB::SREM_I32; 1389 else if (VT == MVT::i64) 1390 LC = RTLIB::SREM_I64; 1391 else if (VT == MVT::i128) 1392 LC = RTLIB::SREM_I128; 1393 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); 1394 1395 return ARMEmitLibcall(I, LC); 1396} 1397 1398bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { 1399 EVT VT = TLI.getValueType(I->getType(), true); 1400 1401 // We can get here in the case when we want to use NEON for our fp 1402 // operations, but can't figure out how to. Just use the vfp instructions 1403 // if we have them. 1404 // FIXME: It'd be nice to use NEON instructions. 1405 const Type *Ty = I->getType(); 1406 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1407 if (isFloat && !Subtarget->hasVFP2()) 1408 return false; 1409 1410 unsigned Op1 = getRegForValue(I->getOperand(0)); 1411 if (Op1 == 0) return false; 1412 1413 unsigned Op2 = getRegForValue(I->getOperand(1)); 1414 if (Op2 == 0) return false; 1415 1416 unsigned Opc; 1417 bool is64bit = VT == MVT::f64 || VT == MVT::i64; 1418 switch (ISDOpcode) { 1419 default: return false; 1420 case ISD::FADD: 1421 Opc = is64bit ? ARM::VADDD : ARM::VADDS; 1422 break; 1423 case ISD::FSUB: 1424 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS; 1425 break; 1426 case ISD::FMUL: 1427 Opc = is64bit ? ARM::VMULD : ARM::VMULS; 1428 break; 1429 } 1430 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1431 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1432 TII.get(Opc), ResultReg) 1433 .addReg(Op1).addReg(Op2)); 1434 UpdateValueMap(I, ResultReg); 1435 return true; 1436} 1437 1438// Call Handling Code 1439 1440bool ARMFastISel::FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, 1441 EVT SrcVT, unsigned &ResultReg) { 1442 unsigned RR = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), Opc, 1443 Src, /*TODO: Kill=*/false); 1444 1445 if (RR != 0) { 1446 ResultReg = RR; 1447 return true; 1448 } else 1449 return false; 1450} 1451 1452// This is largely taken directly from CCAssignFnForNode - we don't support 1453// varargs in FastISel so that part has been removed. 1454// TODO: We may not support all of this. 1455CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { 1456 switch (CC) { 1457 default: 1458 llvm_unreachable("Unsupported calling convention"); 1459 case CallingConv::Fast: 1460 // Ignore fastcc. Silence compiler warnings. 1461 (void)RetFastCC_ARM_APCS; 1462 (void)FastCC_ARM_APCS; 1463 // Fallthrough 1464 case CallingConv::C: 1465 // Use target triple & subtarget features to do actual dispatch. 1466 if (Subtarget->isAAPCS_ABI()) { 1467 if (Subtarget->hasVFP2() && 1468 FloatABIType == FloatABI::Hard) 1469 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1470 else 1471 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1472 } else 1473 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1474 case CallingConv::ARM_AAPCS_VFP: 1475 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1476 case CallingConv::ARM_AAPCS: 1477 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1478 case CallingConv::ARM_APCS: 1479 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1480 } 1481} 1482 1483bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, 1484 SmallVectorImpl<unsigned> &ArgRegs, 1485 SmallVectorImpl<MVT> &ArgVTs, 1486 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 1487 SmallVectorImpl<unsigned> &RegArgs, 1488 CallingConv::ID CC, 1489 unsigned &NumBytes) { 1490 SmallVector<CCValAssign, 16> ArgLocs; 1491 CCState CCInfo(CC, false, TM, ArgLocs, *Context); 1492 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); 1493 1494 // Get a count of how many bytes are to be pushed on the stack. 1495 NumBytes = CCInfo.getNextStackOffset(); 1496 1497 // Issue CALLSEQ_START 1498 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1499 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1500 TII.get(AdjStackDown)) 1501 .addImm(NumBytes)); 1502 1503 // Process the args. 1504 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1505 CCValAssign &VA = ArgLocs[i]; 1506 unsigned Arg = ArgRegs[VA.getValNo()]; 1507 MVT ArgVT = ArgVTs[VA.getValNo()]; 1508 1509 // We don't handle NEON/vector parameters yet. 1510 if (ArgVT.isVector() || ArgVT.getSizeInBits() > 64) 1511 return false; 1512 1513 // Handle arg promotion, etc. 1514 switch (VA.getLocInfo()) { 1515 case CCValAssign::Full: break; 1516 case CCValAssign::SExt: { 1517 bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1518 Arg, ArgVT, Arg); 1519 assert(Emitted && "Failed to emit a sext!"); (void)Emitted; 1520 Emitted = true; 1521 ArgVT = VA.getLocVT(); 1522 break; 1523 } 1524 case CCValAssign::ZExt: { 1525 bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1526 Arg, ArgVT, Arg); 1527 assert(Emitted && "Failed to emit a zext!"); (void)Emitted; 1528 Emitted = true; 1529 ArgVT = VA.getLocVT(); 1530 break; 1531 } 1532 case CCValAssign::AExt: { 1533 bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), 1534 Arg, ArgVT, Arg); 1535 if (!Emitted) 1536 Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), 1537 Arg, ArgVT, Arg); 1538 if (!Emitted) 1539 Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), 1540 Arg, ArgVT, Arg); 1541 1542 assert(Emitted && "Failed to emit a aext!"); (void)Emitted; 1543 ArgVT = VA.getLocVT(); 1544 break; 1545 } 1546 case CCValAssign::BCvt: { 1547 unsigned BC = FastEmit_r(ArgVT, VA.getLocVT(), ISD::BITCAST, Arg, 1548 /*TODO: Kill=*/false); 1549 assert(BC != 0 && "Failed to emit a bitcast!"); 1550 Arg = BC; 1551 ArgVT = VA.getLocVT(); 1552 break; 1553 } 1554 default: llvm_unreachable("Unknown arg promotion!"); 1555 } 1556 1557 // Now copy/store arg to correct locations. 1558 if (VA.isRegLoc() && !VA.needsCustom()) { 1559 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1560 VA.getLocReg()) 1561 .addReg(Arg); 1562 RegArgs.push_back(VA.getLocReg()); 1563 } else if (VA.needsCustom()) { 1564 // TODO: We need custom lowering for vector (v2f64) args. 1565 if (VA.getLocVT() != MVT::f64) return false; 1566 1567 CCValAssign &NextVA = ArgLocs[++i]; 1568 1569 // TODO: Only handle register args for now. 1570 if(!(VA.isRegLoc() && NextVA.isRegLoc())) return false; 1571 1572 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1573 TII.get(ARM::VMOVRRD), VA.getLocReg()) 1574 .addReg(NextVA.getLocReg(), RegState::Define) 1575 .addReg(Arg)); 1576 RegArgs.push_back(VA.getLocReg()); 1577 RegArgs.push_back(NextVA.getLocReg()); 1578 } else { 1579 assert(VA.isMemLoc()); 1580 // Need to store on the stack. 1581 Address Addr; 1582 Addr.BaseType = Address::RegBase; 1583 Addr.Base.Reg = ARM::SP; 1584 Addr.Offset = VA.getLocMemOffset(); 1585 1586 if (!ARMEmitStore(ArgVT, Arg, Addr)) return false; 1587 } 1588 } 1589 return true; 1590} 1591 1592bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 1593 const Instruction *I, CallingConv::ID CC, 1594 unsigned &NumBytes) { 1595 // Issue CALLSEQ_END 1596 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1597 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1598 TII.get(AdjStackUp)) 1599 .addImm(NumBytes).addImm(0)); 1600 1601 // Now the return value. 1602 if (RetVT != MVT::isVoid) { 1603 SmallVector<CCValAssign, 16> RVLocs; 1604 CCState CCInfo(CC, false, TM, RVLocs, *Context); 1605 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); 1606 1607 // Copy all of the result registers out of their specified physreg. 1608 if (RVLocs.size() == 2 && RetVT == MVT::f64) { 1609 // For this move we copy into two registers and then move into the 1610 // double fp reg we want. 1611 EVT DestVT = RVLocs[0].getValVT(); 1612 TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); 1613 unsigned ResultReg = createResultReg(DstRC); 1614 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1615 TII.get(ARM::VMOVDRR), ResultReg) 1616 .addReg(RVLocs[0].getLocReg()) 1617 .addReg(RVLocs[1].getLocReg())); 1618 1619 UsedRegs.push_back(RVLocs[0].getLocReg()); 1620 UsedRegs.push_back(RVLocs[1].getLocReg()); 1621 1622 // Finally update the result. 1623 UpdateValueMap(I, ResultReg); 1624 } else { 1625 assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); 1626 EVT CopyVT = RVLocs[0].getValVT(); 1627 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1628 1629 unsigned ResultReg = createResultReg(DstRC); 1630 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1631 ResultReg).addReg(RVLocs[0].getLocReg()); 1632 UsedRegs.push_back(RVLocs[0].getLocReg()); 1633 1634 // Finally update the result. 1635 UpdateValueMap(I, ResultReg); 1636 } 1637 } 1638 1639 return true; 1640} 1641 1642bool ARMFastISel::SelectRet(const Instruction *I) { 1643 const ReturnInst *Ret = cast<ReturnInst>(I); 1644 const Function &F = *I->getParent()->getParent(); 1645 1646 if (!FuncInfo.CanLowerReturn) 1647 return false; 1648 1649 if (F.isVarArg()) 1650 return false; 1651 1652 CallingConv::ID CC = F.getCallingConv(); 1653 if (Ret->getNumOperands() > 0) { 1654 SmallVector<ISD::OutputArg, 4> Outs; 1655 GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), 1656 Outs, TLI); 1657 1658 // Analyze operands of the call, assigning locations to each operand. 1659 SmallVector<CCValAssign, 16> ValLocs; 1660 CCState CCInfo(CC, F.isVarArg(), TM, ValLocs, I->getContext()); 1661 CCInfo.AnalyzeReturn(Outs, CCAssignFnForCall(CC, true /* is Ret */)); 1662 1663 const Value *RV = Ret->getOperand(0); 1664 unsigned Reg = getRegForValue(RV); 1665 if (Reg == 0) 1666 return false; 1667 1668 // Only handle a single return value for now. 1669 if (ValLocs.size() != 1) 1670 return false; 1671 1672 CCValAssign &VA = ValLocs[0]; 1673 1674 // Don't bother handling odd stuff for now. 1675 if (VA.getLocInfo() != CCValAssign::Full) 1676 return false; 1677 // Only handle register returns for now. 1678 if (!VA.isRegLoc()) 1679 return false; 1680 // TODO: For now, don't try to handle cases where getLocInfo() 1681 // says Full but the types don't match. 1682 if (TLI.getValueType(RV->getType()) != VA.getValVT()) 1683 return false; 1684 1685 // Make the copy. 1686 unsigned SrcReg = Reg + VA.getValNo(); 1687 unsigned DstReg = VA.getLocReg(); 1688 const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); 1689 // Avoid a cross-class copy. This is very unlikely. 1690 if (!SrcRC->contains(DstReg)) 1691 return false; 1692 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1693 DstReg).addReg(SrcReg); 1694 1695 // Mark the register as live out of the function. 1696 MRI.addLiveOut(VA.getLocReg()); 1697 } 1698 1699 unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET; 1700 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1701 TII.get(RetOpc))); 1702 return true; 1703} 1704 1705unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { 1706 1707 // Darwin needs the r9 versions of the opcodes. 1708 bool isDarwin = Subtarget->isTargetDarwin(); 1709 if (isThumb) { 1710 return isDarwin ? ARM::tBLr9 : ARM::tBL; 1711 } else { 1712 return isDarwin ? ARM::BLr9 : ARM::BL; 1713 } 1714} 1715 1716// A quick function that will emit a call for a named libcall in F with the 1717// vector of passed arguments for the Instruction in I. We can assume that we 1718// can emit a call for any libcall we can produce. This is an abridged version 1719// of the full call infrastructure since we won't need to worry about things 1720// like computed function pointers or strange arguments at call sites. 1721// TODO: Try to unify this and the normal call bits for ARM, then try to unify 1722// with X86. 1723bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { 1724 CallingConv::ID CC = TLI.getLibcallCallingConv(Call); 1725 1726 // Handle *simple* calls for now. 1727 const Type *RetTy = I->getType(); 1728 MVT RetVT; 1729 if (RetTy->isVoidTy()) 1730 RetVT = MVT::isVoid; 1731 else if (!isTypeLegal(RetTy, RetVT)) 1732 return false; 1733 1734 // TODO: For now if we have long calls specified we don't handle the call. 1735 if (EnableARMLongCalls) return false; 1736 1737 // Set up the argument vectors. 1738 SmallVector<Value*, 8> Args; 1739 SmallVector<unsigned, 8> ArgRegs; 1740 SmallVector<MVT, 8> ArgVTs; 1741 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1742 Args.reserve(I->getNumOperands()); 1743 ArgRegs.reserve(I->getNumOperands()); 1744 ArgVTs.reserve(I->getNumOperands()); 1745 ArgFlags.reserve(I->getNumOperands()); 1746 for (unsigned i = 0; i < I->getNumOperands(); ++i) { 1747 Value *Op = I->getOperand(i); 1748 unsigned Arg = getRegForValue(Op); 1749 if (Arg == 0) return false; 1750 1751 const Type *ArgTy = Op->getType(); 1752 MVT ArgVT; 1753 if (!isTypeLegal(ArgTy, ArgVT)) return false; 1754 1755 ISD::ArgFlagsTy Flags; 1756 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1757 Flags.setOrigAlign(OriginalAlignment); 1758 1759 Args.push_back(Op); 1760 ArgRegs.push_back(Arg); 1761 ArgVTs.push_back(ArgVT); 1762 ArgFlags.push_back(Flags); 1763 } 1764 1765 // Handle the arguments now that we've gotten them. 1766 SmallVector<unsigned, 4> RegArgs; 1767 unsigned NumBytes; 1768 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1769 return false; 1770 1771 // Issue the call, BLr9 for darwin, BL otherwise. 1772 // TODO: Turn this into the table of arm call ops. 1773 MachineInstrBuilder MIB; 1774 unsigned CallOpc = ARMSelectCallOp(NULL); 1775 if(isThumb) 1776 // Explicitly adding the predicate here. 1777 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1778 TII.get(CallOpc))) 1779 .addExternalSymbol(TLI.getLibcallName(Call)); 1780 else 1781 // Explicitly adding the predicate here. 1782 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1783 TII.get(CallOpc)) 1784 .addExternalSymbol(TLI.getLibcallName(Call))); 1785 1786 // Add implicit physical register uses to the call. 1787 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1788 MIB.addReg(RegArgs[i]); 1789 1790 // Finish off the call including any return values. 1791 SmallVector<unsigned, 4> UsedRegs; 1792 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1793 1794 // Set all unused physreg defs as dead. 1795 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1796 1797 return true; 1798} 1799 1800bool ARMFastISel::SelectCall(const Instruction *I) { 1801 const CallInst *CI = cast<CallInst>(I); 1802 const Value *Callee = CI->getCalledValue(); 1803 1804 // Can't handle inline asm or worry about intrinsics yet. 1805 if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false; 1806 1807 // Only handle global variable Callees that are direct calls. 1808 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 1809 if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel())) 1810 return false; 1811 1812 // Check the calling convention. 1813 ImmutableCallSite CS(CI); 1814 CallingConv::ID CC = CS.getCallingConv(); 1815 1816 // TODO: Avoid some calling conventions? 1817 1818 // Let SDISel handle vararg functions. 1819 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1820 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1821 if (FTy->isVarArg()) 1822 return false; 1823 1824 // Handle *simple* calls for now. 1825 const Type *RetTy = I->getType(); 1826 MVT RetVT; 1827 if (RetTy->isVoidTy()) 1828 RetVT = MVT::isVoid; 1829 else if (!isTypeLegal(RetTy, RetVT)) 1830 return false; 1831 1832 // TODO: For now if we have long calls specified we don't handle the call. 1833 if (EnableARMLongCalls) return false; 1834 1835 // Set up the argument vectors. 1836 SmallVector<Value*, 8> Args; 1837 SmallVector<unsigned, 8> ArgRegs; 1838 SmallVector<MVT, 8> ArgVTs; 1839 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1840 Args.reserve(CS.arg_size()); 1841 ArgRegs.reserve(CS.arg_size()); 1842 ArgVTs.reserve(CS.arg_size()); 1843 ArgFlags.reserve(CS.arg_size()); 1844 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1845 i != e; ++i) { 1846 unsigned Arg = getRegForValue(*i); 1847 1848 if (Arg == 0) 1849 return false; 1850 ISD::ArgFlagsTy Flags; 1851 unsigned AttrInd = i - CS.arg_begin() + 1; 1852 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1853 Flags.setSExt(); 1854 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1855 Flags.setZExt(); 1856 1857 // FIXME: Only handle *easy* calls for now. 1858 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1859 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1860 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1861 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1862 return false; 1863 1864 const Type *ArgTy = (*i)->getType(); 1865 MVT ArgVT; 1866 if (!isTypeLegal(ArgTy, ArgVT)) 1867 return false; 1868 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1869 Flags.setOrigAlign(OriginalAlignment); 1870 1871 Args.push_back(*i); 1872 ArgRegs.push_back(Arg); 1873 ArgVTs.push_back(ArgVT); 1874 ArgFlags.push_back(Flags); 1875 } 1876 1877 // Handle the arguments now that we've gotten them. 1878 SmallVector<unsigned, 4> RegArgs; 1879 unsigned NumBytes; 1880 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1881 return false; 1882 1883 // Issue the call, BLr9 for darwin, BL otherwise. 1884 // TODO: Turn this into the table of arm call ops. 1885 MachineInstrBuilder MIB; 1886 unsigned CallOpc = ARMSelectCallOp(GV); 1887 // Explicitly adding the predicate here. 1888 if(isThumb) 1889 // Explicitly adding the predicate here. 1890 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1891 TII.get(CallOpc))) 1892 .addGlobalAddress(GV, 0, 0); 1893 else 1894 // Explicitly adding the predicate here. 1895 MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1896 TII.get(CallOpc)) 1897 .addGlobalAddress(GV, 0, 0)); 1898 1899 // Add implicit physical register uses to the call. 1900 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1901 MIB.addReg(RegArgs[i]); 1902 1903 // Finish off the call including any return values. 1904 SmallVector<unsigned, 4> UsedRegs; 1905 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1906 1907 // Set all unused physreg defs as dead. 1908 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1909 1910 return true; 1911 1912} 1913 1914// TODO: SoftFP support. 1915bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { 1916 1917 switch (I->getOpcode()) { 1918 case Instruction::Load: 1919 return SelectLoad(I); 1920 case Instruction::Store: 1921 return SelectStore(I); 1922 case Instruction::Br: 1923 return SelectBranch(I); 1924 case Instruction::ICmp: 1925 case Instruction::FCmp: 1926 return SelectCmp(I); 1927 case Instruction::FPExt: 1928 return SelectFPExt(I); 1929 case Instruction::FPTrunc: 1930 return SelectFPTrunc(I); 1931 case Instruction::SIToFP: 1932 return SelectSIToFP(I); 1933 case Instruction::FPToSI: 1934 return SelectFPToSI(I); 1935 case Instruction::FAdd: 1936 return SelectBinaryOp(I, ISD::FADD); 1937 case Instruction::FSub: 1938 return SelectBinaryOp(I, ISD::FSUB); 1939 case Instruction::FMul: 1940 return SelectBinaryOp(I, ISD::FMUL); 1941 case Instruction::SDiv: 1942 return SelectSDiv(I); 1943 case Instruction::SRem: 1944 return SelectSRem(I); 1945 case Instruction::Call: 1946 return SelectCall(I); 1947 case Instruction::Select: 1948 return SelectSelect(I); 1949 case Instruction::Ret: 1950 return SelectRet(I); 1951 default: break; 1952 } 1953 return false; 1954} 1955 1956namespace llvm { 1957 llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { 1958 // Completely untested on non-darwin. 1959 const TargetMachine &TM = funcInfo.MF->getTarget(); 1960 1961 // Darwin and thumb1 only for now. 1962 const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); 1963 if (Subtarget->isTargetDarwin() && !Subtarget->isThumb1Only() && 1964 !DisableARMFastISel) 1965 return new ARMFastISel(funcInfo); 1966 return 0; 1967 } 1968} 1969