ARMFastISel.cpp revision 558cf007b5ed92324156b29861a0acbf95442278
1//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the ARM-specific support for the FastISel class. Some 11// of the target-specific code is generated by tablegen in the file 12// ARMGenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "ARM.h" 17#include "ARMBaseInstrInfo.h" 18#include "ARMCallingConv.h" 19#include "ARMRegisterInfo.h" 20#include "ARMTargetMachine.h" 21#include "ARMSubtarget.h" 22#include "ARMConstantPoolValue.h" 23#include "llvm/CallingConv.h" 24#include "llvm/DerivedTypes.h" 25#include "llvm/GlobalVariable.h" 26#include "llvm/Instructions.h" 27#include "llvm/IntrinsicInst.h" 28#include "llvm/Module.h" 29#include "llvm/CodeGen/Analysis.h" 30#include "llvm/CodeGen/FastISel.h" 31#include "llvm/CodeGen/FunctionLoweringInfo.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/MachineModuleInfo.h" 34#include "llvm/CodeGen/MachineConstantPool.h" 35#include "llvm/CodeGen/MachineFrameInfo.h" 36#include "llvm/CodeGen/MachineRegisterInfo.h" 37#include "llvm/Support/CallSite.h" 38#include "llvm/Support/CommandLine.h" 39#include "llvm/Support/ErrorHandling.h" 40#include "llvm/Support/GetElementPtrTypeIterator.h" 41#include "llvm/Target/TargetData.h" 42#include "llvm/Target/TargetInstrInfo.h" 43#include "llvm/Target/TargetLowering.h" 44#include "llvm/Target/TargetMachine.h" 45#include "llvm/Target/TargetOptions.h" 46using namespace llvm; 47 48static cl::opt<bool> 49EnableARMFastISel("arm-fast-isel", 50 cl::desc("Turn on experimental ARM fast-isel support"), 51 cl::init(false), cl::Hidden); 52 53namespace { 54 55class ARMFastISel : public FastISel { 56 57 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 58 /// make the right decision when generating code for different targets. 59 const ARMSubtarget *Subtarget; 60 const TargetMachine &TM; 61 const TargetInstrInfo &TII; 62 const TargetLowering &TLI; 63 ARMFunctionInfo *AFI; 64 65 // Convenience variables to avoid some queries. 66 bool isThumb; 67 LLVMContext *Context; 68 69 public: 70 explicit ARMFastISel(FunctionLoweringInfo &funcInfo) 71 : FastISel(funcInfo), 72 TM(funcInfo.MF->getTarget()), 73 TII(*TM.getInstrInfo()), 74 TLI(*TM.getTargetLowering()) { 75 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 76 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); 77 isThumb = AFI->isThumbFunction(); 78 Context = &funcInfo.Fn->getContext(); 79 } 80 81 // Code from FastISel.cpp. 82 virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, 83 const TargetRegisterClass *RC); 84 virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, 85 const TargetRegisterClass *RC, 86 unsigned Op0, bool Op0IsKill); 87 virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, 88 const TargetRegisterClass *RC, 89 unsigned Op0, bool Op0IsKill, 90 unsigned Op1, bool Op1IsKill); 91 virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, 92 const TargetRegisterClass *RC, 93 unsigned Op0, bool Op0IsKill, 94 uint64_t Imm); 95 virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, 96 const TargetRegisterClass *RC, 97 unsigned Op0, bool Op0IsKill, 98 const ConstantFP *FPImm); 99 virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, 100 const TargetRegisterClass *RC, 101 uint64_t Imm); 102 virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, 103 const TargetRegisterClass *RC, 104 unsigned Op0, bool Op0IsKill, 105 unsigned Op1, bool Op1IsKill, 106 uint64_t Imm); 107 virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, 108 unsigned Op0, bool Op0IsKill, 109 uint32_t Idx); 110 111 // Backend specific FastISel code. 112 virtual bool TargetSelectInstruction(const Instruction *I); 113 virtual unsigned TargetMaterializeConstant(const Constant *C); 114 virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); 115 116 #include "ARMGenFastISel.inc" 117 118 // Instruction selection routines. 119 private: 120 virtual bool SelectLoad(const Instruction *I); 121 virtual bool SelectStore(const Instruction *I); 122 virtual bool SelectBranch(const Instruction *I); 123 virtual bool SelectCmp(const Instruction *I); 124 virtual bool SelectFPExt(const Instruction *I); 125 virtual bool SelectFPTrunc(const Instruction *I); 126 virtual bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); 127 virtual bool SelectSIToFP(const Instruction *I); 128 virtual bool SelectFPToSI(const Instruction *I); 129 virtual bool SelectSDiv(const Instruction *I); 130 virtual bool SelectSRem(const Instruction *I); 131 virtual bool SelectCall(const Instruction *I); 132 virtual bool SelectSelect(const Instruction *I); 133 134 // Utility routines. 135 private: 136 bool isTypeLegal(const Type *Ty, EVT &VT); 137 bool isLoadTypeLegal(const Type *Ty, EVT &VT); 138 bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset); 139 bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset); 140 bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset); 141 unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); 142 unsigned ARMMaterializeInt(const Constant *C, EVT VT); 143 unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); 144 unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); 145 unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); 146 147 // Call handling routines. 148 private: 149 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); 150 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, 151 SmallVectorImpl<unsigned> &ArgRegs, 152 SmallVectorImpl<EVT> &ArgVTs, 153 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 154 SmallVectorImpl<unsigned> &RegArgs, 155 CallingConv::ID CC, 156 unsigned &NumBytes); 157 bool FinishCall(EVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 158 const Instruction *I, CallingConv::ID CC, 159 unsigned &NumBytes); 160 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); 161 162 // OptionalDef handling routines. 163 private: 164 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); 165 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); 166}; 167 168} // end anonymous namespace 169 170#include "ARMGenCallingConv.inc" 171 172// DefinesOptionalPredicate - This is different from DefinesPredicate in that 173// we don't care about implicit defs here, just places we'll need to add a 174// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. 175bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { 176 const TargetInstrDesc &TID = MI->getDesc(); 177 if (!TID.hasOptionalDef()) 178 return false; 179 180 // Look to see if our OptionalDef is defining CPSR or CCR. 181 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 182 const MachineOperand &MO = MI->getOperand(i); 183 if (!MO.isReg() || !MO.isDef()) continue; 184 if (MO.getReg() == ARM::CPSR) 185 *CPSR = true; 186 } 187 return true; 188} 189 190// If the machine is predicable go ahead and add the predicate operands, if 191// it needs default CC operands add those. 192const MachineInstrBuilder & 193ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { 194 MachineInstr *MI = &*MIB; 195 196 // Do we use a predicate? 197 if (TII.isPredicable(MI)) 198 AddDefaultPred(MIB); 199 200 // Do we optionally set a predicate? Preds is size > 0 iff the predicate 201 // defines CPSR. All other OptionalDefines in ARM are the CCR register. 202 bool CPSR = false; 203 if (DefinesOptionalPredicate(MI, &CPSR)) { 204 if (CPSR) 205 AddDefaultT1CC(MIB); 206 else 207 AddDefaultCC(MIB); 208 } 209 return MIB; 210} 211 212unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, 213 const TargetRegisterClass* RC) { 214 unsigned ResultReg = createResultReg(RC); 215 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 216 217 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); 218 return ResultReg; 219} 220 221unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, 222 const TargetRegisterClass *RC, 223 unsigned Op0, bool Op0IsKill) { 224 unsigned ResultReg = createResultReg(RC); 225 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 226 227 if (II.getNumDefs() >= 1) 228 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 229 .addReg(Op0, Op0IsKill * RegState::Kill)); 230 else { 231 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 232 .addReg(Op0, Op0IsKill * RegState::Kill)); 233 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 234 TII.get(TargetOpcode::COPY), ResultReg) 235 .addReg(II.ImplicitDefs[0])); 236 } 237 return ResultReg; 238} 239 240unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, 241 const TargetRegisterClass *RC, 242 unsigned Op0, bool Op0IsKill, 243 unsigned Op1, bool Op1IsKill) { 244 unsigned ResultReg = createResultReg(RC); 245 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 246 247 if (II.getNumDefs() >= 1) 248 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 249 .addReg(Op0, Op0IsKill * RegState::Kill) 250 .addReg(Op1, Op1IsKill * RegState::Kill)); 251 else { 252 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 253 .addReg(Op0, Op0IsKill * RegState::Kill) 254 .addReg(Op1, Op1IsKill * RegState::Kill)); 255 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 256 TII.get(TargetOpcode::COPY), ResultReg) 257 .addReg(II.ImplicitDefs[0])); 258 } 259 return ResultReg; 260} 261 262unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, 263 const TargetRegisterClass *RC, 264 unsigned Op0, bool Op0IsKill, 265 uint64_t Imm) { 266 unsigned ResultReg = createResultReg(RC); 267 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 268 269 if (II.getNumDefs() >= 1) 270 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 271 .addReg(Op0, Op0IsKill * RegState::Kill) 272 .addImm(Imm)); 273 else { 274 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 275 .addReg(Op0, Op0IsKill * RegState::Kill) 276 .addImm(Imm)); 277 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 278 TII.get(TargetOpcode::COPY), ResultReg) 279 .addReg(II.ImplicitDefs[0])); 280 } 281 return ResultReg; 282} 283 284unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, 285 const TargetRegisterClass *RC, 286 unsigned Op0, bool Op0IsKill, 287 const ConstantFP *FPImm) { 288 unsigned ResultReg = createResultReg(RC); 289 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 290 291 if (II.getNumDefs() >= 1) 292 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 293 .addReg(Op0, Op0IsKill * RegState::Kill) 294 .addFPImm(FPImm)); 295 else { 296 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 297 .addReg(Op0, Op0IsKill * RegState::Kill) 298 .addFPImm(FPImm)); 299 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 300 TII.get(TargetOpcode::COPY), ResultReg) 301 .addReg(II.ImplicitDefs[0])); 302 } 303 return ResultReg; 304} 305 306unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, 307 const TargetRegisterClass *RC, 308 unsigned Op0, bool Op0IsKill, 309 unsigned Op1, bool Op1IsKill, 310 uint64_t Imm) { 311 unsigned ResultReg = createResultReg(RC); 312 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 313 314 if (II.getNumDefs() >= 1) 315 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 316 .addReg(Op0, Op0IsKill * RegState::Kill) 317 .addReg(Op1, Op1IsKill * RegState::Kill) 318 .addImm(Imm)); 319 else { 320 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 321 .addReg(Op0, Op0IsKill * RegState::Kill) 322 .addReg(Op1, Op1IsKill * RegState::Kill) 323 .addImm(Imm)); 324 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 325 TII.get(TargetOpcode::COPY), ResultReg) 326 .addReg(II.ImplicitDefs[0])); 327 } 328 return ResultReg; 329} 330 331unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, 332 const TargetRegisterClass *RC, 333 uint64_t Imm) { 334 unsigned ResultReg = createResultReg(RC); 335 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 336 337 if (II.getNumDefs() >= 1) 338 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 339 .addImm(Imm)); 340 else { 341 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 342 .addImm(Imm)); 343 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 344 TII.get(TargetOpcode::COPY), ResultReg) 345 .addReg(II.ImplicitDefs[0])); 346 } 347 return ResultReg; 348} 349 350unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, 351 unsigned Op0, bool Op0IsKill, 352 uint32_t Idx) { 353 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 354 assert(TargetRegisterInfo::isVirtualRegister(Op0) && 355 "Cannot yet extract from physregs"); 356 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 357 DL, TII.get(TargetOpcode::COPY), ResultReg) 358 .addReg(Op0, getKillRegState(Op0IsKill), Idx)); 359 return ResultReg; 360} 361 362// TODO: Don't worry about 64-bit now, but when this is fixed remove the 363// checks from the various callers. 364unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { 365 if (VT.getSimpleVT().SimpleTy == MVT::f64) return 0; 366 367 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 368 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 369 TII.get(ARM::VMOVRS), MoveReg) 370 .addReg(SrcReg)); 371 return MoveReg; 372} 373 374unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { 375 if (VT.getSimpleVT().SimpleTy == MVT::i64) return 0; 376 377 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 378 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 379 TII.get(ARM::VMOVSR), MoveReg) 380 .addReg(SrcReg)); 381 return MoveReg; 382} 383 384// For double width floating point we need to materialize two constants 385// (the high and the low) into integer registers then use a move to get 386// the combined constant into an FP reg. 387unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { 388 const APFloat Val = CFP->getValueAPF(); 389 bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64; 390 391 // This checks to see if we can use VFP3 instructions to materialize 392 // a constant, otherwise we have to go through the constant pool. 393 if (TLI.isFPImmLegal(Val, VT)) { 394 unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS; 395 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 396 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 397 DestReg) 398 .addFPImm(CFP)); 399 return DestReg; 400 } 401 402 // Require VFP2 for loading fp constants. 403 if (!Subtarget->hasVFP2()) return false; 404 405 // MachineConstantPool wants an explicit alignment. 406 unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); 407 if (Align == 0) { 408 // TODO: Figure out if this is correct. 409 Align = TD.getTypeAllocSize(CFP->getType()); 410 } 411 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 412 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 413 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; 414 415 // The extra reg is for addrmode5. 416 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 417 DestReg) 418 .addConstantPoolIndex(Idx) 419 .addReg(0)); 420 return DestReg; 421} 422 423unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { 424 425 // For now 32-bit only. 426 if (VT.getSimpleVT().SimpleTy != MVT::i32) return false; 427 428 // MachineConstantPool wants an explicit alignment. 429 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 430 if (Align == 0) { 431 // TODO: Figure out if this is correct. 432 Align = TD.getTypeAllocSize(C->getType()); 433 } 434 unsigned Idx = MCP.getConstantPoolIndex(C, Align); 435 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 436 437 if (isThumb) 438 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 439 TII.get(ARM::t2LDRpci), DestReg) 440 .addConstantPoolIndex(Idx)); 441 else 442 // The extra reg and immediate are for addrmode2. 443 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 444 TII.get(ARM::LDRcp), DestReg) 445 .addConstantPoolIndex(Idx) 446 .addReg(0).addImm(0)); 447 448 return DestReg; 449} 450 451unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { 452 // For now 32-bit only. 453 if (VT.getSimpleVT().SimpleTy != MVT::i32) return 0; 454 455 Reloc::Model RelocM = TM.getRelocationModel(); 456 457 // TODO: No external globals for now. 458 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0; 459 460 // TODO: Need more magic for ARM PIC. 461 if (!isThumb && (RelocM == Reloc::PIC_)) return 0; 462 463 // MachineConstantPool wants an explicit alignment. 464 unsigned Align = TD.getPrefTypeAlignment(GV->getType()); 465 if (Align == 0) { 466 // TODO: Figure out if this is correct. 467 Align = TD.getTypeAllocSize(GV->getType()); 468 } 469 470 // Grab index. 471 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); 472 unsigned Id = AFI->createConstPoolEntryUId(); 473 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id, 474 ARMCP::CPValue, PCAdj); 475 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); 476 477 // Load value. 478 MachineInstrBuilder MIB; 479 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 480 if (isThumb) { 481 unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; 482 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) 483 .addConstantPoolIndex(Idx); 484 if (RelocM == Reloc::PIC_) 485 MIB.addImm(Id); 486 } else { 487 // The extra reg and immediate are for addrmode2. 488 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), 489 DestReg) 490 .addConstantPoolIndex(Idx) 491 .addReg(0).addImm(0); 492 } 493 AddOptionalDefs(MIB); 494 return DestReg; 495} 496 497unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { 498 EVT VT = TLI.getValueType(C->getType(), true); 499 500 // Only handle simple types. 501 if (!VT.isSimple()) return 0; 502 503 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 504 return ARMMaterializeFP(CFP, VT); 505 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 506 return ARMMaterializeGV(GV, VT); 507 else if (isa<ConstantInt>(C)) 508 return ARMMaterializeInt(C, VT); 509 510 return 0; 511} 512 513unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { 514 // Don't handle dynamic allocas. 515 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; 516 517 EVT VT; 518 if (!isTypeLegal(AI->getType(), VT)) return false; 519 520 DenseMap<const AllocaInst*, int>::iterator SI = 521 FuncInfo.StaticAllocaMap.find(AI); 522 523 // This will get lowered later into the correct offsets and registers 524 // via rewriteXFrameIndex. 525 if (SI != FuncInfo.StaticAllocaMap.end()) { 526 TargetRegisterClass* RC = TLI.getRegClassFor(VT); 527 unsigned ResultReg = createResultReg(RC); 528 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 529 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 530 TII.get(Opc), ResultReg) 531 .addFrameIndex(SI->second) 532 .addImm(0)); 533 return ResultReg; 534 } 535 536 return 0; 537} 538 539bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) { 540 VT = TLI.getValueType(Ty, true); 541 542 // Only handle simple types. 543 if (VT == MVT::Other || !VT.isSimple()) return false; 544 545 // Handle all legal types, i.e. a register that will directly hold this 546 // value. 547 return TLI.isTypeLegal(VT); 548} 549 550bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) { 551 if (isTypeLegal(Ty, VT)) return true; 552 553 // If this is a type than can be sign or zero-extended to a basic operation 554 // go ahead and accept it now. 555 if (VT == MVT::i8 || VT == MVT::i16) 556 return true; 557 558 return false; 559} 560 561// Computes the Reg+Offset to get to an object. 562bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg, 563 int &Offset) { 564 // Some boilerplate from the X86 FastISel. 565 const User *U = NULL; 566 unsigned Opcode = Instruction::UserOp1; 567 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 568 // Don't walk into other basic blocks; it's possible we haven't 569 // visited them yet, so the instructions may not yet be assigned 570 // virtual registers. 571 if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) 572 return false; 573 Opcode = I->getOpcode(); 574 U = I; 575 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 576 Opcode = C->getOpcode(); 577 U = C; 578 } 579 580 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 581 if (Ty->getAddressSpace() > 255) 582 // Fast instruction selection doesn't support the special 583 // address spaces. 584 return false; 585 586 switch (Opcode) { 587 default: 588 break; 589 case Instruction::BitCast: { 590 // Look through bitcasts. 591 return ARMComputeRegOffset(U->getOperand(0), Reg, Offset); 592 } 593 case Instruction::IntToPtr: { 594 // Look past no-op inttoptrs. 595 if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) 596 return ARMComputeRegOffset(U->getOperand(0), Reg, Offset); 597 break; 598 } 599 case Instruction::PtrToInt: { 600 // Look past no-op ptrtoints. 601 if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) 602 return ARMComputeRegOffset(U->getOperand(0), Reg, Offset); 603 break; 604 } 605 case Instruction::Alloca: { 606 const AllocaInst *AI = cast<AllocaInst>(Obj); 607 DenseMap<const AllocaInst*, int>::iterator SI = 608 FuncInfo.StaticAllocaMap.find(AI); 609 if (SI != FuncInfo.StaticAllocaMap.end()) { 610 Reg = ARM::SP; 611 Offset = SI->second; 612 return true; 613 } 614 // Don't handle dynamic allocas. 615 assert(!FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(Obj)) && 616 "Alloca should have been handled earlier!"); 617 return false; 618 } 619 } 620 621 // FIXME: Handle global variables. 622 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { 623 (void)GV; 624 return false; 625 } 626 627 // Try to get this in a register if nothing else has worked. 628 Reg = getRegForValue(Obj); 629 if (Reg == 0) return false; 630 631 // Since the offset may be too large for the load instruction 632 // get the reg+offset into a register. 633 // TODO: Verify the additions work, otherwise we'll need to add the 634 // offset instead of 0 to the instructions and do all sorts of operand 635 // munging. 636 // TODO: Optimize this somewhat. 637 if (Offset != 0) { 638 ARMCC::CondCodes Pred = ARMCC::AL; 639 unsigned PredReg = 0; 640 641 if (!isThumb) 642 emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 643 Reg, Reg, Offset, Pred, PredReg, 644 static_cast<const ARMBaseInstrInfo&>(TII)); 645 else { 646 assert(AFI->isThumb2Function()); 647 emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 648 Reg, Reg, Offset, Pred, PredReg, 649 static_cast<const ARMBaseInstrInfo&>(TII)); 650 } 651 } 652 return true; 653} 654 655bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, 656 unsigned Reg, int Offset) { 657 658 assert(VT.isSimple() && "Non-simple types are invalid here!"); 659 unsigned Opc; 660 TargetRegisterClass *RC; 661 bool isFloat = false; 662 switch (VT.getSimpleVT().SimpleTy) { 663 default: 664 // This is mostly going to be Neon/vector support. 665 return false; 666 case MVT::i16: 667 Opc = isThumb ? ARM::t2LDRHi8 : ARM::LDRH; 668 RC = ARM::GPRRegisterClass; 669 VT = MVT::i32; 670 break; 671 case MVT::i8: 672 Opc = isThumb ? ARM::t2LDRBi8 : ARM::LDRB; 673 RC = ARM::GPRRegisterClass; 674 VT = MVT::i32; 675 break; 676 case MVT::i32: 677 Opc = isThumb ? ARM::t2LDRi8 : ARM::LDR; 678 RC = ARM::GPRRegisterClass; 679 break; 680 case MVT::f32: 681 Opc = ARM::VLDRS; 682 RC = TLI.getRegClassFor(VT); 683 isFloat = true; 684 break; 685 case MVT::f64: 686 Opc = ARM::VLDRD; 687 RC = TLI.getRegClassFor(VT); 688 isFloat = true; 689 break; 690 } 691 692 ResultReg = createResultReg(RC); 693 694 // For now with the additions above the offset should be zero - thus we 695 // can always fit into an i8. 696 assert((Reg == ARM::SP || Offset == 0) && 697 "Offset not zero and not a stack load!"); 698 699 if (Reg == ARM::SP) 700 TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, 701 ResultReg, Offset, RC, 702 TM.getRegisterInfo()); 703 // The thumb and floating point instructions both take 2 operands, ARM takes 704 // another register. 705 else if (isFloat || isThumb) 706 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 707 TII.get(Opc), ResultReg) 708 .addReg(Reg).addImm(Offset)); 709 else 710 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 711 TII.get(Opc), ResultReg) 712 .addReg(Reg).addReg(0).addImm(Offset)); 713 return true; 714} 715 716bool ARMFastISel::SelectLoad(const Instruction *I) { 717 // Verify we have a legal type before going any further. 718 EVT VT; 719 if (!isLoadTypeLegal(I->getType(), VT)) 720 return false; 721 722 // Our register and offset with innocuous defaults. 723 unsigned Reg = 0; 724 int Offset = 0; 725 726 // See if we can handle this as Reg + Offset 727 if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset)) 728 return false; 729 730 unsigned ResultReg; 731 if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false; 732 733 UpdateValueMap(I, ResultReg); 734 return true; 735} 736 737bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, 738 unsigned DstReg, int Offset) { 739 unsigned StrOpc; 740 bool isFloat = false; 741 // VT is set here only for use in the alloca stores below - those are promoted 742 // to reg size always. 743 switch (VT.getSimpleVT().SimpleTy) { 744 default: return false; 745 case MVT::i1: 746 case MVT::i8: 747 VT = MVT::i32; 748 StrOpc = isThumb ? ARM::t2STRBi8 : ARM::STRB; 749 break; 750 case MVT::i16: 751 VT = MVT::i32; 752 StrOpc = isThumb ? ARM::t2STRHi8 : ARM::STRH; 753 break; 754 case MVT::i32: StrOpc = isThumb ? ARM::t2STRi8 : ARM::STR; break; 755 case MVT::f32: 756 if (!Subtarget->hasVFP2()) return false; 757 StrOpc = ARM::VSTRS; 758 isFloat = true; 759 break; 760 case MVT::f64: 761 if (!Subtarget->hasVFP2()) return false; 762 StrOpc = ARM::VSTRD; 763 isFloat = true; 764 break; 765 } 766 767 if (DstReg == ARM::SP) 768 TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, 769 SrcReg, true /*isKill*/, Offset, 770 TLI.getRegClassFor(VT), TM.getRegisterInfo()); 771 // The thumb addressing mode has operands swapped from the arm addressing 772 // mode, the floating point one only has two operands. 773 if (isFloat || isThumb) 774 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 775 TII.get(StrOpc)) 776 .addReg(SrcReg).addReg(DstReg).addImm(Offset)); 777 else 778 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 779 TII.get(StrOpc)) 780 .addReg(SrcReg).addReg(DstReg).addReg(0).addImm(Offset)); 781 782 return true; 783} 784 785bool ARMFastISel::SelectStore(const Instruction *I) { 786 Value *Op0 = I->getOperand(0); 787 unsigned SrcReg = 0; 788 789 // Yay type legalization 790 EVT VT; 791 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) 792 return false; 793 794 // Get the value to be stored into a register. 795 SrcReg = getRegForValue(Op0); 796 if (SrcReg == 0) 797 return false; 798 799 // Our register and offset with innocuous defaults. 800 unsigned Reg = 0; 801 int Offset = 0; 802 803 // See if we can handle this as Reg + Offset 804 if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset)) 805 return false; 806 807 if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false; 808 809 return true; 810} 811 812static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { 813 switch (Pred) { 814 // Needs two compares... 815 case CmpInst::FCMP_ONE: 816 case CmpInst::FCMP_UEQ: 817 default: 818 assert(false && "Unhandled CmpInst::Predicate!"); 819 return ARMCC::AL; 820 case CmpInst::ICMP_EQ: 821 case CmpInst::FCMP_OEQ: 822 return ARMCC::EQ; 823 case CmpInst::ICMP_SGT: 824 case CmpInst::FCMP_OGT: 825 return ARMCC::GT; 826 case CmpInst::ICMP_SGE: 827 case CmpInst::FCMP_OGE: 828 return ARMCC::GE; 829 case CmpInst::ICMP_UGT: 830 case CmpInst::FCMP_UGT: 831 return ARMCC::HI; 832 case CmpInst::FCMP_OLT: 833 return ARMCC::MI; 834 case CmpInst::ICMP_ULE: 835 case CmpInst::FCMP_OLE: 836 return ARMCC::LS; 837 case CmpInst::FCMP_ORD: 838 return ARMCC::VC; 839 case CmpInst::FCMP_UNO: 840 return ARMCC::VS; 841 case CmpInst::FCMP_UGE: 842 return ARMCC::PL; 843 case CmpInst::ICMP_SLT: 844 case CmpInst::FCMP_ULT: 845 return ARMCC::LT; 846 case CmpInst::ICMP_SLE: 847 case CmpInst::FCMP_ULE: 848 return ARMCC::LE; 849 case CmpInst::FCMP_UNE: 850 case CmpInst::ICMP_NE: 851 return ARMCC::NE; 852 case CmpInst::ICMP_UGE: 853 return ARMCC::HS; 854 case CmpInst::ICMP_ULT: 855 return ARMCC::LO; 856 } 857} 858 859bool ARMFastISel::SelectBranch(const Instruction *I) { 860 const BranchInst *BI = cast<BranchInst>(I); 861 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 862 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 863 864 // Simple branch support. 865 // TODO: Try to avoid the re-computation in some places. 866 unsigned CondReg = getRegForValue(BI->getCondition()); 867 if (CondReg == 0) return false; 868 869 // Re-set the flags just in case. 870 unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri; 871 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 872 .addReg(CondReg).addImm(1)); 873 874 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 875 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 876 .addMBB(TBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 877 FastEmitBranch(FBB, DL); 878 FuncInfo.MBB->addSuccessor(TBB); 879 return true; 880} 881 882bool ARMFastISel::SelectCmp(const Instruction *I) { 883 const CmpInst *CI = cast<CmpInst>(I); 884 885 EVT VT; 886 const Type *Ty = CI->getOperand(0)->getType(); 887 if (!isTypeLegal(Ty, VT)) 888 return false; 889 890 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 891 if (isFloat && !Subtarget->hasVFP2()) 892 return false; 893 894 unsigned CmpOpc; 895 unsigned CondReg; 896 switch (VT.getSimpleVT().SimpleTy) { 897 default: return false; 898 // TODO: Verify compares. 899 case MVT::f32: 900 CmpOpc = ARM::VCMPES; 901 CondReg = ARM::FPSCR; 902 break; 903 case MVT::f64: 904 CmpOpc = ARM::VCMPED; 905 CondReg = ARM::FPSCR; 906 break; 907 case MVT::i32: 908 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 909 CondReg = ARM::CPSR; 910 break; 911 } 912 913 // Get the compare predicate. 914 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 915 916 // We may not handle every CC for now. 917 if (ARMPred == ARMCC::AL) return false; 918 919 unsigned Arg1 = getRegForValue(CI->getOperand(0)); 920 if (Arg1 == 0) return false; 921 922 unsigned Arg2 = getRegForValue(CI->getOperand(1)); 923 if (Arg2 == 0) return false; 924 925 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 926 .addReg(Arg1).addReg(Arg2)); 927 928 // For floating point we need to move the result to a comparison register 929 // that we can then use for branches. 930 if (isFloat) 931 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 932 TII.get(ARM::FMSTAT))); 933 934 // Now set a register based on the comparison. Explicitly set the predicates 935 // here. 936 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; 937 TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass 938 : ARM::GPRRegisterClass; 939 unsigned DestReg = createResultReg(RC); 940 Constant *Zero 941 = ConstantInt::get(Type::getInt32Ty(*Context), 0); 942 unsigned ZeroReg = TargetMaterializeConstant(Zero); 943 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) 944 .addReg(ZeroReg).addImm(1) 945 .addImm(ARMPred).addReg(CondReg); 946 947 UpdateValueMap(I, DestReg); 948 return true; 949} 950 951bool ARMFastISel::SelectFPExt(const Instruction *I) { 952 // Make sure we have VFP and that we're extending float to double. 953 if (!Subtarget->hasVFP2()) return false; 954 955 Value *V = I->getOperand(0); 956 if (!I->getType()->isDoubleTy() || 957 !V->getType()->isFloatTy()) return false; 958 959 unsigned Op = getRegForValue(V); 960 if (Op == 0) return false; 961 962 unsigned Result = createResultReg(ARM::DPRRegisterClass); 963 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 964 TII.get(ARM::VCVTDS), Result) 965 .addReg(Op)); 966 UpdateValueMap(I, Result); 967 return true; 968} 969 970bool ARMFastISel::SelectFPTrunc(const Instruction *I) { 971 // Make sure we have VFP and that we're truncating double to float. 972 if (!Subtarget->hasVFP2()) return false; 973 974 Value *V = I->getOperand(0); 975 if (!(I->getType()->isFloatTy() && 976 V->getType()->isDoubleTy())) return false; 977 978 unsigned Op = getRegForValue(V); 979 if (Op == 0) return false; 980 981 unsigned Result = createResultReg(ARM::SPRRegisterClass); 982 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 983 TII.get(ARM::VCVTSD), Result) 984 .addReg(Op)); 985 UpdateValueMap(I, Result); 986 return true; 987} 988 989bool ARMFastISel::SelectSIToFP(const Instruction *I) { 990 // Make sure we have VFP. 991 if (!Subtarget->hasVFP2()) return false; 992 993 EVT DstVT; 994 const Type *Ty = I->getType(); 995 if (!isTypeLegal(Ty, DstVT)) 996 return false; 997 998 unsigned Op = getRegForValue(I->getOperand(0)); 999 if (Op == 0) return false; 1000 1001 // The conversion routine works on fp-reg to fp-reg and the operand above 1002 // was an integer, move it to the fp registers if possible. 1003 unsigned FP = ARMMoveToFPReg(MVT::f32, Op); 1004 if (FP == 0) return false; 1005 1006 unsigned Opc; 1007 if (Ty->isFloatTy()) Opc = ARM::VSITOS; 1008 else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; 1009 else return 0; 1010 1011 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); 1012 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1013 ResultReg) 1014 .addReg(FP)); 1015 UpdateValueMap(I, ResultReg); 1016 return true; 1017} 1018 1019bool ARMFastISel::SelectFPToSI(const Instruction *I) { 1020 // Make sure we have VFP. 1021 if (!Subtarget->hasVFP2()) return false; 1022 1023 EVT DstVT; 1024 const Type *RetTy = I->getType(); 1025 if (!isTypeLegal(RetTy, DstVT)) 1026 return false; 1027 1028 unsigned Op = getRegForValue(I->getOperand(0)); 1029 if (Op == 0) return false; 1030 1031 unsigned Opc; 1032 const Type *OpTy = I->getOperand(0)->getType(); 1033 if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; 1034 else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; 1035 else return 0; 1036 1037 // f64->s32 or f32->s32 both need an intermediate f32 reg. 1038 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1039 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1040 ResultReg) 1041 .addReg(Op)); 1042 1043 // This result needs to be in an integer register, but the conversion only 1044 // takes place in fp-regs. 1045 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); 1046 if (IntReg == 0) return false; 1047 1048 UpdateValueMap(I, IntReg); 1049 return true; 1050} 1051 1052bool ARMFastISel::SelectSelect(const Instruction *I) { 1053 EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 1054 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 1055 return false; 1056 1057 // Things need to be register sized for register moves. 1058 if (VT.getSimpleVT().SimpleTy != MVT::i32) return false; 1059 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 1060 1061 unsigned CondReg = getRegForValue(I->getOperand(0)); 1062 if (CondReg == 0) return false; 1063 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1064 if (Op1Reg == 0) return false; 1065 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1066 if (Op2Reg == 0) return false; 1067 1068 unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1069 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1070 .addReg(CondReg).addImm(1)); 1071 unsigned ResultReg = createResultReg(RC); 1072 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr; 1073 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1074 .addReg(Op1Reg).addReg(Op2Reg) 1075 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 1076 UpdateValueMap(I, ResultReg); 1077 return true; 1078} 1079 1080bool ARMFastISel::SelectSDiv(const Instruction *I) { 1081 EVT VT; 1082 const Type *Ty = I->getType(); 1083 if (!isTypeLegal(Ty, VT)) 1084 return false; 1085 1086 // If we have integer div support we should have selected this automagically. 1087 // In case we have a real miss go ahead and return false and we'll pick 1088 // it up later. 1089 if (Subtarget->hasDivide()) return false; 1090 1091 // Otherwise emit a libcall. 1092 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1093 if (VT == MVT::i8) 1094 LC = RTLIB::SDIV_I8; 1095 else if (VT == MVT::i16) 1096 LC = RTLIB::SDIV_I16; 1097 else if (VT == MVT::i32) 1098 LC = RTLIB::SDIV_I32; 1099 else if (VT == MVT::i64) 1100 LC = RTLIB::SDIV_I64; 1101 else if (VT == MVT::i128) 1102 LC = RTLIB::SDIV_I128; 1103 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); 1104 1105 return ARMEmitLibcall(I, LC); 1106} 1107 1108bool ARMFastISel::SelectSRem(const Instruction *I) { 1109 EVT VT; 1110 const Type *Ty = I->getType(); 1111 if (!isTypeLegal(Ty, VT)) 1112 return false; 1113 1114 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1115 if (VT == MVT::i8) 1116 LC = RTLIB::SREM_I8; 1117 else if (VT == MVT::i16) 1118 LC = RTLIB::SREM_I16; 1119 else if (VT == MVT::i32) 1120 LC = RTLIB::SREM_I32; 1121 else if (VT == MVT::i64) 1122 LC = RTLIB::SREM_I64; 1123 else if (VT == MVT::i128) 1124 LC = RTLIB::SREM_I128; 1125 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); 1126 1127 return ARMEmitLibcall(I, LC); 1128} 1129 1130bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { 1131 EVT VT = TLI.getValueType(I->getType(), true); 1132 1133 // We can get here in the case when we want to use NEON for our fp 1134 // operations, but can't figure out how to. Just use the vfp instructions 1135 // if we have them. 1136 // FIXME: It'd be nice to use NEON instructions. 1137 const Type *Ty = I->getType(); 1138 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1139 if (isFloat && !Subtarget->hasVFP2()) 1140 return false; 1141 1142 unsigned Op1 = getRegForValue(I->getOperand(0)); 1143 if (Op1 == 0) return false; 1144 1145 unsigned Op2 = getRegForValue(I->getOperand(1)); 1146 if (Op2 == 0) return false; 1147 1148 unsigned Opc; 1149 bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64 || 1150 VT.getSimpleVT().SimpleTy == MVT::i64; 1151 switch (ISDOpcode) { 1152 default: return false; 1153 case ISD::FADD: 1154 Opc = is64bit ? ARM::VADDD : ARM::VADDS; 1155 break; 1156 case ISD::FSUB: 1157 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS; 1158 break; 1159 case ISD::FMUL: 1160 Opc = is64bit ? ARM::VMULD : ARM::VMULS; 1161 break; 1162 } 1163 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1164 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1165 TII.get(Opc), ResultReg) 1166 .addReg(Op1).addReg(Op2)); 1167 UpdateValueMap(I, ResultReg); 1168 return true; 1169} 1170 1171// Call Handling Code 1172 1173// This is largely taken directly from CCAssignFnForNode - we don't support 1174// varargs in FastISel so that part has been removed. 1175// TODO: We may not support all of this. 1176CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { 1177 switch (CC) { 1178 default: 1179 llvm_unreachable("Unsupported calling convention"); 1180 case CallingConv::C: 1181 case CallingConv::Fast: 1182 // Use target triple & subtarget features to do actual dispatch. 1183 if (Subtarget->isAAPCS_ABI()) { 1184 if (Subtarget->hasVFP2() && 1185 FloatABIType == FloatABI::Hard) 1186 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1187 else 1188 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1189 } else 1190 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1191 case CallingConv::ARM_AAPCS_VFP: 1192 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1193 case CallingConv::ARM_AAPCS: 1194 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1195 case CallingConv::ARM_APCS: 1196 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1197 } 1198} 1199 1200bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, 1201 SmallVectorImpl<unsigned> &ArgRegs, 1202 SmallVectorImpl<EVT> &ArgVTs, 1203 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 1204 SmallVectorImpl<unsigned> &RegArgs, 1205 CallingConv::ID CC, 1206 unsigned &NumBytes) { 1207 SmallVector<CCValAssign, 16> ArgLocs; 1208 CCState CCInfo(CC, false, TM, ArgLocs, *Context); 1209 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); 1210 1211 // Get a count of how many bytes are to be pushed on the stack. 1212 NumBytes = CCInfo.getNextStackOffset(); 1213 1214 // Issue CALLSEQ_START 1215 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1216 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1217 TII.get(AdjStackDown)) 1218 .addImm(NumBytes)); 1219 1220 // Process the args. 1221 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1222 CCValAssign &VA = ArgLocs[i]; 1223 unsigned Arg = ArgRegs[VA.getValNo()]; 1224 EVT ArgVT = ArgVTs[VA.getValNo()]; 1225 1226 // Handle arg promotion, etc. 1227 switch (VA.getLocInfo()) { 1228 case CCValAssign::Full: break; 1229 default: 1230 // TODO: Handle arg promotion. 1231 return false; 1232 } 1233 1234 // Now copy/store arg to correct locations. 1235 // TODO: We need custom lowering for f64 args. 1236 if (VA.isRegLoc() && !VA.needsCustom()) { 1237 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1238 VA.getLocReg()) 1239 .addReg(Arg); 1240 RegArgs.push_back(VA.getLocReg()); 1241 } else { 1242 // Need to store 1243 return false; 1244 } 1245 } 1246 1247 return true; 1248} 1249 1250bool ARMFastISel::FinishCall(EVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 1251 const Instruction *I, CallingConv::ID CC, 1252 unsigned &NumBytes) { 1253 // Issue CALLSEQ_END 1254 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1255 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1256 TII.get(AdjStackUp)) 1257 .addImm(NumBytes).addImm(0)); 1258 1259 // Now the return value. 1260 if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { 1261 SmallVector<CCValAssign, 16> RVLocs; 1262 CCState CCInfo(CC, false, TM, RVLocs, *Context); 1263 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); 1264 1265 // Copy all of the result registers out of their specified physreg. 1266 if (RVLocs.size() == 2 && RetVT.getSimpleVT().SimpleTy == MVT::f64) { 1267 // For this move we copy into two registers and then move into the 1268 // double fp reg we want. 1269 // TODO: Are the copies necessary? 1270 TargetRegisterClass *CopyRC = TLI.getRegClassFor(MVT::i32); 1271 unsigned Copy1 = createResultReg(CopyRC); 1272 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1273 Copy1).addReg(RVLocs[0].getLocReg()); 1274 UsedRegs.push_back(RVLocs[0].getLocReg()); 1275 1276 unsigned Copy2 = createResultReg(CopyRC); 1277 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1278 Copy2).addReg(RVLocs[1].getLocReg()); 1279 UsedRegs.push_back(RVLocs[1].getLocReg()); 1280 1281 EVT DestVT = RVLocs[0].getValVT(); 1282 TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); 1283 unsigned ResultReg = createResultReg(DstRC); 1284 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1285 TII.get(ARM::VMOVDRR), ResultReg) 1286 .addReg(Copy1).addReg(Copy2)); 1287 1288 // Finally update the result. 1289 UpdateValueMap(I, ResultReg); 1290 } else { 1291 assert(RVLocs.size() == 1 && "Can't handle non-double multi-reg retvals!"); 1292 EVT CopyVT = RVLocs[0].getValVT(); 1293 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1294 1295 unsigned ResultReg = createResultReg(DstRC); 1296 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1297 ResultReg).addReg(RVLocs[0].getLocReg()); 1298 UsedRegs.push_back(RVLocs[0].getLocReg()); 1299 1300 // Finally update the result. 1301 UpdateValueMap(I, ResultReg); 1302 } 1303 } 1304 1305 return true; 1306} 1307 1308// A quick function that will emit a call for a named libcall in F with the 1309// vector of passed arguments for the Instruction in I. We can assume that we 1310// can emit a call for any libcall we can produce. This is an abridged version 1311// of the full call infrastructure since we won't need to worry about things 1312// like computed function pointers or strange arguments at call sites. 1313// TODO: Try to unify this and the normal call bits for ARM, then try to unify 1314// with X86. 1315bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { 1316 CallingConv::ID CC = TLI.getLibcallCallingConv(Call); 1317 1318 // Handle *simple* calls for now. 1319 const Type *RetTy = I->getType(); 1320 EVT RetVT; 1321 if (RetTy->isVoidTy()) 1322 RetVT = MVT::isVoid; 1323 else if (!isTypeLegal(RetTy, RetVT)) 1324 return false; 1325 1326 // For now we're using BLX etc on the assumption that we have v5t ops. 1327 if (!Subtarget->hasV5TOps()) return false; 1328 1329 // Set up the argument vectors. 1330 SmallVector<Value*, 8> Args; 1331 SmallVector<unsigned, 8> ArgRegs; 1332 SmallVector<EVT, 8> ArgVTs; 1333 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1334 Args.reserve(I->getNumOperands()); 1335 ArgRegs.reserve(I->getNumOperands()); 1336 ArgVTs.reserve(I->getNumOperands()); 1337 ArgFlags.reserve(I->getNumOperands()); 1338 for (unsigned i = 0; i < I->getNumOperands(); ++i) { 1339 Value *Op = I->getOperand(i); 1340 unsigned Arg = getRegForValue(Op); 1341 if (Arg == 0) return false; 1342 1343 const Type *ArgTy = Op->getType(); 1344 EVT ArgVT; 1345 if (!isTypeLegal(ArgTy, ArgVT)) return false; 1346 1347 ISD::ArgFlagsTy Flags; 1348 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1349 Flags.setOrigAlign(OriginalAlignment); 1350 1351 Args.push_back(Op); 1352 ArgRegs.push_back(Arg); 1353 ArgVTs.push_back(ArgVT); 1354 ArgFlags.push_back(Flags); 1355 } 1356 1357 // Handle the arguments now that we've gotten them. 1358 SmallVector<unsigned, 4> RegArgs; 1359 unsigned NumBytes; 1360 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1361 return false; 1362 1363 // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. 1364 // TODO: Turn this into the table of arm call ops. 1365 MachineInstrBuilder MIB; 1366 unsigned CallOpc; 1367 if(isThumb) 1368 CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; 1369 else 1370 CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; 1371 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) 1372 .addExternalSymbol(TLI.getLibcallName(Call)); 1373 1374 // Add implicit physical register uses to the call. 1375 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1376 MIB.addReg(RegArgs[i]); 1377 1378 // Finish off the call including any return values. 1379 SmallVector<unsigned, 4> UsedRegs; 1380 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1381 1382 // Set all unused physreg defs as dead. 1383 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1384 1385 return true; 1386} 1387 1388bool ARMFastISel::SelectCall(const Instruction *I) { 1389 const CallInst *CI = cast<CallInst>(I); 1390 const Value *Callee = CI->getCalledValue(); 1391 1392 // Can't handle inline asm or worry about intrinsics yet. 1393 if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false; 1394 1395 // Only handle global variable Callees that are direct calls. 1396 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 1397 if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel())) 1398 return false; 1399 1400 // Check the calling convention. 1401 ImmutableCallSite CS(CI); 1402 CallingConv::ID CC = CS.getCallingConv(); 1403 // TODO: Avoid some calling conventions? 1404 if (CC != CallingConv::C) { 1405 // errs() << "Can't handle calling convention: " << CC << "\n"; 1406 return false; 1407 } 1408 1409 // Let SDISel handle vararg functions. 1410 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1411 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1412 if (FTy->isVarArg()) 1413 return false; 1414 1415 // Handle *simple* calls for now. 1416 const Type *RetTy = I->getType(); 1417 EVT RetVT; 1418 if (RetTy->isVoidTy()) 1419 RetVT = MVT::isVoid; 1420 else if (!isTypeLegal(RetTy, RetVT)) 1421 return false; 1422 1423 // For now we're using BLX etc on the assumption that we have v5t ops. 1424 // TODO: Maybe? 1425 if (!Subtarget->hasV5TOps()) return false; 1426 1427 // Set up the argument vectors. 1428 SmallVector<Value*, 8> Args; 1429 SmallVector<unsigned, 8> ArgRegs; 1430 SmallVector<EVT, 8> ArgVTs; 1431 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1432 Args.reserve(CS.arg_size()); 1433 ArgRegs.reserve(CS.arg_size()); 1434 ArgVTs.reserve(CS.arg_size()); 1435 ArgFlags.reserve(CS.arg_size()); 1436 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1437 i != e; ++i) { 1438 unsigned Arg = getRegForValue(*i); 1439 1440 if (Arg == 0) 1441 return false; 1442 ISD::ArgFlagsTy Flags; 1443 unsigned AttrInd = i - CS.arg_begin() + 1; 1444 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1445 Flags.setSExt(); 1446 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1447 Flags.setZExt(); 1448 1449 // FIXME: Only handle *easy* calls for now. 1450 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1451 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1452 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1453 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1454 return false; 1455 1456 const Type *ArgTy = (*i)->getType(); 1457 EVT ArgVT; 1458 if (!isTypeLegal(ArgTy, ArgVT)) 1459 return false; 1460 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1461 Flags.setOrigAlign(OriginalAlignment); 1462 1463 Args.push_back(*i); 1464 ArgRegs.push_back(Arg); 1465 ArgVTs.push_back(ArgVT); 1466 ArgFlags.push_back(Flags); 1467 } 1468 1469 // Handle the arguments now that we've gotten them. 1470 SmallVector<unsigned, 4> RegArgs; 1471 unsigned NumBytes; 1472 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1473 return false; 1474 1475 // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. 1476 // TODO: Turn this into the table of arm call ops. 1477 MachineInstrBuilder MIB; 1478 unsigned CallOpc; 1479 if(isThumb) 1480 CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; 1481 else 1482 CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; 1483 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) 1484 .addGlobalAddress(GV, 0, 0); 1485 1486 // Add implicit physical register uses to the call. 1487 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1488 MIB.addReg(RegArgs[i]); 1489 1490 // Finish off the call including any return values. 1491 SmallVector<unsigned, 4> UsedRegs; 1492 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1493 1494 // Set all unused physreg defs as dead. 1495 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1496 1497 return true; 1498 1499} 1500 1501// TODO: SoftFP support. 1502bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { 1503 // No Thumb-1 for now. 1504 if (isThumb && !AFI->isThumb2Function()) return false; 1505 1506 switch (I->getOpcode()) { 1507 case Instruction::Load: 1508 return SelectLoad(I); 1509 case Instruction::Store: 1510 return SelectStore(I); 1511 case Instruction::Br: 1512 return SelectBranch(I); 1513 case Instruction::ICmp: 1514 case Instruction::FCmp: 1515 return SelectCmp(I); 1516 case Instruction::FPExt: 1517 return SelectFPExt(I); 1518 case Instruction::FPTrunc: 1519 return SelectFPTrunc(I); 1520 case Instruction::SIToFP: 1521 return SelectSIToFP(I); 1522 case Instruction::FPToSI: 1523 return SelectFPToSI(I); 1524 case Instruction::FAdd: 1525 return SelectBinaryOp(I, ISD::FADD); 1526 case Instruction::FSub: 1527 return SelectBinaryOp(I, ISD::FSUB); 1528 case Instruction::FMul: 1529 return SelectBinaryOp(I, ISD::FMUL); 1530 case Instruction::SDiv: 1531 return SelectSDiv(I); 1532 case Instruction::SRem: 1533 return SelectSRem(I); 1534 case Instruction::Call: 1535 return SelectCall(I); 1536 case Instruction::Select: 1537 return SelectSelect(I); 1538 default: break; 1539 } 1540 return false; 1541} 1542 1543namespace llvm { 1544 llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { 1545 // Completely untested on non-darwin. 1546 const TargetMachine &TM = funcInfo.MF->getTarget(); 1547 const ARMSubtarget *Subtarget = &TM.getSubtarget<ARMSubtarget>(); 1548 if (Subtarget->isTargetDarwin() && EnableARMFastISel) 1549 return new ARMFastISel(funcInfo); 1550 return 0; 1551 } 1552} 1553