ARMFastISel.cpp revision dccd2c3c4346dd6625b80fcac9caa1be99731c9c
1//===-- ARMFastISel.cpp - ARM FastISel implementation ---------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the ARM-specific support for the FastISel class. Some 11// of the target-specific code is generated by tablegen in the file 12// ARMGenFastISel.inc, which is #included here. 13// 14//===----------------------------------------------------------------------===// 15 16#include "ARM.h" 17#include "ARMBaseInstrInfo.h" 18#include "ARMCallingConv.h" 19#include "ARMRegisterInfo.h" 20#include "ARMTargetMachine.h" 21#include "ARMSubtarget.h" 22#include "ARMConstantPoolValue.h" 23#include "llvm/CallingConv.h" 24#include "llvm/DerivedTypes.h" 25#include "llvm/GlobalVariable.h" 26#include "llvm/Instructions.h" 27#include "llvm/IntrinsicInst.h" 28#include "llvm/Module.h" 29#include "llvm/CodeGen/Analysis.h" 30#include "llvm/CodeGen/FastISel.h" 31#include "llvm/CodeGen/FunctionLoweringInfo.h" 32#include "llvm/CodeGen/MachineInstrBuilder.h" 33#include "llvm/CodeGen/MachineModuleInfo.h" 34#include "llvm/CodeGen/MachineConstantPool.h" 35#include "llvm/CodeGen/MachineFrameInfo.h" 36#include "llvm/CodeGen/MachineRegisterInfo.h" 37#include "llvm/Support/CallSite.h" 38#include "llvm/Support/CommandLine.h" 39#include "llvm/Support/ErrorHandling.h" 40#include "llvm/Support/GetElementPtrTypeIterator.h" 41#include "llvm/Target/TargetData.h" 42#include "llvm/Target/TargetInstrInfo.h" 43#include "llvm/Target/TargetLowering.h" 44#include "llvm/Target/TargetMachine.h" 45#include "llvm/Target/TargetOptions.h" 46using namespace llvm; 47 48static cl::opt<bool> 49EnableARMFastISel("arm-fast-isel", 50 cl::desc("Turn on experimental ARM fast-isel support"), 51 cl::init(false), cl::Hidden); 52 53namespace { 54 55class ARMFastISel : public FastISel { 56 57 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 58 /// make the right decision when generating code for different targets. 59 const ARMSubtarget *Subtarget; 60 const TargetMachine &TM; 61 const TargetInstrInfo &TII; 62 const TargetLowering &TLI; 63 ARMFunctionInfo *AFI; 64 65 // Convenience variables to avoid some queries. 66 bool isThumb; 67 LLVMContext *Context; 68 69 public: 70 explicit ARMFastISel(FunctionLoweringInfo &funcInfo) 71 : FastISel(funcInfo), 72 TM(funcInfo.MF->getTarget()), 73 TII(*TM.getInstrInfo()), 74 TLI(*TM.getTargetLowering()) { 75 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 76 AFI = funcInfo.MF->getInfo<ARMFunctionInfo>(); 77 isThumb = AFI->isThumbFunction(); 78 Context = &funcInfo.Fn->getContext(); 79 } 80 81 // Code from FastISel.cpp. 82 virtual unsigned FastEmitInst_(unsigned MachineInstOpcode, 83 const TargetRegisterClass *RC); 84 virtual unsigned FastEmitInst_r(unsigned MachineInstOpcode, 85 const TargetRegisterClass *RC, 86 unsigned Op0, bool Op0IsKill); 87 virtual unsigned FastEmitInst_rr(unsigned MachineInstOpcode, 88 const TargetRegisterClass *RC, 89 unsigned Op0, bool Op0IsKill, 90 unsigned Op1, bool Op1IsKill); 91 virtual unsigned FastEmitInst_ri(unsigned MachineInstOpcode, 92 const TargetRegisterClass *RC, 93 unsigned Op0, bool Op0IsKill, 94 uint64_t Imm); 95 virtual unsigned FastEmitInst_rf(unsigned MachineInstOpcode, 96 const TargetRegisterClass *RC, 97 unsigned Op0, bool Op0IsKill, 98 const ConstantFP *FPImm); 99 virtual unsigned FastEmitInst_i(unsigned MachineInstOpcode, 100 const TargetRegisterClass *RC, 101 uint64_t Imm); 102 virtual unsigned FastEmitInst_rri(unsigned MachineInstOpcode, 103 const TargetRegisterClass *RC, 104 unsigned Op0, bool Op0IsKill, 105 unsigned Op1, bool Op1IsKill, 106 uint64_t Imm); 107 virtual unsigned FastEmitInst_extractsubreg(MVT RetVT, 108 unsigned Op0, bool Op0IsKill, 109 uint32_t Idx); 110 111 // Backend specific FastISel code. 112 virtual bool TargetSelectInstruction(const Instruction *I); 113 virtual unsigned TargetMaterializeConstant(const Constant *C); 114 virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); 115 116 #include "ARMGenFastISel.inc" 117 118 // Instruction selection routines. 119 private: 120 virtual bool SelectLoad(const Instruction *I); 121 virtual bool SelectStore(const Instruction *I); 122 virtual bool SelectBranch(const Instruction *I); 123 virtual bool SelectCmp(const Instruction *I); 124 virtual bool SelectFPExt(const Instruction *I); 125 virtual bool SelectFPTrunc(const Instruction *I); 126 virtual bool SelectBinaryOp(const Instruction *I, unsigned ISDOpcode); 127 virtual bool SelectSIToFP(const Instruction *I); 128 virtual bool SelectFPToSI(const Instruction *I); 129 virtual bool SelectSDiv(const Instruction *I); 130 virtual bool SelectSRem(const Instruction *I); 131 virtual bool SelectCall(const Instruction *I); 132 virtual bool SelectSelect(const Instruction *I); 133 134 // Utility routines. 135 private: 136 bool isTypeLegal(const Type *Ty, EVT &VT); 137 bool isLoadTypeLegal(const Type *Ty, EVT &VT); 138 bool ARMEmitLoad(EVT VT, unsigned &ResultReg, unsigned Reg, int Offset); 139 bool ARMEmitStore(EVT VT, unsigned SrcReg, unsigned Reg, int Offset); 140 bool ARMLoadAlloca(const Instruction *I, EVT VT); 141 bool ARMStoreAlloca(const Instruction *I, unsigned SrcReg, EVT VT); 142 bool ARMComputeRegOffset(const Value *Obj, unsigned &Reg, int &Offset); 143 unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); 144 unsigned ARMMaterializeInt(const Constant *C, EVT VT); 145 unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); 146 unsigned ARMMoveToFPReg(EVT VT, unsigned SrcReg); 147 unsigned ARMMoveToIntReg(EVT VT, unsigned SrcReg); 148 149 // Call handling routines. 150 private: 151 CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool Return); 152 bool ProcessCallArgs(SmallVectorImpl<Value*> &Args, 153 SmallVectorImpl<unsigned> &ArgRegs, 154 SmallVectorImpl<EVT> &ArgVTs, 155 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 156 SmallVectorImpl<unsigned> &RegArgs, 157 CallingConv::ID CC, 158 unsigned &NumBytes); 159 bool FinishCall(EVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 160 const Instruction *I, CallingConv::ID CC, 161 unsigned &NumBytes); 162 bool ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call); 163 164 // OptionalDef handling routines. 165 private: 166 bool DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR); 167 const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); 168}; 169 170} // end anonymous namespace 171 172#include "ARMGenCallingConv.inc" 173 174// DefinesOptionalPredicate - This is different from DefinesPredicate in that 175// we don't care about implicit defs here, just places we'll need to add a 176// default CCReg argument. Sets CPSR if we're setting CPSR instead of CCR. 177bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { 178 const TargetInstrDesc &TID = MI->getDesc(); 179 if (!TID.hasOptionalDef()) 180 return false; 181 182 // Look to see if our OptionalDef is defining CPSR or CCR. 183 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 184 const MachineOperand &MO = MI->getOperand(i); 185 if (!MO.isReg() || !MO.isDef()) continue; 186 if (MO.getReg() == ARM::CPSR) 187 *CPSR = true; 188 } 189 return true; 190} 191 192// If the machine is predicable go ahead and add the predicate operands, if 193// it needs default CC operands add those. 194const MachineInstrBuilder & 195ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { 196 MachineInstr *MI = &*MIB; 197 198 // Do we use a predicate? 199 if (TII.isPredicable(MI)) 200 AddDefaultPred(MIB); 201 202 // Do we optionally set a predicate? Preds is size > 0 iff the predicate 203 // defines CPSR. All other OptionalDefines in ARM are the CCR register. 204 bool CPSR = false; 205 if (DefinesOptionalPredicate(MI, &CPSR)) { 206 if (CPSR) 207 AddDefaultT1CC(MIB); 208 else 209 AddDefaultCC(MIB); 210 } 211 return MIB; 212} 213 214unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, 215 const TargetRegisterClass* RC) { 216 unsigned ResultReg = createResultReg(RC); 217 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 218 219 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg)); 220 return ResultReg; 221} 222 223unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, 224 const TargetRegisterClass *RC, 225 unsigned Op0, bool Op0IsKill) { 226 unsigned ResultReg = createResultReg(RC); 227 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 228 229 if (II.getNumDefs() >= 1) 230 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 231 .addReg(Op0, Op0IsKill * RegState::Kill)); 232 else { 233 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 234 .addReg(Op0, Op0IsKill * RegState::Kill)); 235 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 236 TII.get(TargetOpcode::COPY), ResultReg) 237 .addReg(II.ImplicitDefs[0])); 238 } 239 return ResultReg; 240} 241 242unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, 243 const TargetRegisterClass *RC, 244 unsigned Op0, bool Op0IsKill, 245 unsigned Op1, bool Op1IsKill) { 246 unsigned ResultReg = createResultReg(RC); 247 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 248 249 if (II.getNumDefs() >= 1) 250 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 251 .addReg(Op0, Op0IsKill * RegState::Kill) 252 .addReg(Op1, Op1IsKill * RegState::Kill)); 253 else { 254 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 255 .addReg(Op0, Op0IsKill * RegState::Kill) 256 .addReg(Op1, Op1IsKill * RegState::Kill)); 257 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 258 TII.get(TargetOpcode::COPY), ResultReg) 259 .addReg(II.ImplicitDefs[0])); 260 } 261 return ResultReg; 262} 263 264unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, 265 const TargetRegisterClass *RC, 266 unsigned Op0, bool Op0IsKill, 267 uint64_t Imm) { 268 unsigned ResultReg = createResultReg(RC); 269 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 270 271 if (II.getNumDefs() >= 1) 272 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 273 .addReg(Op0, Op0IsKill * RegState::Kill) 274 .addImm(Imm)); 275 else { 276 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 277 .addReg(Op0, Op0IsKill * RegState::Kill) 278 .addImm(Imm)); 279 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 280 TII.get(TargetOpcode::COPY), ResultReg) 281 .addReg(II.ImplicitDefs[0])); 282 } 283 return ResultReg; 284} 285 286unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, 287 const TargetRegisterClass *RC, 288 unsigned Op0, bool Op0IsKill, 289 const ConstantFP *FPImm) { 290 unsigned ResultReg = createResultReg(RC); 291 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 292 293 if (II.getNumDefs() >= 1) 294 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 295 .addReg(Op0, Op0IsKill * RegState::Kill) 296 .addFPImm(FPImm)); 297 else { 298 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 299 .addReg(Op0, Op0IsKill * RegState::Kill) 300 .addFPImm(FPImm)); 301 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 302 TII.get(TargetOpcode::COPY), ResultReg) 303 .addReg(II.ImplicitDefs[0])); 304 } 305 return ResultReg; 306} 307 308unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, 309 const TargetRegisterClass *RC, 310 unsigned Op0, bool Op0IsKill, 311 unsigned Op1, bool Op1IsKill, 312 uint64_t Imm) { 313 unsigned ResultReg = createResultReg(RC); 314 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 315 316 if (II.getNumDefs() >= 1) 317 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 318 .addReg(Op0, Op0IsKill * RegState::Kill) 319 .addReg(Op1, Op1IsKill * RegState::Kill) 320 .addImm(Imm)); 321 else { 322 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 323 .addReg(Op0, Op0IsKill * RegState::Kill) 324 .addReg(Op1, Op1IsKill * RegState::Kill) 325 .addImm(Imm)); 326 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 327 TII.get(TargetOpcode::COPY), ResultReg) 328 .addReg(II.ImplicitDefs[0])); 329 } 330 return ResultReg; 331} 332 333unsigned ARMFastISel::FastEmitInst_i(unsigned MachineInstOpcode, 334 const TargetRegisterClass *RC, 335 uint64_t Imm) { 336 unsigned ResultReg = createResultReg(RC); 337 const TargetInstrDesc &II = TII.get(MachineInstOpcode); 338 339 if (II.getNumDefs() >= 1) 340 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) 341 .addImm(Imm)); 342 else { 343 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) 344 .addImm(Imm)); 345 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 346 TII.get(TargetOpcode::COPY), ResultReg) 347 .addReg(II.ImplicitDefs[0])); 348 } 349 return ResultReg; 350} 351 352unsigned ARMFastISel::FastEmitInst_extractsubreg(MVT RetVT, 353 unsigned Op0, bool Op0IsKill, 354 uint32_t Idx) { 355 unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); 356 assert(TargetRegisterInfo::isVirtualRegister(Op0) && 357 "Cannot yet extract from physregs"); 358 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, 359 DL, TII.get(TargetOpcode::COPY), ResultReg) 360 .addReg(Op0, getKillRegState(Op0IsKill), Idx)); 361 return ResultReg; 362} 363 364// TODO: Don't worry about 64-bit now, but when this is fixed remove the 365// checks from the various callers. 366unsigned ARMFastISel::ARMMoveToFPReg(EVT VT, unsigned SrcReg) { 367 if (VT.getSimpleVT().SimpleTy == MVT::f64) return 0; 368 369 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 370 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 371 TII.get(ARM::VMOVRS), MoveReg) 372 .addReg(SrcReg)); 373 return MoveReg; 374} 375 376unsigned ARMFastISel::ARMMoveToIntReg(EVT VT, unsigned SrcReg) { 377 if (VT.getSimpleVT().SimpleTy == MVT::i64) return 0; 378 379 unsigned MoveReg = createResultReg(TLI.getRegClassFor(VT)); 380 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 381 TII.get(ARM::VMOVSR), MoveReg) 382 .addReg(SrcReg)); 383 return MoveReg; 384} 385 386// For double width floating point we need to materialize two constants 387// (the high and the low) into integer registers then use a move to get 388// the combined constant into an FP reg. 389unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { 390 const APFloat Val = CFP->getValueAPF(); 391 bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64; 392 393 // This checks to see if we can use VFP3 instructions to materialize 394 // a constant, otherwise we have to go through the constant pool. 395 if (TLI.isFPImmLegal(Val, VT)) { 396 unsigned Opc = is64bit ? ARM::FCONSTD : ARM::FCONSTS; 397 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 398 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 399 DestReg) 400 .addFPImm(CFP)); 401 return DestReg; 402 } 403 404 // Require VFP2 for loading fp constants. 405 if (!Subtarget->hasVFP2()) return false; 406 407 // MachineConstantPool wants an explicit alignment. 408 unsigned Align = TD.getPrefTypeAlignment(CFP->getType()); 409 if (Align == 0) { 410 // TODO: Figure out if this is correct. 411 Align = TD.getTypeAllocSize(CFP->getType()); 412 } 413 unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align); 414 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 415 unsigned Opc = is64bit ? ARM::VLDRD : ARM::VLDRS; 416 417 // The extra reg is for addrmode5. 418 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 419 DestReg) 420 .addConstantPoolIndex(Idx) 421 .addReg(0)); 422 return DestReg; 423} 424 425unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { 426 427 // For now 32-bit only. 428 if (VT.getSimpleVT().SimpleTy != MVT::i32) return false; 429 430 // MachineConstantPool wants an explicit alignment. 431 unsigned Align = TD.getPrefTypeAlignment(C->getType()); 432 if (Align == 0) { 433 // TODO: Figure out if this is correct. 434 Align = TD.getTypeAllocSize(C->getType()); 435 } 436 unsigned Idx = MCP.getConstantPoolIndex(C, Align); 437 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 438 439 if (isThumb) 440 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 441 TII.get(ARM::t2LDRpci), DestReg) 442 .addConstantPoolIndex(Idx)); 443 else 444 // The extra reg and immediate are for addrmode2. 445 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 446 TII.get(ARM::LDRcp), DestReg) 447 .addConstantPoolIndex(Idx) 448 .addReg(0).addImm(0)); 449 450 return DestReg; 451} 452 453unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { 454 // For now 32-bit only. 455 if (VT.getSimpleVT().SimpleTy != MVT::i32) return 0; 456 457 Reloc::Model RelocM = TM.getRelocationModel(); 458 459 // TODO: No external globals for now. 460 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) return 0; 461 462 // TODO: Need more magic for ARM PIC. 463 if (!isThumb && (RelocM == Reloc::PIC_)) return 0; 464 465 // MachineConstantPool wants an explicit alignment. 466 unsigned Align = TD.getPrefTypeAlignment(GV->getType()); 467 if (Align == 0) { 468 // TODO: Figure out if this is correct. 469 Align = TD.getTypeAllocSize(GV->getType()); 470 } 471 472 // Grab index. 473 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb() ? 4 : 8); 474 unsigned Id = AFI->createConstPoolEntryUId(); 475 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, Id, 476 ARMCP::CPValue, PCAdj); 477 unsigned Idx = MCP.getConstantPoolIndex(CPV, Align); 478 479 // Load value. 480 MachineInstrBuilder MIB; 481 unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); 482 if (isThumb) { 483 unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; 484 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) 485 .addConstantPoolIndex(Idx); 486 if (RelocM == Reloc::PIC_) 487 MIB.addImm(Id); 488 } else { 489 // The extra reg and immediate are for addrmode2. 490 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), 491 DestReg) 492 .addConstantPoolIndex(Idx) 493 .addReg(0).addImm(0); 494 } 495 AddOptionalDefs(MIB); 496 return DestReg; 497} 498 499unsigned ARMFastISel::TargetMaterializeConstant(const Constant *C) { 500 EVT VT = TLI.getValueType(C->getType(), true); 501 502 // Only handle simple types. 503 if (!VT.isSimple()) return 0; 504 505 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) 506 return ARMMaterializeFP(CFP, VT); 507 else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C)) 508 return ARMMaterializeGV(GV, VT); 509 else if (isa<ConstantInt>(C)) 510 return ARMMaterializeInt(C, VT); 511 512 return 0; 513} 514 515unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { 516 // Don't handle dynamic allocas. 517 if (!FuncInfo.StaticAllocaMap.count(AI)) return 0; 518 519 EVT VT; 520 if (!isTypeLegal(AI->getType(), VT)) return false; 521 522 DenseMap<const AllocaInst*, int>::iterator SI = 523 FuncInfo.StaticAllocaMap.find(AI); 524 525 // This will get lowered later into the correct offsets and registers 526 // via rewriteXFrameIndex. 527 if (SI != FuncInfo.StaticAllocaMap.end()) { 528 TargetRegisterClass* RC = TLI.getRegClassFor(VT); 529 unsigned ResultReg = createResultReg(RC); 530 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; 531 AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, 532 TII.get(Opc), ResultReg) 533 .addFrameIndex(SI->second) 534 .addImm(0)); 535 return ResultReg; 536 } 537 538 return 0; 539} 540 541bool ARMFastISel::isTypeLegal(const Type *Ty, EVT &VT) { 542 VT = TLI.getValueType(Ty, true); 543 544 // Only handle simple types. 545 if (VT == MVT::Other || !VT.isSimple()) return false; 546 547 // Handle all legal types, i.e. a register that will directly hold this 548 // value. 549 return TLI.isTypeLegal(VT); 550} 551 552bool ARMFastISel::isLoadTypeLegal(const Type *Ty, EVT &VT) { 553 if (isTypeLegal(Ty, VT)) return true; 554 555 // If this is a type than can be sign or zero-extended to a basic operation 556 // go ahead and accept it now. 557 if (VT == MVT::i8 || VT == MVT::i16) 558 return true; 559 560 return false; 561} 562 563// Computes the Reg+Offset to get to an object. 564bool ARMFastISel::ARMComputeRegOffset(const Value *Obj, unsigned &Reg, 565 int &Offset) { 566 // Some boilerplate from the X86 FastISel. 567 const User *U = NULL; 568 unsigned Opcode = Instruction::UserOp1; 569 if (const Instruction *I = dyn_cast<Instruction>(Obj)) { 570 // Don't walk into other basic blocks; it's possible we haven't 571 // visited them yet, so the instructions may not yet be assigned 572 // virtual registers. 573 if (FuncInfo.MBBMap[I->getParent()] != FuncInfo.MBB) 574 return false; 575 Opcode = I->getOpcode(); 576 U = I; 577 } else if (const ConstantExpr *C = dyn_cast<ConstantExpr>(Obj)) { 578 Opcode = C->getOpcode(); 579 U = C; 580 } 581 582 if (const PointerType *Ty = dyn_cast<PointerType>(Obj->getType())) 583 if (Ty->getAddressSpace() > 255) 584 // Fast instruction selection doesn't support the special 585 // address spaces. 586 return false; 587 588 switch (Opcode) { 589 default: 590 break; 591 case Instruction::Alloca: { 592 assert(false && "Alloca should have been handled earlier!"); 593 return false; 594 } 595 } 596 597 // FIXME: Handle global variables. 598 if (const GlobalValue *GV = dyn_cast<GlobalValue>(Obj)) { 599 (void)GV; 600 return false; 601 } 602 603 // Try to get this in a register if nothing else has worked. 604 Reg = getRegForValue(Obj); 605 if (Reg == 0) return false; 606 607 // Since the offset may be too large for the load instruction 608 // get the reg+offset into a register. 609 // TODO: Verify the additions work, otherwise we'll need to add the 610 // offset instead of 0 to the instructions and do all sorts of operand 611 // munging. 612 // TODO: Optimize this somewhat. 613 if (Offset != 0) { 614 ARMCC::CondCodes Pred = ARMCC::AL; 615 unsigned PredReg = 0; 616 617 if (!isThumb) 618 emitARMRegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 619 Reg, Reg, Offset, Pred, PredReg, 620 static_cast<const ARMBaseInstrInfo&>(TII)); 621 else { 622 assert(AFI->isThumb2Function()); 623 emitT2RegPlusImmediate(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 624 Reg, Reg, Offset, Pred, PredReg, 625 static_cast<const ARMBaseInstrInfo&>(TII)); 626 } 627 } 628 return true; 629} 630 631bool ARMFastISel::ARMLoadAlloca(const Instruction *I, EVT VT) { 632 Value *Op0 = I->getOperand(0); 633 634 // Promote load/store types. 635 if (VT == MVT::i8 || VT == MVT::i16) VT = MVT::i32; 636 637 // Verify it's an alloca. 638 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op0)) { 639 DenseMap<const AllocaInst*, int>::iterator SI = 640 FuncInfo.StaticAllocaMap.find(AI); 641 642 if (SI != FuncInfo.StaticAllocaMap.end()) { 643 TargetRegisterClass* RC = TLI.getRegClassFor(VT); 644 unsigned ResultReg = createResultReg(RC); 645 TII.loadRegFromStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, 646 ResultReg, SI->second, RC, 647 TM.getRegisterInfo()); 648 UpdateValueMap(I, ResultReg); 649 return true; 650 } 651 } 652 return false; 653} 654 655bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, 656 unsigned Reg, int Offset) { 657 658 assert(VT.isSimple() && "Non-simple types are invalid here!"); 659 unsigned Opc; 660 TargetRegisterClass *RC; 661 bool isFloat = false; 662 switch (VT.getSimpleVT().SimpleTy) { 663 default: 664 // This is mostly going to be Neon/vector support. 665 return false; 666 case MVT::i16: 667 Opc = isThumb ? ARM::t2LDRHi8 : ARM::LDRH; 668 RC = ARM::GPRRegisterClass; 669 VT = MVT::i32; 670 break; 671 case MVT::i8: 672 Opc = isThumb ? ARM::t2LDRBi8 : ARM::LDRB; 673 RC = ARM::GPRRegisterClass; 674 VT = MVT::i32; 675 break; 676 case MVT::i32: 677 Opc = isThumb ? ARM::t2LDRi8 : ARM::LDR; 678 RC = ARM::GPRRegisterClass; 679 break; 680 case MVT::f32: 681 Opc = ARM::VLDRS; 682 RC = TLI.getRegClassFor(VT); 683 isFloat = true; 684 break; 685 case MVT::f64: 686 Opc = ARM::VLDRD; 687 RC = TLI.getRegClassFor(VT); 688 isFloat = true; 689 break; 690 } 691 692 ResultReg = createResultReg(RC); 693 694 // For now with the additions above the offset should be zero - thus we 695 // can always fit into an i8. 696 assert(Offset == 0 && "Offset not zero!"); 697 698 // The thumb and floating point instructions both take 2 operands, ARM takes 699 // another register. 700 if (isFloat || isThumb) 701 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 702 TII.get(Opc), ResultReg) 703 .addReg(Reg).addImm(Offset)); 704 else 705 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 706 TII.get(Opc), ResultReg) 707 .addReg(Reg).addReg(0).addImm(Offset)); 708 return true; 709} 710 711bool ARMFastISel::SelectLoad(const Instruction *I) { 712 // Verify we have a legal type before going any further. 713 EVT VT; 714 if (!isLoadTypeLegal(I->getType(), VT)) 715 return false; 716 717 // If we're an alloca we know we have a frame index and can emit the load 718 // directly in short order. 719 if (ARMLoadAlloca(I, VT)) 720 return true; 721 722 // Our register and offset with innocuous defaults. 723 unsigned Reg = 0; 724 int Offset = 0; 725 726 // See if we can handle this as Reg + Offset 727 if (!ARMComputeRegOffset(I->getOperand(0), Reg, Offset)) 728 return false; 729 730 unsigned ResultReg; 731 if (!ARMEmitLoad(VT, ResultReg, Reg, Offset /* 0 */)) return false; 732 733 UpdateValueMap(I, ResultReg); 734 return true; 735} 736 737bool ARMFastISel::ARMStoreAlloca(const Instruction *I, unsigned SrcReg, EVT VT){ 738 Value *Op1 = I->getOperand(1); 739 740 // Promote load/store types. 741 if (VT == MVT::i8 || VT == MVT::i16) VT = MVT::i32; 742 743 // Verify it's an alloca. 744 if (const AllocaInst *AI = dyn_cast<AllocaInst>(Op1)) { 745 DenseMap<const AllocaInst*, int>::iterator SI = 746 FuncInfo.StaticAllocaMap.find(AI); 747 748 if (SI != FuncInfo.StaticAllocaMap.end()) { 749 TargetRegisterClass* RC = TLI.getRegClassFor(VT); 750 assert(SrcReg != 0 && "Nothing to store!"); 751 TII.storeRegToStackSlot(*FuncInfo.MBB, *FuncInfo.InsertPt, 752 SrcReg, true /*isKill*/, SI->second, RC, 753 TM.getRegisterInfo()); 754 return true; 755 } 756 } 757 return false; 758} 759 760bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, 761 unsigned DstReg, int Offset) { 762 unsigned StrOpc; 763 bool isFloat = false; 764 switch (VT.getSimpleVT().SimpleTy) { 765 default: return false; 766 case MVT::i1: 767 case MVT::i8: StrOpc = isThumb ? ARM::t2STRBi8 : ARM::STRB; break; 768 case MVT::i16: StrOpc = isThumb ? ARM::t2STRHi8 : ARM::STRH; break; 769 case MVT::i32: StrOpc = isThumb ? ARM::t2STRi8 : ARM::STR; break; 770 case MVT::f32: 771 if (!Subtarget->hasVFP2()) return false; 772 StrOpc = ARM::VSTRS; 773 isFloat = true; 774 break; 775 case MVT::f64: 776 if (!Subtarget->hasVFP2()) return false; 777 StrOpc = ARM::VSTRD; 778 isFloat = true; 779 break; 780 } 781 782 // The thumb addressing mode has operands swapped from the arm addressing 783 // mode, the floating point one only has two operands. 784 if (isFloat || isThumb) 785 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 786 TII.get(StrOpc)) 787 .addReg(SrcReg).addReg(DstReg).addImm(Offset)); 788 else 789 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 790 TII.get(StrOpc)) 791 .addReg(SrcReg).addReg(DstReg).addReg(0).addImm(Offset)); 792 793 return true; 794} 795 796bool ARMFastISel::SelectStore(const Instruction *I) { 797 Value *Op0 = I->getOperand(0); 798 unsigned SrcReg = 0; 799 800 // Yay type legalization 801 EVT VT; 802 if (!isLoadTypeLegal(I->getOperand(0)->getType(), VT)) 803 return false; 804 805 // Get the value to be stored into a register. 806 SrcReg = getRegForValue(Op0); 807 if (SrcReg == 0) 808 return false; 809 810 // If we're an alloca we know we have a frame index and can emit the store 811 // quickly. 812 if (ARMStoreAlloca(I, SrcReg, VT)) 813 return true; 814 815 // Our register and offset with innocuous defaults. 816 unsigned Reg = 0; 817 int Offset = 0; 818 819 // See if we can handle this as Reg + Offset 820 if (!ARMComputeRegOffset(I->getOperand(1), Reg, Offset)) 821 return false; 822 823 if (!ARMEmitStore(VT, SrcReg, Reg, Offset /* 0 */)) return false; 824 825 return true; 826} 827 828static ARMCC::CondCodes getComparePred(CmpInst::Predicate Pred) { 829 switch (Pred) { 830 // Needs two compares... 831 case CmpInst::FCMP_ONE: 832 case CmpInst::FCMP_UEQ: 833 default: 834 assert(false && "Unhandled CmpInst::Predicate!"); 835 return ARMCC::AL; 836 case CmpInst::ICMP_EQ: 837 case CmpInst::FCMP_OEQ: 838 return ARMCC::EQ; 839 case CmpInst::ICMP_SGT: 840 case CmpInst::FCMP_OGT: 841 return ARMCC::GT; 842 case CmpInst::ICMP_SGE: 843 case CmpInst::FCMP_OGE: 844 return ARMCC::GE; 845 case CmpInst::ICMP_UGT: 846 case CmpInst::FCMP_UGT: 847 return ARMCC::HI; 848 case CmpInst::FCMP_OLT: 849 return ARMCC::MI; 850 case CmpInst::ICMP_ULE: 851 case CmpInst::FCMP_OLE: 852 return ARMCC::LS; 853 case CmpInst::FCMP_ORD: 854 return ARMCC::VC; 855 case CmpInst::FCMP_UNO: 856 return ARMCC::VS; 857 case CmpInst::FCMP_UGE: 858 return ARMCC::PL; 859 case CmpInst::ICMP_SLT: 860 case CmpInst::FCMP_ULT: 861 return ARMCC::LT; 862 case CmpInst::ICMP_SLE: 863 case CmpInst::FCMP_ULE: 864 return ARMCC::LE; 865 case CmpInst::FCMP_UNE: 866 case CmpInst::ICMP_NE: 867 return ARMCC::NE; 868 case CmpInst::ICMP_UGE: 869 return ARMCC::HS; 870 case CmpInst::ICMP_ULT: 871 return ARMCC::LO; 872 } 873} 874 875bool ARMFastISel::SelectBranch(const Instruction *I) { 876 const BranchInst *BI = cast<BranchInst>(I); 877 MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; 878 MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; 879 880 // Simple branch support. 881 // TODO: Try to avoid the re-computation in some places. 882 unsigned CondReg = getRegForValue(BI->getCondition()); 883 if (CondReg == 0) return false; 884 885 // Re-set the flags just in case. 886 unsigned CmpOpc = isThumb ? ARM::t2CMPri : ARM::CMPri; 887 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 888 .addReg(CondReg).addImm(1)); 889 890 unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; 891 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) 892 .addMBB(TBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 893 FastEmitBranch(FBB, DL); 894 FuncInfo.MBB->addSuccessor(TBB); 895 return true; 896} 897 898bool ARMFastISel::SelectCmp(const Instruction *I) { 899 const CmpInst *CI = cast<CmpInst>(I); 900 901 EVT VT; 902 const Type *Ty = CI->getOperand(0)->getType(); 903 if (!isTypeLegal(Ty, VT)) 904 return false; 905 906 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 907 if (isFloat && !Subtarget->hasVFP2()) 908 return false; 909 910 unsigned CmpOpc; 911 unsigned CondReg; 912 switch (VT.getSimpleVT().SimpleTy) { 913 default: return false; 914 // TODO: Verify compares. 915 case MVT::f32: 916 CmpOpc = ARM::VCMPES; 917 CondReg = ARM::FPSCR; 918 break; 919 case MVT::f64: 920 CmpOpc = ARM::VCMPED; 921 CondReg = ARM::FPSCR; 922 break; 923 case MVT::i32: 924 CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; 925 CondReg = ARM::CPSR; 926 break; 927 } 928 929 // Get the compare predicate. 930 ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); 931 932 // We may not handle every CC for now. 933 if (ARMPred == ARMCC::AL) return false; 934 935 unsigned Arg1 = getRegForValue(CI->getOperand(0)); 936 if (Arg1 == 0) return false; 937 938 unsigned Arg2 = getRegForValue(CI->getOperand(1)); 939 if (Arg2 == 0) return false; 940 941 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 942 .addReg(Arg1).addReg(Arg2)); 943 944 // For floating point we need to move the result to a comparison register 945 // that we can then use for branches. 946 if (isFloat) 947 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 948 TII.get(ARM::FMSTAT))); 949 950 // Now set a register based on the comparison. Explicitly set the predicates 951 // here. 952 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; 953 TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass 954 : ARM::GPRRegisterClass; 955 unsigned DestReg = createResultReg(RC); 956 Constant *Zero 957 = ConstantInt::get(Type::getInt32Ty(*Context), 0); 958 unsigned ZeroReg = TargetMaterializeConstant(Zero); 959 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) 960 .addReg(ZeroReg).addImm(1) 961 .addImm(ARMPred).addReg(CondReg); 962 963 UpdateValueMap(I, DestReg); 964 return true; 965} 966 967bool ARMFastISel::SelectFPExt(const Instruction *I) { 968 // Make sure we have VFP and that we're extending float to double. 969 if (!Subtarget->hasVFP2()) return false; 970 971 Value *V = I->getOperand(0); 972 if (!I->getType()->isDoubleTy() || 973 !V->getType()->isFloatTy()) return false; 974 975 unsigned Op = getRegForValue(V); 976 if (Op == 0) return false; 977 978 unsigned Result = createResultReg(ARM::DPRRegisterClass); 979 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 980 TII.get(ARM::VCVTDS), Result) 981 .addReg(Op)); 982 UpdateValueMap(I, Result); 983 return true; 984} 985 986bool ARMFastISel::SelectFPTrunc(const Instruction *I) { 987 // Make sure we have VFP and that we're truncating double to float. 988 if (!Subtarget->hasVFP2()) return false; 989 990 Value *V = I->getOperand(0); 991 if (!(I->getType()->isFloatTy() && 992 V->getType()->isDoubleTy())) return false; 993 994 unsigned Op = getRegForValue(V); 995 if (Op == 0) return false; 996 997 unsigned Result = createResultReg(ARM::SPRRegisterClass); 998 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 999 TII.get(ARM::VCVTSD), Result) 1000 .addReg(Op)); 1001 UpdateValueMap(I, Result); 1002 return true; 1003} 1004 1005bool ARMFastISel::SelectSIToFP(const Instruction *I) { 1006 // Make sure we have VFP. 1007 if (!Subtarget->hasVFP2()) return false; 1008 1009 EVT DstVT; 1010 const Type *Ty = I->getType(); 1011 if (!isTypeLegal(Ty, DstVT)) 1012 return false; 1013 1014 unsigned Op = getRegForValue(I->getOperand(0)); 1015 if (Op == 0) return false; 1016 1017 // The conversion routine works on fp-reg to fp-reg and the operand above 1018 // was an integer, move it to the fp registers if possible. 1019 unsigned FP = ARMMoveToFPReg(MVT::f32, Op); 1020 if (FP == 0) return false; 1021 1022 unsigned Opc; 1023 if (Ty->isFloatTy()) Opc = ARM::VSITOS; 1024 else if (Ty->isDoubleTy()) Opc = ARM::VSITOD; 1025 else return 0; 1026 1027 unsigned ResultReg = createResultReg(TLI.getRegClassFor(DstVT)); 1028 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1029 ResultReg) 1030 .addReg(FP)); 1031 UpdateValueMap(I, ResultReg); 1032 return true; 1033} 1034 1035bool ARMFastISel::SelectFPToSI(const Instruction *I) { 1036 // Make sure we have VFP. 1037 if (!Subtarget->hasVFP2()) return false; 1038 1039 EVT DstVT; 1040 const Type *RetTy = I->getType(); 1041 if (!isTypeLegal(RetTy, DstVT)) 1042 return false; 1043 1044 unsigned Op = getRegForValue(I->getOperand(0)); 1045 if (Op == 0) return false; 1046 1047 unsigned Opc; 1048 const Type *OpTy = I->getOperand(0)->getType(); 1049 if (OpTy->isFloatTy()) Opc = ARM::VTOSIZS; 1050 else if (OpTy->isDoubleTy()) Opc = ARM::VTOSIZD; 1051 else return 0; 1052 1053 // f64->s32 or f32->s32 both need an intermediate f32 reg. 1054 unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::f32)); 1055 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), 1056 ResultReg) 1057 .addReg(Op)); 1058 1059 // This result needs to be in an integer register, but the conversion only 1060 // takes place in fp-regs. 1061 unsigned IntReg = ARMMoveToIntReg(DstVT, ResultReg); 1062 if (IntReg == 0) return false; 1063 1064 UpdateValueMap(I, IntReg); 1065 return true; 1066} 1067 1068bool ARMFastISel::SelectSelect(const Instruction *I) { 1069 EVT VT = TLI.getValueType(I->getType(), /*HandleUnknown=*/true); 1070 if (VT == MVT::Other || !isTypeLegal(I->getType(), VT)) 1071 return false; 1072 1073 // Things need to be register sized for register moves. 1074 if (VT.getSimpleVT().SimpleTy != MVT::i32) return false; 1075 const TargetRegisterClass *RC = TLI.getRegClassFor(VT); 1076 1077 unsigned CondReg = getRegForValue(I->getOperand(0)); 1078 if (CondReg == 0) return false; 1079 unsigned Op1Reg = getRegForValue(I->getOperand(1)); 1080 if (Op1Reg == 0) return false; 1081 unsigned Op2Reg = getRegForValue(I->getOperand(2)); 1082 if (Op2Reg == 0) return false; 1083 1084 unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; 1085 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) 1086 .addReg(CondReg).addImm(1)); 1087 unsigned ResultReg = createResultReg(RC); 1088 unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr; 1089 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) 1090 .addReg(Op1Reg).addReg(Op2Reg) 1091 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 1092 UpdateValueMap(I, ResultReg); 1093 return true; 1094} 1095 1096bool ARMFastISel::SelectSDiv(const Instruction *I) { 1097 EVT VT; 1098 const Type *Ty = I->getType(); 1099 if (!isTypeLegal(Ty, VT)) 1100 return false; 1101 1102 // If we have integer div support we should have selected this automagically. 1103 // In case we have a real miss go ahead and return false and we'll pick 1104 // it up later. 1105 if (Subtarget->hasDivide()) return false; 1106 1107 // Otherwise emit a libcall. 1108 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1109 if (VT == MVT::i8) 1110 LC = RTLIB::SDIV_I8; 1111 else if (VT == MVT::i16) 1112 LC = RTLIB::SDIV_I16; 1113 else if (VT == MVT::i32) 1114 LC = RTLIB::SDIV_I32; 1115 else if (VT == MVT::i64) 1116 LC = RTLIB::SDIV_I64; 1117 else if (VT == MVT::i128) 1118 LC = RTLIB::SDIV_I128; 1119 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); 1120 1121 return ARMEmitLibcall(I, LC); 1122} 1123 1124bool ARMFastISel::SelectSRem(const Instruction *I) { 1125 EVT VT; 1126 const Type *Ty = I->getType(); 1127 if (!isTypeLegal(Ty, VT)) 1128 return false; 1129 1130 RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; 1131 if (VT == MVT::i8) 1132 LC = RTLIB::SREM_I8; 1133 else if (VT == MVT::i16) 1134 LC = RTLIB::SREM_I16; 1135 else if (VT == MVT::i32) 1136 LC = RTLIB::SREM_I32; 1137 else if (VT == MVT::i64) 1138 LC = RTLIB::SREM_I64; 1139 else if (VT == MVT::i128) 1140 LC = RTLIB::SREM_I128; 1141 assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); 1142 1143 return ARMEmitLibcall(I, LC); 1144} 1145 1146bool ARMFastISel::SelectBinaryOp(const Instruction *I, unsigned ISDOpcode) { 1147 EVT VT = TLI.getValueType(I->getType(), true); 1148 1149 // We can get here in the case when we want to use NEON for our fp 1150 // operations, but can't figure out how to. Just use the vfp instructions 1151 // if we have them. 1152 // FIXME: It'd be nice to use NEON instructions. 1153 const Type *Ty = I->getType(); 1154 bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); 1155 if (isFloat && !Subtarget->hasVFP2()) 1156 return false; 1157 1158 unsigned Op1 = getRegForValue(I->getOperand(0)); 1159 if (Op1 == 0) return false; 1160 1161 unsigned Op2 = getRegForValue(I->getOperand(1)); 1162 if (Op2 == 0) return false; 1163 1164 unsigned Opc; 1165 bool is64bit = VT.getSimpleVT().SimpleTy == MVT::f64 || 1166 VT.getSimpleVT().SimpleTy == MVT::i64; 1167 switch (ISDOpcode) { 1168 default: return false; 1169 case ISD::FADD: 1170 Opc = is64bit ? ARM::VADDD : ARM::VADDS; 1171 break; 1172 case ISD::FSUB: 1173 Opc = is64bit ? ARM::VSUBD : ARM::VSUBS; 1174 break; 1175 case ISD::FMUL: 1176 Opc = is64bit ? ARM::VMULD : ARM::VMULS; 1177 break; 1178 } 1179 unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); 1180 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1181 TII.get(Opc), ResultReg) 1182 .addReg(Op1).addReg(Op2)); 1183 UpdateValueMap(I, ResultReg); 1184 return true; 1185} 1186 1187// Call Handling Code 1188 1189// This is largely taken directly from CCAssignFnForNode - we don't support 1190// varargs in FastISel so that part has been removed. 1191// TODO: We may not support all of this. 1192CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC, bool Return) { 1193 switch (CC) { 1194 default: 1195 llvm_unreachable("Unsupported calling convention"); 1196 case CallingConv::C: 1197 case CallingConv::Fast: 1198 // Use target triple & subtarget features to do actual dispatch. 1199 if (Subtarget->isAAPCS_ABI()) { 1200 if (Subtarget->hasVFP2() && 1201 FloatABIType == FloatABI::Hard) 1202 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1203 else 1204 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1205 } else 1206 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1207 case CallingConv::ARM_AAPCS_VFP: 1208 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 1209 case CallingConv::ARM_AAPCS: 1210 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 1211 case CallingConv::ARM_APCS: 1212 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 1213 } 1214} 1215 1216bool ARMFastISel::ProcessCallArgs(SmallVectorImpl<Value*> &Args, 1217 SmallVectorImpl<unsigned> &ArgRegs, 1218 SmallVectorImpl<EVT> &ArgVTs, 1219 SmallVectorImpl<ISD::ArgFlagsTy> &ArgFlags, 1220 SmallVectorImpl<unsigned> &RegArgs, 1221 CallingConv::ID CC, 1222 unsigned &NumBytes) { 1223 SmallVector<CCValAssign, 16> ArgLocs; 1224 CCState CCInfo(CC, false, TM, ArgLocs, *Context); 1225 CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CCAssignFnForCall(CC, false)); 1226 1227 // Get a count of how many bytes are to be pushed on the stack. 1228 NumBytes = CCInfo.getNextStackOffset(); 1229 1230 // Issue CALLSEQ_START 1231 unsigned AdjStackDown = TM.getRegisterInfo()->getCallFrameSetupOpcode(); 1232 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackDown)) 1233 .addImm(NumBytes); 1234 1235 // Process the args. 1236 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1237 CCValAssign &VA = ArgLocs[i]; 1238 unsigned Arg = ArgRegs[VA.getValNo()]; 1239 EVT ArgVT = ArgVTs[VA.getValNo()]; 1240 1241 // Handle arg promotion, etc. 1242 switch (VA.getLocInfo()) { 1243 case CCValAssign::Full: break; 1244 default: 1245 // TODO: Handle arg promotion. 1246 return false; 1247 } 1248 1249 // Now copy/store arg to correct locations. 1250 if (VA.isRegLoc()) { 1251 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1252 VA.getLocReg()) 1253 .addReg(Arg); 1254 RegArgs.push_back(VA.getLocReg()); 1255 } else { 1256 // Need to store 1257 return false; 1258 } 1259 } 1260 1261 return true; 1262} 1263 1264bool ARMFastISel::FinishCall(EVT RetVT, SmallVectorImpl<unsigned> &UsedRegs, 1265 const Instruction *I, CallingConv::ID CC, 1266 unsigned &NumBytes) { 1267 // Issue CALLSEQ_END 1268 unsigned AdjStackUp = TM.getRegisterInfo()->getCallFrameDestroyOpcode(); 1269 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) 1270 .addImm(NumBytes).addImm(0); 1271 1272 // Now the return value. 1273 if (RetVT.getSimpleVT().SimpleTy != MVT::isVoid) { 1274 SmallVector<CCValAssign, 16> RVLocs; 1275 CCState CCInfo(CC, false, TM, RVLocs, *Context); 1276 CCInfo.AnalyzeCallResult(RetVT, CCAssignFnForCall(CC, true)); 1277 1278 // Copy all of the result registers out of their specified physreg. 1279 if (RVLocs.size() == 2 && RetVT.getSimpleVT().SimpleTy == MVT::f64) { 1280 // For this move we copy into two registers and then move into the 1281 // double fp reg we want. 1282 // TODO: Are the copies necessary? 1283 TargetRegisterClass *CopyRC = TLI.getRegClassFor(MVT::i32); 1284 unsigned Copy1 = createResultReg(CopyRC); 1285 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1286 Copy1).addReg(RVLocs[0].getLocReg()); 1287 UsedRegs.push_back(RVLocs[0].getLocReg()); 1288 1289 unsigned Copy2 = createResultReg(CopyRC); 1290 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1291 Copy2).addReg(RVLocs[1].getLocReg()); 1292 UsedRegs.push_back(RVLocs[1].getLocReg()); 1293 1294 EVT DestVT = RVLocs[0].getValVT(); 1295 TargetRegisterClass* DstRC = TLI.getRegClassFor(DestVT); 1296 unsigned ResultReg = createResultReg(DstRC); 1297 AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, 1298 TII.get(ARM::VMOVDRR), ResultReg) 1299 .addReg(Copy1).addReg(Copy2)); 1300 1301 // Finally update the result. 1302 UpdateValueMap(I, ResultReg); 1303 } else { 1304 assert(RVLocs.size() == 1 && "Can't handle non-double multi-reg retvals!"); 1305 EVT CopyVT = RVLocs[0].getValVT(); 1306 TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); 1307 1308 unsigned ResultReg = createResultReg(DstRC); 1309 BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), 1310 ResultReg).addReg(RVLocs[0].getLocReg()); 1311 UsedRegs.push_back(RVLocs[0].getLocReg()); 1312 1313 // Finally update the result. 1314 UpdateValueMap(I, ResultReg); 1315 } 1316 } 1317 1318 return true; 1319} 1320 1321// A quick function that will emit a call for a named libcall in F with the 1322// vector of passed arguments for the Instruction in I. We can assume that we 1323// can emit a call for any libcall we can produce. This is an abridged version 1324// of the full call infrastructure since we won't need to worry about things 1325// like computed function pointers or strange arguments at call sites. 1326// TODO: Try to unify this and the normal call bits for ARM, then try to unify 1327// with X86. 1328bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { 1329 CallingConv::ID CC = TLI.getLibcallCallingConv(Call); 1330 1331 // Handle *simple* calls for now. 1332 const Type *RetTy = I->getType(); 1333 EVT RetVT; 1334 if (RetTy->isVoidTy()) 1335 RetVT = MVT::isVoid; 1336 else if (!isTypeLegal(RetTy, RetVT)) 1337 return false; 1338 1339 // For now we're using BLX etc on the assumption that we have v5t ops. 1340 if (!Subtarget->hasV5TOps()) return false; 1341 1342 // Set up the argument vectors. 1343 SmallVector<Value*, 8> Args; 1344 SmallVector<unsigned, 8> ArgRegs; 1345 SmallVector<EVT, 8> ArgVTs; 1346 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1347 Args.reserve(I->getNumOperands()); 1348 ArgRegs.reserve(I->getNumOperands()); 1349 ArgVTs.reserve(I->getNumOperands()); 1350 ArgFlags.reserve(I->getNumOperands()); 1351 for (unsigned i = 0; i < I->getNumOperands(); ++i) { 1352 Value *Op = I->getOperand(i); 1353 unsigned Arg = getRegForValue(Op); 1354 if (Arg == 0) return false; 1355 1356 const Type *ArgTy = Op->getType(); 1357 EVT ArgVT; 1358 if (!isTypeLegal(ArgTy, ArgVT)) return false; 1359 1360 ISD::ArgFlagsTy Flags; 1361 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1362 Flags.setOrigAlign(OriginalAlignment); 1363 1364 Args.push_back(Op); 1365 ArgRegs.push_back(Arg); 1366 ArgVTs.push_back(ArgVT); 1367 ArgFlags.push_back(Flags); 1368 } 1369 1370 // Handle the arguments now that we've gotten them. 1371 SmallVector<unsigned, 4> RegArgs; 1372 unsigned NumBytes; 1373 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1374 return false; 1375 1376 // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. 1377 // TODO: Turn this into the table of arm call ops. 1378 MachineInstrBuilder MIB; 1379 unsigned CallOpc; 1380 if(isThumb) 1381 CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; 1382 else 1383 CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; 1384 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) 1385 .addExternalSymbol(TLI.getLibcallName(Call)); 1386 1387 // Add implicit physical register uses to the call. 1388 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1389 MIB.addReg(RegArgs[i]); 1390 1391 // Finish off the call including any return values. 1392 SmallVector<unsigned, 4> UsedRegs; 1393 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1394 1395 // Set all unused physreg defs as dead. 1396 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1397 1398 return true; 1399} 1400 1401bool ARMFastISel::SelectCall(const Instruction *I) { 1402 const CallInst *CI = cast<CallInst>(I); 1403 const Value *Callee = CI->getCalledValue(); 1404 1405 // Can't handle inline asm or worry about intrinsics yet. 1406 if (isa<InlineAsm>(Callee) || isa<IntrinsicInst>(CI)) return false; 1407 1408 // Only handle global variable Callees that are direct calls. 1409 const GlobalValue *GV = dyn_cast<GlobalValue>(Callee); 1410 if (!GV || Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel())) 1411 return false; 1412 1413 // Check the calling convention. 1414 ImmutableCallSite CS(CI); 1415 CallingConv::ID CC = CS.getCallingConv(); 1416 // TODO: Avoid some calling conventions? 1417 if (CC != CallingConv::C) { 1418 // errs() << "Can't handle calling convention: " << CC << "\n"; 1419 return false; 1420 } 1421 1422 // Let SDISel handle vararg functions. 1423 const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); 1424 const FunctionType *FTy = cast<FunctionType>(PT->getElementType()); 1425 if (FTy->isVarArg()) 1426 return false; 1427 1428 // Handle *simple* calls for now. 1429 const Type *RetTy = I->getType(); 1430 EVT RetVT; 1431 if (RetTy->isVoidTy()) 1432 RetVT = MVT::isVoid; 1433 else if (!isTypeLegal(RetTy, RetVT)) 1434 return false; 1435 1436 // For now we're using BLX etc on the assumption that we have v5t ops. 1437 // TODO: Maybe? 1438 if (!Subtarget->hasV5TOps()) return false; 1439 1440 // Set up the argument vectors. 1441 SmallVector<Value*, 8> Args; 1442 SmallVector<unsigned, 8> ArgRegs; 1443 SmallVector<EVT, 8> ArgVTs; 1444 SmallVector<ISD::ArgFlagsTy, 8> ArgFlags; 1445 Args.reserve(CS.arg_size()); 1446 ArgRegs.reserve(CS.arg_size()); 1447 ArgVTs.reserve(CS.arg_size()); 1448 ArgFlags.reserve(CS.arg_size()); 1449 for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); 1450 i != e; ++i) { 1451 unsigned Arg = getRegForValue(*i); 1452 1453 if (Arg == 0) 1454 return false; 1455 ISD::ArgFlagsTy Flags; 1456 unsigned AttrInd = i - CS.arg_begin() + 1; 1457 if (CS.paramHasAttr(AttrInd, Attribute::SExt)) 1458 Flags.setSExt(); 1459 if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) 1460 Flags.setZExt(); 1461 1462 // FIXME: Only handle *easy* calls for now. 1463 if (CS.paramHasAttr(AttrInd, Attribute::InReg) || 1464 CS.paramHasAttr(AttrInd, Attribute::StructRet) || 1465 CS.paramHasAttr(AttrInd, Attribute::Nest) || 1466 CS.paramHasAttr(AttrInd, Attribute::ByVal)) 1467 return false; 1468 1469 const Type *ArgTy = (*i)->getType(); 1470 EVT ArgVT; 1471 if (!isTypeLegal(ArgTy, ArgVT)) 1472 return false; 1473 unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); 1474 Flags.setOrigAlign(OriginalAlignment); 1475 1476 Args.push_back(*i); 1477 ArgRegs.push_back(Arg); 1478 ArgVTs.push_back(ArgVT); 1479 ArgFlags.push_back(Flags); 1480 } 1481 1482 // Handle the arguments now that we've gotten them. 1483 SmallVector<unsigned, 4> RegArgs; 1484 unsigned NumBytes; 1485 if (!ProcessCallArgs(Args, ArgRegs, ArgVTs, ArgFlags, RegArgs, CC, NumBytes)) 1486 return false; 1487 1488 // Issue the call, BLXr9 for darwin, BLX otherwise. This uses V5 ops. 1489 // TODO: Turn this into the table of arm call ops. 1490 MachineInstrBuilder MIB; 1491 unsigned CallOpc; 1492 if(isThumb) 1493 CallOpc = Subtarget->isTargetDarwin() ? ARM::tBLXi_r9 : ARM::tBLXi; 1494 else 1495 CallOpc = Subtarget->isTargetDarwin() ? ARM::BLr9 : ARM::BL; 1496 MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)) 1497 .addGlobalAddress(GV, 0, 0); 1498 1499 // Add implicit physical register uses to the call. 1500 for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) 1501 MIB.addReg(RegArgs[i]); 1502 1503 // Finish off the call including any return values. 1504 SmallVector<unsigned, 4> UsedRegs; 1505 if (!FinishCall(RetVT, UsedRegs, I, CC, NumBytes)) return false; 1506 1507 // Set all unused physreg defs as dead. 1508 static_cast<MachineInstr *>(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); 1509 1510 return true; 1511 1512} 1513 1514// TODO: SoftFP support. 1515bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { 1516 // No Thumb-1 for now. 1517 if (isThumb && !AFI->isThumb2Function()) return false; 1518 1519 switch (I->getOpcode()) { 1520 case Instruction::Load: 1521 return SelectLoad(I); 1522 case Instruction::Store: 1523 return SelectStore(I); 1524 case Instruction::Br: 1525 return SelectBranch(I); 1526 case Instruction::ICmp: 1527 case Instruction::FCmp: 1528 return SelectCmp(I); 1529 case Instruction::FPExt: 1530 return SelectFPExt(I); 1531 case Instruction::FPTrunc: 1532 return SelectFPTrunc(I); 1533 case Instruction::SIToFP: 1534 return SelectSIToFP(I); 1535 case Instruction::FPToSI: 1536 return SelectFPToSI(I); 1537 case Instruction::FAdd: 1538 return SelectBinaryOp(I, ISD::FADD); 1539 case Instruction::FSub: 1540 return SelectBinaryOp(I, ISD::FSUB); 1541 case Instruction::FMul: 1542 return SelectBinaryOp(I, ISD::FMUL); 1543 case Instruction::SDiv: 1544 return SelectSDiv(I); 1545 case Instruction::SRem: 1546 return SelectSRem(I); 1547 case Instruction::Call: 1548 return SelectCall(I); 1549 case Instruction::Select: 1550 return SelectSelect(I); 1551 default: break; 1552 } 1553 return false; 1554} 1555 1556namespace llvm { 1557 llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { 1558 if (EnableARMFastISel) return new ARMFastISel(funcInfo); 1559 return 0; 1560 } 1561} 1562