ARMISelLowering.cpp revision 6b97ebe9a32342207b24a5f73ebbf3070ec8d189
1f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov// 3f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov// The LLVM Compiler Infrastructure 4f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov// 5f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov// This file is distributed under the University of Illinois Open Source 6f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov// License. See LICENSE.TXT for details. 7f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov// 8f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov//===----------------------------------------------------------------------===// 9f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov// 10f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov// This file defines the interfaces that ARM uses to lower LLVM code into a 11f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov// selection DAG. 12f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov// 13f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov//===----------------------------------------------------------------------===// 14f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov 15f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#define DEBUG_TYPE "arm-isel" 16f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "ARMISelLowering.h" 17f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "ARM.h" 18f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "ARMCallingConv.h" 19f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "ARMConstantPoolValue.h" 20f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "ARMMachineFunctionInfo.h" 21f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "ARMPerfectShuffle.h" 22f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "ARMSubtarget.h" 23f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "ARMTargetMachine.h" 24f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "ARMTargetObjectFile.h" 25f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "MCTargetDesc/ARMAddressingModes.h" 26f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/ADT/Statistic.h" 27f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/ADT/StringExtras.h" 28f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/CodeGen/CallingConvLower.h" 29f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/CodeGen/IntrinsicLowering.h" 30f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/CodeGen/MachineBasicBlock.h" 31c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov#include "llvm/CodeGen/MachineFrameInfo.h" 32f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/CodeGen/MachineFunction.h" 33f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/CodeGen/MachineInstrBuilder.h" 34f0144127b98425d214e59e4a1a4b342b78e3642bChris Lattner#include "llvm/CodeGen/MachineModuleInfo.h" 35f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/CodeGen/MachineRegisterInfo.h" 36804e0fea4033e3b91dbc8198cef30de30f141bb5Torok Edwin#include "llvm/CodeGen/SelectionDAG.h" 374437ae213d5435390f0750213b53ec807c047f22Chris Lattner#include "llvm/IR/CallingConv.h" 38f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/IR/Constants.h" 39f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/IR/Function.h" 40f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/IR/GlobalValue.h" 41f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/IR/Instruction.h" 42f0144127b98425d214e59e4a1a4b342b78e3642bChris Lattner#include "llvm/IR/Instructions.h" 43f0144127b98425d214e59e4a1a4b342b78e3642bChris Lattner#include "llvm/IR/Intrinsics.h" 44f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/IR/Type.h" 45f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/MC/MCSectionMachO.h" 46825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson#include "llvm/Support/CommandLine.h" 47825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson#include "llvm/Support/ErrorHandling.h" 48f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/Support/MathExtras.h" 49f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/Support/raw_ostream.h" 50f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov#include "llvm/Target/TargetOptions.h" 51fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikovusing namespace llvm; 521476d97037e07d17635468fcd3a2ee0111972574Anton Korobeynikov 531476d97037e07d17635468fcd3a2ee0111972574Anton KorobeynikovSTATISTIC(NumTailCalls, "Number of tail calls"); 541476d97037e07d17635468fcd3a2ee0111972574Anton KorobeynikovSTATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 551476d97037e07d17635468fcd3a2ee0111972574Anton KorobeynikovSTATISTIC(NumLoopByVals, "Number of loops generated for byval arguments"); 561476d97037e07d17635468fcd3a2ee0111972574Anton Korobeynikov 57d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov// This option should go away when tail calls fully work. 58d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikovstatic cl::opt<bool> 59825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen AndersonEnableARMTailCalls("arm-tail-calls", cl::Hidden, 60d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov cl::desc("Generate tail calls (TEMPORARY OPTION)."), 61c08163e72dca43ff5421a13505503314e0d7074aAnton Korobeynikov cl::init(false)); 62c08163e72dca43ff5421a13505503314e0d7074aAnton Korobeynikov 63c08163e72dca43ff5421a13505503314e0d7074aAnton Korobeynikovcl::opt<bool> 64c08163e72dca43ff5421a13505503314e0d7074aAnton KorobeynikovEnableARMLongCalls("arm-long-calls", cl::Hidden, 656534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov cl::desc("Generate calls via indirect call instructions"), 666534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov cl::init(false)); 676534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov 686534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikovstatic cl::opt<bool> 696534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton KorobeynikovARMInterworking("arm-interworking", cl::Hidden, 706534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov cl::desc("Enable / disable ARM interworking (for debugging only)"), 716534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov cl::init(true)); 726534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov 73825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Andersonnamespace { 7436b6e533c1aac85452438161f7034a9f54bd1830Anton Korobeynikov class ARMCCState : public CCState { 7554f30d3fc94e055f13e6744378323d05c5c050baAnton Korobeynikov public: 76825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, 77825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson const TargetMachine &TM, SmallVector<CCValAssign, 16> &locs, 78825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson LLVMContext &C, ParmContext PC) 79825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson : CCState(CC, isVarArg, MF, TM, locs, C) { 80825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson assert(((PC == Call) || (PC == Prologue)) && 81825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson "ARMCCState users must specify whether their context is call" 82825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson "or prologue generation."); 83825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson CallOrPrologue = PC; 84825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson } 85825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson }; 86825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson} 87825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson 88825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson// The APCS parameter registers. 89825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Andersonstatic const uint16_t GPRArgRegs[] = { 90825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson ARM::R0, ARM::R1, ARM::R2, ARM::R3 91825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson}; 92825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson 93825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Andersonvoid ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT, 94825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson MVT PromotedBitwiseVT) { 95825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson if (VT != PromotedLdStVT) { 96825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::LOAD, VT, Promote); 97825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson AddPromotedToType (ISD::LOAD, VT, PromotedLdStVT); 98825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson 99825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::STORE, VT, Promote); 100825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson AddPromotedToType (ISD::STORE, VT, PromotedLdStVT); 101825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson } 102379a087cc7175532ff0c24c60069da5eec596879Anton Korobeynikov 103379a087cc7175532ff0c24c60069da5eec596879Anton Korobeynikov MVT ElemTy = VT.getVectorElementType(); 104825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 105825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::SETCC, VT, Custom); 106825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); 107825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); 108825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson if (ElemTy == MVT::i32) { 109825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::SINT_TO_FP, VT, Custom); 110825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::UINT_TO_FP, VT, Custom); 111825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::FP_TO_SINT, VT, Custom); 112825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::FP_TO_UINT, VT, Custom); 113825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson } else { 114825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::SINT_TO_FP, VT, Expand); 115825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::UINT_TO_FP, VT, Expand); 116825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::FP_TO_SINT, VT, Expand); 117825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::FP_TO_UINT, VT, Expand); 118825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson } 119825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::BUILD_VECTOR, VT, Custom); 120e4ce880dfa340bf45ddce10bb1dbe856553677b6Eli Friedman setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); 1218725bd22bf91c29e2351a127295c19fea996e2c7Anton Korobeynikov setOperationAction(ISD::CONCAT_VECTORS, VT, Legal); 1228983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); 1238983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov setOperationAction(ISD::SELECT, VT, Expand); 1248983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov setOperationAction(ISD::SELECT_CC, VT, Expand); 1258983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov setOperationAction(ISD::VSELECT, VT, Expand); 1268983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 127825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson if (VT.isInteger()) { 128825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::SHL, VT, Custom); 129825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::SRA, VT, Custom); 130825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::SRL, VT, Custom); 131825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson } 132825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson 1338983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov // Promote all bit-wise operations. 1348983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov if (VT.isInteger() && VT != PromotedBitwiseVT) { 1358983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov setOperationAction(ISD::AND, VT, Promote); 1368983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov AddPromotedToType (ISD::AND, VT, PromotedBitwiseVT); 1378983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov setOperationAction(ISD::OR, VT, Promote); 1388983da729aa1ca99a11a3b98ae6280dfcdbadb39Anton Korobeynikov AddPromotedToType (ISD::OR, VT, PromotedBitwiseVT); 139825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::XOR, VT, Promote); 140825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson AddPromotedToType (ISD::XOR, VT, PromotedBitwiseVT); 141825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson } 142825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson 143825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson // Neon does not support vector divide/remainder operations. 144825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::SDIV, VT, Expand); 145f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov setOperationAction(ISD::UDIV, VT, Expand); 146f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov setOperationAction(ISD::FDIV, VT, Expand); 147b8639f52143c99a3902b83555db4c54766c783caAnton Korobeynikov setOperationAction(ISD::SREM, VT, Expand); 148f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov setOperationAction(ISD::UREM, VT, Expand); 149ea54c9846b2973cafa8ffd40626f5676ba9ccfeeAnton Korobeynikov setOperationAction(ISD::FREM, VT, Expand); 150e699d0f549151a2cca993c21407aea4a6eff7d3fAnton Korobeynikov} 1514428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 1523513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikovvoid ARMTargetLowering::addDRTypeForNEON(MVT VT) { 1535d59f68ade7573175f1ace09061a94286e59076bAnton Korobeynikov addRegisterClass(VT, &ARM::DPRRegClass); 1541bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov addTypeForNEON(VT, MVT::f64, MVT::v2i32); 1551bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov} 156b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov 157f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikovvoid ARMTargetLowering::addQRTypeForNEON(MVT VT) { 158c23197a26f34f559ea9797de51e187087c039c42Torok Edwin addRegisterClass(VT, &ARM::QPRRegClass); 159f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 160f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov} 161f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov 162f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikovstatic TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 163b4202b84d7e54efe5e144885c7da63e6cc465f80Bill Wendling if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 16420c568f366be211323eeaf0e45ef053278ec9ddcBill Wendling return new TargetLoweringObjectFileMachO(); 16520c568f366be211323eeaf0e45ef053278ec9ddcBill Wendling 16620c568f366be211323eeaf0e45ef053278ec9ddcBill Wendling return new ARMElfTargetObjectFile(); 16720c568f366be211323eeaf0e45ef053278ec9ddcBill Wendling} 168c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov 169cd76128f182b9a9f3986384523cf90f4c30e4d35Anton KorobeynikovARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 170cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov : TargetLowering(TM, createTLOF(TM)) { 171cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov Subtarget = &TM.getSubtarget<ARMSubtarget>(); 172cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov RegInfo = TM.getRegisterInfo(); 173cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov Itins = TM.getInstrItineraryData(); 174cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov 175cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 176cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov 177cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov if (Subtarget->isTargetDarwin()) { 178cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov // Uses VFP for Thumb libfuncs if available. 179cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 180cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov // Single-precision floating-point arithmetic. 181cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 182cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 183cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 184cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 185cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov 186cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov // Double-precision floating-point arithmetic. 187cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 188cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 189cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 190cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 191cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov 192cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov // Single-precision comparisons. 193cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 194cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 195cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 196cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 197cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 198cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 199cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 200cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 201cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov 202cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 203cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 204cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 205cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 206cd76128f182b9a9f3986384523cf90f4c30e4d35Anton Korobeynikov setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 207c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 208c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 209c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 210f2c3e179ecc2a6ebc259382828a5e5dc5a61d2f8Anton Korobeynikov 211c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov // Double-precision comparisons. 21298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 21398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 21465c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 21598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 21698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 21798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 21898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 21998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 22098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman 22198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 22298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 223c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 224c23197a26f34f559ea9797de51e187087c039c42Torok Edwin setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 225c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 226c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 22798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 228c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 229c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov 230c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov // Floating-point to integer conversions. 23198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // i64 conversions are done via library routines even when generating VFP 23298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // instructions, so use the same ones. 23365c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 23498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 23598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 23698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 23798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman 23898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // Conversions between floating types. 23998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 24098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 2414428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 242c23197a26f34f559ea9797de51e187087c039c42Torok Edwin // Integer to floating-point conversions. 2434428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // i64 conversions are done via library routines even when generating VFP 2444428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // instructions, so use the same ones. 24598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // FIXME: There appears to be some naming inconsistency in ARM libgcc: 24698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // e.g., __floatunsidf vs. __floatunssidfvfp. 2474428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 2484428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 2494428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 250c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 251c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov } 252c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov } 253c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov 25498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // These libcalls are not available in 32-bit. 25598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::SHL_I128, 0); 25665c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel setLibcallName(RTLIB::SRL_I128, 0); 25798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::SRA_I128, 0); 25898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman 25998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman if (Subtarget->isAAPCS_ABI() && !Subtarget->isTargetDarwin()) { 26098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // Double-precision floating-point arithmetic helper functions 26198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // RTABI chapter 4.1.2, Table 2 26298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 263c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 264c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 265c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 266c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 267c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 268c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 26998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 27098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman 27198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // Double-precision floating-point comparison helper functions 272c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov // RTABI chapter 4.1.2, Table 3 273c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 274c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 275c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 276c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 277c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 278c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 279e50ed30282bb5b4a9ed952580523f2dda16215acOwen Anderson setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 280825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 281804e0fea4033e3b91dbc8198cef30de30f141bb5Torok Edwin setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 282804e0fea4033e3b91dbc8198cef30de30f141bb5Torok Edwin setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 283dac237e18209b697a8ba122d0ddd9cad4dfba1f8Torok Edwin setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 2844437ae213d5435390f0750213b53ec807c047f22Chris Lattner setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 285825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 286dac237e18209b697a8ba122d0ddd9cad4dfba1f8Torok Edwin setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 287c23197a26f34f559ea9797de51e187087c039c42Torok Edwin setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 288804e0fea4033e3b91dbc8198cef30de30f141bb5Torok Edwin setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 289825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 290c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 2911df221f2bb8e8380e255d1bec73ab07b388d01a2Anton Korobeynikov setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 292c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 29398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 294c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 295c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 296c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 297c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov 298c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov // Single-precision floating-point arithmetic helper functions 299c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov // RTABI chapter 4.1.2, Table 4 300c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 301c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 302c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 303c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 304c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 305c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 306c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 307c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 30898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman 309c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov // Single-precision floating-point comparison helper functions 310c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov // RTABI chapter 4.1.2, Table 5 311c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 312c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 313c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 314c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 315c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 3164437ae213d5435390f0750213b53ec807c047f22Chris Lattner setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 317825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 318c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 319c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 320c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 321c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 322c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 323c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 324c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 325825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 32698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 3276553155172a2e74feff1253837daa608123de54aEvan Cheng setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 328c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 329c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 330c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 33198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 332c8fbb6ae2041f17285e4ba73d54d388e703b9689Anton Korobeynikov setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 333fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 33498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 33598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman 33665c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel // Floating-point to integer conversions. 33798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // RTABI chapter 4.1.2, Table 6 33898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 33998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 340fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 341fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 342fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 343fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 34498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 34598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 346fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 34798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 34898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 349fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 350fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 351fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 352fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 353fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 354fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov 355fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov // Conversions between floating types. 356fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov // RTABI chapter 4.1.2, Table 7 357fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 358fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 359fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 360fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 361fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov 362fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov // Integer to floating-point conversions. 363fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov // RTABI chapter 4.1.2, Table 8 364fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 365fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 36698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 367fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 368dcb802cf7be8e540e487c699f25d89c4821536abAnton Korobeynikov setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 369dcb802cf7be8e540e487c699f25d89c4821536abAnton Korobeynikov setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 370fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 371fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 372fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 373fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 374825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 375fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 376fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 377825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 378fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 379fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 3804428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 3814428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // Long long helper functions 3824428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // RTABI chapter 4.2, Table 9 38398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 38498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 38565c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 38698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 38798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 38898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 38998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 39098ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 39198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 3924428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 3934428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 39498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // Integer division functions 39598ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // RTABI chapter 4.3.1 3964428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 39798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 3984428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 3994428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 4004428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 4014428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 4024428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 4034428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 4044428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 4054428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 4064428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 4074428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 4084428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 4094428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 4104428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 4114428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 4124428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 41398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman // Memory operations 4144428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // RTABI chapter 4.3.4 4154428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::MEMCPY, "__aeabi_memcpy"); 4164428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::MEMMOVE, "__aeabi_memmove"); 417c23197a26f34f559ea9797de51e187087c039c42Torok Edwin setLibcallName(RTLIB::MEMSET, "__aeabi_memset"); 4184428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::MEMCPY, CallingConv::ARM_AAPCS); 4194428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::MEMMOVE, CallingConv::ARM_AAPCS); 4204428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallCallingConv(RTLIB::MEMSET, CallingConv::ARM_AAPCS); 4214428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov } 4224428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 4234428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // Use divmod compiler-rt calls for iOS 5.0 and later. 4244428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov if (Subtarget->getTargetTriple().getOS() == Triple::IOS && 4254428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { 4264428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 4274428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 4284428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov } 4294428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 4304428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov if (Subtarget->isThumb1Only()) 4314428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addRegisterClass(MVT::i32, &ARM::tGPRRegClass); 4324428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov else 4334428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addRegisterClass(MVT::i32, &ARM::GPRRegClass); 4344428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 4354428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov !Subtarget->isThumb1Only()) { 4364428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addRegisterClass(MVT::f32, &ARM::SPRRegClass); 4374428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov if (!Subtarget->isFPOnlySP()) 4384428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addRegisterClass(MVT::f64, &ARM::DPRRegClass); 4394428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 4404428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setTruncStoreAction(MVT::f64, MVT::f32, Expand); 4414428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov } 4424428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 4434428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 4444428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 4454428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 4464428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 4474428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setTruncStoreAction((MVT::SimpleValueType)VT, 4484428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov (MVT::SimpleValueType)InnerVT, Expand); 4494428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); 4504428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); 4514428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); 4524428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov } 4534428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 454825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::ConstantFP, MVT::f32, Custom); 4554428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 4564428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov if (Subtarget->hasNEON()) { 4574428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addDRTypeForNEON(MVT::v2f32); 4584428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addDRTypeForNEON(MVT::v8i8); 4594428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addDRTypeForNEON(MVT::v4i16); 4604428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addDRTypeForNEON(MVT::v2i32); 4614428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addDRTypeForNEON(MVT::v1i64); 4624428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 4634428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addQRTypeForNEON(MVT::v4f32); 4644428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addQRTypeForNEON(MVT::v2f64); 4654428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addQRTypeForNEON(MVT::v16i8); 4664428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addQRTypeForNEON(MVT::v8i16); 4674428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addQRTypeForNEON(MVT::v4i32); 4684428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov addQRTypeForNEON(MVT::v2i64); 4694428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 4704428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 471825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson // neither Neon nor VFP support any arithmetic operations on it. 4724428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively 473825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson // supported for v4f32. 4744428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FADD, MVT::v2f64, Expand); 4754428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 476825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 4774428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // FIXME: Code duplication: FDIV and FREM are expanded always, see 4784428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // ARMTargetLowering::addTypeForNEON method for details. 4794428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 4804428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FREM, MVT::v2f64, Expand); 4814428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // FIXME: Create unittest. 4824428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // In another words, find a way when "copysign" appears in DAG with vector 4834428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // operands. 4844428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 4854428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // FIXME: Code duplication: SETCC has custom operation action, see 4864428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // ARMTargetLowering::addTypeForNEON method for details. 4874428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::SETCC, MVT::v2f64, Expand); 4884428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // FIXME: Create unittest for FNEG and for FABS. 4894428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 4904428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FABS, MVT::v2f64, Expand); 4914428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 4924428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 4934428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 4944428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 4954428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 4964428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 4974428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 4984428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 4994428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 5004428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 5014428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. 50298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 50398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 5044428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 5054428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 50698ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 50798ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FMA, MVT::v2f64, Expand); 50898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman 50998ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); 5104428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FSIN, MVT::v4f32, Expand); 51165c3c8f323198b99b88b109654194540cf9b3fa5Sandeep Patel setOperationAction(ISD::FCOS, MVT::v4f32, Expand); 51298ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FPOWI, MVT::v4f32, Expand); 51398ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FPOW, MVT::v4f32, Expand); 51498ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FLOG, MVT::v4f32, Expand); 5154428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); 5164428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); 5174428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FEXP, MVT::v4f32, Expand); 51898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); 519e922c0201916e0b980ab3cfe91e1413e68d55647Owen Anderson setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); 5204428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); 52198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FRINT, MVT::v4f32, Expand); 5224428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); 5234428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); 5244428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov 5254428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov // Mark v2f32 intrinsics. 5264428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); 5274428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FSIN, MVT::v2f32, Expand); 52898ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FCOS, MVT::v2f32, Expand); 5294428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FPOWI, MVT::v2f32, Expand); 5304428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FPOW, MVT::v2f32, Expand); 53198ca4f2a325f72374a477f9deba7d09e8999c29bDan Gohman setOperationAction(ISD::FLOG, MVT::v2f32, Expand); 5324428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); 5334428885c5acfffbbdd03ad2aab23960531c47753Anton Korobeynikov setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); 534d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::FEXP, MVT::v2f32, Expand); 535d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); 536ea54c9846b2973cafa8ffd40626f5676ba9ccfeeAnton Korobeynikov setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); 537d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); 538e50ed30282bb5b4a9ed952580523f2dda16215acOwen Anderson setOperationAction(ISD::FRINT, MVT::v2f32, Expand); 539d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); 540d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); 541ea54c9846b2973cafa8ffd40626f5676ba9ccfeeAnton Korobeynikov 542d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov // Neon does not support some operations on v1i64 and v2i64 types. 543d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::MUL, MVT::v1i64, Expand); 544d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov // Custom handling for some quad-vector types to detect VMULL. 545d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::MUL, MVT::v8i16, Custom); 546d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::MUL, MVT::v4i32, Custom); 547d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::MUL, MVT::v2i64, Custom); 548d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov // Custom handling for some vector types to avoid expensive expansions 549d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 550d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 551e699d0f549151a2cca993c21407aea4a6eff7d3fAnton Korobeynikov setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 552e699d0f549151a2cca993c21407aea4a6eff7d3fAnton Korobeynikov setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 553e699d0f549151a2cca993c21407aea4a6eff7d3fAnton Korobeynikov setOperationAction(ISD::SETCC, MVT::v1i64, Expand); 554e699d0f549151a2cca993c21407aea4a6eff7d3fAnton Korobeynikov setOperationAction(ISD::SETCC, MVT::v2i64, Expand); 555bf8ef3f29de28529b5d65970af9015c41f7c809bAnton Korobeynikov // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 556e699d0f549151a2cca993c21407aea4a6eff7d3fAnton Korobeynikov // a destination type that is wider than the source, and nor does 557e699d0f549151a2cca993c21407aea4a6eff7d3fAnton Korobeynikov // it have a FP_TO_[SU]INT instruction with a narrower destination than 558e699d0f549151a2cca993c21407aea4a6eff7d3fAnton Korobeynikov // source. 559d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 560aceb620de855485a4fb2eed343d880d76f6c701cAnton Korobeynikov setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 561ea54c9846b2973cafa8ffd40626f5676ba9ccfeeAnton Korobeynikov setOperationAction(ISD::FP_TO_UINT, MVT::v4i16, Custom); 562d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::FP_TO_SINT, MVT::v4i16, Custom); 563d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov 564d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); 565d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); 5663513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov 5673513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov // Custom expand long extensions to vectors. 5683513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); 5693513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); 5703513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); 5713513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); 5723513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); 5733513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); 5743513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); 5753513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); 5765d59f68ade7573175f1ace09061a94286e59076bAnton Korobeynikov 5775d59f68ade7573175f1ace09061a94286e59076bAnton Korobeynikov // NEON does not have single instruction CTPOP for vectors with element 5785d59f68ade7573175f1ace09061a94286e59076bAnton Korobeynikov // types wider than 8-bits. However, custom lowering can leverage the 5795d59f68ade7573175f1ace09061a94286e59076bAnton Korobeynikov // v8i8/v16i8 vcnt instruction. 5805d59f68ade7573175f1ace09061a94286e59076bAnton Korobeynikov setOperationAction(ISD::CTPOP, MVT::v2i32, Custom); 5815d59f68ade7573175f1ace09061a94286e59076bAnton Korobeynikov setOperationAction(ISD::CTPOP, MVT::v4i32, Custom); 5825d59f68ade7573175f1ace09061a94286e59076bAnton Korobeynikov setOperationAction(ISD::CTPOP, MVT::v4i16, Custom); 5835d59f68ade7573175f1ace09061a94286e59076bAnton Korobeynikov setOperationAction(ISD::CTPOP, MVT::v8i16, Custom); 5845d59f68ade7573175f1ace09061a94286e59076bAnton Korobeynikov 5853926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov // NEON only has FMA instructions as of VFP4. 5861bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov if (!Subtarget->hasVFP4()) { 5871bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setOperationAction(ISD::FMA, MVT::v2f32, Expand); 588ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setOperationAction(ISD::FMA, MVT::v4f32, Expand); 589ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov } 590ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov 591ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::INTRINSIC_VOID); 5923926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 593ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 594c23197a26f34f559ea9797de51e187087c039c42Torok Edwin setTargetDAGCombine(ISD::SHL); 595ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::SRL); 5963926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov setTargetDAGCombine(ISD::SRA); 597ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::SIGN_EXTEND); 598ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::ZERO_EXTEND); 5993926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov setTargetDAGCombine(ISD::ANY_EXTEND); 600ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::SELECT_CC); 601ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::BUILD_VECTOR); 602ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 603ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 6043926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov setTargetDAGCombine(ISD::STORE); 605ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::FP_TO_SINT); 606ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::FP_TO_UINT); 607ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::FDIV); 608ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov 6093926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov // It is legal to extload from v4i8 to v4i16 or v4i32. 610ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov MVT Tys[6] = {MVT::v8i8, MVT::v4i8, MVT::v2i8, 611ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov MVT::v4i16, MVT::v2i16, 612ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov MVT::v2i32}; 613ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov for (unsigned i = 0; i < 6; ++i) { 6143926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov setLoadExtAction(ISD::EXTLOAD, Tys[i], Legal); 615ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setLoadExtAction(ISD::ZEXTLOAD, Tys[i], Legal); 616ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setLoadExtAction(ISD::SEXTLOAD, Tys[i], Legal); 617ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov } 618ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov } 6193926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov 620ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov // ARM and Thumb2 support UMLAL/SMLAL. 621ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov if (!Subtarget->isThumb1Only()) 622ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setTargetDAGCombine(ISD::ADDC); 6233926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov 624825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson 625ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov computeRegisterProperties(); 626ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov 6271bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov // ARM does not have f32 extending load. 6281bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 629ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov 6301bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov // ARM does not have i1 sign extending load. 6311bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 6321bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov 6331bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov // ARM supports all 4 flavors of integer indexed load / store. 6341bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov if (!Subtarget->isThumb1Only()) { 6351bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov for (unsigned im = (unsigned)ISD::PRE_INC; 6363926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 6371bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setIndexedLoadAction(im, MVT::i1, Legal); 6381bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setIndexedLoadAction(im, MVT::i8, Legal); 6391bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setIndexedLoadAction(im, MVT::i16, Legal); 6403926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov setIndexedLoadAction(im, MVT::i32, Legal); 641ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setIndexedStoreAction(im, MVT::i1, Legal); 642ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setIndexedStoreAction(im, MVT::i8, Legal); 6431bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setIndexedStoreAction(im, MVT::i16, Legal); 6441bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setIndexedStoreAction(im, MVT::i32, Legal); 6451bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov } 6461bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov } 6471bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov 6481bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov // i64 operation support. 6498b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::MUL, MVT::i64, Expand); 6501bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setOperationAction(ISD::MULHU, MVT::i32, Expand); 6513926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov if (Subtarget->isThumb1Only()) { 6521bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 6538b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 654825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson } 6558b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops() 6568b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov || (Subtarget->isThumb2() && !Subtarget->hasThumb2DSP())) 6578b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::MULHS, MVT::i32, Expand); 6583926fb63c24ceeefc0215b8e14eb81c85403639eAnton Korobeynikov 6591bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 6608b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 6611bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 6628b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::SRL, MVT::i64, Custom); 6638b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::SRA, MVT::i64, Custom); 664b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov 665b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov if (!Subtarget->isThumb1Only()) { 666b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov // FIXME: We should do this for Thumb1 as well. 667e50ed30282bb5b4a9ed952580523f2dda16215acOwen Anderson setOperationAction(ISD::ADDC, MVT::i32, Custom); 668b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov setOperationAction(ISD::ADDE, MVT::i32, Custom); 669b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov setOperationAction(ISD::SUBC, MVT::i32, Custom); 670825b72b0571821bf2d378749f69d6c4cfb52d2f9Owen Anderson setOperationAction(ISD::SUBE, MVT::i32, Custom); 671b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov } 672b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov 673b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov // ARM does not have ROTL. 674b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov setOperationAction(ISD::ROTL, MVT::i32, Expand); 675b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov setOperationAction(ISD::CTTZ, MVT::i32, Custom); 676b78e214274d397407b6167a293b7cd7c3b526ddeAnton Korobeynikov setOperationAction(ISD::CTPOP, MVT::i32, Expand); 6776534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 6786534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::CTLZ, MVT::i32, Expand); 6796534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov 6806534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov // These just redirect to CTTZ and CTLZ on ARM. 6816534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); 6826534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); 6836534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov 6846534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); 6856534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov 6866534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov // Only ARMv6 has BSWAP. 6876534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov if (!Subtarget->hasV6Ops()) 6886534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::BSWAP, MVT::i32, Expand); 6896534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov 6906534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov if (!(Subtarget->hasDivide() && Subtarget->isThumb2()) && 6916534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov !(Subtarget->hasDivideInARMMode() && !Subtarget->isThumb())) { 6926534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov // These are expanded into libcalls if the cpu doesn't have HW divider. 6936534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::SDIV, MVT::i32, Expand); 6946534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::UDIV, MVT::i32, Expand); 6956534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov } 6966534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::SREM, MVT::i32, Expand); 6976534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::UREM, MVT::i32, Expand); 6986534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 6996534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 7006534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov 7016534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 7026534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 7036534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 7046534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 7056534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 7066534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov 7076534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::TRAP, MVT::Other, Legal); 7086534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov 7096534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov // Use the default implementation. 7106534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::VASTART, MVT::Other, Custom); 7116534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::VAARG, MVT::Other, Expand); 7126534f83ae8c39284ae51fbf478ce0c37d0c892a2Anton Korobeynikov setOperationAction(ISD::VACOPY, MVT::Other, Expand); 713fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setOperationAction(ISD::VAEND, MVT::Other, Expand); 714fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 715fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 716fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov 717d2c94ae49e546e68b591e838cdfc2fd016d928d9Anton Korobeynikov if (!Subtarget->isTargetDarwin()) { 718e699d0f549151a2cca993c21407aea4a6eff7d3fAnton Korobeynikov // Non-Darwin platforms may return values in these registers via the 719e699d0f549151a2cca993c21407aea4a6eff7d3fAnton Korobeynikov // personality function. 720b561264d2b2e33e1e6322a99d600b5daece5bbdeAnton Korobeynikov setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 7213513ca81c6beda087a281a66f1b0e612879c0aadAnton Korobeynikov setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 7221bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov setExceptionPointerRegister(ARM::R0); 723ed1a51af376b9027db60ff060e0a2572493df07bAnton Korobeynikov setExceptionSelectorRegister(ARM::R1); 7241bb8cd723d9fc89701fd3e54951c6bb419f798d3Anton Korobeynikov } 725fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov 726fd1b7c778c0c332a676b1003115d2b4bc6f9a46aAnton Korobeynikov setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 7278b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 7288b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // the default expansion. 7298b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // FIXME: This should be checking for v6k, not just v6. 7308b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov if (Subtarget->hasDataBarrier() || 7318b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 7328b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // membarrier needs custom lowering; the rest are legal and handled 7338b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // normally. 734fb2e752e4175920d0531f2afc93a23d0cdf4db14Evan Cheng setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); 735fb2e752e4175920d0531f2afc93a23d0cdf4db14Evan Cheng // Custom lowering for 64-bit ops 7368b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); 7378b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); 738da4d2f63d8b138569ec732d970bb452a0403a3abAnton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); 739da4d2f63d8b138569ec732d970bb452a0403a3abAnton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); 7408b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); 7418b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); 7428b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); 7438b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); 7448b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); 7458b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); 7468b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); 7478b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. 7488b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setInsertFencesForAtomic(true); 7498b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov } else { 7508b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // Set them all for expansion, which will force libcalls. 7518b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); 7528b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 7538b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 7548b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 7558b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 7568b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 7578b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 7588b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 7598b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 7608b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 7618b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 7628b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 7638b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 7648b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the 765ce31910eae5bd4896fa6c27798e7b26885691d3bEvan Cheng // Unordered/Monotonic case. 766ce31910eae5bd4896fa6c27798e7b26885691d3bEvan Cheng setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom); 767ce31910eae5bd4896fa6c27798e7b26885691d3bEvan Cheng setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom); 768ce31910eae5bd4896fa6c27798e7b26885691d3bEvan Cheng } 7698b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov 7708b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 7718b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov 7728b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 7738b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov if (!Subtarget->hasV6Ops()) { 7748b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 7758b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 7768b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov } 7778b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 7788b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov 7798b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 7808b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov !Subtarget->isThumb1Only()) { 7818b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 7828b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // iff target supports vfp2. 7838b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::BITCAST, MVT::i64, Custom); 7848b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 7858b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov } 7868b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov 7878b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov // We want to custom lower some of our intrinsics. 7888b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 7898b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov if (Subtarget->isTargetDarwin()) { 7908b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 7918b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 7928b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); 7938b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov } 7948b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov 7958b528e52ee6018b0d0e7e46b3b4cf6f41fdaa0d9Anton Korobeynikov setOperationAction(ISD::SETCC, MVT::i32, Expand); 796 setOperationAction(ISD::SETCC, MVT::f32, Expand); 797 setOperationAction(ISD::SETCC, MVT::f64, Expand); 798 setOperationAction(ISD::SELECT, MVT::i32, Custom); 799 setOperationAction(ISD::SELECT, MVT::f32, Custom); 800 setOperationAction(ISD::SELECT, MVT::f64, Custom); 801 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 802 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 803 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 804 805 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 806 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 807 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 808 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 809 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 810 811 // We don't support sin/cos/fmod/copysign/pow 812 setOperationAction(ISD::FSIN, MVT::f64, Expand); 813 setOperationAction(ISD::FSIN, MVT::f32, Expand); 814 setOperationAction(ISD::FCOS, MVT::f32, Expand); 815 setOperationAction(ISD::FCOS, MVT::f64, Expand); 816 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 817 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 818 setOperationAction(ISD::FREM, MVT::f64, Expand); 819 setOperationAction(ISD::FREM, MVT::f32, Expand); 820 if (!TM.Options.UseSoftFloat && Subtarget->hasVFP2() && 821 !Subtarget->isThumb1Only()) { 822 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 823 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 824 } 825 setOperationAction(ISD::FPOW, MVT::f64, Expand); 826 setOperationAction(ISD::FPOW, MVT::f32, Expand); 827 828 if (!Subtarget->hasVFP4()) { 829 setOperationAction(ISD::FMA, MVT::f64, Expand); 830 setOperationAction(ISD::FMA, MVT::f32, Expand); 831 } 832 833 // Various VFP goodness 834 if (!TM.Options.UseSoftFloat && !Subtarget->isThumb1Only()) { 835 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 836 if (Subtarget->hasVFP2()) { 837 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 838 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 839 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 840 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 841 } 842 // Special handling for half-precision FP. 843 if (!Subtarget->hasFP16()) { 844 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 845 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 846 } 847 } 848 849 // We have target-specific dag combine patterns for the following nodes: 850 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 851 setTargetDAGCombine(ISD::ADD); 852 setTargetDAGCombine(ISD::SUB); 853 setTargetDAGCombine(ISD::MUL); 854 setTargetDAGCombine(ISD::AND); 855 setTargetDAGCombine(ISD::OR); 856 setTargetDAGCombine(ISD::XOR); 857 858 if (Subtarget->hasV6Ops()) 859 setTargetDAGCombine(ISD::SRL); 860 861 setStackPointerRegisterToSaveRestore(ARM::SP); 862 863 if (TM.Options.UseSoftFloat || Subtarget->isThumb1Only() || 864 !Subtarget->hasVFP2()) 865 setSchedulingPreference(Sched::RegPressure); 866 else 867 setSchedulingPreference(Sched::Hybrid); 868 869 //// temporary - rewrite interface to use type 870 MaxStoresPerMemset = 8; 871 MaxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; 872 MaxStoresPerMemcpy = 4; // For @llvm.memcpy -> sequence of stores 873 MaxStoresPerMemcpyOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 874 MaxStoresPerMemmove = 4; // For @llvm.memmove -> sequence of stores 875 MaxStoresPerMemmoveOptSize = Subtarget->isTargetDarwin() ? 4 : 2; 876 877 // On ARM arguments smaller than 4 bytes are extended, so all arguments 878 // are at least 4 bytes aligned. 879 setMinStackArgumentAlignment(4); 880 881 // Prefer likely predicted branches to selects on out-of-order cores. 882 PredictableSelectIsExpensive = Subtarget->isLikeA9(); 883 884 setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); 885} 886 887// FIXME: It might make sense to define the representative register class as the 888// nearest super-register that has a non-null superset. For example, DPR_VFP2 is 889// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 890// SPR's representative would be DPR_VFP2. This should work well if register 891// pressure tracking were modified such that a register use would increment the 892// pressure of the register class's representative and all of it's super 893// classes' representatives transitively. We have not implemented this because 894// of the difficulty prior to coalescing of modeling operand register classes 895// due to the common occurrence of cross class copies and subregister insertions 896// and extractions. 897std::pair<const TargetRegisterClass*, uint8_t> 898ARMTargetLowering::findRepresentativeClass(MVT VT) const{ 899 const TargetRegisterClass *RRC = 0; 900 uint8_t Cost = 1; 901 switch (VT.SimpleTy) { 902 default: 903 return TargetLowering::findRepresentativeClass(VT); 904 // Use DPR as representative register class for all floating point 905 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 906 // the cost is 1 for both f32 and f64. 907 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 908 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 909 RRC = &ARM::DPRRegClass; 910 // When NEON is used for SP, only half of the register file is available 911 // because operations that define both SP and DP results will be constrained 912 // to the VFP2 class (D0-D15). We currently model this constraint prior to 913 // coalescing by double-counting the SP regs. See the FIXME above. 914 if (Subtarget->useNEONForSinglePrecisionFP()) 915 Cost = 2; 916 break; 917 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 918 case MVT::v4f32: case MVT::v2f64: 919 RRC = &ARM::DPRRegClass; 920 Cost = 2; 921 break; 922 case MVT::v4i64: 923 RRC = &ARM::DPRRegClass; 924 Cost = 4; 925 break; 926 case MVT::v8i64: 927 RRC = &ARM::DPRRegClass; 928 Cost = 8; 929 break; 930 } 931 return std::make_pair(RRC, Cost); 932} 933 934const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 935 switch (Opcode) { 936 default: return 0; 937 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 938 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 939 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 940 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 941 case ARMISD::CALL: return "ARMISD::CALL"; 942 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 943 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 944 case ARMISD::tCALL: return "ARMISD::tCALL"; 945 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 946 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 947 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 948 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 949 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 950 case ARMISD::CMP: return "ARMISD::CMP"; 951 case ARMISD::CMN: return "ARMISD::CMN"; 952 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 953 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 954 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 955 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 956 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 957 958 case ARMISD::CMOV: return "ARMISD::CMOV"; 959 960 case ARMISD::RBIT: return "ARMISD::RBIT"; 961 962 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 963 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 964 case ARMISD::SITOF: return "ARMISD::SITOF"; 965 case ARMISD::UITOF: return "ARMISD::UITOF"; 966 967 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 968 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 969 case ARMISD::RRX: return "ARMISD::RRX"; 970 971 case ARMISD::ADDC: return "ARMISD::ADDC"; 972 case ARMISD::ADDE: return "ARMISD::ADDE"; 973 case ARMISD::SUBC: return "ARMISD::SUBC"; 974 case ARMISD::SUBE: return "ARMISD::SUBE"; 975 976 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 977 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 978 979 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 980 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 981 982 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 983 984 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 985 986 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 987 988 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 989 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 990 991 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 992 993 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 994 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 995 case ARMISD::VCGE: return "ARMISD::VCGE"; 996 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 997 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 998 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 999 case ARMISD::VCGT: return "ARMISD::VCGT"; 1000 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 1001 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 1002 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 1003 case ARMISD::VTST: return "ARMISD::VTST"; 1004 1005 case ARMISD::VSHL: return "ARMISD::VSHL"; 1006 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 1007 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 1008 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 1009 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 1010 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 1011 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 1012 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 1013 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 1014 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 1015 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 1016 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 1017 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 1018 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 1019 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 1020 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 1021 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 1022 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 1023 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 1024 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 1025 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 1026 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 1027 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 1028 case ARMISD::VMOVFPIMM: return "ARMISD::VMOVFPIMM"; 1029 case ARMISD::VDUP: return "ARMISD::VDUP"; 1030 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 1031 case ARMISD::VEXT: return "ARMISD::VEXT"; 1032 case ARMISD::VREV64: return "ARMISD::VREV64"; 1033 case ARMISD::VREV32: return "ARMISD::VREV32"; 1034 case ARMISD::VREV16: return "ARMISD::VREV16"; 1035 case ARMISD::VZIP: return "ARMISD::VZIP"; 1036 case ARMISD::VUZP: return "ARMISD::VUZP"; 1037 case ARMISD::VTRN: return "ARMISD::VTRN"; 1038 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 1039 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 1040 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 1041 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 1042 case ARMISD::UMLAL: return "ARMISD::UMLAL"; 1043 case ARMISD::SMLAL: return "ARMISD::SMLAL"; 1044 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 1045 case ARMISD::FMAX: return "ARMISD::FMAX"; 1046 case ARMISD::FMIN: return "ARMISD::FMIN"; 1047 case ARMISD::BFI: return "ARMISD::BFI"; 1048 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 1049 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 1050 case ARMISD::VBSL: return "ARMISD::VBSL"; 1051 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 1052 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 1053 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 1054 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 1055 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 1056 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 1057 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 1058 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 1059 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 1060 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 1061 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 1062 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 1063 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 1064 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 1065 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 1066 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 1067 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 1068 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 1069 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 1070 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 1071 } 1072} 1073 1074EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 1075 if (!VT.isVector()) return getPointerTy(); 1076 return VT.changeVectorElementTypeToInteger(); 1077} 1078 1079/// getRegClassFor - Return the register class that should be used for the 1080/// specified value type. 1081const TargetRegisterClass *ARMTargetLowering::getRegClassFor(MVT VT) const { 1082 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 1083 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 1084 // load / store 4 to 8 consecutive D registers. 1085 if (Subtarget->hasNEON()) { 1086 if (VT == MVT::v4i64) 1087 return &ARM::QQPRRegClass; 1088 if (VT == MVT::v8i64) 1089 return &ARM::QQQQPRRegClass; 1090 } 1091 return TargetLowering::getRegClassFor(VT); 1092} 1093 1094// Create a fast isel object. 1095FastISel * 1096ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, 1097 const TargetLibraryInfo *libInfo) const { 1098 return ARM::createFastISel(funcInfo, libInfo); 1099} 1100 1101/// getMaximalGlobalOffset - Returns the maximal possible offset which can 1102/// be used for loads / stores from the global. 1103unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 1104 return (Subtarget->isThumb1Only() ? 127 : 4095); 1105} 1106 1107Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 1108 unsigned NumVals = N->getNumValues(); 1109 if (!NumVals) 1110 return Sched::RegPressure; 1111 1112 for (unsigned i = 0; i != NumVals; ++i) { 1113 EVT VT = N->getValueType(i); 1114 if (VT == MVT::Glue || VT == MVT::Other) 1115 continue; 1116 if (VT.isFloatingPoint() || VT.isVector()) 1117 return Sched::ILP; 1118 } 1119 1120 if (!N->isMachineOpcode()) 1121 return Sched::RegPressure; 1122 1123 // Load are scheduled for latency even if there instruction itinerary 1124 // is not available. 1125 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1126 const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); 1127 1128 if (MCID.getNumDefs() == 0) 1129 return Sched::RegPressure; 1130 if (!Itins->isEmpty() && 1131 Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) 1132 return Sched::ILP; 1133 1134 return Sched::RegPressure; 1135} 1136 1137//===----------------------------------------------------------------------===// 1138// Lowering Code 1139//===----------------------------------------------------------------------===// 1140 1141/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 1142static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 1143 switch (CC) { 1144 default: llvm_unreachable("Unknown condition code!"); 1145 case ISD::SETNE: return ARMCC::NE; 1146 case ISD::SETEQ: return ARMCC::EQ; 1147 case ISD::SETGT: return ARMCC::GT; 1148 case ISD::SETGE: return ARMCC::GE; 1149 case ISD::SETLT: return ARMCC::LT; 1150 case ISD::SETLE: return ARMCC::LE; 1151 case ISD::SETUGT: return ARMCC::HI; 1152 case ISD::SETUGE: return ARMCC::HS; 1153 case ISD::SETULT: return ARMCC::LO; 1154 case ISD::SETULE: return ARMCC::LS; 1155 } 1156} 1157 1158/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 1159static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 1160 ARMCC::CondCodes &CondCode2) { 1161 CondCode2 = ARMCC::AL; 1162 switch (CC) { 1163 default: llvm_unreachable("Unknown FP condition!"); 1164 case ISD::SETEQ: 1165 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1166 case ISD::SETGT: 1167 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1168 case ISD::SETGE: 1169 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1170 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1171 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1172 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1173 case ISD::SETO: CondCode = ARMCC::VC; break; 1174 case ISD::SETUO: CondCode = ARMCC::VS; break; 1175 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1176 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1177 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1178 case ISD::SETLT: 1179 case ISD::SETULT: CondCode = ARMCC::LT; break; 1180 case ISD::SETLE: 1181 case ISD::SETULE: CondCode = ARMCC::LE; break; 1182 case ISD::SETNE: 1183 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1184 } 1185} 1186 1187//===----------------------------------------------------------------------===// 1188// Calling Convention Implementation 1189//===----------------------------------------------------------------------===// 1190 1191#include "ARMGenCallingConv.inc" 1192 1193/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1194/// given CallingConvention value. 1195CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1196 bool Return, 1197 bool isVarArg) const { 1198 switch (CC) { 1199 default: 1200 llvm_unreachable("Unsupported calling convention"); 1201 case CallingConv::Fast: 1202 if (Subtarget->hasVFP2() && !isVarArg) { 1203 if (!Subtarget->isAAPCS_ABI()) 1204 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1205 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1206 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1207 } 1208 // Fallthrough 1209 case CallingConv::C: { 1210 // Use target triple & subtarget features to do actual dispatch. 1211 if (!Subtarget->isAAPCS_ABI()) 1212 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1213 else if (Subtarget->hasVFP2() && 1214 getTargetMachine().Options.FloatABIType == FloatABI::Hard && 1215 !isVarArg) 1216 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1217 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1218 } 1219 case CallingConv::ARM_AAPCS_VFP: 1220 if (!isVarArg) 1221 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1222 // Fallthrough 1223 case CallingConv::ARM_AAPCS: 1224 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1225 case CallingConv::ARM_APCS: 1226 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1227 case CallingConv::GHC: 1228 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS_GHC); 1229 } 1230} 1231 1232/// LowerCallResult - Lower the result values of a call into the 1233/// appropriate copies out of appropriate physical registers. 1234SDValue 1235ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1236 CallingConv::ID CallConv, bool isVarArg, 1237 const SmallVectorImpl<ISD::InputArg> &Ins, 1238 SDLoc dl, SelectionDAG &DAG, 1239 SmallVectorImpl<SDValue> &InVals, 1240 bool isThisReturn, SDValue ThisVal) const { 1241 1242 // Assign locations to each value returned by this call. 1243 SmallVector<CCValAssign, 16> RVLocs; 1244 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1245 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 1246 CCInfo.AnalyzeCallResult(Ins, 1247 CCAssignFnForNode(CallConv, /* Return*/ true, 1248 isVarArg)); 1249 1250 // Copy all of the result registers out of their specified physreg. 1251 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1252 CCValAssign VA = RVLocs[i]; 1253 1254 // Pass 'this' value directly from the argument to return value, to avoid 1255 // reg unit interference 1256 if (i == 0 && isThisReturn) { 1257 assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 && 1258 "unexpected return calling convention register assignment"); 1259 InVals.push_back(ThisVal); 1260 continue; 1261 } 1262 1263 SDValue Val; 1264 if (VA.needsCustom()) { 1265 // Handle f64 or half of a v2f64. 1266 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1267 InFlag); 1268 Chain = Lo.getValue(1); 1269 InFlag = Lo.getValue(2); 1270 VA = RVLocs[++i]; // skip ahead to next loc 1271 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1272 InFlag); 1273 Chain = Hi.getValue(1); 1274 InFlag = Hi.getValue(2); 1275 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1276 1277 if (VA.getLocVT() == MVT::v2f64) { 1278 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1279 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1280 DAG.getConstant(0, MVT::i32)); 1281 1282 VA = RVLocs[++i]; // skip ahead to next loc 1283 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1284 Chain = Lo.getValue(1); 1285 InFlag = Lo.getValue(2); 1286 VA = RVLocs[++i]; // skip ahead to next loc 1287 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1288 Chain = Hi.getValue(1); 1289 InFlag = Hi.getValue(2); 1290 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1291 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1292 DAG.getConstant(1, MVT::i32)); 1293 } 1294 } else { 1295 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1296 InFlag); 1297 Chain = Val.getValue(1); 1298 InFlag = Val.getValue(2); 1299 } 1300 1301 switch (VA.getLocInfo()) { 1302 default: llvm_unreachable("Unknown loc info!"); 1303 case CCValAssign::Full: break; 1304 case CCValAssign::BCvt: 1305 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1306 break; 1307 } 1308 1309 InVals.push_back(Val); 1310 } 1311 1312 return Chain; 1313} 1314 1315/// LowerMemOpCallTo - Store the argument to the stack. 1316SDValue 1317ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1318 SDValue StackPtr, SDValue Arg, 1319 SDLoc dl, SelectionDAG &DAG, 1320 const CCValAssign &VA, 1321 ISD::ArgFlagsTy Flags) const { 1322 unsigned LocMemOffset = VA.getLocMemOffset(); 1323 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1324 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1325 return DAG.getStore(Chain, dl, Arg, PtrOff, 1326 MachinePointerInfo::getStack(LocMemOffset), 1327 false, false, 0); 1328} 1329 1330void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, 1331 SDValue Chain, SDValue &Arg, 1332 RegsToPassVector &RegsToPass, 1333 CCValAssign &VA, CCValAssign &NextVA, 1334 SDValue &StackPtr, 1335 SmallVector<SDValue, 8> &MemOpChains, 1336 ISD::ArgFlagsTy Flags) const { 1337 1338 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1339 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1340 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1341 1342 if (NextVA.isRegLoc()) 1343 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1344 else { 1345 assert(NextVA.isMemLoc()); 1346 if (StackPtr.getNode() == 0) 1347 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1348 1349 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1350 dl, DAG, NextVA, 1351 Flags)); 1352 } 1353} 1354 1355/// LowerCall - Lowering a call into a callseq_start <- 1356/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1357/// nodes. 1358SDValue 1359ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 1360 SmallVectorImpl<SDValue> &InVals) const { 1361 SelectionDAG &DAG = CLI.DAG; 1362 SDLoc &dl = CLI.DL; 1363 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 1364 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 1365 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 1366 SDValue Chain = CLI.Chain; 1367 SDValue Callee = CLI.Callee; 1368 bool &isTailCall = CLI.IsTailCall; 1369 CallingConv::ID CallConv = CLI.CallConv; 1370 bool doesNotRet = CLI.DoesNotReturn; 1371 bool isVarArg = CLI.IsVarArg; 1372 1373 MachineFunction &MF = DAG.getMachineFunction(); 1374 bool isStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1375 bool isThisReturn = false; 1376 bool isSibCall = false; 1377 // Disable tail calls if they're not supported. 1378 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 1379 isTailCall = false; 1380 if (isTailCall) { 1381 // Check if it's really possible to do a tail call. 1382 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1383 isVarArg, isStructRet, MF.getFunction()->hasStructRetAttr(), 1384 Outs, OutVals, Ins, DAG); 1385 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1386 // detected sibcalls. 1387 if (isTailCall) { 1388 ++NumTailCalls; 1389 isSibCall = true; 1390 } 1391 } 1392 1393 // Analyze operands of the call, assigning locations to each operand. 1394 SmallVector<CCValAssign, 16> ArgLocs; 1395 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1396 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1397 CCInfo.AnalyzeCallOperands(Outs, 1398 CCAssignFnForNode(CallConv, /* Return*/ false, 1399 isVarArg)); 1400 1401 // Get a count of how many bytes are to be pushed on the stack. 1402 unsigned NumBytes = CCInfo.getNextStackOffset(); 1403 1404 // For tail calls, memory operands are available in our caller's stack. 1405 if (isSibCall) 1406 NumBytes = 0; 1407 1408 // Adjust the stack pointer for the new arguments... 1409 // These operations are automatically eliminated by the prolog/epilog pass 1410 if (!isSibCall) 1411 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 1412 dl); 1413 1414 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1415 1416 RegsToPassVector RegsToPass; 1417 SmallVector<SDValue, 8> MemOpChains; 1418 1419 // Walk the register/memloc assignments, inserting copies/loads. In the case 1420 // of tail call optimization, arguments are handled later. 1421 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1422 i != e; 1423 ++i, ++realArgIdx) { 1424 CCValAssign &VA = ArgLocs[i]; 1425 SDValue Arg = OutVals[realArgIdx]; 1426 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1427 bool isByVal = Flags.isByVal(); 1428 1429 // Promote the value if needed. 1430 switch (VA.getLocInfo()) { 1431 default: llvm_unreachable("Unknown loc info!"); 1432 case CCValAssign::Full: break; 1433 case CCValAssign::SExt: 1434 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1435 break; 1436 case CCValAssign::ZExt: 1437 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1438 break; 1439 case CCValAssign::AExt: 1440 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1441 break; 1442 case CCValAssign::BCvt: 1443 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1444 break; 1445 } 1446 1447 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1448 if (VA.needsCustom()) { 1449 if (VA.getLocVT() == MVT::v2f64) { 1450 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1451 DAG.getConstant(0, MVT::i32)); 1452 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1453 DAG.getConstant(1, MVT::i32)); 1454 1455 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1456 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1457 1458 VA = ArgLocs[++i]; // skip ahead to next loc 1459 if (VA.isRegLoc()) { 1460 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1461 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1462 } else { 1463 assert(VA.isMemLoc()); 1464 1465 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1466 dl, DAG, VA, Flags)); 1467 } 1468 } else { 1469 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1470 StackPtr, MemOpChains, Flags); 1471 } 1472 } else if (VA.isRegLoc()) { 1473 if (realArgIdx == 0 && Flags.isReturned() && Outs[0].VT == MVT::i32) { 1474 assert(VA.getLocVT() == MVT::i32 && 1475 "unexpected calling convention register assignment"); 1476 assert(!Ins.empty() && Ins[0].VT == MVT::i32 && 1477 "unexpected use of 'returned'"); 1478 isThisReturn = true; 1479 } 1480 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1481 } else if (isByVal) { 1482 assert(VA.isMemLoc()); 1483 unsigned offset = 0; 1484 1485 // True if this byval aggregate will be split between registers 1486 // and memory. 1487 unsigned ByValArgsCount = CCInfo.getInRegsParamsCount(); 1488 unsigned CurByValIdx = CCInfo.getInRegsParamsProceed(); 1489 1490 if (CurByValIdx < ByValArgsCount) { 1491 1492 unsigned RegBegin, RegEnd; 1493 CCInfo.getInRegsParamInfo(CurByValIdx, RegBegin, RegEnd); 1494 1495 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1496 unsigned int i, j; 1497 for (i = 0, j = RegBegin; j < RegEnd; i++, j++) { 1498 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1499 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1500 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1501 MachinePointerInfo(), 1502 false, false, false, 0); 1503 MemOpChains.push_back(Load.getValue(1)); 1504 RegsToPass.push_back(std::make_pair(j, Load)); 1505 } 1506 1507 // If parameter size outsides register area, "offset" value 1508 // helps us to calculate stack slot for remained part properly. 1509 offset = RegEnd - RegBegin; 1510 1511 CCInfo.nextInRegsParam(); 1512 } 1513 1514 if (Flags.getByValSize() > 4*offset) { 1515 unsigned LocMemOffset = VA.getLocMemOffset(); 1516 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1517 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1518 StkPtrOff); 1519 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1520 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1521 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1522 MVT::i32); 1523 SDValue AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); 1524 1525 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 1526 SDValue Ops[] = { Chain, Dst, Src, SizeNode, AlignNode}; 1527 MemOpChains.push_back(DAG.getNode(ARMISD::COPY_STRUCT_BYVAL, dl, VTs, 1528 Ops, array_lengthof(Ops))); 1529 } 1530 } else if (!isSibCall) { 1531 assert(VA.isMemLoc()); 1532 1533 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1534 dl, DAG, VA, Flags)); 1535 } 1536 } 1537 1538 if (!MemOpChains.empty()) 1539 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1540 &MemOpChains[0], MemOpChains.size()); 1541 1542 // Build a sequence of copy-to-reg nodes chained together with token chain 1543 // and flag operands which copy the outgoing args into the appropriate regs. 1544 SDValue InFlag; 1545 // Tail call byval lowering might overwrite argument registers so in case of 1546 // tail call optimization the copies to registers are lowered later. 1547 if (!isTailCall) 1548 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1549 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1550 RegsToPass[i].second, InFlag); 1551 InFlag = Chain.getValue(1); 1552 } 1553 1554 // For tail calls lower the arguments to the 'real' stack slot. 1555 if (isTailCall) { 1556 // Force all the incoming stack arguments to be loaded from the stack 1557 // before any new outgoing arguments are stored to the stack, because the 1558 // outgoing stack slots may alias the incoming argument stack slots, and 1559 // the alias isn't otherwise explicit. This is slightly more conservative 1560 // than necessary, because it means that each store effectively depends 1561 // on every argument instead of just those arguments it would clobber. 1562 1563 // Do not flag preceding copytoreg stuff together with the following stuff. 1564 InFlag = SDValue(); 1565 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1566 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1567 RegsToPass[i].second, InFlag); 1568 InFlag = Chain.getValue(1); 1569 } 1570 InFlag = SDValue(); 1571 } 1572 1573 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1574 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1575 // node so that legalize doesn't hack it. 1576 bool isDirect = false; 1577 bool isARMFunc = false; 1578 bool isLocalARMFunc = false; 1579 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1580 1581 if (EnableARMLongCalls) { 1582 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1583 && "long-calls with non-static relocation model!"); 1584 // Handle a global address or an external symbol. If it's not one of 1585 // those, the target's already in a register, so we don't need to do 1586 // anything extra. 1587 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1588 const GlobalValue *GV = G->getGlobal(); 1589 // Create a constant pool entry for the callee address 1590 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1591 ARMConstantPoolValue *CPV = 1592 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 0); 1593 1594 // Get the address of the callee into a register 1595 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1596 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1597 Callee = DAG.getLoad(getPointerTy(), dl, 1598 DAG.getEntryNode(), CPAddr, 1599 MachinePointerInfo::getConstantPool(), 1600 false, false, false, 0); 1601 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1602 const char *Sym = S->getSymbol(); 1603 1604 // Create a constant pool entry for the callee address 1605 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1606 ARMConstantPoolValue *CPV = 1607 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1608 ARMPCLabelIndex, 0); 1609 // Get the address of the callee into a register 1610 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1611 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1612 Callee = DAG.getLoad(getPointerTy(), dl, 1613 DAG.getEntryNode(), CPAddr, 1614 MachinePointerInfo::getConstantPool(), 1615 false, false, false, 0); 1616 } 1617 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1618 const GlobalValue *GV = G->getGlobal(); 1619 isDirect = true; 1620 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1621 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1622 getTargetMachine().getRelocationModel() != Reloc::Static; 1623 isARMFunc = !Subtarget->isThumb() || isStub; 1624 // ARM call to a local ARM function is predicable. 1625 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1626 // tBX takes a register source operand. 1627 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1628 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1629 ARMConstantPoolValue *CPV = 1630 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 4); 1631 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1632 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1633 Callee = DAG.getLoad(getPointerTy(), dl, 1634 DAG.getEntryNode(), CPAddr, 1635 MachinePointerInfo::getConstantPool(), 1636 false, false, false, 0); 1637 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1638 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1639 getPointerTy(), Callee, PICLabel); 1640 } else { 1641 // On ELF targets for PIC code, direct calls should go through the PLT 1642 unsigned OpFlags = 0; 1643 if (Subtarget->isTargetELF() && 1644 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1645 OpFlags = ARMII::MO_PLT; 1646 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1647 } 1648 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1649 isDirect = true; 1650 bool isStub = Subtarget->isTargetDarwin() && 1651 getTargetMachine().getRelocationModel() != Reloc::Static; 1652 isARMFunc = !Subtarget->isThumb() || isStub; 1653 // tBX takes a register source operand. 1654 const char *Sym = S->getSymbol(); 1655 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1656 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1657 ARMConstantPoolValue *CPV = 1658 ARMConstantPoolSymbol::Create(*DAG.getContext(), Sym, 1659 ARMPCLabelIndex, 4); 1660 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1661 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1662 Callee = DAG.getLoad(getPointerTy(), dl, 1663 DAG.getEntryNode(), CPAddr, 1664 MachinePointerInfo::getConstantPool(), 1665 false, false, false, 0); 1666 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1667 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1668 getPointerTy(), Callee, PICLabel); 1669 } else { 1670 unsigned OpFlags = 0; 1671 // On ELF targets for PIC code, direct calls should go through the PLT 1672 if (Subtarget->isTargetELF() && 1673 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1674 OpFlags = ARMII::MO_PLT; 1675 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1676 } 1677 } 1678 1679 // FIXME: handle tail calls differently. 1680 unsigned CallOpc; 1681 bool HasMinSizeAttr = MF.getFunction()->getAttributes(). 1682 hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 1683 if (Subtarget->isThumb()) { 1684 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1685 CallOpc = ARMISD::CALL_NOLINK; 1686 else 1687 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1688 } else { 1689 if (!isDirect && !Subtarget->hasV5TOps()) 1690 CallOpc = ARMISD::CALL_NOLINK; 1691 else if (doesNotRet && isDirect && Subtarget->hasRAS() && 1692 // Emit regular call when code size is the priority 1693 !HasMinSizeAttr) 1694 // "mov lr, pc; b _foo" to avoid confusing the RSP 1695 CallOpc = ARMISD::CALL_NOLINK; 1696 else 1697 CallOpc = isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL; 1698 } 1699 1700 std::vector<SDValue> Ops; 1701 Ops.push_back(Chain); 1702 Ops.push_back(Callee); 1703 1704 // Add argument registers to the end of the list so that they are known live 1705 // into the call. 1706 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1707 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1708 RegsToPass[i].second.getValueType())); 1709 1710 // Add a register mask operand representing the call-preserved registers. 1711 const uint32_t *Mask; 1712 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 1713 const ARMBaseRegisterInfo *ARI = static_cast<const ARMBaseRegisterInfo*>(TRI); 1714 if (isThisReturn) { 1715 // For 'this' returns, use the R0-preserving mask if applicable 1716 Mask = ARI->getThisReturnPreservedMask(CallConv); 1717 if (!Mask) { 1718 // Set isThisReturn to false if the calling convention is not one that 1719 // allows 'returned' to be modeled in this way, so LowerCallResult does 1720 // not try to pass 'this' straight through 1721 isThisReturn = false; 1722 Mask = ARI->getCallPreservedMask(CallConv); 1723 } 1724 } else 1725 Mask = ARI->getCallPreservedMask(CallConv); 1726 1727 assert(Mask && "Missing call preserved mask for calling convention"); 1728 Ops.push_back(DAG.getRegisterMask(Mask)); 1729 1730 if (InFlag.getNode()) 1731 Ops.push_back(InFlag); 1732 1733 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1734 if (isTailCall) 1735 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1736 1737 // Returns a chain and a flag for retval copy to use. 1738 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1739 InFlag = Chain.getValue(1); 1740 1741 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1742 DAG.getIntPtrConstant(0, true), InFlag, dl); 1743 if (!Ins.empty()) 1744 InFlag = Chain.getValue(1); 1745 1746 // Handle result values, copying them out of physregs into vregs that we 1747 // return. 1748 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, 1749 InVals, isThisReturn, 1750 isThisReturn ? OutVals[0] : SDValue()); 1751} 1752 1753/// HandleByVal - Every parameter *after* a byval parameter is passed 1754/// on the stack. Remember the next parameter register to allocate, 1755/// and then confiscate the rest of the parameter registers to insure 1756/// this. 1757void 1758ARMTargetLowering::HandleByVal( 1759 CCState *State, unsigned &size, unsigned Align) const { 1760 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1761 assert((State->getCallOrPrologue() == Prologue || 1762 State->getCallOrPrologue() == Call) && 1763 "unhandled ParmContext"); 1764 1765 // For in-prologue parameters handling, we also introduce stack offset 1766 // for byval registers: see CallingConvLower.cpp, CCState::HandleByVal. 1767 // This behaviour outsides AAPCS rules (5.5 Parameters Passing) of how 1768 // NSAA should be evaluted (NSAA means "next stacked argument address"). 1769 // So: NextStackOffset = NSAAOffset + SizeOfByValParamsStoredInRegs. 1770 // Then: NSAAOffset = NextStackOffset - SizeOfByValParamsStoredInRegs. 1771 unsigned NSAAOffset = State->getNextStackOffset(); 1772 if (State->getCallOrPrologue() != Call) { 1773 for (unsigned i = 0, e = State->getInRegsParamsCount(); i != e; ++i) { 1774 unsigned RB, RE; 1775 State->getInRegsParamInfo(i, RB, RE); 1776 assert(NSAAOffset >= (RE-RB)*4 && 1777 "Stack offset for byval regs doesn't introduced anymore?"); 1778 NSAAOffset -= (RE-RB)*4; 1779 } 1780 } 1781 if ((ARM::R0 <= reg) && (reg <= ARM::R3)) { 1782 if (Subtarget->isAAPCS_ABI() && Align > 4) { 1783 unsigned AlignInRegs = Align / 4; 1784 unsigned Waste = (ARM::R4 - reg) % AlignInRegs; 1785 for (unsigned i = 0; i < Waste; ++i) 1786 reg = State->AllocateReg(GPRArgRegs, 4); 1787 } 1788 if (reg != 0) { 1789 unsigned excess = 4 * (ARM::R4 - reg); 1790 1791 // Special case when NSAA != SP and parameter size greater than size of 1792 // all remained GPR regs. In that case we can't split parameter, we must 1793 // send it to stack. We also must set NCRN to R4, so waste all 1794 // remained registers. 1795 if (Subtarget->isAAPCS_ABI() && NSAAOffset != 0 && size > excess) { 1796 while (State->AllocateReg(GPRArgRegs, 4)) 1797 ; 1798 return; 1799 } 1800 1801 // First register for byval parameter is the first register that wasn't 1802 // allocated before this method call, so it would be "reg". 1803 // If parameter is small enough to be saved in range [reg, r4), then 1804 // the end (first after last) register would be reg + param-size-in-regs, 1805 // else parameter would be splitted between registers and stack, 1806 // end register would be r4 in this case. 1807 unsigned ByValRegBegin = reg; 1808 unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; 1809 State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); 1810 // Note, first register is allocated in the beginning of function already, 1811 // allocate remained amount of registers we need. 1812 for (unsigned i = reg+1; i != ByValRegEnd; ++i) 1813 State->AllocateReg(GPRArgRegs, 4); 1814 // At a call site, a byval parameter that is split between 1815 // registers and memory needs its size truncated here. In a 1816 // function prologue, such byval parameters are reassembled in 1817 // memory, and are not truncated. 1818 if (State->getCallOrPrologue() == Call) { 1819 // Make remained size equal to 0 in case, when 1820 // the whole structure may be stored into registers. 1821 if (size < excess) 1822 size = 0; 1823 else 1824 size -= excess; 1825 } 1826 } 1827 } 1828} 1829 1830/// MatchingStackOffset - Return true if the given stack call argument is 1831/// already available in the same position (relatively) of the caller's 1832/// incoming argument stack. 1833static 1834bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1835 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1836 const TargetInstrInfo *TII) { 1837 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1838 int FI = INT_MAX; 1839 if (Arg.getOpcode() == ISD::CopyFromReg) { 1840 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1841 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1842 return false; 1843 MachineInstr *Def = MRI->getVRegDef(VR); 1844 if (!Def) 1845 return false; 1846 if (!Flags.isByVal()) { 1847 if (!TII->isLoadFromStackSlot(Def, FI)) 1848 return false; 1849 } else { 1850 return false; 1851 } 1852 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1853 if (Flags.isByVal()) 1854 // ByVal argument is passed in as a pointer but it's now being 1855 // dereferenced. e.g. 1856 // define @foo(%struct.X* %A) { 1857 // tail call @bar(%struct.X* byval %A) 1858 // } 1859 return false; 1860 SDValue Ptr = Ld->getBasePtr(); 1861 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1862 if (!FINode) 1863 return false; 1864 FI = FINode->getIndex(); 1865 } else 1866 return false; 1867 1868 assert(FI != INT_MAX); 1869 if (!MFI->isFixedObjectIndex(FI)) 1870 return false; 1871 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1872} 1873 1874/// IsEligibleForTailCallOptimization - Check whether the call is eligible 1875/// for tail call optimization. Targets which want to do tail call 1876/// optimization should implement this function. 1877bool 1878ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1879 CallingConv::ID CalleeCC, 1880 bool isVarArg, 1881 bool isCalleeStructRet, 1882 bool isCallerStructRet, 1883 const SmallVectorImpl<ISD::OutputArg> &Outs, 1884 const SmallVectorImpl<SDValue> &OutVals, 1885 const SmallVectorImpl<ISD::InputArg> &Ins, 1886 SelectionDAG& DAG) const { 1887 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1888 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1889 bool CCMatch = CallerCC == CalleeCC; 1890 1891 // Look for obvious safe cases to perform tail call optimization that do not 1892 // require ABI changes. This is what gcc calls sibcall. 1893 1894 // Do not sibcall optimize vararg calls unless the call site is not passing 1895 // any arguments. 1896 if (isVarArg && !Outs.empty()) 1897 return false; 1898 1899 // Also avoid sibcall optimization if either caller or callee uses struct 1900 // return semantics. 1901 if (isCalleeStructRet || isCallerStructRet) 1902 return false; 1903 1904 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1905 // emitEpilogue is not ready for them. Thumb tail calls also use t2B, as 1906 // the Thumb1 16-bit unconditional branch doesn't have sufficient relocation 1907 // support in the assembler and linker to be used. This would need to be 1908 // fixed to fully support tail calls in Thumb1. 1909 // 1910 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1911 // LR. This means if we need to reload LR, it takes an extra instructions, 1912 // which outweighs the value of the tail call; but here we don't know yet 1913 // whether LR is going to be used. Probably the right approach is to 1914 // generate the tail call here and turn it back into CALL/RET in 1915 // emitEpilogue if LR is used. 1916 1917 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1918 // but we need to make sure there are enough registers; the only valid 1919 // registers are the 4 used for parameters. We don't currently do this 1920 // case. 1921 if (Subtarget->isThumb1Only()) 1922 return false; 1923 1924 // If the calling conventions do not match, then we'd better make sure the 1925 // results are returned in the same way as what the caller expects. 1926 if (!CCMatch) { 1927 SmallVector<CCValAssign, 16> RVLocs1; 1928 ARMCCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), 1929 getTargetMachine(), RVLocs1, *DAG.getContext(), Call); 1930 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1931 1932 SmallVector<CCValAssign, 16> RVLocs2; 1933 ARMCCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), 1934 getTargetMachine(), RVLocs2, *DAG.getContext(), Call); 1935 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1936 1937 if (RVLocs1.size() != RVLocs2.size()) 1938 return false; 1939 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1940 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1941 return false; 1942 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1943 return false; 1944 if (RVLocs1[i].isRegLoc()) { 1945 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1946 return false; 1947 } else { 1948 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1949 return false; 1950 } 1951 } 1952 } 1953 1954 // If Caller's vararg or byval argument has been split between registers and 1955 // stack, do not perform tail call, since part of the argument is in caller's 1956 // local frame. 1957 const ARMFunctionInfo *AFI_Caller = DAG.getMachineFunction(). 1958 getInfo<ARMFunctionInfo>(); 1959 if (AFI_Caller->getArgRegsSaveSize()) 1960 return false; 1961 1962 // If the callee takes no arguments then go on to check the results of the 1963 // call. 1964 if (!Outs.empty()) { 1965 // Check if stack adjustment is needed. For now, do not do this if any 1966 // argument is passed on the stack. 1967 SmallVector<CCValAssign, 16> ArgLocs; 1968 ARMCCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), 1969 getTargetMachine(), ArgLocs, *DAG.getContext(), Call); 1970 CCInfo.AnalyzeCallOperands(Outs, 1971 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1972 if (CCInfo.getNextStackOffset()) { 1973 MachineFunction &MF = DAG.getMachineFunction(); 1974 1975 // Check if the arguments are already laid out in the right way as 1976 // the caller's fixed stack objects. 1977 MachineFrameInfo *MFI = MF.getFrameInfo(); 1978 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1979 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 1980 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1981 i != e; 1982 ++i, ++realArgIdx) { 1983 CCValAssign &VA = ArgLocs[i]; 1984 EVT RegVT = VA.getLocVT(); 1985 SDValue Arg = OutVals[realArgIdx]; 1986 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1987 if (VA.getLocInfo() == CCValAssign::Indirect) 1988 return false; 1989 if (VA.needsCustom()) { 1990 // f64 and vector types are split into multiple registers or 1991 // register/stack-slot combinations. The types will not match 1992 // the registers; give up on memory f64 refs until we figure 1993 // out what to do about this. 1994 if (!VA.isRegLoc()) 1995 return false; 1996 if (!ArgLocs[++i].isRegLoc()) 1997 return false; 1998 if (RegVT == MVT::v2f64) { 1999 if (!ArgLocs[++i].isRegLoc()) 2000 return false; 2001 if (!ArgLocs[++i].isRegLoc()) 2002 return false; 2003 } 2004 } else if (!VA.isRegLoc()) { 2005 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 2006 MFI, MRI, TII)) 2007 return false; 2008 } 2009 } 2010 } 2011 } 2012 2013 return true; 2014} 2015 2016bool 2017ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 2018 MachineFunction &MF, bool isVarArg, 2019 const SmallVectorImpl<ISD::OutputArg> &Outs, 2020 LLVMContext &Context) const { 2021 SmallVector<CCValAssign, 16> RVLocs; 2022 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), RVLocs, Context); 2023 return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true, 2024 isVarArg)); 2025} 2026 2027SDValue 2028ARMTargetLowering::LowerReturn(SDValue Chain, 2029 CallingConv::ID CallConv, bool isVarArg, 2030 const SmallVectorImpl<ISD::OutputArg> &Outs, 2031 const SmallVectorImpl<SDValue> &OutVals, 2032 SDLoc dl, SelectionDAG &DAG) const { 2033 2034 // CCValAssign - represent the assignment of the return value to a location. 2035 SmallVector<CCValAssign, 16> RVLocs; 2036 2037 // CCState - Info about the registers and stack slots. 2038 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2039 getTargetMachine(), RVLocs, *DAG.getContext(), Call); 2040 2041 // Analyze outgoing return values. 2042 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 2043 isVarArg)); 2044 2045 SDValue Flag; 2046 SmallVector<SDValue, 4> RetOps; 2047 RetOps.push_back(Chain); // Operand #0 = Chain (updated below) 2048 2049 // Copy the result values into the output registers. 2050 for (unsigned i = 0, realRVLocIdx = 0; 2051 i != RVLocs.size(); 2052 ++i, ++realRVLocIdx) { 2053 CCValAssign &VA = RVLocs[i]; 2054 assert(VA.isRegLoc() && "Can only return in registers!"); 2055 2056 SDValue Arg = OutVals[realRVLocIdx]; 2057 2058 switch (VA.getLocInfo()) { 2059 default: llvm_unreachable("Unknown loc info!"); 2060 case CCValAssign::Full: break; 2061 case CCValAssign::BCvt: 2062 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 2063 break; 2064 } 2065 2066 if (VA.needsCustom()) { 2067 if (VA.getLocVT() == MVT::v2f64) { 2068 // Extract the first half and return it in two registers. 2069 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2070 DAG.getConstant(0, MVT::i32)); 2071 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 2072 DAG.getVTList(MVT::i32, MVT::i32), Half); 2073 2074 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 2075 Flag = Chain.getValue(1); 2076 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2077 VA = RVLocs[++i]; // skip ahead to next loc 2078 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 2079 HalfGPRs.getValue(1), Flag); 2080 Flag = Chain.getValue(1); 2081 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2082 VA = RVLocs[++i]; // skip ahead to next loc 2083 2084 // Extract the 2nd half and fall through to handle it as an f64 value. 2085 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 2086 DAG.getConstant(1, MVT::i32)); 2087 } 2088 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 2089 // available. 2090 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 2091 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 2092 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 2093 Flag = Chain.getValue(1); 2094 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2095 VA = RVLocs[++i]; // skip ahead to next loc 2096 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 2097 Flag); 2098 } else 2099 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 2100 2101 // Guarantee that all emitted copies are 2102 // stuck together, avoiding something bad. 2103 Flag = Chain.getValue(1); 2104 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 2105 } 2106 2107 // Update chain and glue. 2108 RetOps[0] = Chain; 2109 if (Flag.getNode()) 2110 RetOps.push_back(Flag); 2111 2112 return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, 2113 RetOps.data(), RetOps.size()); 2114} 2115 2116bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { 2117 if (N->getNumValues() != 1) 2118 return false; 2119 if (!N->hasNUsesOfValue(1, 0)) 2120 return false; 2121 2122 SDValue TCChain = Chain; 2123 SDNode *Copy = *N->use_begin(); 2124 if (Copy->getOpcode() == ISD::CopyToReg) { 2125 // If the copy has a glue operand, we conservatively assume it isn't safe to 2126 // perform a tail call. 2127 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) 2128 return false; 2129 TCChain = Copy->getOperand(0); 2130 } else if (Copy->getOpcode() == ARMISD::VMOVRRD) { 2131 SDNode *VMov = Copy; 2132 // f64 returned in a pair of GPRs. 2133 SmallPtrSet<SDNode*, 2> Copies; 2134 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2135 UI != UE; ++UI) { 2136 if (UI->getOpcode() != ISD::CopyToReg) 2137 return false; 2138 Copies.insert(*UI); 2139 } 2140 if (Copies.size() > 2) 2141 return false; 2142 2143 for (SDNode::use_iterator UI = VMov->use_begin(), UE = VMov->use_end(); 2144 UI != UE; ++UI) { 2145 SDValue UseChain = UI->getOperand(0); 2146 if (Copies.count(UseChain.getNode())) 2147 // Second CopyToReg 2148 Copy = *UI; 2149 else 2150 // First CopyToReg 2151 TCChain = UseChain; 2152 } 2153 } else if (Copy->getOpcode() == ISD::BITCAST) { 2154 // f32 returned in a single GPR. 2155 if (!Copy->hasOneUse()) 2156 return false; 2157 Copy = *Copy->use_begin(); 2158 if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) 2159 return false; 2160 TCChain = Copy->getOperand(0); 2161 } else { 2162 return false; 2163 } 2164 2165 bool HasRet = false; 2166 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 2167 UI != UE; ++UI) { 2168 if (UI->getOpcode() != ARMISD::RET_FLAG) 2169 return false; 2170 HasRet = true; 2171 } 2172 2173 if (!HasRet) 2174 return false; 2175 2176 Chain = TCChain; 2177 return true; 2178} 2179 2180bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 2181 if (!EnableARMTailCalls && !Subtarget->supportsTailCall()) 2182 return false; 2183 2184 if (!CI->isTailCall()) 2185 return false; 2186 2187 return !Subtarget->isThumb1Only(); 2188} 2189 2190// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 2191// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 2192// one of the above mentioned nodes. It has to be wrapped because otherwise 2193// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 2194// be used to form addressing mode. These wrapped nodes will be selected 2195// into MOVi. 2196static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 2197 EVT PtrVT = Op.getValueType(); 2198 // FIXME there is no actual debug info here 2199 SDLoc dl(Op); 2200 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 2201 SDValue Res; 2202 if (CP->isMachineConstantPoolEntry()) 2203 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 2204 CP->getAlignment()); 2205 else 2206 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 2207 CP->getAlignment()); 2208 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 2209} 2210 2211unsigned ARMTargetLowering::getJumpTableEncoding() const { 2212 return MachineJumpTableInfo::EK_Inline; 2213} 2214 2215SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 2216 SelectionDAG &DAG) const { 2217 MachineFunction &MF = DAG.getMachineFunction(); 2218 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2219 unsigned ARMPCLabelIndex = 0; 2220 SDLoc DL(Op); 2221 EVT PtrVT = getPointerTy(); 2222 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 2223 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2224 SDValue CPAddr; 2225 if (RelocM == Reloc::Static) { 2226 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 2227 } else { 2228 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2229 ARMPCLabelIndex = AFI->createPICLabelUId(); 2230 ARMConstantPoolValue *CPV = 2231 ARMConstantPoolConstant::Create(BA, ARMPCLabelIndex, 2232 ARMCP::CPBlockAddress, PCAdj); 2233 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2234 } 2235 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 2236 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 2237 MachinePointerInfo::getConstantPool(), 2238 false, false, false, 0); 2239 if (RelocM == Reloc::Static) 2240 return Result; 2241 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2242 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 2243} 2244 2245// Lower ISD::GlobalTLSAddress using the "general dynamic" model 2246SDValue 2247ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 2248 SelectionDAG &DAG) const { 2249 SDLoc dl(GA); 2250 EVT PtrVT = getPointerTy(); 2251 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2252 MachineFunction &MF = DAG.getMachineFunction(); 2253 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2254 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2255 ARMConstantPoolValue *CPV = 2256 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2257 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 2258 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2259 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 2260 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 2261 MachinePointerInfo::getConstantPool(), 2262 false, false, false, 0); 2263 SDValue Chain = Argument.getValue(1); 2264 2265 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2266 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 2267 2268 // call __tls_get_addr. 2269 ArgListTy Args; 2270 ArgListEntry Entry; 2271 Entry.Node = Argument; 2272 Entry.Ty = (Type *) Type::getInt32Ty(*DAG.getContext()); 2273 Args.push_back(Entry); 2274 // FIXME: is there useful debug info available here? 2275 TargetLowering::CallLoweringInfo CLI(Chain, 2276 (Type *) Type::getInt32Ty(*DAG.getContext()), 2277 false, false, false, false, 2278 0, CallingConv::C, /*isTailCall=*/false, 2279 /*doesNotRet=*/false, /*isReturnValueUsed=*/true, 2280 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 2281 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 2282 return CallResult.first; 2283} 2284 2285// Lower ISD::GlobalTLSAddress using the "initial exec" or 2286// "local exec" model. 2287SDValue 2288ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 2289 SelectionDAG &DAG, 2290 TLSModel::Model model) const { 2291 const GlobalValue *GV = GA->getGlobal(); 2292 SDLoc dl(GA); 2293 SDValue Offset; 2294 SDValue Chain = DAG.getEntryNode(); 2295 EVT PtrVT = getPointerTy(); 2296 // Get the Thread Pointer 2297 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2298 2299 if (model == TLSModel::InitialExec) { 2300 MachineFunction &MF = DAG.getMachineFunction(); 2301 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2302 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2303 // Initial exec model. 2304 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 2305 ARMConstantPoolValue *CPV = 2306 ARMConstantPoolConstant::Create(GA->getGlobal(), ARMPCLabelIndex, 2307 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, 2308 true); 2309 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2310 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2311 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2312 MachinePointerInfo::getConstantPool(), 2313 false, false, false, 0); 2314 Chain = Offset.getValue(1); 2315 2316 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2317 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2318 2319 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2320 MachinePointerInfo::getConstantPool(), 2321 false, false, false, 0); 2322 } else { 2323 // local exec model 2324 assert(model == TLSModel::LocalExec); 2325 ARMConstantPoolValue *CPV = 2326 ARMConstantPoolConstant::Create(GV, ARMCP::TPOFF); 2327 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2328 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2329 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2330 MachinePointerInfo::getConstantPool(), 2331 false, false, false, 0); 2332 } 2333 2334 // The address of the thread local variable is the add of the thread 2335 // pointer with the offset of the variable. 2336 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2337} 2338 2339SDValue 2340ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2341 // TODO: implement the "local dynamic" model 2342 assert(Subtarget->isTargetELF() && 2343 "TLS not implemented for non-ELF targets"); 2344 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2345 2346 TLSModel::Model model = getTargetMachine().getTLSModel(GA->getGlobal()); 2347 2348 switch (model) { 2349 case TLSModel::GeneralDynamic: 2350 case TLSModel::LocalDynamic: 2351 return LowerToTLSGeneralDynamicModel(GA, DAG); 2352 case TLSModel::InitialExec: 2353 case TLSModel::LocalExec: 2354 return LowerToTLSExecModels(GA, DAG, model); 2355 } 2356 llvm_unreachable("bogus TLS model"); 2357} 2358 2359SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2360 SelectionDAG &DAG) const { 2361 EVT PtrVT = getPointerTy(); 2362 SDLoc dl(Op); 2363 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2364 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2365 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2366 ARMConstantPoolValue *CPV = 2367 ARMConstantPoolConstant::Create(GV, 2368 UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2369 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2370 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2371 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2372 CPAddr, 2373 MachinePointerInfo::getConstantPool(), 2374 false, false, false, 0); 2375 SDValue Chain = Result.getValue(1); 2376 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2377 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2378 if (!UseGOTOFF) 2379 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2380 MachinePointerInfo::getGOT(), 2381 false, false, false, 0); 2382 return Result; 2383 } 2384 2385 // If we have T2 ops, we can materialize the address directly via movt/movw 2386 // pair. This is always cheaper. 2387 if (Subtarget->useMovt()) { 2388 ++NumMovwMovt; 2389 // FIXME: Once remat is capable of dealing with instructions with register 2390 // operands, expand this into two nodes. 2391 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2392 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2393 } else { 2394 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2395 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2396 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2397 MachinePointerInfo::getConstantPool(), 2398 false, false, false, 0); 2399 } 2400} 2401 2402SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2403 SelectionDAG &DAG) const { 2404 EVT PtrVT = getPointerTy(); 2405 SDLoc dl(Op); 2406 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2407 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2408 2409 // FIXME: Enable this for static codegen when tool issues are fixed. Also 2410 // update ARMFastISel::ARMMaterializeGV. 2411 if (Subtarget->useMovt() && RelocM != Reloc::Static) { 2412 ++NumMovwMovt; 2413 // FIXME: Once remat is capable of dealing with instructions with register 2414 // operands, expand this into two nodes. 2415 if (RelocM == Reloc::Static) 2416 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2417 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2418 2419 unsigned Wrapper = (RelocM == Reloc::PIC_) 2420 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2421 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2422 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2423 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2424 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2425 MachinePointerInfo::getGOT(), 2426 false, false, false, 0); 2427 return Result; 2428 } 2429 2430 unsigned ARMPCLabelIndex = 0; 2431 SDValue CPAddr; 2432 if (RelocM == Reloc::Static) { 2433 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2434 } else { 2435 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2436 ARMPCLabelIndex = AFI->createPICLabelUId(); 2437 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2438 ARMConstantPoolValue *CPV = 2439 ARMConstantPoolConstant::Create(GV, ARMPCLabelIndex, ARMCP::CPValue, 2440 PCAdj); 2441 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2442 } 2443 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2444 2445 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2446 MachinePointerInfo::getConstantPool(), 2447 false, false, false, 0); 2448 SDValue Chain = Result.getValue(1); 2449 2450 if (RelocM == Reloc::PIC_) { 2451 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2452 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2453 } 2454 2455 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2456 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2457 false, false, false, 0); 2458 2459 return Result; 2460} 2461 2462SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2463 SelectionDAG &DAG) const { 2464 assert(Subtarget->isTargetELF() && 2465 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2466 MachineFunction &MF = DAG.getMachineFunction(); 2467 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2468 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2469 EVT PtrVT = getPointerTy(); 2470 SDLoc dl(Op); 2471 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2472 ARMConstantPoolValue *CPV = 2473 ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", 2474 ARMPCLabelIndex, PCAdj); 2475 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2476 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2477 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2478 MachinePointerInfo::getConstantPool(), 2479 false, false, false, 0); 2480 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2481 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2482} 2483 2484SDValue 2485ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2486 SDLoc dl(Op); 2487 SDValue Val = DAG.getConstant(0, MVT::i32); 2488 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, 2489 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), 2490 Op.getOperand(1), Val); 2491} 2492 2493SDValue 2494ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2495 SDLoc dl(Op); 2496 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2497 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2498} 2499 2500SDValue 2501ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2502 const ARMSubtarget *Subtarget) const { 2503 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2504 SDLoc dl(Op); 2505 switch (IntNo) { 2506 default: return SDValue(); // Don't custom lower most intrinsics. 2507 case Intrinsic::arm_thread_pointer: { 2508 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2509 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2510 } 2511 case Intrinsic::eh_sjlj_lsda: { 2512 MachineFunction &MF = DAG.getMachineFunction(); 2513 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2514 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2515 EVT PtrVT = getPointerTy(); 2516 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2517 SDValue CPAddr; 2518 unsigned PCAdj = (RelocM != Reloc::PIC_) 2519 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2520 ARMConstantPoolValue *CPV = 2521 ARMConstantPoolConstant::Create(MF.getFunction(), ARMPCLabelIndex, 2522 ARMCP::CPLSDA, PCAdj); 2523 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2524 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2525 SDValue Result = 2526 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2527 MachinePointerInfo::getConstantPool(), 2528 false, false, false, 0); 2529 2530 if (RelocM == Reloc::PIC_) { 2531 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2532 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2533 } 2534 return Result; 2535 } 2536 case Intrinsic::arm_neon_vmulls: 2537 case Intrinsic::arm_neon_vmullu: { 2538 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2539 ? ARMISD::VMULLs : ARMISD::VMULLu; 2540 return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), 2541 Op.getOperand(1), Op.getOperand(2)); 2542 } 2543 } 2544} 2545 2546static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, 2547 const ARMSubtarget *Subtarget) { 2548 // FIXME: handle "fence singlethread" more efficiently. 2549 SDLoc dl(Op); 2550 if (!Subtarget->hasDataBarrier()) { 2551 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2552 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2553 // here. 2554 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2555 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2556 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2557 DAG.getConstant(0, MVT::i32)); 2558 } 2559 2560 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2561 DAG.getConstant(ARM_MB::ISH, MVT::i32)); 2562} 2563 2564static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2565 const ARMSubtarget *Subtarget) { 2566 // ARM pre v5TE and Thumb1 does not have preload instructions. 2567 if (!(Subtarget->isThumb2() || 2568 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2569 // Just preserve the chain. 2570 return Op.getOperand(0); 2571 2572 SDLoc dl(Op); 2573 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2574 if (!isRead && 2575 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2576 // ARMv7 with MP extension has PLDW. 2577 return Op.getOperand(0); 2578 2579 unsigned isData = cast<ConstantSDNode>(Op.getOperand(4))->getZExtValue(); 2580 if (Subtarget->isThumb()) { 2581 // Invert the bits. 2582 isRead = ~isRead & 1; 2583 isData = ~isData & 1; 2584 } 2585 2586 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2587 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2588 DAG.getConstant(isData, MVT::i32)); 2589} 2590 2591static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2592 MachineFunction &MF = DAG.getMachineFunction(); 2593 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2594 2595 // vastart just stores the address of the VarArgsFrameIndex slot into the 2596 // memory location argument. 2597 SDLoc dl(Op); 2598 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2599 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2600 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2601 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2602 MachinePointerInfo(SV), false, false, 0); 2603} 2604 2605SDValue 2606ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2607 SDValue &Root, SelectionDAG &DAG, 2608 SDLoc dl) const { 2609 MachineFunction &MF = DAG.getMachineFunction(); 2610 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2611 2612 const TargetRegisterClass *RC; 2613 if (AFI->isThumb1OnlyFunction()) 2614 RC = &ARM::tGPRRegClass; 2615 else 2616 RC = &ARM::GPRRegClass; 2617 2618 // Transform the arguments stored in physical registers into virtual ones. 2619 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2620 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2621 2622 SDValue ArgValue2; 2623 if (NextVA.isMemLoc()) { 2624 MachineFrameInfo *MFI = MF.getFrameInfo(); 2625 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2626 2627 // Create load node to retrieve arguments from the stack. 2628 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2629 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2630 MachinePointerInfo::getFixedStack(FI), 2631 false, false, false, 0); 2632 } else { 2633 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2634 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2635 } 2636 2637 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2638} 2639 2640void 2641ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2642 unsigned InRegsParamRecordIdx, 2643 unsigned ArgSize, 2644 unsigned &ArgRegsSize, 2645 unsigned &ArgRegsSaveSize) 2646 const { 2647 unsigned NumGPRs; 2648 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 2649 unsigned RBegin, REnd; 2650 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 2651 NumGPRs = REnd - RBegin; 2652 } else { 2653 unsigned int firstUnalloced; 2654 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2655 sizeof(GPRArgRegs) / 2656 sizeof(GPRArgRegs[0])); 2657 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2658 } 2659 2660 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2661 ArgRegsSize = NumGPRs * 4; 2662 2663 // If parameter is split between stack and GPRs... 2664 if (NumGPRs && Align == 8 && 2665 (ArgRegsSize < ArgSize || 2666 InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { 2667 // Add padding for part of param recovered from GPRs, so 2668 // its last byte must be at address K*8 - 1. 2669 // We need to do it, since remained (stack) part of parameter has 2670 // stack alignment, and we need to "attach" "GPRs head" without gaps 2671 // to it: 2672 // Stack: 2673 // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... 2674 // [ [padding] [GPRs head] ] [ Tail passed via stack .... 2675 // 2676 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2677 unsigned Padding = 2678 ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - 2679 (ArgRegsSize + AFI->getArgRegsSaveSize()); 2680 ArgRegsSaveSize = ArgRegsSize + Padding; 2681 } else 2682 // We don't need to extend regs save size for byval parameters if they 2683 // are passed via GPRs only. 2684 ArgRegsSaveSize = ArgRegsSize; 2685} 2686 2687// The remaining GPRs hold either the beginning of variable-argument 2688// data, or the beginning of an aggregate passed by value (usually 2689// byval). Either way, we allocate stack slots adjacent to the data 2690// provided by our caller, and store the unallocated registers there. 2691// If this is a variadic function, the va_list pointer will begin with 2692// these values; otherwise, this reassembles a (byval) structure that 2693// was split between registers and memory. 2694// Return: The frame index registers were stored into. 2695int 2696ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, 2697 SDLoc dl, SDValue &Chain, 2698 const Value *OrigArg, 2699 unsigned InRegsParamRecordIdx, 2700 unsigned OffsetFromOrigArg, 2701 unsigned ArgOffset, 2702 unsigned ArgSize, 2703 bool ForceMutable) const { 2704 2705 // Currently, two use-cases possible: 2706 // Case #1. Non var-args function, and we meet first byval parameter. 2707 // Setup first unallocated register as first byval register; 2708 // eat all remained registers 2709 // (these two actions are performed by HandleByVal method). 2710 // Then, here, we initialize stack frame with 2711 // "store-reg" instructions. 2712 // Case #2. Var-args function, that doesn't contain byval parameters. 2713 // The same: eat all remained unallocated registers, 2714 // initialize stack frame. 2715 2716 MachineFunction &MF = DAG.getMachineFunction(); 2717 MachineFrameInfo *MFI = MF.getFrameInfo(); 2718 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2719 unsigned firstRegToSaveIndex, lastRegToSaveIndex; 2720 unsigned RBegin, REnd; 2721 if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) { 2722 CCInfo.getInRegsParamInfo(InRegsParamRecordIdx, RBegin, REnd); 2723 firstRegToSaveIndex = RBegin - ARM::R0; 2724 lastRegToSaveIndex = REnd - ARM::R0; 2725 } else { 2726 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2727 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2728 lastRegToSaveIndex = 4; 2729 } 2730 2731 unsigned ArgRegsSize, ArgRegsSaveSize; 2732 computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, 2733 ArgRegsSize, ArgRegsSaveSize); 2734 2735 // Store any by-val regs to their spots on the stack so that they may be 2736 // loaded by deferencing the result of formal parameter pointer or va_next. 2737 // Note: once stack area for byval/varargs registers 2738 // was initialized, it can't be initialized again. 2739 if (ArgRegsSaveSize) { 2740 2741 unsigned Padding = ArgRegsSaveSize - ArgRegsSize; 2742 2743 if (Padding) { 2744 assert(AFI->getStoredByValParamsPadding() == 0 && 2745 "The only parameter may be padded."); 2746 AFI->setStoredByValParamsPadding(Padding); 2747 } 2748 2749 int FrameIndex = MFI->CreateFixedObject( 2750 ArgRegsSaveSize, 2751 Padding + ArgOffset, 2752 false); 2753 SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); 2754 2755 SmallVector<SDValue, 4> MemOps; 2756 for (unsigned i = 0; firstRegToSaveIndex < lastRegToSaveIndex; 2757 ++firstRegToSaveIndex, ++i) { 2758 const TargetRegisterClass *RC; 2759 if (AFI->isThumb1OnlyFunction()) 2760 RC = &ARM::tGPRRegClass; 2761 else 2762 RC = &ARM::GPRRegClass; 2763 2764 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2765 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2766 SDValue Store = 2767 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2768 MachinePointerInfo(OrigArg, OffsetFromOrigArg + 4*i), 2769 false, false, 0); 2770 MemOps.push_back(Store); 2771 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2772 DAG.getConstant(4, getPointerTy())); 2773 } 2774 2775 AFI->setArgRegsSaveSize(ArgRegsSaveSize + AFI->getArgRegsSaveSize()); 2776 2777 if (!MemOps.empty()) 2778 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2779 &MemOps[0], MemOps.size()); 2780 return FrameIndex; 2781 } else 2782 // This will point to the next argument passed via stack. 2783 return MFI->CreateFixedObject( 2784 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); 2785} 2786 2787// Setup stack frame, the va_list pointer will start from. 2788void 2789ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2790 SDLoc dl, SDValue &Chain, 2791 unsigned ArgOffset, 2792 bool ForceMutable) const { 2793 MachineFunction &MF = DAG.getMachineFunction(); 2794 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2795 2796 // Try to store any remaining integer argument regs 2797 // to their spots on the stack so that they may be loaded by deferencing 2798 // the result of va_next. 2799 // If there is no regs to be stored, just point address after last 2800 // argument passed via stack. 2801 int FrameIndex = 2802 StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), 2803 0, ArgOffset, 0, ForceMutable); 2804 2805 AFI->setVarArgsFrameIndex(FrameIndex); 2806} 2807 2808SDValue 2809ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2810 CallingConv::ID CallConv, bool isVarArg, 2811 const SmallVectorImpl<ISD::InputArg> 2812 &Ins, 2813 SDLoc dl, SelectionDAG &DAG, 2814 SmallVectorImpl<SDValue> &InVals) 2815 const { 2816 MachineFunction &MF = DAG.getMachineFunction(); 2817 MachineFrameInfo *MFI = MF.getFrameInfo(); 2818 2819 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2820 2821 // Assign locations to all of the incoming arguments. 2822 SmallVector<CCValAssign, 16> ArgLocs; 2823 ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2824 getTargetMachine(), ArgLocs, *DAG.getContext(), Prologue); 2825 CCInfo.AnalyzeFormalArguments(Ins, 2826 CCAssignFnForNode(CallConv, /* Return*/ false, 2827 isVarArg)); 2828 2829 SmallVector<SDValue, 16> ArgValues; 2830 int lastInsIndex = -1; 2831 SDValue ArgValue; 2832 Function::const_arg_iterator CurOrigArg = MF.getFunction()->arg_begin(); 2833 unsigned CurArgIdx = 0; 2834 2835 // Initially ArgRegsSaveSize is zero. 2836 // Then we increase this value each time we meet byval parameter. 2837 // We also increase this value in case of varargs function. 2838 AFI->setArgRegsSaveSize(0); 2839 2840 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2841 CCValAssign &VA = ArgLocs[i]; 2842 std::advance(CurOrigArg, Ins[VA.getValNo()].OrigArgIndex - CurArgIdx); 2843 CurArgIdx = Ins[VA.getValNo()].OrigArgIndex; 2844 // Arguments stored in registers. 2845 if (VA.isRegLoc()) { 2846 EVT RegVT = VA.getLocVT(); 2847 2848 if (VA.needsCustom()) { 2849 // f64 and vector types are split up into multiple registers or 2850 // combinations of registers and stack slots. 2851 if (VA.getLocVT() == MVT::v2f64) { 2852 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2853 Chain, DAG, dl); 2854 VA = ArgLocs[++i]; // skip ahead to next loc 2855 SDValue ArgValue2; 2856 if (VA.isMemLoc()) { 2857 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2858 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2859 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2860 MachinePointerInfo::getFixedStack(FI), 2861 false, false, false, 0); 2862 } else { 2863 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2864 Chain, DAG, dl); 2865 } 2866 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2867 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2868 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2869 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2870 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2871 } else 2872 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2873 2874 } else { 2875 const TargetRegisterClass *RC; 2876 2877 if (RegVT == MVT::f32) 2878 RC = &ARM::SPRRegClass; 2879 else if (RegVT == MVT::f64) 2880 RC = &ARM::DPRRegClass; 2881 else if (RegVT == MVT::v2f64) 2882 RC = &ARM::QPRRegClass; 2883 else if (RegVT == MVT::i32) 2884 RC = AFI->isThumb1OnlyFunction() ? 2885 (const TargetRegisterClass*)&ARM::tGPRRegClass : 2886 (const TargetRegisterClass*)&ARM::GPRRegClass; 2887 else 2888 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2889 2890 // Transform the arguments in physical registers into virtual ones. 2891 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2892 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2893 } 2894 2895 // If this is an 8 or 16-bit value, it is really passed promoted 2896 // to 32 bits. Insert an assert[sz]ext to capture this, then 2897 // truncate to the right size. 2898 switch (VA.getLocInfo()) { 2899 default: llvm_unreachable("Unknown loc info!"); 2900 case CCValAssign::Full: break; 2901 case CCValAssign::BCvt: 2902 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 2903 break; 2904 case CCValAssign::SExt: 2905 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2906 DAG.getValueType(VA.getValVT())); 2907 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2908 break; 2909 case CCValAssign::ZExt: 2910 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2911 DAG.getValueType(VA.getValVT())); 2912 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2913 break; 2914 } 2915 2916 InVals.push_back(ArgValue); 2917 2918 } else { // VA.isRegLoc() 2919 2920 // sanity check 2921 assert(VA.isMemLoc()); 2922 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2923 2924 int index = ArgLocs[i].getValNo(); 2925 2926 // Some Ins[] entries become multiple ArgLoc[] entries. 2927 // Process them only once. 2928 if (index != lastInsIndex) 2929 { 2930 ISD::ArgFlagsTy Flags = Ins[index].Flags; 2931 // FIXME: For now, all byval parameter objects are marked mutable. 2932 // This can be changed with more analysis. 2933 // In case of tail call optimization mark all arguments mutable. 2934 // Since they could be overwritten by lowering of arguments in case of 2935 // a tail call. 2936 if (Flags.isByVal()) { 2937 unsigned CurByValIndex = CCInfo.getInRegsParamsProceed(); 2938 int FrameIndex = StoreByValRegs( 2939 CCInfo, DAG, dl, Chain, CurOrigArg, 2940 CurByValIndex, 2941 Ins[VA.getValNo()].PartOffset, 2942 VA.getLocMemOffset(), 2943 Flags.getByValSize(), 2944 true /*force mutable frames*/); 2945 InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); 2946 CCInfo.nextInRegsParam(); 2947 } else { 2948 unsigned FIOffset = VA.getLocMemOffset() + 2949 AFI->getStoredByValParamsPadding(); 2950 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 2951 FIOffset, true); 2952 2953 // Create load nodes to retrieve arguments from the stack. 2954 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2955 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2956 MachinePointerInfo::getFixedStack(FI), 2957 false, false, false, 0)); 2958 } 2959 lastInsIndex = index; 2960 } 2961 } 2962 } 2963 2964 // varargs 2965 if (isVarArg) 2966 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 2967 CCInfo.getNextStackOffset()); 2968 2969 return Chain; 2970} 2971 2972/// isFloatingPointZero - Return true if this is +0.0. 2973static bool isFloatingPointZero(SDValue Op) { 2974 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2975 return CFP->getValueAPF().isPosZero(); 2976 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2977 // Maybe this has already been legalized into the constant pool? 2978 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2979 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2980 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2981 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2982 return CFP->getValueAPF().isPosZero(); 2983 } 2984 } 2985 return false; 2986} 2987 2988/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2989/// the given operands. 2990SDValue 2991ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2992 SDValue &ARMcc, SelectionDAG &DAG, 2993 SDLoc dl) const { 2994 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2995 unsigned C = RHSC->getZExtValue(); 2996 if (!isLegalICmpImmediate(C)) { 2997 // Constant does not fit, try adjusting it by one? 2998 switch (CC) { 2999 default: break; 3000 case ISD::SETLT: 3001 case ISD::SETGE: 3002 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 3003 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 3004 RHS = DAG.getConstant(C-1, MVT::i32); 3005 } 3006 break; 3007 case ISD::SETULT: 3008 case ISD::SETUGE: 3009 if (C != 0 && isLegalICmpImmediate(C-1)) { 3010 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 3011 RHS = DAG.getConstant(C-1, MVT::i32); 3012 } 3013 break; 3014 case ISD::SETLE: 3015 case ISD::SETGT: 3016 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 3017 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 3018 RHS = DAG.getConstant(C+1, MVT::i32); 3019 } 3020 break; 3021 case ISD::SETULE: 3022 case ISD::SETUGT: 3023 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 3024 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 3025 RHS = DAG.getConstant(C+1, MVT::i32); 3026 } 3027 break; 3028 } 3029 } 3030 } 3031 3032 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3033 ARMISD::NodeType CompareType; 3034 switch (CondCode) { 3035 default: 3036 CompareType = ARMISD::CMP; 3037 break; 3038 case ARMCC::EQ: 3039 case ARMCC::NE: 3040 // Uses only Z Flag 3041 CompareType = ARMISD::CMPZ; 3042 break; 3043 } 3044 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3045 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 3046} 3047 3048/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 3049SDValue 3050ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 3051 SDLoc dl) const { 3052 SDValue Cmp; 3053 if (!isFloatingPointZero(RHS)) 3054 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 3055 else 3056 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 3057 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 3058} 3059 3060/// duplicateCmp - Glue values can have only one use, so this function 3061/// duplicates a comparison node. 3062SDValue 3063ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 3064 unsigned Opc = Cmp.getOpcode(); 3065 SDLoc DL(Cmp); 3066 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 3067 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 3068 3069 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 3070 Cmp = Cmp.getOperand(0); 3071 Opc = Cmp.getOpcode(); 3072 if (Opc == ARMISD::CMPFP) 3073 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 3074 else { 3075 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 3076 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 3077 } 3078 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 3079} 3080 3081SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 3082 SDValue Cond = Op.getOperand(0); 3083 SDValue SelectTrue = Op.getOperand(1); 3084 SDValue SelectFalse = Op.getOperand(2); 3085 SDLoc dl(Op); 3086 3087 // Convert: 3088 // 3089 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 3090 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 3091 // 3092 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 3093 const ConstantSDNode *CMOVTrue = 3094 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 3095 const ConstantSDNode *CMOVFalse = 3096 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 3097 3098 if (CMOVTrue && CMOVFalse) { 3099 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 3100 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 3101 3102 SDValue True; 3103 SDValue False; 3104 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 3105 True = SelectTrue; 3106 False = SelectFalse; 3107 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 3108 True = SelectFalse; 3109 False = SelectTrue; 3110 } 3111 3112 if (True.getNode() && False.getNode()) { 3113 EVT VT = Op.getValueType(); 3114 SDValue ARMcc = Cond.getOperand(2); 3115 SDValue CCR = Cond.getOperand(3); 3116 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 3117 assert(True.getValueType() == VT); 3118 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 3119 } 3120 } 3121 } 3122 3123 // ARM's BooleanContents value is UndefinedBooleanContent. Mask out the 3124 // undefined bits before doing a full-word comparison with zero. 3125 Cond = DAG.getNode(ISD::AND, dl, Cond.getValueType(), Cond, 3126 DAG.getConstant(1, Cond.getValueType())); 3127 3128 return DAG.getSelectCC(dl, Cond, 3129 DAG.getConstant(0, Cond.getValueType()), 3130 SelectTrue, SelectFalse, ISD::SETNE); 3131} 3132 3133SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 3134 EVT VT = Op.getValueType(); 3135 SDValue LHS = Op.getOperand(0); 3136 SDValue RHS = Op.getOperand(1); 3137 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 3138 SDValue TrueVal = Op.getOperand(2); 3139 SDValue FalseVal = Op.getOperand(3); 3140 SDLoc dl(Op); 3141 3142 if (LHS.getValueType() == MVT::i32) { 3143 SDValue ARMcc; 3144 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3145 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3146 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 3147 } 3148 3149 ARMCC::CondCodes CondCode, CondCode2; 3150 FPCCToARMCC(CC, CondCode, CondCode2); 3151 3152 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3153 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3154 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3155 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 3156 ARMcc, CCR, Cmp); 3157 if (CondCode2 != ARMCC::AL) { 3158 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 3159 // FIXME: Needs another CMP because flag can have but one use. 3160 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 3161 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 3162 Result, TrueVal, ARMcc2, CCR, Cmp2); 3163 } 3164 return Result; 3165} 3166 3167/// canChangeToInt - Given the fp compare operand, return true if it is suitable 3168/// to morph to an integer compare sequence. 3169static bool canChangeToInt(SDValue Op, bool &SeenZero, 3170 const ARMSubtarget *Subtarget) { 3171 SDNode *N = Op.getNode(); 3172 if (!N->hasOneUse()) 3173 // Otherwise it requires moving the value from fp to integer registers. 3174 return false; 3175 if (!N->getNumValues()) 3176 return false; 3177 EVT VT = Op.getValueType(); 3178 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 3179 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 3180 // vmrs are very slow, e.g. cortex-a8. 3181 return false; 3182 3183 if (isFloatingPointZero(Op)) { 3184 SeenZero = true; 3185 return true; 3186 } 3187 return ISD::isNormalLoad(N); 3188} 3189 3190static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 3191 if (isFloatingPointZero(Op)) 3192 return DAG.getConstant(0, MVT::i32); 3193 3194 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 3195 return DAG.getLoad(MVT::i32, SDLoc(Op), 3196 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 3197 Ld->isVolatile(), Ld->isNonTemporal(), 3198 Ld->isInvariant(), Ld->getAlignment()); 3199 3200 llvm_unreachable("Unknown VFP cmp argument!"); 3201} 3202 3203static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 3204 SDValue &RetVal1, SDValue &RetVal2) { 3205 if (isFloatingPointZero(Op)) { 3206 RetVal1 = DAG.getConstant(0, MVT::i32); 3207 RetVal2 = DAG.getConstant(0, MVT::i32); 3208 return; 3209 } 3210 3211 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 3212 SDValue Ptr = Ld->getBasePtr(); 3213 RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op), 3214 Ld->getChain(), Ptr, 3215 Ld->getPointerInfo(), 3216 Ld->isVolatile(), Ld->isNonTemporal(), 3217 Ld->isInvariant(), Ld->getAlignment()); 3218 3219 EVT PtrType = Ptr.getValueType(); 3220 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 3221 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op), 3222 PtrType, Ptr, DAG.getConstant(4, PtrType)); 3223 RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op), 3224 Ld->getChain(), NewPtr, 3225 Ld->getPointerInfo().getWithOffset(4), 3226 Ld->isVolatile(), Ld->isNonTemporal(), 3227 Ld->isInvariant(), NewAlign); 3228 return; 3229 } 3230 3231 llvm_unreachable("Unknown VFP cmp argument!"); 3232} 3233 3234/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 3235/// f32 and even f64 comparisons to integer ones. 3236SDValue 3237ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 3238 SDValue Chain = Op.getOperand(0); 3239 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3240 SDValue LHS = Op.getOperand(2); 3241 SDValue RHS = Op.getOperand(3); 3242 SDValue Dest = Op.getOperand(4); 3243 SDLoc dl(Op); 3244 3245 bool LHSSeenZero = false; 3246 bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); 3247 bool RHSSeenZero = false; 3248 bool RHSOk = canChangeToInt(RHS, RHSSeenZero, Subtarget); 3249 if (LHSOk && RHSOk && (LHSSeenZero || RHSSeenZero)) { 3250 // If unsafe fp math optimization is enabled and there are no other uses of 3251 // the CMP operands, and the condition code is EQ or NE, we can optimize it 3252 // to an integer comparison. 3253 if (CC == ISD::SETOEQ) 3254 CC = ISD::SETEQ; 3255 else if (CC == ISD::SETUNE) 3256 CC = ISD::SETNE; 3257 3258 SDValue Mask = DAG.getConstant(0x7fffffff, MVT::i32); 3259 SDValue ARMcc; 3260 if (LHS.getValueType() == MVT::f32) { 3261 LHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3262 bitcastf32Toi32(LHS, DAG), Mask); 3263 RHS = DAG.getNode(ISD::AND, dl, MVT::i32, 3264 bitcastf32Toi32(RHS, DAG), Mask); 3265 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3266 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3267 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3268 Chain, Dest, ARMcc, CCR, Cmp); 3269 } 3270 3271 SDValue LHS1, LHS2; 3272 SDValue RHS1, RHS2; 3273 expandf64Toi32(LHS, DAG, LHS1, LHS2); 3274 expandf64Toi32(RHS, DAG, RHS1, RHS2); 3275 LHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, LHS2, Mask); 3276 RHS2 = DAG.getNode(ISD::AND, dl, MVT::i32, RHS2, Mask); 3277 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 3278 ARMcc = DAG.getConstant(CondCode, MVT::i32); 3279 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3280 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 3281 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 3282 } 3283 3284 return SDValue(); 3285} 3286 3287SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 3288 SDValue Chain = Op.getOperand(0); 3289 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 3290 SDValue LHS = Op.getOperand(2); 3291 SDValue RHS = Op.getOperand(3); 3292 SDValue Dest = Op.getOperand(4); 3293 SDLoc dl(Op); 3294 3295 if (LHS.getValueType() == MVT::i32) { 3296 SDValue ARMcc; 3297 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 3298 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3299 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 3300 Chain, Dest, ARMcc, CCR, Cmp); 3301 } 3302 3303 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 3304 3305 if (getTargetMachine().Options.UnsafeFPMath && 3306 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 3307 CC == ISD::SETNE || CC == ISD::SETUNE)) { 3308 SDValue Result = OptimizeVFPBrcond(Op, DAG); 3309 if (Result.getNode()) 3310 return Result; 3311 } 3312 3313 ARMCC::CondCodes CondCode, CondCode2; 3314 FPCCToARMCC(CC, CondCode, CondCode2); 3315 3316 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 3317 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 3318 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3319 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 3320 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 3321 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3322 if (CondCode2 != ARMCC::AL) { 3323 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 3324 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 3325 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 3326 } 3327 return Res; 3328} 3329 3330SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 3331 SDValue Chain = Op.getOperand(0); 3332 SDValue Table = Op.getOperand(1); 3333 SDValue Index = Op.getOperand(2); 3334 SDLoc dl(Op); 3335 3336 EVT PTy = getPointerTy(); 3337 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 3338 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 3339 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 3340 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 3341 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 3342 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 3343 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 3344 if (Subtarget->isThumb2()) { 3345 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 3346 // which does another jump to the destination. This also makes it easier 3347 // to translate it to TBB / TBH later. 3348 // FIXME: This might not work if the function is extremely large. 3349 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 3350 Addr, Op.getOperand(2), JTI, UId); 3351 } 3352 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 3353 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 3354 MachinePointerInfo::getJumpTable(), 3355 false, false, false, 0); 3356 Chain = Addr.getValue(1); 3357 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 3358 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3359 } else { 3360 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 3361 MachinePointerInfo::getJumpTable(), 3362 false, false, false, 0); 3363 Chain = Addr.getValue(1); 3364 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 3365 } 3366} 3367 3368static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3369 EVT VT = Op.getValueType(); 3370 SDLoc dl(Op); 3371 3372 if (Op.getValueType().getVectorElementType() == MVT::i32) { 3373 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) 3374 return Op; 3375 return DAG.UnrollVectorOp(Op.getNode()); 3376 } 3377 3378 assert(Op.getOperand(0).getValueType() == MVT::v4f32 && 3379 "Invalid type for custom lowering!"); 3380 if (VT != MVT::v4i16) 3381 return DAG.UnrollVectorOp(Op.getNode()); 3382 3383 Op = DAG.getNode(Op.getOpcode(), dl, MVT::v4i32, Op.getOperand(0)); 3384 return DAG.getNode(ISD::TRUNCATE, dl, VT, Op); 3385} 3386 3387static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 3388 EVT VT = Op.getValueType(); 3389 if (VT.isVector()) 3390 return LowerVectorFP_TO_INT(Op, DAG); 3391 3392 SDLoc dl(Op); 3393 unsigned Opc; 3394 3395 switch (Op.getOpcode()) { 3396 default: llvm_unreachable("Invalid opcode!"); 3397 case ISD::FP_TO_SINT: 3398 Opc = ARMISD::FTOSI; 3399 break; 3400 case ISD::FP_TO_UINT: 3401 Opc = ARMISD::FTOUI; 3402 break; 3403 } 3404 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 3405 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 3406} 3407 3408static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3409 EVT VT = Op.getValueType(); 3410 SDLoc dl(Op); 3411 3412 if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { 3413 if (VT.getVectorElementType() == MVT::f32) 3414 return Op; 3415 return DAG.UnrollVectorOp(Op.getNode()); 3416 } 3417 3418 assert(Op.getOperand(0).getValueType() == MVT::v4i16 && 3419 "Invalid type for custom lowering!"); 3420 if (VT != MVT::v4f32) 3421 return DAG.UnrollVectorOp(Op.getNode()); 3422 3423 unsigned CastOpc; 3424 unsigned Opc; 3425 switch (Op.getOpcode()) { 3426 default: llvm_unreachable("Invalid opcode!"); 3427 case ISD::SINT_TO_FP: 3428 CastOpc = ISD::SIGN_EXTEND; 3429 Opc = ISD::SINT_TO_FP; 3430 break; 3431 case ISD::UINT_TO_FP: 3432 CastOpc = ISD::ZERO_EXTEND; 3433 Opc = ISD::UINT_TO_FP; 3434 break; 3435 } 3436 3437 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 3438 return DAG.getNode(Opc, dl, VT, Op); 3439} 3440 3441static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 3442 EVT VT = Op.getValueType(); 3443 if (VT.isVector()) 3444 return LowerVectorINT_TO_FP(Op, DAG); 3445 3446 SDLoc dl(Op); 3447 unsigned Opc; 3448 3449 switch (Op.getOpcode()) { 3450 default: llvm_unreachable("Invalid opcode!"); 3451 case ISD::SINT_TO_FP: 3452 Opc = ARMISD::SITOF; 3453 break; 3454 case ISD::UINT_TO_FP: 3455 Opc = ARMISD::UITOF; 3456 break; 3457 } 3458 3459 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3460 return DAG.getNode(Opc, dl, VT, Op); 3461} 3462 3463SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3464 // Implement fcopysign with a fabs and a conditional fneg. 3465 SDValue Tmp0 = Op.getOperand(0); 3466 SDValue Tmp1 = Op.getOperand(1); 3467 SDLoc dl(Op); 3468 EVT VT = Op.getValueType(); 3469 EVT SrcVT = Tmp1.getValueType(); 3470 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3471 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3472 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3473 3474 if (UseNEON) { 3475 // Use VBSL to copy the sign bit. 3476 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3477 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3478 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3479 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3480 if (VT == MVT::f64) 3481 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3482 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3483 DAG.getConstant(32, MVT::i32)); 3484 else /*if (VT == MVT::f32)*/ 3485 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3486 if (SrcVT == MVT::f32) { 3487 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3488 if (VT == MVT::f64) 3489 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3490 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3491 DAG.getConstant(32, MVT::i32)); 3492 } else if (VT == MVT::f32) 3493 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3494 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3495 DAG.getConstant(32, MVT::i32)); 3496 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3497 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3498 3499 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3500 MVT::i32); 3501 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3502 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3503 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3504 3505 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3506 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3507 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3508 if (VT == MVT::f32) { 3509 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3510 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3511 DAG.getConstant(0, MVT::i32)); 3512 } else { 3513 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3514 } 3515 3516 return Res; 3517 } 3518 3519 // Bitcast operand 1 to i32. 3520 if (SrcVT == MVT::f64) 3521 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3522 &Tmp1, 1).getValue(1); 3523 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3524 3525 // Or in the signbit with integer operations. 3526 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3527 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3528 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3529 if (VT == MVT::f32) { 3530 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3531 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3532 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3533 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3534 } 3535 3536 // f64: Or the high part with signbit and then combine two parts. 3537 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3538 &Tmp0, 1); 3539 SDValue Lo = Tmp0.getValue(0); 3540 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3541 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3542 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3543} 3544 3545SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3546 MachineFunction &MF = DAG.getMachineFunction(); 3547 MachineFrameInfo *MFI = MF.getFrameInfo(); 3548 MFI->setReturnAddressIsTaken(true); 3549 3550 EVT VT = Op.getValueType(); 3551 SDLoc dl(Op); 3552 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3553 if (Depth) { 3554 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3555 SDValue Offset = DAG.getConstant(4, MVT::i32); 3556 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3557 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3558 MachinePointerInfo(), false, false, false, 0); 3559 } 3560 3561 // Return LR, which contains the return address. Mark it an implicit live-in. 3562 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3563 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3564} 3565 3566SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3567 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3568 MFI->setFrameAddressIsTaken(true); 3569 3570 EVT VT = Op.getValueType(); 3571 SDLoc dl(Op); // FIXME probably not meaningful 3572 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3573 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3574 ? ARM::R7 : ARM::R11; 3575 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3576 while (Depth--) 3577 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3578 MachinePointerInfo(), 3579 false, false, false, 0); 3580 return FrameAddr; 3581} 3582 3583/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), 3584/// and size(DestVec) > 128-bits. 3585/// This is achieved by doing the one extension from the SrcVec, splitting the 3586/// result, extending these parts, and then concatenating these into the 3587/// destination. 3588static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { 3589 SDValue Op = N->getOperand(0); 3590 EVT SrcVT = Op.getValueType(); 3591 EVT DestVT = N->getValueType(0); 3592 3593 assert(DestVT.getSizeInBits() > 128 && 3594 "Custom sext/zext expansion needs >128-bit vector."); 3595 // If this is a normal length extension, use the default expansion. 3596 if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && 3597 SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) 3598 return SDValue(); 3599 3600 SDLoc dl(N); 3601 unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); 3602 unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); 3603 unsigned NumElts = SrcVT.getVectorNumElements(); 3604 LLVMContext &Ctx = *DAG.getContext(); 3605 SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; 3606 3607 EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), 3608 NumElts); 3609 EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), 3610 NumElts/2); 3611 EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), 3612 NumElts/2); 3613 3614 Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); 3615 SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, 3616 DAG.getIntPtrConstant(0)); 3617 SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, 3618 DAG.getIntPtrConstant(NumElts/2)); 3619 ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); 3620 ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); 3621 return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); 3622} 3623 3624/// ExpandBITCAST - If the target supports VFP, this function is called to 3625/// expand a bit convert where either the source or destination type is i64 to 3626/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3627/// operand type is illegal (e.g., v2f32 for a target that doesn't support 3628/// vectors), since the legalizer won't know what to do with that. 3629static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3630 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3631 SDLoc dl(N); 3632 SDValue Op = N->getOperand(0); 3633 3634 // This function is only supposed to be called for i64 types, either as the 3635 // source or destination of the bit convert. 3636 EVT SrcVT = Op.getValueType(); 3637 EVT DstVT = N->getValueType(0); 3638 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3639 "ExpandBITCAST called for non-i64 type"); 3640 3641 // Turn i64->f64 into VMOVDRR. 3642 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3643 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3644 DAG.getConstant(0, MVT::i32)); 3645 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3646 DAG.getConstant(1, MVT::i32)); 3647 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3648 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3649 } 3650 3651 // Turn f64->i64 into VMOVRRD. 3652 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3653 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3654 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3655 // Merge the pieces into a single i64 value. 3656 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3657 } 3658 3659 return SDValue(); 3660} 3661 3662/// getZeroVector - Returns a vector of specified type with all zero elements. 3663/// Zero vectors are used to represent vector negation and in those cases 3664/// will be implemented with the NEON VNEG instruction. However, VNEG does 3665/// not support i64 elements, so sometimes the zero vectors will need to be 3666/// explicitly constructed. Regardless, use a canonical VMOV to create the 3667/// zero vector. 3668static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { 3669 assert(VT.isVector() && "Expected a vector type"); 3670 // The canonical modified immediate encoding of a zero vector is....0! 3671 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3672 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3673 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3674 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3675} 3676 3677/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3678/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3679SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3680 SelectionDAG &DAG) const { 3681 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3682 EVT VT = Op.getValueType(); 3683 unsigned VTBits = VT.getSizeInBits(); 3684 SDLoc dl(Op); 3685 SDValue ShOpLo = Op.getOperand(0); 3686 SDValue ShOpHi = Op.getOperand(1); 3687 SDValue ShAmt = Op.getOperand(2); 3688 SDValue ARMcc; 3689 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3690 3691 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3692 3693 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3694 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3695 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3696 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3697 DAG.getConstant(VTBits, MVT::i32)); 3698 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3699 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3700 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3701 3702 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3703 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3704 ARMcc, DAG, dl); 3705 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3706 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3707 CCR, Cmp); 3708 3709 SDValue Ops[2] = { Lo, Hi }; 3710 return DAG.getMergeValues(Ops, 2, dl); 3711} 3712 3713/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3714/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3715SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3716 SelectionDAG &DAG) const { 3717 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3718 EVT VT = Op.getValueType(); 3719 unsigned VTBits = VT.getSizeInBits(); 3720 SDLoc dl(Op); 3721 SDValue ShOpLo = Op.getOperand(0); 3722 SDValue ShOpHi = Op.getOperand(1); 3723 SDValue ShAmt = Op.getOperand(2); 3724 SDValue ARMcc; 3725 3726 assert(Op.getOpcode() == ISD::SHL_PARTS); 3727 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3728 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3729 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3730 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3731 DAG.getConstant(VTBits, MVT::i32)); 3732 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3733 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3734 3735 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3736 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3737 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3738 ARMcc, DAG, dl); 3739 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3740 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3741 CCR, Cmp); 3742 3743 SDValue Ops[2] = { Lo, Hi }; 3744 return DAG.getMergeValues(Ops, 2, dl); 3745} 3746 3747SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3748 SelectionDAG &DAG) const { 3749 // The rounding mode is in bits 23:22 of the FPSCR. 3750 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3751 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3752 // so that the shift + and get folded into a bitfield extract. 3753 SDLoc dl(Op); 3754 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3755 DAG.getConstant(Intrinsic::arm_get_fpscr, 3756 MVT::i32)); 3757 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3758 DAG.getConstant(1U << 22, MVT::i32)); 3759 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3760 DAG.getConstant(22, MVT::i32)); 3761 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3762 DAG.getConstant(3, MVT::i32)); 3763} 3764 3765static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3766 const ARMSubtarget *ST) { 3767 EVT VT = N->getValueType(0); 3768 SDLoc dl(N); 3769 3770 if (!ST->hasV6T2Ops()) 3771 return SDValue(); 3772 3773 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3774 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3775} 3776 3777/// getCTPOP16BitCounts - Returns a v8i8/v16i8 vector containing the bit-count 3778/// for each 16-bit element from operand, repeated. The basic idea is to 3779/// leverage vcnt to get the 8-bit counts, gather and add the results. 3780/// 3781/// Trace for v4i16: 3782/// input = [v0 v1 v2 v3 ] (vi 16-bit element) 3783/// cast: N0 = [w0 w1 w2 w3 w4 w5 w6 w7] (v0 = [w0 w1], wi 8-bit element) 3784/// vcnt: N1 = [b0 b1 b2 b3 b4 b5 b6 b7] (bi = bit-count of 8-bit element wi) 3785/// vrev: N2 = [b1 b0 b3 b2 b5 b4 b7 b6] 3786/// [b0 b1 b2 b3 b4 b5 b6 b7] 3787/// +[b1 b0 b3 b2 b5 b4 b7 b6] 3788/// N3=N1+N2 = [k0 k0 k1 k1 k2 k2 k3 k3] (k0 = b0+b1 = bit-count of 16-bit v0, 3789/// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) 3790static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { 3791 EVT VT = N->getValueType(0); 3792 SDLoc DL(N); 3793 3794 EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; 3795 SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); 3796 SDValue N1 = DAG.getNode(ISD::CTPOP, DL, VT8Bit, N0); 3797 SDValue N2 = DAG.getNode(ARMISD::VREV16, DL, VT8Bit, N1); 3798 SDValue N3 = DAG.getNode(ISD::ADD, DL, VT8Bit, N1, N2); 3799 return DAG.getNode(ARMISD::VUZP, DL, VT8Bit, N3, N3); 3800} 3801 3802/// lowerCTPOP16BitElements - Returns a v4i16/v8i16 vector containing the 3803/// bit-count for each 16-bit element from the operand. We need slightly 3804/// different sequencing for v4i16 and v8i16 to stay within NEON's available 3805/// 64/128-bit registers. 3806/// 3807/// Trace for v4i16: 3808/// input = [v0 v1 v2 v3 ] (vi 16-bit element) 3809/// v8i8: BitCounts = [k0 k1 k2 k3 k0 k1 k2 k3 ] (ki is the bit-count of vi) 3810/// v8i16:Extended = [k0 k1 k2 k3 k0 k1 k2 k3 ] 3811/// v4i16:Extracted = [k0 k1 k2 k3 ] 3812static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { 3813 EVT VT = N->getValueType(0); 3814 SDLoc DL(N); 3815 3816 SDValue BitCounts = getCTPOP16BitCounts(N, DAG); 3817 if (VT.is64BitVector()) { 3818 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, BitCounts); 3819 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Extended, 3820 DAG.getIntPtrConstant(0)); 3821 } else { 3822 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i8, 3823 BitCounts, DAG.getIntPtrConstant(0)); 3824 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v8i16, Extracted); 3825 } 3826} 3827 3828/// lowerCTPOP32BitElements - Returns a v2i32/v4i32 vector containing the 3829/// bit-count for each 32-bit element from the operand. The idea here is 3830/// to split the vector into 16-bit elements, leverage the 16-bit count 3831/// routine, and then combine the results. 3832/// 3833/// Trace for v2i32 (v4i32 similar with Extracted/Extended exchanged): 3834/// input = [v0 v1 ] (vi: 32-bit elements) 3835/// Bitcast = [w0 w1 w2 w3 ] (wi: 16-bit elements, v0 = [w0 w1]) 3836/// Counts16 = [k0 k1 k2 k3 ] (ki: 16-bit elements, bit-count of wi) 3837/// vrev: N0 = [k1 k0 k3 k2 ] 3838/// [k0 k1 k2 k3 ] 3839/// N1 =+[k1 k0 k3 k2 ] 3840/// [k0 k2 k1 k3 ] 3841/// N2 =+[k1 k3 k0 k2 ] 3842/// [k0 k2 k1 k3 ] 3843/// Extended =+[k1 k3 k0 k2 ] 3844/// [k0 k2 ] 3845/// Extracted=+[k1 k3 ] 3846/// 3847static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { 3848 EVT VT = N->getValueType(0); 3849 SDLoc DL(N); 3850 3851 EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; 3852 3853 SDValue Bitcast = DAG.getNode(ISD::BITCAST, DL, VT16Bit, N->getOperand(0)); 3854 SDValue Counts16 = lowerCTPOP16BitElements(Bitcast.getNode(), DAG); 3855 SDValue N0 = DAG.getNode(ARMISD::VREV32, DL, VT16Bit, Counts16); 3856 SDValue N1 = DAG.getNode(ISD::ADD, DL, VT16Bit, Counts16, N0); 3857 SDValue N2 = DAG.getNode(ARMISD::VUZP, DL, VT16Bit, N1, N1); 3858 3859 if (VT.is64BitVector()) { 3860 SDValue Extended = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, N2); 3861 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Extended, 3862 DAG.getIntPtrConstant(0)); 3863 } else { 3864 SDValue Extracted = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, N2, 3865 DAG.getIntPtrConstant(0)); 3866 return DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i32, Extracted); 3867 } 3868} 3869 3870static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, 3871 const ARMSubtarget *ST) { 3872 EVT VT = N->getValueType(0); 3873 3874 assert(ST->hasNEON() && "Custom ctpop lowering requires NEON."); 3875 assert((VT == MVT::v2i32 || VT == MVT::v4i32 || 3876 VT == MVT::v4i16 || VT == MVT::v8i16) && 3877 "Unexpected type for custom ctpop lowering"); 3878 3879 if (VT.getVectorElementType() == MVT::i32) 3880 return lowerCTPOP32BitElements(N, DAG); 3881 else 3882 return lowerCTPOP16BitElements(N, DAG); 3883} 3884 3885static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 3886 const ARMSubtarget *ST) { 3887 EVT VT = N->getValueType(0); 3888 SDLoc dl(N); 3889 3890 if (!VT.isVector()) 3891 return SDValue(); 3892 3893 // Lower vector shifts on NEON to use VSHL. 3894 assert(ST->hasNEON() && "unexpected vector shift"); 3895 3896 // Left shifts translate directly to the vshiftu intrinsic. 3897 if (N->getOpcode() == ISD::SHL) 3898 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3899 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 3900 N->getOperand(0), N->getOperand(1)); 3901 3902 assert((N->getOpcode() == ISD::SRA || 3903 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 3904 3905 // NEON uses the same intrinsics for both left and right shifts. For 3906 // right shifts, the shift amounts are negative, so negate the vector of 3907 // shift amounts. 3908 EVT ShiftVT = N->getOperand(1).getValueType(); 3909 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 3910 getZeroVector(ShiftVT, DAG, dl), 3911 N->getOperand(1)); 3912 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 3913 Intrinsic::arm_neon_vshifts : 3914 Intrinsic::arm_neon_vshiftu); 3915 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3916 DAG.getConstant(vshiftInt, MVT::i32), 3917 N->getOperand(0), NegatedCount); 3918} 3919 3920static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 3921 const ARMSubtarget *ST) { 3922 EVT VT = N->getValueType(0); 3923 SDLoc dl(N); 3924 3925 // We can get here for a node like i32 = ISD::SHL i32, i64 3926 if (VT != MVT::i64) 3927 return SDValue(); 3928 3929 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 3930 "Unknown shift to lower!"); 3931 3932 // We only lower SRA, SRL of 1 here, all others use generic lowering. 3933 if (!isa<ConstantSDNode>(N->getOperand(1)) || 3934 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 3935 return SDValue(); 3936 3937 // If we are in thumb mode, we don't have RRX. 3938 if (ST->isThumb1Only()) return SDValue(); 3939 3940 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 3941 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3942 DAG.getConstant(0, MVT::i32)); 3943 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3944 DAG.getConstant(1, MVT::i32)); 3945 3946 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 3947 // captures the result into a carry flag. 3948 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 3949 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 3950 3951 // The low part is an ARMISD::RRX operand, which shifts the carry in. 3952 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 3953 3954 // Merge the pieces into a single i64 value. 3955 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 3956} 3957 3958static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 3959 SDValue TmpOp0, TmpOp1; 3960 bool Invert = false; 3961 bool Swap = false; 3962 unsigned Opc = 0; 3963 3964 SDValue Op0 = Op.getOperand(0); 3965 SDValue Op1 = Op.getOperand(1); 3966 SDValue CC = Op.getOperand(2); 3967 EVT VT = Op.getValueType(); 3968 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3969 SDLoc dl(Op); 3970 3971 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 3972 switch (SetCCOpcode) { 3973 default: llvm_unreachable("Illegal FP comparison"); 3974 case ISD::SETUNE: 3975 case ISD::SETNE: Invert = true; // Fallthrough 3976 case ISD::SETOEQ: 3977 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3978 case ISD::SETOLT: 3979 case ISD::SETLT: Swap = true; // Fallthrough 3980 case ISD::SETOGT: 3981 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3982 case ISD::SETOLE: 3983 case ISD::SETLE: Swap = true; // Fallthrough 3984 case ISD::SETOGE: 3985 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3986 case ISD::SETUGE: Swap = true; // Fallthrough 3987 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 3988 case ISD::SETUGT: Swap = true; // Fallthrough 3989 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 3990 case ISD::SETUEQ: Invert = true; // Fallthrough 3991 case ISD::SETONE: 3992 // Expand this to (OLT | OGT). 3993 TmpOp0 = Op0; 3994 TmpOp1 = Op1; 3995 Opc = ISD::OR; 3996 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3997 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 3998 break; 3999 case ISD::SETUO: Invert = true; // Fallthrough 4000 case ISD::SETO: 4001 // Expand this to (OLT | OGE). 4002 TmpOp0 = Op0; 4003 TmpOp1 = Op1; 4004 Opc = ISD::OR; 4005 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 4006 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 4007 break; 4008 } 4009 } else { 4010 // Integer comparisons. 4011 switch (SetCCOpcode) { 4012 default: llvm_unreachable("Illegal integer comparison"); 4013 case ISD::SETNE: Invert = true; 4014 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 4015 case ISD::SETLT: Swap = true; 4016 case ISD::SETGT: Opc = ARMISD::VCGT; break; 4017 case ISD::SETLE: Swap = true; 4018 case ISD::SETGE: Opc = ARMISD::VCGE; break; 4019 case ISD::SETULT: Swap = true; 4020 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 4021 case ISD::SETULE: Swap = true; 4022 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 4023 } 4024 4025 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 4026 if (Opc == ARMISD::VCEQ) { 4027 4028 SDValue AndOp; 4029 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 4030 AndOp = Op0; 4031 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 4032 AndOp = Op1; 4033 4034 // Ignore bitconvert. 4035 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 4036 AndOp = AndOp.getOperand(0); 4037 4038 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 4039 Opc = ARMISD::VTST; 4040 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 4041 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 4042 Invert = !Invert; 4043 } 4044 } 4045 } 4046 4047 if (Swap) 4048 std::swap(Op0, Op1); 4049 4050 // If one of the operands is a constant vector zero, attempt to fold the 4051 // comparison to a specialized compare-against-zero form. 4052 SDValue SingleOp; 4053 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 4054 SingleOp = Op0; 4055 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 4056 if (Opc == ARMISD::VCGE) 4057 Opc = ARMISD::VCLEZ; 4058 else if (Opc == ARMISD::VCGT) 4059 Opc = ARMISD::VCLTZ; 4060 SingleOp = Op1; 4061 } 4062 4063 SDValue Result; 4064 if (SingleOp.getNode()) { 4065 switch (Opc) { 4066 case ARMISD::VCEQ: 4067 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 4068 case ARMISD::VCGE: 4069 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 4070 case ARMISD::VCLEZ: 4071 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 4072 case ARMISD::VCGT: 4073 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 4074 case ARMISD::VCLTZ: 4075 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 4076 default: 4077 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 4078 } 4079 } else { 4080 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 4081 } 4082 4083 if (Invert) 4084 Result = DAG.getNOT(dl, Result, VT); 4085 4086 return Result; 4087} 4088 4089/// isNEONModifiedImm - Check if the specified splat value corresponds to a 4090/// valid vector constant for a NEON instruction with a "modified immediate" 4091/// operand (e.g., VMOV). If so, return the encoded value. 4092static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 4093 unsigned SplatBitSize, SelectionDAG &DAG, 4094 EVT &VT, bool is128Bits, NEONModImmType type) { 4095 unsigned OpCmode, Imm; 4096 4097 // SplatBitSize is set to the smallest size that splats the vector, so a 4098 // zero vector will always have SplatBitSize == 8. However, NEON modified 4099 // immediate instructions others than VMOV do not support the 8-bit encoding 4100 // of a zero vector, and the default encoding of zero is supposed to be the 4101 // 32-bit version. 4102 if (SplatBits == 0) 4103 SplatBitSize = 32; 4104 4105 switch (SplatBitSize) { 4106 case 8: 4107 if (type != VMOVModImm) 4108 return SDValue(); 4109 // Any 1-byte value is OK. Op=0, Cmode=1110. 4110 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 4111 OpCmode = 0xe; 4112 Imm = SplatBits; 4113 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 4114 break; 4115 4116 case 16: 4117 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 4118 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 4119 if ((SplatBits & ~0xff) == 0) { 4120 // Value = 0x00nn: Op=x, Cmode=100x. 4121 OpCmode = 0x8; 4122 Imm = SplatBits; 4123 break; 4124 } 4125 if ((SplatBits & ~0xff00) == 0) { 4126 // Value = 0xnn00: Op=x, Cmode=101x. 4127 OpCmode = 0xa; 4128 Imm = SplatBits >> 8; 4129 break; 4130 } 4131 return SDValue(); 4132 4133 case 32: 4134 // NEON's 32-bit VMOV supports splat values where: 4135 // * only one byte is nonzero, or 4136 // * the least significant byte is 0xff and the second byte is nonzero, or 4137 // * the least significant 2 bytes are 0xff and the third is nonzero. 4138 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 4139 if ((SplatBits & ~0xff) == 0) { 4140 // Value = 0x000000nn: Op=x, Cmode=000x. 4141 OpCmode = 0; 4142 Imm = SplatBits; 4143 break; 4144 } 4145 if ((SplatBits & ~0xff00) == 0) { 4146 // Value = 0x0000nn00: Op=x, Cmode=001x. 4147 OpCmode = 0x2; 4148 Imm = SplatBits >> 8; 4149 break; 4150 } 4151 if ((SplatBits & ~0xff0000) == 0) { 4152 // Value = 0x00nn0000: Op=x, Cmode=010x. 4153 OpCmode = 0x4; 4154 Imm = SplatBits >> 16; 4155 break; 4156 } 4157 if ((SplatBits & ~0xff000000) == 0) { 4158 // Value = 0xnn000000: Op=x, Cmode=011x. 4159 OpCmode = 0x6; 4160 Imm = SplatBits >> 24; 4161 break; 4162 } 4163 4164 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 4165 if (type == OtherModImm) return SDValue(); 4166 4167 if ((SplatBits & ~0xffff) == 0 && 4168 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 4169 // Value = 0x0000nnff: Op=x, Cmode=1100. 4170 OpCmode = 0xc; 4171 Imm = SplatBits >> 8; 4172 SplatBits |= 0xff; 4173 break; 4174 } 4175 4176 if ((SplatBits & ~0xffffff) == 0 && 4177 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 4178 // Value = 0x00nnffff: Op=x, Cmode=1101. 4179 OpCmode = 0xd; 4180 Imm = SplatBits >> 16; 4181 SplatBits |= 0xffff; 4182 break; 4183 } 4184 4185 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 4186 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 4187 // VMOV.I32. A (very) minor optimization would be to replicate the value 4188 // and fall through here to test for a valid 64-bit splat. But, then the 4189 // caller would also need to check and handle the change in size. 4190 return SDValue(); 4191 4192 case 64: { 4193 if (type != VMOVModImm) 4194 return SDValue(); 4195 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 4196 uint64_t BitMask = 0xff; 4197 uint64_t Val = 0; 4198 unsigned ImmMask = 1; 4199 Imm = 0; 4200 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 4201 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 4202 Val |= BitMask; 4203 Imm |= ImmMask; 4204 } else if ((SplatBits & BitMask) != 0) { 4205 return SDValue(); 4206 } 4207 BitMask <<= 8; 4208 ImmMask <<= 1; 4209 } 4210 // Op=1, Cmode=1110. 4211 OpCmode = 0x1e; 4212 SplatBits = Val; 4213 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 4214 break; 4215 } 4216 4217 default: 4218 llvm_unreachable("unexpected size for isNEONModifiedImm"); 4219 } 4220 4221 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 4222 return DAG.getTargetConstant(EncodedVal, MVT::i32); 4223} 4224 4225SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, 4226 const ARMSubtarget *ST) const { 4227 if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) 4228 return SDValue(); 4229 4230 ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op); 4231 assert(Op.getValueType() == MVT::f32 && 4232 "ConstantFP custom lowering should only occur for f32."); 4233 4234 // Try splatting with a VMOV.f32... 4235 APFloat FPVal = CFP->getValueAPF(); 4236 int ImmVal = ARM_AM::getFP32Imm(FPVal); 4237 if (ImmVal != -1) { 4238 SDLoc DL(Op); 4239 SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); 4240 SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, 4241 NewVal); 4242 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecConstant, 4243 DAG.getConstant(0, MVT::i32)); 4244 } 4245 4246 // If that fails, try a VMOV.i32 4247 EVT VMovVT; 4248 unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); 4249 SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, 4250 VMOVModImm); 4251 if (NewVal != SDValue()) { 4252 SDLoc DL(Op); 4253 SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, 4254 NewVal); 4255 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 4256 VecConstant); 4257 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 4258 DAG.getConstant(0, MVT::i32)); 4259 } 4260 4261 // Finally, try a VMVN.i32 4262 NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, 4263 VMVNModImm); 4264 if (NewVal != SDValue()) { 4265 SDLoc DL(Op); 4266 SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); 4267 SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, 4268 VecConstant); 4269 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, 4270 DAG.getConstant(0, MVT::i32)); 4271 } 4272 4273 return SDValue(); 4274} 4275 4276// check if an VEXT instruction can handle the shuffle mask when the 4277// vector sources of the shuffle are the same. 4278static bool isSingletonVEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) { 4279 unsigned NumElts = VT.getVectorNumElements(); 4280 4281 // Assume that the first shuffle index is not UNDEF. Fail if it is. 4282 if (M[0] < 0) 4283 return false; 4284 4285 Imm = M[0]; 4286 4287 // If this is a VEXT shuffle, the immediate value is the index of the first 4288 // element. The other shuffle indices must be the successive elements after 4289 // the first one. 4290 unsigned ExpectedElt = Imm; 4291 for (unsigned i = 1; i < NumElts; ++i) { 4292 // Increment the expected index. If it wraps around, just follow it 4293 // back to index zero and keep going. 4294 ++ExpectedElt; 4295 if (ExpectedElt == NumElts) 4296 ExpectedElt = 0; 4297 4298 if (M[i] < 0) continue; // ignore UNDEF indices 4299 if (ExpectedElt != static_cast<unsigned>(M[i])) 4300 return false; 4301 } 4302 4303 return true; 4304} 4305 4306 4307static bool isVEXTMask(ArrayRef<int> M, EVT VT, 4308 bool &ReverseVEXT, unsigned &Imm) { 4309 unsigned NumElts = VT.getVectorNumElements(); 4310 ReverseVEXT = false; 4311 4312 // Assume that the first shuffle index is not UNDEF. Fail if it is. 4313 if (M[0] < 0) 4314 return false; 4315 4316 Imm = M[0]; 4317 4318 // If this is a VEXT shuffle, the immediate value is the index of the first 4319 // element. The other shuffle indices must be the successive elements after 4320 // the first one. 4321 unsigned ExpectedElt = Imm; 4322 for (unsigned i = 1; i < NumElts; ++i) { 4323 // Increment the expected index. If it wraps around, it may still be 4324 // a VEXT but the source vectors must be swapped. 4325 ExpectedElt += 1; 4326 if (ExpectedElt == NumElts * 2) { 4327 ExpectedElt = 0; 4328 ReverseVEXT = true; 4329 } 4330 4331 if (M[i] < 0) continue; // ignore UNDEF indices 4332 if (ExpectedElt != static_cast<unsigned>(M[i])) 4333 return false; 4334 } 4335 4336 // Adjust the index value if the source operands will be swapped. 4337 if (ReverseVEXT) 4338 Imm -= NumElts; 4339 4340 return true; 4341} 4342 4343/// isVREVMask - Check if a vector shuffle corresponds to a VREV 4344/// instruction with the specified blocksize. (The order of the elements 4345/// within each block of the vector is reversed.) 4346static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) { 4347 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 4348 "Only possible block sizes for VREV are: 16, 32, 64"); 4349 4350 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4351 if (EltSz == 64) 4352 return false; 4353 4354 unsigned NumElts = VT.getVectorNumElements(); 4355 unsigned BlockElts = M[0] + 1; 4356 // If the first shuffle index is UNDEF, be optimistic. 4357 if (M[0] < 0) 4358 BlockElts = BlockSize / EltSz; 4359 4360 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 4361 return false; 4362 4363 for (unsigned i = 0; i < NumElts; ++i) { 4364 if (M[i] < 0) continue; // ignore UNDEF indices 4365 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 4366 return false; 4367 } 4368 4369 return true; 4370} 4371 4372static bool isVTBLMask(ArrayRef<int> M, EVT VT) { 4373 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 4374 // range, then 0 is placed into the resulting vector. So pretty much any mask 4375 // of 8 elements can work here. 4376 return VT == MVT::v8i8 && M.size() == 8; 4377} 4378 4379static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4380 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4381 if (EltSz == 64) 4382 return false; 4383 4384 unsigned NumElts = VT.getVectorNumElements(); 4385 WhichResult = (M[0] == 0 ? 0 : 1); 4386 for (unsigned i = 0; i < NumElts; i += 2) { 4387 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 4388 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 4389 return false; 4390 } 4391 return true; 4392} 4393 4394/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 4395/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4396/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 4397static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4398 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4399 if (EltSz == 64) 4400 return false; 4401 4402 unsigned NumElts = VT.getVectorNumElements(); 4403 WhichResult = (M[0] == 0 ? 0 : 1); 4404 for (unsigned i = 0; i < NumElts; i += 2) { 4405 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 4406 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 4407 return false; 4408 } 4409 return true; 4410} 4411 4412static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4413 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4414 if (EltSz == 64) 4415 return false; 4416 4417 unsigned NumElts = VT.getVectorNumElements(); 4418 WhichResult = (M[0] == 0 ? 0 : 1); 4419 for (unsigned i = 0; i != NumElts; ++i) { 4420 if (M[i] < 0) continue; // ignore UNDEF indices 4421 if ((unsigned) M[i] != 2 * i + WhichResult) 4422 return false; 4423 } 4424 4425 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4426 if (VT.is64BitVector() && EltSz == 32) 4427 return false; 4428 4429 return true; 4430} 4431 4432/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 4433/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4434/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 4435static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4436 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4437 if (EltSz == 64) 4438 return false; 4439 4440 unsigned Half = VT.getVectorNumElements() / 2; 4441 WhichResult = (M[0] == 0 ? 0 : 1); 4442 for (unsigned j = 0; j != 2; ++j) { 4443 unsigned Idx = WhichResult; 4444 for (unsigned i = 0; i != Half; ++i) { 4445 int MIdx = M[i + j * Half]; 4446 if (MIdx >= 0 && (unsigned) MIdx != Idx) 4447 return false; 4448 Idx += 2; 4449 } 4450 } 4451 4452 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4453 if (VT.is64BitVector() && EltSz == 32) 4454 return false; 4455 4456 return true; 4457} 4458 4459static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) { 4460 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4461 if (EltSz == 64) 4462 return false; 4463 4464 unsigned NumElts = VT.getVectorNumElements(); 4465 WhichResult = (M[0] == 0 ? 0 : 1); 4466 unsigned Idx = WhichResult * NumElts / 2; 4467 for (unsigned i = 0; i != NumElts; i += 2) { 4468 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 4469 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 4470 return false; 4471 Idx += 1; 4472 } 4473 4474 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4475 if (VT.is64BitVector() && EltSz == 32) 4476 return false; 4477 4478 return true; 4479} 4480 4481/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 4482/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 4483/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 4484static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){ 4485 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 4486 if (EltSz == 64) 4487 return false; 4488 4489 unsigned NumElts = VT.getVectorNumElements(); 4490 WhichResult = (M[0] == 0 ? 0 : 1); 4491 unsigned Idx = WhichResult * NumElts / 2; 4492 for (unsigned i = 0; i != NumElts; i += 2) { 4493 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 4494 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 4495 return false; 4496 Idx += 1; 4497 } 4498 4499 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 4500 if (VT.is64BitVector() && EltSz == 32) 4501 return false; 4502 4503 return true; 4504} 4505 4506/// \return true if this is a reverse operation on an vector. 4507static bool isReverseMask(ArrayRef<int> M, EVT VT) { 4508 unsigned NumElts = VT.getVectorNumElements(); 4509 // Make sure the mask has the right size. 4510 if (NumElts != M.size()) 4511 return false; 4512 4513 // Look for <15, ..., 3, -1, 1, 0>. 4514 for (unsigned i = 0; i != NumElts; ++i) 4515 if (M[i] >= 0 && M[i] != (int) (NumElts - 1 - i)) 4516 return false; 4517 4518 return true; 4519} 4520 4521// If N is an integer constant that can be moved into a register in one 4522// instruction, return an SDValue of such a constant (will become a MOV 4523// instruction). Otherwise return null. 4524static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 4525 const ARMSubtarget *ST, SDLoc dl) { 4526 uint64_t Val; 4527 if (!isa<ConstantSDNode>(N)) 4528 return SDValue(); 4529 Val = cast<ConstantSDNode>(N)->getZExtValue(); 4530 4531 if (ST->isThumb1Only()) { 4532 if (Val <= 255 || ~Val <= 255) 4533 return DAG.getConstant(Val, MVT::i32); 4534 } else { 4535 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 4536 return DAG.getConstant(Val, MVT::i32); 4537 } 4538 return SDValue(); 4539} 4540 4541// If this is a case we can't handle, return null and let the default 4542// expansion code take care of it. 4543SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 4544 const ARMSubtarget *ST) const { 4545 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 4546 SDLoc dl(Op); 4547 EVT VT = Op.getValueType(); 4548 4549 APInt SplatBits, SplatUndef; 4550 unsigned SplatBitSize; 4551 bool HasAnyUndefs; 4552 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 4553 if (SplatBitSize <= 64) { 4554 // Check if an immediate VMOV works. 4555 EVT VmovVT; 4556 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 4557 SplatUndef.getZExtValue(), SplatBitSize, 4558 DAG, VmovVT, VT.is128BitVector(), 4559 VMOVModImm); 4560 if (Val.getNode()) { 4561 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 4562 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4563 } 4564 4565 // Try an immediate VMVN. 4566 uint64_t NegatedImm = (~SplatBits).getZExtValue(); 4567 Val = isNEONModifiedImm(NegatedImm, 4568 SplatUndef.getZExtValue(), SplatBitSize, 4569 DAG, VmovVT, VT.is128BitVector(), 4570 VMVNModImm); 4571 if (Val.getNode()) { 4572 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 4573 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 4574 } 4575 4576 // Use vmov.f32 to materialize other v2f32 and v4f32 splats. 4577 if ((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) { 4578 int ImmVal = ARM_AM::getFP32Imm(SplatBits); 4579 if (ImmVal != -1) { 4580 SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); 4581 return DAG.getNode(ARMISD::VMOVFPIMM, dl, VT, Val); 4582 } 4583 } 4584 } 4585 } 4586 4587 // Scan through the operands to see if only one value is used. 4588 // 4589 // As an optimisation, even if more than one value is used it may be more 4590 // profitable to splat with one value then change some lanes. 4591 // 4592 // Heuristically we decide to do this if the vector has a "dominant" value, 4593 // defined as splatted to more than half of the lanes. 4594 unsigned NumElts = VT.getVectorNumElements(); 4595 bool isOnlyLowElement = true; 4596 bool usesOnlyOneValue = true; 4597 bool hasDominantValue = false; 4598 bool isConstant = true; 4599 4600 // Map of the number of times a particular SDValue appears in the 4601 // element list. 4602 DenseMap<SDValue, unsigned> ValueCounts; 4603 SDValue Value; 4604 for (unsigned i = 0; i < NumElts; ++i) { 4605 SDValue V = Op.getOperand(i); 4606 if (V.getOpcode() == ISD::UNDEF) 4607 continue; 4608 if (i > 0) 4609 isOnlyLowElement = false; 4610 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 4611 isConstant = false; 4612 4613 ValueCounts.insert(std::make_pair(V, 0)); 4614 unsigned &Count = ValueCounts[V]; 4615 4616 // Is this value dominant? (takes up more than half of the lanes) 4617 if (++Count > (NumElts / 2)) { 4618 hasDominantValue = true; 4619 Value = V; 4620 } 4621 } 4622 if (ValueCounts.size() != 1) 4623 usesOnlyOneValue = false; 4624 if (!Value.getNode() && ValueCounts.size() > 0) 4625 Value = ValueCounts.begin()->first; 4626 4627 if (ValueCounts.size() == 0) 4628 return DAG.getUNDEF(VT); 4629 4630 if (isOnlyLowElement) 4631 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 4632 4633 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4634 4635 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 4636 // i32 and try again. 4637 if (hasDominantValue && EltSize <= 32) { 4638 if (!isConstant) { 4639 SDValue N; 4640 4641 // If we are VDUPing a value that comes directly from a vector, that will 4642 // cause an unnecessary move to and from a GPR, where instead we could 4643 // just use VDUPLANE. We can only do this if the lane being extracted 4644 // is at a constant index, as the VDUP from lane instructions only have 4645 // constant-index forms. 4646 if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && 4647 isa<ConstantSDNode>(Value->getOperand(1))) { 4648 // We need to create a new undef vector to use for the VDUPLANE if the 4649 // size of the vector from which we get the value is different than the 4650 // size of the vector that we need to create. We will insert the element 4651 // such that the register coalescer will remove unnecessary copies. 4652 if (VT != Value->getOperand(0).getValueType()) { 4653 ConstantSDNode *constIndex; 4654 constIndex = dyn_cast<ConstantSDNode>(Value->getOperand(1)); 4655 assert(constIndex && "The index is not a constant!"); 4656 unsigned index = constIndex->getAPIntValue().getLimitedValue() % 4657 VT.getVectorNumElements(); 4658 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4659 DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DAG.getUNDEF(VT), 4660 Value, DAG.getConstant(index, MVT::i32)), 4661 DAG.getConstant(index, MVT::i32)); 4662 } else 4663 N = DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4664 Value->getOperand(0), Value->getOperand(1)); 4665 } else 4666 N = DAG.getNode(ARMISD::VDUP, dl, VT, Value); 4667 4668 if (!usesOnlyOneValue) { 4669 // The dominant value was splatted as 'N', but we now have to insert 4670 // all differing elements. 4671 for (unsigned I = 0; I < NumElts; ++I) { 4672 if (Op.getOperand(I) == Value) 4673 continue; 4674 SmallVector<SDValue, 3> Ops; 4675 Ops.push_back(N); 4676 Ops.push_back(Op.getOperand(I)); 4677 Ops.push_back(DAG.getConstant(I, MVT::i32)); 4678 N = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, &Ops[0], 3); 4679 } 4680 } 4681 return N; 4682 } 4683 if (VT.getVectorElementType().isFloatingPoint()) { 4684 SmallVector<SDValue, 8> Ops; 4685 for (unsigned i = 0; i < NumElts; ++i) 4686 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 4687 Op.getOperand(i))); 4688 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 4689 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 4690 Val = LowerBUILD_VECTOR(Val, DAG, ST); 4691 if (Val.getNode()) 4692 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4693 } 4694 if (usesOnlyOneValue) { 4695 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 4696 if (isConstant && Val.getNode()) 4697 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 4698 } 4699 } 4700 4701 // If all elements are constants and the case above didn't get hit, fall back 4702 // to the default expansion, which will generate a load from the constant 4703 // pool. 4704 if (isConstant) 4705 return SDValue(); 4706 4707 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 4708 if (NumElts >= 4) { 4709 SDValue shuffle = ReconstructShuffle(Op, DAG); 4710 if (shuffle != SDValue()) 4711 return shuffle; 4712 } 4713 4714 // Vectors with 32- or 64-bit elements can be built by directly assigning 4715 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 4716 // will be legalized. 4717 if (EltSize >= 32) { 4718 // Do the expansion with floating-point types, since that is what the VFP 4719 // registers are defined to use, and since i64 is not legal. 4720 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4721 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4722 SmallVector<SDValue, 8> Ops; 4723 for (unsigned i = 0; i < NumElts; ++i) 4724 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 4725 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4726 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4727 } 4728 4729 return SDValue(); 4730} 4731 4732// Gather data to see if the operation can be modelled as a 4733// shuffle in combination with VEXTs. 4734SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 4735 SelectionDAG &DAG) const { 4736 SDLoc dl(Op); 4737 EVT VT = Op.getValueType(); 4738 unsigned NumElts = VT.getVectorNumElements(); 4739 4740 SmallVector<SDValue, 2> SourceVecs; 4741 SmallVector<unsigned, 2> MinElts; 4742 SmallVector<unsigned, 2> MaxElts; 4743 4744 for (unsigned i = 0; i < NumElts; ++i) { 4745 SDValue V = Op.getOperand(i); 4746 if (V.getOpcode() == ISD::UNDEF) 4747 continue; 4748 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 4749 // A shuffle can only come from building a vector from various 4750 // elements of other vectors. 4751 return SDValue(); 4752 } else if (V.getOperand(0).getValueType().getVectorElementType() != 4753 VT.getVectorElementType()) { 4754 // This code doesn't know how to handle shuffles where the vector 4755 // element types do not match (this happens because type legalization 4756 // promotes the return type of EXTRACT_VECTOR_ELT). 4757 // FIXME: It might be appropriate to extend this code to handle 4758 // mismatched types. 4759 return SDValue(); 4760 } 4761 4762 // Record this extraction against the appropriate vector if possible... 4763 SDValue SourceVec = V.getOperand(0); 4764 // If the element number isn't a constant, we can't effectively 4765 // analyze what's going on. 4766 if (!isa<ConstantSDNode>(V.getOperand(1))) 4767 return SDValue(); 4768 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 4769 bool FoundSource = false; 4770 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 4771 if (SourceVecs[j] == SourceVec) { 4772 if (MinElts[j] > EltNo) 4773 MinElts[j] = EltNo; 4774 if (MaxElts[j] < EltNo) 4775 MaxElts[j] = EltNo; 4776 FoundSource = true; 4777 break; 4778 } 4779 } 4780 4781 // Or record a new source if not... 4782 if (!FoundSource) { 4783 SourceVecs.push_back(SourceVec); 4784 MinElts.push_back(EltNo); 4785 MaxElts.push_back(EltNo); 4786 } 4787 } 4788 4789 // Currently only do something sane when at most two source vectors 4790 // involved. 4791 if (SourceVecs.size() > 2) 4792 return SDValue(); 4793 4794 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 4795 int VEXTOffsets[2] = {0, 0}; 4796 4797 // This loop extracts the usage patterns of the source vectors 4798 // and prepares appropriate SDValues for a shuffle if possible. 4799 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 4800 if (SourceVecs[i].getValueType() == VT) { 4801 // No VEXT necessary 4802 ShuffleSrcs[i] = SourceVecs[i]; 4803 VEXTOffsets[i] = 0; 4804 continue; 4805 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 4806 // It probably isn't worth padding out a smaller vector just to 4807 // break it down again in a shuffle. 4808 return SDValue(); 4809 } 4810 4811 // Since only 64-bit and 128-bit vectors are legal on ARM and 4812 // we've eliminated the other cases... 4813 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 4814 "unexpected vector sizes in ReconstructShuffle"); 4815 4816 if (MaxElts[i] - MinElts[i] >= NumElts) { 4817 // Span too large for a VEXT to cope 4818 return SDValue(); 4819 } 4820 4821 if (MinElts[i] >= NumElts) { 4822 // The extraction can just take the second half 4823 VEXTOffsets[i] = NumElts; 4824 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4825 SourceVecs[i], 4826 DAG.getIntPtrConstant(NumElts)); 4827 } else if (MaxElts[i] < NumElts) { 4828 // The extraction can just take the first half 4829 VEXTOffsets[i] = 0; 4830 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4831 SourceVecs[i], 4832 DAG.getIntPtrConstant(0)); 4833 } else { 4834 // An actual VEXT is needed 4835 VEXTOffsets[i] = MinElts[i]; 4836 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4837 SourceVecs[i], 4838 DAG.getIntPtrConstant(0)); 4839 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4840 SourceVecs[i], 4841 DAG.getIntPtrConstant(NumElts)); 4842 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 4843 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 4844 } 4845 } 4846 4847 SmallVector<int, 8> Mask; 4848 4849 for (unsigned i = 0; i < NumElts; ++i) { 4850 SDValue Entry = Op.getOperand(i); 4851 if (Entry.getOpcode() == ISD::UNDEF) { 4852 Mask.push_back(-1); 4853 continue; 4854 } 4855 4856 SDValue ExtractVec = Entry.getOperand(0); 4857 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 4858 .getOperand(1))->getSExtValue(); 4859 if (ExtractVec == SourceVecs[0]) { 4860 Mask.push_back(ExtractElt - VEXTOffsets[0]); 4861 } else { 4862 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 4863 } 4864 } 4865 4866 // Final check before we try to produce nonsense... 4867 if (isShuffleMaskLegal(Mask, VT)) 4868 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 4869 &Mask[0]); 4870 4871 return SDValue(); 4872} 4873 4874/// isShuffleMaskLegal - Targets can use this to indicate that they only 4875/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4876/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4877/// are assumed to be legal. 4878bool 4879ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 4880 EVT VT) const { 4881 if (VT.getVectorNumElements() == 4 && 4882 (VT.is128BitVector() || VT.is64BitVector())) { 4883 unsigned PFIndexes[4]; 4884 for (unsigned i = 0; i != 4; ++i) { 4885 if (M[i] < 0) 4886 PFIndexes[i] = 8; 4887 else 4888 PFIndexes[i] = M[i]; 4889 } 4890 4891 // Compute the index in the perfect shuffle table. 4892 unsigned PFTableIndex = 4893 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4894 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4895 unsigned Cost = (PFEntry >> 30); 4896 4897 if (Cost <= 4) 4898 return true; 4899 } 4900 4901 bool ReverseVEXT; 4902 unsigned Imm, WhichResult; 4903 4904 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4905 return (EltSize >= 32 || 4906 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 4907 isVREVMask(M, VT, 64) || 4908 isVREVMask(M, VT, 32) || 4909 isVREVMask(M, VT, 16) || 4910 isVEXTMask(M, VT, ReverseVEXT, Imm) || 4911 isVTBLMask(M, VT) || 4912 isVTRNMask(M, VT, WhichResult) || 4913 isVUZPMask(M, VT, WhichResult) || 4914 isVZIPMask(M, VT, WhichResult) || 4915 isVTRN_v_undef_Mask(M, VT, WhichResult) || 4916 isVUZP_v_undef_Mask(M, VT, WhichResult) || 4917 isVZIP_v_undef_Mask(M, VT, WhichResult) || 4918 ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(M, VT))); 4919} 4920 4921/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 4922/// the specified operations to build the shuffle. 4923static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 4924 SDValue RHS, SelectionDAG &DAG, 4925 SDLoc dl) { 4926 unsigned OpNum = (PFEntry >> 26) & 0x0F; 4927 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 4928 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 4929 4930 enum { 4931 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 4932 OP_VREV, 4933 OP_VDUP0, 4934 OP_VDUP1, 4935 OP_VDUP2, 4936 OP_VDUP3, 4937 OP_VEXT1, 4938 OP_VEXT2, 4939 OP_VEXT3, 4940 OP_VUZPL, // VUZP, left result 4941 OP_VUZPR, // VUZP, right result 4942 OP_VZIPL, // VZIP, left result 4943 OP_VZIPR, // VZIP, right result 4944 OP_VTRNL, // VTRN, left result 4945 OP_VTRNR // VTRN, right result 4946 }; 4947 4948 if (OpNum == OP_COPY) { 4949 if (LHSID == (1*9+2)*9+3) return LHS; 4950 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 4951 return RHS; 4952 } 4953 4954 SDValue OpLHS, OpRHS; 4955 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 4956 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 4957 EVT VT = OpLHS.getValueType(); 4958 4959 switch (OpNum) { 4960 default: llvm_unreachable("Unknown shuffle opcode!"); 4961 case OP_VREV: 4962 // VREV divides the vector in half and swaps within the half. 4963 if (VT.getVectorElementType() == MVT::i32 || 4964 VT.getVectorElementType() == MVT::f32) 4965 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 4966 // vrev <4 x i16> -> VREV32 4967 if (VT.getVectorElementType() == MVT::i16) 4968 return DAG.getNode(ARMISD::VREV32, dl, VT, OpLHS); 4969 // vrev <4 x i8> -> VREV16 4970 assert(VT.getVectorElementType() == MVT::i8); 4971 return DAG.getNode(ARMISD::VREV16, dl, VT, OpLHS); 4972 case OP_VDUP0: 4973 case OP_VDUP1: 4974 case OP_VDUP2: 4975 case OP_VDUP3: 4976 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4977 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 4978 case OP_VEXT1: 4979 case OP_VEXT2: 4980 case OP_VEXT3: 4981 return DAG.getNode(ARMISD::VEXT, dl, VT, 4982 OpLHS, OpRHS, 4983 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 4984 case OP_VUZPL: 4985 case OP_VUZPR: 4986 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4987 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 4988 case OP_VZIPL: 4989 case OP_VZIPR: 4990 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4991 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 4992 case OP_VTRNL: 4993 case OP_VTRNR: 4994 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4995 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 4996 } 4997} 4998 4999static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 5000 ArrayRef<int> ShuffleMask, 5001 SelectionDAG &DAG) { 5002 // Check to see if we can use the VTBL instruction. 5003 SDValue V1 = Op.getOperand(0); 5004 SDValue V2 = Op.getOperand(1); 5005 SDLoc DL(Op); 5006 5007 SmallVector<SDValue, 8> VTBLMask; 5008 for (ArrayRef<int>::iterator 5009 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 5010 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 5011 5012 if (V2.getNode()->getOpcode() == ISD::UNDEF) 5013 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 5014 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 5015 &VTBLMask[0], 8)); 5016 5017 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 5018 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 5019 &VTBLMask[0], 8)); 5020} 5021 5022static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, 5023 SelectionDAG &DAG) { 5024 SDLoc DL(Op); 5025 SDValue OpLHS = Op.getOperand(0); 5026 EVT VT = OpLHS.getValueType(); 5027 5028 assert((VT == MVT::v8i16 || VT == MVT::v16i8) && 5029 "Expect an v8i16/v16i8 type"); 5030 OpLHS = DAG.getNode(ARMISD::VREV64, DL, VT, OpLHS); 5031 // For a v16i8 type: After the VREV, we have got <8, ...15, 8, ..., 0>. Now, 5032 // extract the first 8 bytes into the top double word and the last 8 bytes 5033 // into the bottom double word. The v8i16 case is similar. 5034 unsigned ExtractNum = (VT == MVT::v16i8) ? 8 : 4; 5035 return DAG.getNode(ARMISD::VEXT, DL, VT, OpLHS, OpLHS, 5036 DAG.getConstant(ExtractNum, MVT::i32)); 5037} 5038 5039static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 5040 SDValue V1 = Op.getOperand(0); 5041 SDValue V2 = Op.getOperand(1); 5042 SDLoc dl(Op); 5043 EVT VT = Op.getValueType(); 5044 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 5045 5046 // Convert shuffles that are directly supported on NEON to target-specific 5047 // DAG nodes, instead of keeping them as shuffles and matching them again 5048 // during code selection. This is more efficient and avoids the possibility 5049 // of inconsistencies between legalization and selection. 5050 // FIXME: floating-point vectors should be canonicalized to integer vectors 5051 // of the same time so that they get CSEd properly. 5052 ArrayRef<int> ShuffleMask = SVN->getMask(); 5053 5054 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5055 if (EltSize <= 32) { 5056 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 5057 int Lane = SVN->getSplatIndex(); 5058 // If this is undef splat, generate it via "just" vdup, if possible. 5059 if (Lane == -1) Lane = 0; 5060 5061 // Test if V1 is a SCALAR_TO_VECTOR. 5062 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5063 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 5064 } 5065 // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR 5066 // (and probably will turn into a SCALAR_TO_VECTOR once legalization 5067 // reaches it). 5068 if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && 5069 !isa<ConstantSDNode>(V1.getOperand(0))) { 5070 bool IsScalarToVector = true; 5071 for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) 5072 if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { 5073 IsScalarToVector = false; 5074 break; 5075 } 5076 if (IsScalarToVector) 5077 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 5078 } 5079 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 5080 DAG.getConstant(Lane, MVT::i32)); 5081 } 5082 5083 bool ReverseVEXT; 5084 unsigned Imm; 5085 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 5086 if (ReverseVEXT) 5087 std::swap(V1, V2); 5088 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 5089 DAG.getConstant(Imm, MVT::i32)); 5090 } 5091 5092 if (isVREVMask(ShuffleMask, VT, 64)) 5093 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 5094 if (isVREVMask(ShuffleMask, VT, 32)) 5095 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 5096 if (isVREVMask(ShuffleMask, VT, 16)) 5097 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 5098 5099 if (V2->getOpcode() == ISD::UNDEF && 5100 isSingletonVEXTMask(ShuffleMask, VT, Imm)) { 5101 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V1, 5102 DAG.getConstant(Imm, MVT::i32)); 5103 } 5104 5105 // Check for Neon shuffles that modify both input vectors in place. 5106 // If both results are used, i.e., if there are two shuffles with the same 5107 // source operands and with masks corresponding to both results of one of 5108 // these operations, DAG memoization will ensure that a single node is 5109 // used for both shuffles. 5110 unsigned WhichResult; 5111 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 5112 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 5113 V1, V2).getValue(WhichResult); 5114 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 5115 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 5116 V1, V2).getValue(WhichResult); 5117 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 5118 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 5119 V1, V2).getValue(WhichResult); 5120 5121 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5122 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 5123 V1, V1).getValue(WhichResult); 5124 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5125 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 5126 V1, V1).getValue(WhichResult); 5127 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 5128 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 5129 V1, V1).getValue(WhichResult); 5130 } 5131 5132 // If the shuffle is not directly supported and it has 4 elements, use 5133 // the PerfectShuffle-generated table to synthesize it from other shuffles. 5134 unsigned NumElts = VT.getVectorNumElements(); 5135 if (NumElts == 4) { 5136 unsigned PFIndexes[4]; 5137 for (unsigned i = 0; i != 4; ++i) { 5138 if (ShuffleMask[i] < 0) 5139 PFIndexes[i] = 8; 5140 else 5141 PFIndexes[i] = ShuffleMask[i]; 5142 } 5143 5144 // Compute the index in the perfect shuffle table. 5145 unsigned PFTableIndex = 5146 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5147 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5148 unsigned Cost = (PFEntry >> 30); 5149 5150 if (Cost <= 4) 5151 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 5152 } 5153 5154 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 5155 if (EltSize >= 32) { 5156 // Do the expansion with floating-point types, since that is what the VFP 5157 // registers are defined to use, and since i64 is not legal. 5158 EVT EltVT = EVT::getFloatingPointVT(EltSize); 5159 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 5160 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 5161 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 5162 SmallVector<SDValue, 8> Ops; 5163 for (unsigned i = 0; i < NumElts; ++i) { 5164 if (ShuffleMask[i] < 0) 5165 Ops.push_back(DAG.getUNDEF(EltVT)); 5166 else 5167 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 5168 ShuffleMask[i] < (int)NumElts ? V1 : V2, 5169 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 5170 MVT::i32))); 5171 } 5172 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 5173 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 5174 } 5175 5176 if ((VT == MVT::v8i16 || VT == MVT::v16i8) && isReverseMask(ShuffleMask, VT)) 5177 return LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(Op, DAG); 5178 5179 if (VT == MVT::v8i8) { 5180 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 5181 if (NewOp.getNode()) 5182 return NewOp; 5183 } 5184 5185 return SDValue(); 5186} 5187 5188static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 5189 // INSERT_VECTOR_ELT is legal only for immediate indexes. 5190 SDValue Lane = Op.getOperand(2); 5191 if (!isa<ConstantSDNode>(Lane)) 5192 return SDValue(); 5193 5194 return Op; 5195} 5196 5197static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 5198 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 5199 SDValue Lane = Op.getOperand(1); 5200 if (!isa<ConstantSDNode>(Lane)) 5201 return SDValue(); 5202 5203 SDValue Vec = Op.getOperand(0); 5204 if (Op.getValueType() == MVT::i32 && 5205 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 5206 SDLoc dl(Op); 5207 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 5208 } 5209 5210 return Op; 5211} 5212 5213static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 5214 // The only time a CONCAT_VECTORS operation can have legal types is when 5215 // two 64-bit vectors are concatenated to a 128-bit vector. 5216 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 5217 "unexpected CONCAT_VECTORS"); 5218 SDLoc dl(Op); 5219 SDValue Val = DAG.getUNDEF(MVT::v2f64); 5220 SDValue Op0 = Op.getOperand(0); 5221 SDValue Op1 = Op.getOperand(1); 5222 if (Op0.getOpcode() != ISD::UNDEF) 5223 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 5224 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 5225 DAG.getIntPtrConstant(0)); 5226 if (Op1.getOpcode() != ISD::UNDEF) 5227 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 5228 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 5229 DAG.getIntPtrConstant(1)); 5230 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 5231} 5232 5233/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 5234/// element has been zero/sign-extended, depending on the isSigned parameter, 5235/// from an integer type half its size. 5236static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 5237 bool isSigned) { 5238 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 5239 EVT VT = N->getValueType(0); 5240 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 5241 SDNode *BVN = N->getOperand(0).getNode(); 5242 if (BVN->getValueType(0) != MVT::v4i32 || 5243 BVN->getOpcode() != ISD::BUILD_VECTOR) 5244 return false; 5245 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 5246 unsigned HiElt = 1 - LoElt; 5247 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 5248 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 5249 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 5250 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 5251 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 5252 return false; 5253 if (isSigned) { 5254 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 5255 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 5256 return true; 5257 } else { 5258 if (Hi0->isNullValue() && Hi1->isNullValue()) 5259 return true; 5260 } 5261 return false; 5262 } 5263 5264 if (N->getOpcode() != ISD::BUILD_VECTOR) 5265 return false; 5266 5267 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 5268 SDNode *Elt = N->getOperand(i).getNode(); 5269 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 5270 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 5271 unsigned HalfSize = EltSize / 2; 5272 if (isSigned) { 5273 if (!isIntN(HalfSize, C->getSExtValue())) 5274 return false; 5275 } else { 5276 if (!isUIntN(HalfSize, C->getZExtValue())) 5277 return false; 5278 } 5279 continue; 5280 } 5281 return false; 5282 } 5283 5284 return true; 5285} 5286 5287/// isSignExtended - Check if a node is a vector value that is sign-extended 5288/// or a constant BUILD_VECTOR with sign-extended elements. 5289static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 5290 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 5291 return true; 5292 if (isExtendedBUILD_VECTOR(N, DAG, true)) 5293 return true; 5294 return false; 5295} 5296 5297/// isZeroExtended - Check if a node is a vector value that is zero-extended 5298/// or a constant BUILD_VECTOR with zero-extended elements. 5299static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 5300 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 5301 return true; 5302 if (isExtendedBUILD_VECTOR(N, DAG, false)) 5303 return true; 5304 return false; 5305} 5306 5307static EVT getExtensionTo64Bits(const EVT &OrigVT) { 5308 if (OrigVT.getSizeInBits() >= 64) 5309 return OrigVT; 5310 5311 assert(OrigVT.isSimple() && "Expecting a simple value type"); 5312 5313 MVT::SimpleValueType OrigSimpleTy = OrigVT.getSimpleVT().SimpleTy; 5314 switch (OrigSimpleTy) { 5315 default: llvm_unreachable("Unexpected Vector Type"); 5316 case MVT::v2i8: 5317 case MVT::v2i16: 5318 return MVT::v2i32; 5319 case MVT::v4i8: 5320 return MVT::v4i16; 5321 } 5322} 5323 5324/// AddRequiredExtensionForVMULL - Add a sign/zero extension to extend the total 5325/// value size to 64 bits. We need a 64-bit D register as an operand to VMULL. 5326/// We insert the required extension here to get the vector to fill a D register. 5327static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, 5328 const EVT &OrigTy, 5329 const EVT &ExtTy, 5330 unsigned ExtOpcode) { 5331 // The vector originally had a size of OrigTy. It was then extended to ExtTy. 5332 // We expect the ExtTy to be 128-bits total. If the OrigTy is less than 5333 // 64-bits we need to insert a new extension so that it will be 64-bits. 5334 assert(ExtTy.is128BitVector() && "Unexpected extension size"); 5335 if (OrigTy.getSizeInBits() >= 64) 5336 return N; 5337 5338 // Must extend size to at least 64 bits to be used as an operand for VMULL. 5339 EVT NewVT = getExtensionTo64Bits(OrigTy); 5340 5341 return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); 5342} 5343 5344/// SkipLoadExtensionForVMULL - return a load of the original vector size that 5345/// does not do any sign/zero extension. If the original vector is less 5346/// than 64 bits, an appropriate extension will be added after the load to 5347/// reach a total size of 64 bits. We have to add the extension separately 5348/// because ARM does not have a sign/zero extending load for vectors. 5349static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { 5350 EVT ExtendedTy = getExtensionTo64Bits(LD->getMemoryVT()); 5351 5352 // The load already has the right type. 5353 if (ExtendedTy == LD->getMemoryVT()) 5354 return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), 5355 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 5356 LD->isNonTemporal(), LD->isInvariant(), 5357 LD->getAlignment()); 5358 5359 // We need to create a zextload/sextload. We cannot just create a load 5360 // followed by a zext/zext node because LowerMUL is also run during normal 5361 // operation legalization where we can't create illegal types. 5362 return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, 5363 LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), 5364 LD->getMemoryVT(), LD->isVolatile(), 5365 LD->isNonTemporal(), LD->getAlignment()); 5366} 5367 5368/// SkipExtensionForVMULL - For a node that is a SIGN_EXTEND, ZERO_EXTEND, 5369/// extending load, or BUILD_VECTOR with extended elements, return the 5370/// unextended value. The unextended vector should be 64 bits so that it can 5371/// be used as an operand to a VMULL instruction. If the original vector size 5372/// before extension is less than 64 bits we add a an extension to resize 5373/// the vector to 64 bits. 5374static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { 5375 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 5376 return AddRequiredExtensionForVMULL(N->getOperand(0), DAG, 5377 N->getOperand(0)->getValueType(0), 5378 N->getValueType(0), 5379 N->getOpcode()); 5380 5381 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 5382 return SkipLoadExtensionForVMULL(LD, DAG); 5383 5384 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 5385 // have been legalized as a BITCAST from v4i32. 5386 if (N->getOpcode() == ISD::BITCAST) { 5387 SDNode *BVN = N->getOperand(0).getNode(); 5388 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 5389 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 5390 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 5391 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, 5392 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 5393 } 5394 // Construct a new BUILD_VECTOR with elements truncated to half the size. 5395 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 5396 EVT VT = N->getValueType(0); 5397 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 5398 unsigned NumElts = VT.getVectorNumElements(); 5399 MVT TruncVT = MVT::getIntegerVT(EltSize); 5400 SmallVector<SDValue, 8> Ops; 5401 for (unsigned i = 0; i != NumElts; ++i) { 5402 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 5403 const APInt &CInt = C->getAPIntValue(); 5404 // Element types smaller than 32 bits are not legal, so use i32 elements. 5405 // The values are implicitly truncated so sext vs. zext doesn't matter. 5406 Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); 5407 } 5408 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), 5409 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 5410} 5411 5412static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 5413 unsigned Opcode = N->getOpcode(); 5414 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 5415 SDNode *N0 = N->getOperand(0).getNode(); 5416 SDNode *N1 = N->getOperand(1).getNode(); 5417 return N0->hasOneUse() && N1->hasOneUse() && 5418 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 5419 } 5420 return false; 5421} 5422 5423static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 5424 unsigned Opcode = N->getOpcode(); 5425 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 5426 SDNode *N0 = N->getOperand(0).getNode(); 5427 SDNode *N1 = N->getOperand(1).getNode(); 5428 return N0->hasOneUse() && N1->hasOneUse() && 5429 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 5430 } 5431 return false; 5432} 5433 5434static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 5435 // Multiplications are only custom-lowered for 128-bit vectors so that 5436 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 5437 EVT VT = Op.getValueType(); 5438 assert(VT.is128BitVector() && VT.isInteger() && 5439 "unexpected type for custom-lowering ISD::MUL"); 5440 SDNode *N0 = Op.getOperand(0).getNode(); 5441 SDNode *N1 = Op.getOperand(1).getNode(); 5442 unsigned NewOpc = 0; 5443 bool isMLA = false; 5444 bool isN0SExt = isSignExtended(N0, DAG); 5445 bool isN1SExt = isSignExtended(N1, DAG); 5446 if (isN0SExt && isN1SExt) 5447 NewOpc = ARMISD::VMULLs; 5448 else { 5449 bool isN0ZExt = isZeroExtended(N0, DAG); 5450 bool isN1ZExt = isZeroExtended(N1, DAG); 5451 if (isN0ZExt && isN1ZExt) 5452 NewOpc = ARMISD::VMULLu; 5453 else if (isN1SExt || isN1ZExt) { 5454 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 5455 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 5456 if (isN1SExt && isAddSubSExt(N0, DAG)) { 5457 NewOpc = ARMISD::VMULLs; 5458 isMLA = true; 5459 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 5460 NewOpc = ARMISD::VMULLu; 5461 isMLA = true; 5462 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 5463 std::swap(N0, N1); 5464 NewOpc = ARMISD::VMULLu; 5465 isMLA = true; 5466 } 5467 } 5468 5469 if (!NewOpc) { 5470 if (VT == MVT::v2i64) 5471 // Fall through to expand this. It is not legal. 5472 return SDValue(); 5473 else 5474 // Other vector multiplications are legal. 5475 return Op; 5476 } 5477 } 5478 5479 // Legalize to a VMULL instruction. 5480 SDLoc DL(Op); 5481 SDValue Op0; 5482 SDValue Op1 = SkipExtensionForVMULL(N1, DAG); 5483 if (!isMLA) { 5484 Op0 = SkipExtensionForVMULL(N0, DAG); 5485 assert(Op0.getValueType().is64BitVector() && 5486 Op1.getValueType().is64BitVector() && 5487 "unexpected types for extended operands to VMULL"); 5488 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 5489 } 5490 5491 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 5492 // isel lowering to take advantage of no-stall back to back vmul + vmla. 5493 // vmull q0, d4, d6 5494 // vmlal q0, d5, d6 5495 // is faster than 5496 // vaddl q0, d4, d5 5497 // vmovl q1, d6 5498 // vmul q0, q0, q1 5499 SDValue N00 = SkipExtensionForVMULL(N0->getOperand(0).getNode(), DAG); 5500 SDValue N01 = SkipExtensionForVMULL(N0->getOperand(1).getNode(), DAG); 5501 EVT Op1VT = Op1.getValueType(); 5502 return DAG.getNode(N0->getOpcode(), DL, VT, 5503 DAG.getNode(NewOpc, DL, VT, 5504 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 5505 DAG.getNode(NewOpc, DL, VT, 5506 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 5507} 5508 5509static SDValue 5510LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { 5511 // Convert to float 5512 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 5513 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 5514 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 5515 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 5516 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 5517 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 5518 // Get reciprocal estimate. 5519 // float4 recip = vrecpeq_f32(yf); 5520 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5521 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 5522 // Because char has a smaller range than uchar, we can actually get away 5523 // without any newton steps. This requires that we use a weird bias 5524 // of 0xb000, however (again, this has been exhaustively tested). 5525 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 5526 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 5527 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 5528 Y = DAG.getConstant(0xb000, MVT::i32); 5529 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 5530 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 5531 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 5532 // Convert back to short. 5533 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 5534 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 5535 return X; 5536} 5537 5538static SDValue 5539LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { 5540 SDValue N2; 5541 // Convert to float. 5542 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 5543 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 5544 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 5545 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 5546 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 5547 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 5548 5549 // Use reciprocal estimate and one refinement step. 5550 // float4 recip = vrecpeq_f32(yf); 5551 // recip *= vrecpsq_f32(yf, recip); 5552 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5553 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 5554 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5555 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5556 N1, N2); 5557 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5558 // Because short has a smaller range than ushort, we can actually get away 5559 // with only a single newton step. This requires that we use a weird bias 5560 // of 89, however (again, this has been exhaustively tested). 5561 // float4 result = as_float4(as_int4(xf*recip) + 0x89); 5562 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 5563 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 5564 N1 = DAG.getConstant(0x89, MVT::i32); 5565 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 5566 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 5567 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 5568 // Convert back to integer and return. 5569 // return vmovn_s32(vcvt_s32_f32(result)); 5570 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 5571 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 5572 return N0; 5573} 5574 5575static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 5576 EVT VT = Op.getValueType(); 5577 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 5578 "unexpected type for custom-lowering ISD::SDIV"); 5579 5580 SDLoc dl(Op); 5581 SDValue N0 = Op.getOperand(0); 5582 SDValue N1 = Op.getOperand(1); 5583 SDValue N2, N3; 5584 5585 if (VT == MVT::v8i8) { 5586 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 5587 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 5588 5589 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5590 DAG.getIntPtrConstant(4)); 5591 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5592 DAG.getIntPtrConstant(4)); 5593 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5594 DAG.getIntPtrConstant(0)); 5595 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5596 DAG.getIntPtrConstant(0)); 5597 5598 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 5599 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 5600 5601 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 5602 N0 = LowerCONCAT_VECTORS(N0, DAG); 5603 5604 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 5605 return N0; 5606 } 5607 return LowerSDIV_v4i16(N0, N1, dl, DAG); 5608} 5609 5610static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 5611 EVT VT = Op.getValueType(); 5612 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 5613 "unexpected type for custom-lowering ISD::UDIV"); 5614 5615 SDLoc dl(Op); 5616 SDValue N0 = Op.getOperand(0); 5617 SDValue N1 = Op.getOperand(1); 5618 SDValue N2, N3; 5619 5620 if (VT == MVT::v8i8) { 5621 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 5622 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 5623 5624 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5625 DAG.getIntPtrConstant(4)); 5626 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5627 DAG.getIntPtrConstant(4)); 5628 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 5629 DAG.getIntPtrConstant(0)); 5630 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 5631 DAG.getIntPtrConstant(0)); 5632 5633 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 5634 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 5635 5636 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 5637 N0 = LowerCONCAT_VECTORS(N0, DAG); 5638 5639 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 5640 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 5641 N0); 5642 return N0; 5643 } 5644 5645 // v4i16 sdiv ... Convert to float. 5646 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 5647 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 5648 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 5649 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 5650 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 5651 SDValue BN1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 5652 5653 // Use reciprocal estimate and two refinement steps. 5654 // float4 recip = vrecpeq_f32(yf); 5655 // recip *= vrecpsq_f32(yf, recip); 5656 // recip *= vrecpsq_f32(yf, recip); 5657 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5658 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), BN1); 5659 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5660 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5661 BN1, N2); 5662 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5663 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 5664 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 5665 BN1, N2); 5666 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 5667 // Simply multiplying by the reciprocal estimate can leave us a few ulps 5668 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 5669 // and that it will never cause us to return an answer too large). 5670 // float4 result = as_float4(as_int4(xf*recip) + 2); 5671 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 5672 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 5673 N1 = DAG.getConstant(2, MVT::i32); 5674 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 5675 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 5676 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 5677 // Convert back to integer and return. 5678 // return vmovn_u32(vcvt_s32_f32(result)); 5679 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 5680 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 5681 return N0; 5682} 5683 5684static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { 5685 EVT VT = Op.getNode()->getValueType(0); 5686 SDVTList VTs = DAG.getVTList(VT, MVT::i32); 5687 5688 unsigned Opc; 5689 bool ExtraOp = false; 5690 switch (Op.getOpcode()) { 5691 default: llvm_unreachable("Invalid code"); 5692 case ISD::ADDC: Opc = ARMISD::ADDC; break; 5693 case ISD::ADDE: Opc = ARMISD::ADDE; ExtraOp = true; break; 5694 case ISD::SUBC: Opc = ARMISD::SUBC; break; 5695 case ISD::SUBE: Opc = ARMISD::SUBE; ExtraOp = true; break; 5696 } 5697 5698 if (!ExtraOp) 5699 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), 5700 Op.getOperand(1)); 5701 return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), 5702 Op.getOperand(1), Op.getOperand(2)); 5703} 5704 5705static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { 5706 // Monotonic load/store is legal for all targets 5707 if (cast<AtomicSDNode>(Op)->getOrdering() <= Monotonic) 5708 return Op; 5709 5710 // Aquire/Release load/store is not legal for targets without a 5711 // dmb or equivalent available. 5712 return SDValue(); 5713} 5714 5715static void 5716ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl<SDValue>& Results, 5717 SelectionDAG &DAG, unsigned NewOp) { 5718 SDLoc dl(Node); 5719 assert (Node->getValueType(0) == MVT::i64 && 5720 "Only know how to expand i64 atomics"); 5721 5722 SmallVector<SDValue, 6> Ops; 5723 Ops.push_back(Node->getOperand(0)); // Chain 5724 Ops.push_back(Node->getOperand(1)); // Ptr 5725 // Low part of Val1 5726 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5727 Node->getOperand(2), DAG.getIntPtrConstant(0))); 5728 // High part of Val1 5729 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5730 Node->getOperand(2), DAG.getIntPtrConstant(1))); 5731 if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { 5732 // High part of Val1 5733 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5734 Node->getOperand(3), DAG.getIntPtrConstant(0))); 5735 // High part of Val2 5736 Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, 5737 Node->getOperand(3), DAG.getIntPtrConstant(1))); 5738 } 5739 SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); 5740 SDValue Result = 5741 DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, 5742 cast<MemSDNode>(Node)->getMemOperand()); 5743 SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; 5744 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); 5745 Results.push_back(Result.getValue(2)); 5746} 5747 5748static void ReplaceREADCYCLECOUNTER(SDNode *N, 5749 SmallVectorImpl<SDValue> &Results, 5750 SelectionDAG &DAG, 5751 const ARMSubtarget *Subtarget) { 5752 SDLoc DL(N); 5753 SDValue Cycles32, OutChain; 5754 5755 if (Subtarget->hasPerfMon()) { 5756 // Under Power Management extensions, the cycle-count is: 5757 // mrc p15, #0, <Rt>, c9, c13, #0 5758 SDValue Ops[] = { N->getOperand(0), // Chain 5759 DAG.getConstant(Intrinsic::arm_mrc, MVT::i32), 5760 DAG.getConstant(15, MVT::i32), 5761 DAG.getConstant(0, MVT::i32), 5762 DAG.getConstant(9, MVT::i32), 5763 DAG.getConstant(13, MVT::i32), 5764 DAG.getConstant(0, MVT::i32) 5765 }; 5766 5767 Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, 5768 DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], 5769 array_lengthof(Ops)); 5770 OutChain = Cycles32.getValue(1); 5771 } else { 5772 // Intrinsic is defined to return 0 on unsupported platforms. Technically 5773 // there are older ARM CPUs that have implementation-specific ways of 5774 // obtaining this information (FIXME!). 5775 Cycles32 = DAG.getConstant(0, MVT::i32); 5776 OutChain = DAG.getEntryNode(); 5777 } 5778 5779 5780 SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, 5781 Cycles32, DAG.getConstant(0, MVT::i32)); 5782 Results.push_back(Cycles64); 5783 Results.push_back(OutChain); 5784} 5785 5786SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 5787 switch (Op.getOpcode()) { 5788 default: llvm_unreachable("Don't know how to custom lower this!"); 5789 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5790 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 5791 case ISD::GlobalAddress: 5792 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 5793 LowerGlobalAddressELF(Op, DAG); 5794 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5795 case ISD::SELECT: return LowerSELECT(Op, DAG); 5796 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 5797 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 5798 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 5799 case ISD::VASTART: return LowerVASTART(Op, DAG); 5800 case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, DAG, Subtarget); 5801 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 5802 case ISD::SINT_TO_FP: 5803 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 5804 case ISD::FP_TO_SINT: 5805 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 5806 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 5807 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5808 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5809 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 5810 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 5811 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 5812 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 5813 Subtarget); 5814 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 5815 case ISD::SHL: 5816 case ISD::SRL: 5817 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 5818 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 5819 case ISD::SRL_PARTS: 5820 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 5821 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 5822 case ISD::CTPOP: return LowerCTPOP(Op.getNode(), DAG, Subtarget); 5823 case ISD::SETCC: return LowerVSETCC(Op, DAG); 5824 case ISD::ConstantFP: return LowerConstantFP(Op, DAG, Subtarget); 5825 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 5826 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5827 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); 5828 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 5829 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 5830 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5831 case ISD::MUL: return LowerMUL(Op, DAG); 5832 case ISD::SDIV: return LowerSDIV(Op, DAG); 5833 case ISD::UDIV: return LowerUDIV(Op, DAG); 5834 case ISD::ADDC: 5835 case ISD::ADDE: 5836 case ISD::SUBC: 5837 case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); 5838 case ISD::ATOMIC_LOAD: 5839 case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); 5840 } 5841} 5842 5843/// ReplaceNodeResults - Replace the results of node with an illegal result 5844/// type with new values built out of custom code. 5845void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 5846 SmallVectorImpl<SDValue>&Results, 5847 SelectionDAG &DAG) const { 5848 SDValue Res; 5849 switch (N->getOpcode()) { 5850 default: 5851 llvm_unreachable("Don't know how to custom expand this!"); 5852 case ISD::BITCAST: 5853 Res = ExpandBITCAST(N, DAG); 5854 break; 5855 case ISD::SIGN_EXTEND: 5856 case ISD::ZERO_EXTEND: 5857 Res = ExpandVectorExtension(N, DAG); 5858 break; 5859 case ISD::SRL: 5860 case ISD::SRA: 5861 Res = Expand64BitShift(N, DAG, Subtarget); 5862 break; 5863 case ISD::READCYCLECOUNTER: 5864 ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); 5865 return; 5866 case ISD::ATOMIC_LOAD_ADD: 5867 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); 5868 return; 5869 case ISD::ATOMIC_LOAD_AND: 5870 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); 5871 return; 5872 case ISD::ATOMIC_LOAD_NAND: 5873 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); 5874 return; 5875 case ISD::ATOMIC_LOAD_OR: 5876 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); 5877 return; 5878 case ISD::ATOMIC_LOAD_SUB: 5879 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); 5880 return; 5881 case ISD::ATOMIC_LOAD_XOR: 5882 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); 5883 return; 5884 case ISD::ATOMIC_SWAP: 5885 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); 5886 return; 5887 case ISD::ATOMIC_CMP_SWAP: 5888 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); 5889 return; 5890 case ISD::ATOMIC_LOAD_MIN: 5891 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG); 5892 return; 5893 case ISD::ATOMIC_LOAD_UMIN: 5894 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG); 5895 return; 5896 case ISD::ATOMIC_LOAD_MAX: 5897 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG); 5898 return; 5899 case ISD::ATOMIC_LOAD_UMAX: 5900 ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG); 5901 return; 5902 } 5903 if (Res.getNode()) 5904 Results.push_back(Res); 5905} 5906 5907//===----------------------------------------------------------------------===// 5908// ARM Scheduler Hooks 5909//===----------------------------------------------------------------------===// 5910 5911MachineBasicBlock * 5912ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 5913 MachineBasicBlock *BB, 5914 unsigned Size) const { 5915 unsigned dest = MI->getOperand(0).getReg(); 5916 unsigned ptr = MI->getOperand(1).getReg(); 5917 unsigned oldval = MI->getOperand(2).getReg(); 5918 unsigned newval = MI->getOperand(3).getReg(); 5919 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5920 DebugLoc dl = MI->getDebugLoc(); 5921 bool isThumb2 = Subtarget->isThumb2(); 5922 5923 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 5924 unsigned scratch = MRI.createVirtualRegister(isThumb2 ? 5925 (const TargetRegisterClass*)&ARM::rGPRRegClass : 5926 (const TargetRegisterClass*)&ARM::GPRRegClass); 5927 5928 if (isThumb2) { 5929 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 5930 MRI.constrainRegClass(oldval, &ARM::rGPRRegClass); 5931 MRI.constrainRegClass(newval, &ARM::rGPRRegClass); 5932 } 5933 5934 unsigned ldrOpc, strOpc; 5935 switch (Size) { 5936 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5937 case 1: 5938 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5939 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5940 break; 5941 case 2: 5942 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5943 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5944 break; 5945 case 4: 5946 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5947 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5948 break; 5949 } 5950 5951 MachineFunction *MF = BB->getParent(); 5952 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5953 MachineFunction::iterator It = BB; 5954 ++It; // insert the new blocks after the current block 5955 5956 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5957 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 5958 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5959 MF->insert(It, loop1MBB); 5960 MF->insert(It, loop2MBB); 5961 MF->insert(It, exitMBB); 5962 5963 // Transfer the remainder of BB and its successor edges to exitMBB. 5964 exitMBB->splice(exitMBB->begin(), BB, 5965 llvm::next(MachineBasicBlock::iterator(MI)), 5966 BB->end()); 5967 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5968 5969 // thisMBB: 5970 // ... 5971 // fallthrough --> loop1MBB 5972 BB->addSuccessor(loop1MBB); 5973 5974 // loop1MBB: 5975 // ldrex dest, [ptr] 5976 // cmp dest, oldval 5977 // bne exitMBB 5978 BB = loop1MBB; 5979 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 5980 if (ldrOpc == ARM::t2LDREX) 5981 MIB.addImm(0); 5982 AddDefaultPred(MIB); 5983 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5984 .addReg(dest).addReg(oldval)); 5985 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5986 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5987 BB->addSuccessor(loop2MBB); 5988 BB->addSuccessor(exitMBB); 5989 5990 // loop2MBB: 5991 // strex scratch, newval, [ptr] 5992 // cmp scratch, #0 5993 // bne loop1MBB 5994 BB = loop2MBB; 5995 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval).addReg(ptr); 5996 if (strOpc == ARM::t2STREX) 5997 MIB.addImm(0); 5998 AddDefaultPred(MIB); 5999 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6000 .addReg(scratch).addImm(0)); 6001 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6002 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6003 BB->addSuccessor(loop1MBB); 6004 BB->addSuccessor(exitMBB); 6005 6006 // exitMBB: 6007 // ... 6008 BB = exitMBB; 6009 6010 MI->eraseFromParent(); // The instruction is gone now. 6011 6012 return BB; 6013} 6014 6015MachineBasicBlock * 6016ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 6017 unsigned Size, unsigned BinOpcode) const { 6018 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 6019 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6020 6021 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6022 MachineFunction *MF = BB->getParent(); 6023 MachineFunction::iterator It = BB; 6024 ++It; 6025 6026 unsigned dest = MI->getOperand(0).getReg(); 6027 unsigned ptr = MI->getOperand(1).getReg(); 6028 unsigned incr = MI->getOperand(2).getReg(); 6029 DebugLoc dl = MI->getDebugLoc(); 6030 bool isThumb2 = Subtarget->isThumb2(); 6031 6032 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6033 if (isThumb2) { 6034 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 6035 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6036 } 6037 6038 unsigned ldrOpc, strOpc; 6039 switch (Size) { 6040 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 6041 case 1: 6042 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 6043 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 6044 break; 6045 case 2: 6046 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 6047 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 6048 break; 6049 case 4: 6050 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 6051 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 6052 break; 6053 } 6054 6055 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6056 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6057 MF->insert(It, loopMBB); 6058 MF->insert(It, exitMBB); 6059 6060 // Transfer the remainder of BB and its successor edges to exitMBB. 6061 exitMBB->splice(exitMBB->begin(), BB, 6062 llvm::next(MachineBasicBlock::iterator(MI)), 6063 BB->end()); 6064 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6065 6066 const TargetRegisterClass *TRC = isThumb2 ? 6067 (const TargetRegisterClass*)&ARM::rGPRRegClass : 6068 (const TargetRegisterClass*)&ARM::GPRRegClass; 6069 unsigned scratch = MRI.createVirtualRegister(TRC); 6070 unsigned scratch2 = (!BinOpcode) ? incr : MRI.createVirtualRegister(TRC); 6071 6072 // thisMBB: 6073 // ... 6074 // fallthrough --> loopMBB 6075 BB->addSuccessor(loopMBB); 6076 6077 // loopMBB: 6078 // ldrex dest, ptr 6079 // <binop> scratch2, dest, incr 6080 // strex scratch, scratch2, ptr 6081 // cmp scratch, #0 6082 // bne- loopMBB 6083 // fallthrough --> exitMBB 6084 BB = loopMBB; 6085 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 6086 if (ldrOpc == ARM::t2LDREX) 6087 MIB.addImm(0); 6088 AddDefaultPred(MIB); 6089 if (BinOpcode) { 6090 // operand order needs to go the other way for NAND 6091 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 6092 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 6093 addReg(incr).addReg(dest)).addReg(0); 6094 else 6095 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 6096 addReg(dest).addReg(incr)).addReg(0); 6097 } 6098 6099 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 6100 if (strOpc == ARM::t2STREX) 6101 MIB.addImm(0); 6102 AddDefaultPred(MIB); 6103 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6104 .addReg(scratch).addImm(0)); 6105 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6106 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6107 6108 BB->addSuccessor(loopMBB); 6109 BB->addSuccessor(exitMBB); 6110 6111 // exitMBB: 6112 // ... 6113 BB = exitMBB; 6114 6115 MI->eraseFromParent(); // The instruction is gone now. 6116 6117 return BB; 6118} 6119 6120MachineBasicBlock * 6121ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, 6122 MachineBasicBlock *BB, 6123 unsigned Size, 6124 bool signExtend, 6125 ARMCC::CondCodes Cond) const { 6126 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6127 6128 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6129 MachineFunction *MF = BB->getParent(); 6130 MachineFunction::iterator It = BB; 6131 ++It; 6132 6133 unsigned dest = MI->getOperand(0).getReg(); 6134 unsigned ptr = MI->getOperand(1).getReg(); 6135 unsigned incr = MI->getOperand(2).getReg(); 6136 unsigned oldval = dest; 6137 DebugLoc dl = MI->getDebugLoc(); 6138 bool isThumb2 = Subtarget->isThumb2(); 6139 6140 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6141 if (isThumb2) { 6142 MRI.constrainRegClass(dest, &ARM::rGPRRegClass); 6143 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6144 } 6145 6146 unsigned ldrOpc, strOpc, extendOpc; 6147 switch (Size) { 6148 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 6149 case 1: 6150 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 6151 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 6152 extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; 6153 break; 6154 case 2: 6155 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 6156 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 6157 extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; 6158 break; 6159 case 4: 6160 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 6161 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 6162 extendOpc = 0; 6163 break; 6164 } 6165 6166 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6167 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6168 MF->insert(It, loopMBB); 6169 MF->insert(It, exitMBB); 6170 6171 // Transfer the remainder of BB and its successor edges to exitMBB. 6172 exitMBB->splice(exitMBB->begin(), BB, 6173 llvm::next(MachineBasicBlock::iterator(MI)), 6174 BB->end()); 6175 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6176 6177 const TargetRegisterClass *TRC = isThumb2 ? 6178 (const TargetRegisterClass*)&ARM::rGPRRegClass : 6179 (const TargetRegisterClass*)&ARM::GPRRegClass; 6180 unsigned scratch = MRI.createVirtualRegister(TRC); 6181 unsigned scratch2 = MRI.createVirtualRegister(TRC); 6182 6183 // thisMBB: 6184 // ... 6185 // fallthrough --> loopMBB 6186 BB->addSuccessor(loopMBB); 6187 6188 // loopMBB: 6189 // ldrex dest, ptr 6190 // (sign extend dest, if required) 6191 // cmp dest, incr 6192 // cmov.cond scratch2, incr, dest 6193 // strex scratch, scratch2, ptr 6194 // cmp scratch, #0 6195 // bne- loopMBB 6196 // fallthrough --> exitMBB 6197 BB = loopMBB; 6198 MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr); 6199 if (ldrOpc == ARM::t2LDREX) 6200 MIB.addImm(0); 6201 AddDefaultPred(MIB); 6202 6203 // Sign extend the value, if necessary. 6204 if (signExtend && extendOpc) { 6205 oldval = MRI.createVirtualRegister(&ARM::GPRRegClass); 6206 AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) 6207 .addReg(dest) 6208 .addImm(0)); 6209 } 6210 6211 // Build compare and cmov instructions. 6212 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 6213 .addReg(oldval).addReg(incr)); 6214 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) 6215 .addReg(incr).addReg(oldval).addImm(Cond).addReg(ARM::CPSR); 6216 6217 MIB = BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2).addReg(ptr); 6218 if (strOpc == ARM::t2STREX) 6219 MIB.addImm(0); 6220 AddDefaultPred(MIB); 6221 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6222 .addReg(scratch).addImm(0)); 6223 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6224 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6225 6226 BB->addSuccessor(loopMBB); 6227 BB->addSuccessor(exitMBB); 6228 6229 // exitMBB: 6230 // ... 6231 BB = exitMBB; 6232 6233 MI->eraseFromParent(); // The instruction is gone now. 6234 6235 return BB; 6236} 6237 6238MachineBasicBlock * 6239ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, 6240 unsigned Op1, unsigned Op2, 6241 bool NeedsCarry, bool IsCmpxchg, 6242 bool IsMinMax, ARMCC::CondCodes CC) const { 6243 // This also handles ATOMIC_SWAP, indicated by Op1==0. 6244 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6245 6246 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6247 MachineFunction *MF = BB->getParent(); 6248 MachineFunction::iterator It = BB; 6249 ++It; 6250 6251 unsigned destlo = MI->getOperand(0).getReg(); 6252 unsigned desthi = MI->getOperand(1).getReg(); 6253 unsigned ptr = MI->getOperand(2).getReg(); 6254 unsigned vallo = MI->getOperand(3).getReg(); 6255 unsigned valhi = MI->getOperand(4).getReg(); 6256 DebugLoc dl = MI->getDebugLoc(); 6257 bool isThumb2 = Subtarget->isThumb2(); 6258 6259 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); 6260 if (isThumb2) { 6261 MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); 6262 MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); 6263 MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); 6264 } 6265 6266 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6267 MachineBasicBlock *contBB = 0, *cont2BB = 0; 6268 if (IsCmpxchg || IsMinMax) 6269 contBB = MF->CreateMachineBasicBlock(LLVM_BB); 6270 if (IsCmpxchg) 6271 cont2BB = MF->CreateMachineBasicBlock(LLVM_BB); 6272 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 6273 6274 MF->insert(It, loopMBB); 6275 if (IsCmpxchg || IsMinMax) MF->insert(It, contBB); 6276 if (IsCmpxchg) MF->insert(It, cont2BB); 6277 MF->insert(It, exitMBB); 6278 6279 // Transfer the remainder of BB and its successor edges to exitMBB. 6280 exitMBB->splice(exitMBB->begin(), BB, 6281 llvm::next(MachineBasicBlock::iterator(MI)), 6282 BB->end()); 6283 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6284 6285 const TargetRegisterClass *TRC = isThumb2 ? 6286 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6287 (const TargetRegisterClass*)&ARM::GPRRegClass; 6288 unsigned storesuccess = MRI.createVirtualRegister(TRC); 6289 6290 // thisMBB: 6291 // ... 6292 // fallthrough --> loopMBB 6293 BB->addSuccessor(loopMBB); 6294 6295 // loopMBB: 6296 // ldrexd r2, r3, ptr 6297 // <binopa> r0, r2, incr 6298 // <binopb> r1, r3, incr 6299 // strexd storesuccess, r0, r1, ptr 6300 // cmp storesuccess, #0 6301 // bne- loopMBB 6302 // fallthrough --> exitMBB 6303 BB = loopMBB; 6304 6305 // Load 6306 if (isThumb2) { 6307 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) 6308 .addReg(destlo, RegState::Define) 6309 .addReg(desthi, RegState::Define) 6310 .addReg(ptr)); 6311 } else { 6312 unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6313 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) 6314 .addReg(GPRPair0, RegState::Define).addReg(ptr)); 6315 // Copy r2/r3 into dest. (This copy will normally be coalesced.) 6316 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) 6317 .addReg(GPRPair0, 0, ARM::gsub_0); 6318 BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) 6319 .addReg(GPRPair0, 0, ARM::gsub_1); 6320 } 6321 6322 unsigned StoreLo, StoreHi; 6323 if (IsCmpxchg) { 6324 // Add early exit 6325 for (unsigned i = 0; i < 2; i++) { 6326 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : 6327 ARM::CMPrr)) 6328 .addReg(i == 0 ? destlo : desthi) 6329 .addReg(i == 0 ? vallo : valhi)); 6330 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6331 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6332 BB->addSuccessor(exitMBB); 6333 BB->addSuccessor(i == 0 ? contBB : cont2BB); 6334 BB = (i == 0 ? contBB : cont2BB); 6335 } 6336 6337 // Copy to physregs for strexd 6338 StoreLo = MI->getOperand(5).getReg(); 6339 StoreHi = MI->getOperand(6).getReg(); 6340 } else if (Op1) { 6341 // Perform binary operation 6342 unsigned tmpRegLo = MRI.createVirtualRegister(TRC); 6343 AddDefaultPred(BuildMI(BB, dl, TII->get(Op1), tmpRegLo) 6344 .addReg(destlo).addReg(vallo)) 6345 .addReg(NeedsCarry ? ARM::CPSR : 0, getDefRegState(NeedsCarry)); 6346 unsigned tmpRegHi = MRI.createVirtualRegister(TRC); 6347 AddDefaultPred(BuildMI(BB, dl, TII->get(Op2), tmpRegHi) 6348 .addReg(desthi).addReg(valhi)) 6349 .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); 6350 6351 StoreLo = tmpRegLo; 6352 StoreHi = tmpRegHi; 6353 } else { 6354 // Copy to physregs for strexd 6355 StoreLo = vallo; 6356 StoreHi = valhi; 6357 } 6358 if (IsMinMax) { 6359 // Compare and branch to exit block. 6360 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6361 .addMBB(exitMBB).addImm(CC).addReg(ARM::CPSR); 6362 BB->addSuccessor(exitMBB); 6363 BB->addSuccessor(contBB); 6364 BB = contBB; 6365 StoreLo = vallo; 6366 StoreHi = valhi; 6367 } 6368 6369 // Store 6370 if (isThumb2) { 6371 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) 6372 .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); 6373 } else { 6374 // Marshal a pair... 6375 unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6376 unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6377 unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 6378 BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); 6379 BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) 6380 .addReg(UndefPair) 6381 .addReg(StoreLo) 6382 .addImm(ARM::gsub_0); 6383 BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) 6384 .addReg(r1) 6385 .addReg(StoreHi) 6386 .addImm(ARM::gsub_1); 6387 6388 // ...and store it 6389 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) 6390 .addReg(StorePair).addReg(ptr)); 6391 } 6392 // Cmp+jump 6393 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 6394 .addReg(storesuccess).addImm(0)); 6395 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 6396 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 6397 6398 BB->addSuccessor(loopMBB); 6399 BB->addSuccessor(exitMBB); 6400 6401 // exitMBB: 6402 // ... 6403 BB = exitMBB; 6404 6405 MI->eraseFromParent(); // The instruction is gone now. 6406 6407 return BB; 6408} 6409 6410/// SetupEntryBlockForSjLj - Insert code into the entry block that creates and 6411/// registers the function context. 6412void ARMTargetLowering:: 6413SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, 6414 MachineBasicBlock *DispatchBB, int FI) const { 6415 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6416 DebugLoc dl = MI->getDebugLoc(); 6417 MachineFunction *MF = MBB->getParent(); 6418 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6419 MachineConstantPool *MCP = MF->getConstantPool(); 6420 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 6421 const Function *F = MF->getFunction(); 6422 6423 bool isThumb = Subtarget->isThumb(); 6424 bool isThumb2 = Subtarget->isThumb2(); 6425 6426 unsigned PCLabelId = AFI->createPICLabelUId(); 6427 unsigned PCAdj = (isThumb || isThumb2) ? 4 : 8; 6428 ARMConstantPoolValue *CPV = 6429 ARMConstantPoolMBB::Create(F->getContext(), DispatchBB, PCLabelId, PCAdj); 6430 unsigned CPI = MCP->getConstantPoolIndex(CPV, 4); 6431 6432 const TargetRegisterClass *TRC = isThumb ? 6433 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6434 (const TargetRegisterClass*)&ARM::GPRRegClass; 6435 6436 // Grab constant pool and fixed stack memory operands. 6437 MachineMemOperand *CPMMO = 6438 MF->getMachineMemOperand(MachinePointerInfo::getConstantPool(), 6439 MachineMemOperand::MOLoad, 4, 4); 6440 6441 MachineMemOperand *FIMMOSt = 6442 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 6443 MachineMemOperand::MOStore, 4, 4); 6444 6445 // Load the address of the dispatch MBB into the jump buffer. 6446 if (isThumb2) { 6447 // Incoming value: jbuf 6448 // ldr.n r5, LCPI1_1 6449 // orr r5, r5, #1 6450 // add r5, pc 6451 // str r5, [$jbuf, #+4] ; &jbuf[1] 6452 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6453 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2LDRpci), NewVReg1) 6454 .addConstantPoolIndex(CPI) 6455 .addMemOperand(CPMMO)); 6456 // Set the low bit because of thumb mode. 6457 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6458 AddDefaultCC( 6459 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2ORRri), NewVReg2) 6460 .addReg(NewVReg1, RegState::Kill) 6461 .addImm(0x01))); 6462 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6463 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg3) 6464 .addReg(NewVReg2, RegState::Kill) 6465 .addImm(PCLabelId); 6466 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::t2STRi12)) 6467 .addReg(NewVReg3, RegState::Kill) 6468 .addFrameIndex(FI) 6469 .addImm(36) // &jbuf[1] :: pc 6470 .addMemOperand(FIMMOSt)); 6471 } else if (isThumb) { 6472 // Incoming value: jbuf 6473 // ldr.n r1, LCPI1_4 6474 // add r1, pc 6475 // mov r2, #1 6476 // orrs r1, r2 6477 // add r2, $jbuf, #+4 ; &jbuf[1] 6478 // str r1, [r2] 6479 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6480 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tLDRpci), NewVReg1) 6481 .addConstantPoolIndex(CPI) 6482 .addMemOperand(CPMMO)); 6483 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6484 BuildMI(*MBB, MI, dl, TII->get(ARM::tPICADD), NewVReg2) 6485 .addReg(NewVReg1, RegState::Kill) 6486 .addImm(PCLabelId); 6487 // Set the low bit because of thumb mode. 6488 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6489 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tMOVi8), NewVReg3) 6490 .addReg(ARM::CPSR, RegState::Define) 6491 .addImm(1)); 6492 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6493 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tORR), NewVReg4) 6494 .addReg(ARM::CPSR, RegState::Define) 6495 .addReg(NewVReg2, RegState::Kill) 6496 .addReg(NewVReg3, RegState::Kill)); 6497 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6498 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tADDrSPi), NewVReg5) 6499 .addFrameIndex(FI) 6500 .addImm(36)); // &jbuf[1] :: pc 6501 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::tSTRi)) 6502 .addReg(NewVReg4, RegState::Kill) 6503 .addReg(NewVReg5, RegState::Kill) 6504 .addImm(0) 6505 .addMemOperand(FIMMOSt)); 6506 } else { 6507 // Incoming value: jbuf 6508 // ldr r1, LCPI1_1 6509 // add r1, pc, r1 6510 // str r1, [$jbuf, #+4] ; &jbuf[1] 6511 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6512 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::LDRi12), NewVReg1) 6513 .addConstantPoolIndex(CPI) 6514 .addImm(0) 6515 .addMemOperand(CPMMO)); 6516 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6517 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::PICADD), NewVReg2) 6518 .addReg(NewVReg1, RegState::Kill) 6519 .addImm(PCLabelId)); 6520 AddDefaultPred(BuildMI(*MBB, MI, dl, TII->get(ARM::STRi12)) 6521 .addReg(NewVReg2, RegState::Kill) 6522 .addFrameIndex(FI) 6523 .addImm(36) // &jbuf[1] :: pc 6524 .addMemOperand(FIMMOSt)); 6525 } 6526} 6527 6528MachineBasicBlock *ARMTargetLowering:: 6529EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { 6530 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6531 DebugLoc dl = MI->getDebugLoc(); 6532 MachineFunction *MF = MBB->getParent(); 6533 MachineRegisterInfo *MRI = &MF->getRegInfo(); 6534 ARMFunctionInfo *AFI = MF->getInfo<ARMFunctionInfo>(); 6535 MachineFrameInfo *MFI = MF->getFrameInfo(); 6536 int FI = MFI->getFunctionContextIndex(); 6537 6538 const TargetRegisterClass *TRC = Subtarget->isThumb() ? 6539 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6540 (const TargetRegisterClass*)&ARM::GPRnopcRegClass; 6541 6542 // Get a mapping of the call site numbers to all of the landing pads they're 6543 // associated with. 6544 DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad; 6545 unsigned MaxCSNum = 0; 6546 MachineModuleInfo &MMI = MF->getMMI(); 6547 for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E; 6548 ++BB) { 6549 if (!BB->isLandingPad()) continue; 6550 6551 // FIXME: We should assert that the EH_LABEL is the first MI in the landing 6552 // pad. 6553 for (MachineBasicBlock::iterator 6554 II = BB->begin(), IE = BB->end(); II != IE; ++II) { 6555 if (!II->isEHLabel()) continue; 6556 6557 MCSymbol *Sym = II->getOperand(0).getMCSymbol(); 6558 if (!MMI.hasCallSiteLandingPad(Sym)) continue; 6559 6560 SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym); 6561 for (SmallVectorImpl<unsigned>::iterator 6562 CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end(); 6563 CSI != CSE; ++CSI) { 6564 CallSiteNumToLPad[*CSI].push_back(BB); 6565 MaxCSNum = std::max(MaxCSNum, *CSI); 6566 } 6567 break; 6568 } 6569 } 6570 6571 // Get an ordered list of the machine basic blocks for the jump table. 6572 std::vector<MachineBasicBlock*> LPadList; 6573 SmallPtrSet<MachineBasicBlock*, 64> InvokeBBs; 6574 LPadList.reserve(CallSiteNumToLPad.size()); 6575 for (unsigned I = 1; I <= MaxCSNum; ++I) { 6576 SmallVectorImpl<MachineBasicBlock*> &MBBList = CallSiteNumToLPad[I]; 6577 for (SmallVectorImpl<MachineBasicBlock*>::iterator 6578 II = MBBList.begin(), IE = MBBList.end(); II != IE; ++II) { 6579 LPadList.push_back(*II); 6580 InvokeBBs.insert((*II)->pred_begin(), (*II)->pred_end()); 6581 } 6582 } 6583 6584 assert(!LPadList.empty() && 6585 "No landing pad destinations for the dispatch jump table!"); 6586 6587 // Create the jump table and associated information. 6588 MachineJumpTableInfo *JTI = 6589 MF->getOrCreateJumpTableInfo(MachineJumpTableInfo::EK_Inline); 6590 unsigned MJTI = JTI->createJumpTableIndex(LPadList); 6591 unsigned UId = AFI->createJumpTableUId(); 6592 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 6593 6594 // Create the MBBs for the dispatch code. 6595 6596 // Shove the dispatch's address into the return slot in the function context. 6597 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); 6598 DispatchBB->setIsLandingPad(); 6599 6600 MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); 6601 unsigned trap_opcode; 6602 if (Subtarget->isThumb()) 6603 trap_opcode = ARM::tTRAP; 6604 else 6605 trap_opcode = Subtarget->useNaClTrap() ? ARM::TRAPNaCl : ARM::TRAP; 6606 6607 BuildMI(TrapBB, dl, TII->get(trap_opcode)); 6608 DispatchBB->addSuccessor(TrapBB); 6609 6610 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); 6611 DispatchBB->addSuccessor(DispContBB); 6612 6613 // Insert and MBBs. 6614 MF->insert(MF->end(), DispatchBB); 6615 MF->insert(MF->end(), DispContBB); 6616 MF->insert(MF->end(), TrapBB); 6617 6618 // Insert code into the entry block that creates and registers the function 6619 // context. 6620 SetupEntryBlockForSjLj(MI, MBB, DispatchBB, FI); 6621 6622 MachineMemOperand *FIMMOLd = 6623 MF->getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), 6624 MachineMemOperand::MOLoad | 6625 MachineMemOperand::MOVolatile, 4, 4); 6626 6627 MachineInstrBuilder MIB; 6628 MIB = BuildMI(DispatchBB, dl, TII->get(ARM::Int_eh_sjlj_dispatchsetup)); 6629 6630 const ARMBaseInstrInfo *AII = static_cast<const ARMBaseInstrInfo*>(TII); 6631 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 6632 6633 // Add a register mask with no preserved registers. This results in all 6634 // registers being marked as clobbered. 6635 MIB.addRegMask(RI.getNoPreservedMask()); 6636 6637 unsigned NumLPads = LPadList.size(); 6638 if (Subtarget->isThumb2()) { 6639 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6640 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2LDRi12), NewVReg1) 6641 .addFrameIndex(FI) 6642 .addImm(4) 6643 .addMemOperand(FIMMOLd)); 6644 6645 if (NumLPads < 256) { 6646 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPri)) 6647 .addReg(NewVReg1) 6648 .addImm(LPadList.size())); 6649 } else { 6650 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6651 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVi16), VReg1) 6652 .addImm(NumLPads & 0xFFFF)); 6653 6654 unsigned VReg2 = VReg1; 6655 if ((NumLPads & 0xFFFF0000) != 0) { 6656 VReg2 = MRI->createVirtualRegister(TRC); 6657 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2MOVTi16), VReg2) 6658 .addReg(VReg1) 6659 .addImm(NumLPads >> 16)); 6660 } 6661 6662 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::t2CMPrr)) 6663 .addReg(NewVReg1) 6664 .addReg(VReg2)); 6665 } 6666 6667 BuildMI(DispatchBB, dl, TII->get(ARM::t2Bcc)) 6668 .addMBB(TrapBB) 6669 .addImm(ARMCC::HI) 6670 .addReg(ARM::CPSR); 6671 6672 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6673 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::t2LEApcrelJT),NewVReg3) 6674 .addJumpTableIndex(MJTI) 6675 .addImm(UId)); 6676 6677 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6678 AddDefaultCC( 6679 AddDefaultPred( 6680 BuildMI(DispContBB, dl, TII->get(ARM::t2ADDrs), NewVReg4) 6681 .addReg(NewVReg3, RegState::Kill) 6682 .addReg(NewVReg1) 6683 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 6684 6685 BuildMI(DispContBB, dl, TII->get(ARM::t2BR_JT)) 6686 .addReg(NewVReg4, RegState::Kill) 6687 .addReg(NewVReg1) 6688 .addJumpTableIndex(MJTI) 6689 .addImm(UId); 6690 } else if (Subtarget->isThumb()) { 6691 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6692 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRspi), NewVReg1) 6693 .addFrameIndex(FI) 6694 .addImm(1) 6695 .addMemOperand(FIMMOLd)); 6696 6697 if (NumLPads < 256) { 6698 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPi8)) 6699 .addReg(NewVReg1) 6700 .addImm(NumLPads)); 6701 } else { 6702 MachineConstantPool *ConstantPool = MF->getConstantPool(); 6703 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 6704 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 6705 6706 // MachineConstantPool wants an explicit alignment. 6707 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 6708 if (Align == 0) 6709 Align = getDataLayout()->getTypeAllocSize(C->getType()); 6710 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 6711 6712 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6713 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tLDRpci)) 6714 .addReg(VReg1, RegState::Define) 6715 .addConstantPoolIndex(Idx)); 6716 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::tCMPr)) 6717 .addReg(NewVReg1) 6718 .addReg(VReg1)); 6719 } 6720 6721 BuildMI(DispatchBB, dl, TII->get(ARM::tBcc)) 6722 .addMBB(TrapBB) 6723 .addImm(ARMCC::HI) 6724 .addReg(ARM::CPSR); 6725 6726 unsigned NewVReg2 = MRI->createVirtualRegister(TRC); 6727 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLSLri), NewVReg2) 6728 .addReg(ARM::CPSR, RegState::Define) 6729 .addReg(NewVReg1) 6730 .addImm(2)); 6731 6732 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6733 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLEApcrelJT), NewVReg3) 6734 .addJumpTableIndex(MJTI) 6735 .addImm(UId)); 6736 6737 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6738 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg4) 6739 .addReg(ARM::CPSR, RegState::Define) 6740 .addReg(NewVReg2, RegState::Kill) 6741 .addReg(NewVReg3)); 6742 6743 MachineMemOperand *JTMMOLd = 6744 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 6745 MachineMemOperand::MOLoad, 4, 4); 6746 6747 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6748 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tLDRi), NewVReg5) 6749 .addReg(NewVReg4, RegState::Kill) 6750 .addImm(0) 6751 .addMemOperand(JTMMOLd)); 6752 6753 unsigned NewVReg6 = NewVReg5; 6754 if (RelocM == Reloc::PIC_) { 6755 NewVReg6 = MRI->createVirtualRegister(TRC); 6756 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::tADDrr), NewVReg6) 6757 .addReg(ARM::CPSR, RegState::Define) 6758 .addReg(NewVReg5, RegState::Kill) 6759 .addReg(NewVReg3)); 6760 } 6761 6762 BuildMI(DispContBB, dl, TII->get(ARM::tBR_JTr)) 6763 .addReg(NewVReg6, RegState::Kill) 6764 .addJumpTableIndex(MJTI) 6765 .addImm(UId); 6766 } else { 6767 unsigned NewVReg1 = MRI->createVirtualRegister(TRC); 6768 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRi12), NewVReg1) 6769 .addFrameIndex(FI) 6770 .addImm(4) 6771 .addMemOperand(FIMMOLd)); 6772 6773 if (NumLPads < 256) { 6774 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPri)) 6775 .addReg(NewVReg1) 6776 .addImm(NumLPads)); 6777 } else if (Subtarget->hasV6T2Ops() && isUInt<16>(NumLPads)) { 6778 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6779 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVi16), VReg1) 6780 .addImm(NumLPads & 0xFFFF)); 6781 6782 unsigned VReg2 = VReg1; 6783 if ((NumLPads & 0xFFFF0000) != 0) { 6784 VReg2 = MRI->createVirtualRegister(TRC); 6785 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::MOVTi16), VReg2) 6786 .addReg(VReg1) 6787 .addImm(NumLPads >> 16)); 6788 } 6789 6790 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 6791 .addReg(NewVReg1) 6792 .addReg(VReg2)); 6793 } else { 6794 MachineConstantPool *ConstantPool = MF->getConstantPool(); 6795 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 6796 const Constant *C = ConstantInt::get(Int32Ty, NumLPads); 6797 6798 // MachineConstantPool wants an explicit alignment. 6799 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 6800 if (Align == 0) 6801 Align = getDataLayout()->getTypeAllocSize(C->getType()); 6802 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 6803 6804 unsigned VReg1 = MRI->createVirtualRegister(TRC); 6805 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) 6806 .addReg(VReg1, RegState::Define) 6807 .addConstantPoolIndex(Idx) 6808 .addImm(0)); 6809 AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) 6810 .addReg(NewVReg1) 6811 .addReg(VReg1, RegState::Kill)); 6812 } 6813 6814 BuildMI(DispatchBB, dl, TII->get(ARM::Bcc)) 6815 .addMBB(TrapBB) 6816 .addImm(ARMCC::HI) 6817 .addReg(ARM::CPSR); 6818 6819 unsigned NewVReg3 = MRI->createVirtualRegister(TRC); 6820 AddDefaultCC( 6821 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::MOVsi), NewVReg3) 6822 .addReg(NewVReg1) 6823 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, 2)))); 6824 unsigned NewVReg4 = MRI->createVirtualRegister(TRC); 6825 AddDefaultPred(BuildMI(DispContBB, dl, TII->get(ARM::LEApcrelJT), NewVReg4) 6826 .addJumpTableIndex(MJTI) 6827 .addImm(UId)); 6828 6829 MachineMemOperand *JTMMOLd = 6830 MF->getMachineMemOperand(MachinePointerInfo::getJumpTable(), 6831 MachineMemOperand::MOLoad, 4, 4); 6832 unsigned NewVReg5 = MRI->createVirtualRegister(TRC); 6833 AddDefaultPred( 6834 BuildMI(DispContBB, dl, TII->get(ARM::LDRrs), NewVReg5) 6835 .addReg(NewVReg3, RegState::Kill) 6836 .addReg(NewVReg4) 6837 .addImm(0) 6838 .addMemOperand(JTMMOLd)); 6839 6840 if (RelocM == Reloc::PIC_) { 6841 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTadd)) 6842 .addReg(NewVReg5, RegState::Kill) 6843 .addReg(NewVReg4) 6844 .addJumpTableIndex(MJTI) 6845 .addImm(UId); 6846 } else { 6847 BuildMI(DispContBB, dl, TII->get(ARM::BR_JTr)) 6848 .addReg(NewVReg5, RegState::Kill) 6849 .addJumpTableIndex(MJTI) 6850 .addImm(UId); 6851 } 6852 } 6853 6854 // Add the jump table entries as successors to the MBB. 6855 SmallPtrSet<MachineBasicBlock*, 8> SeenMBBs; 6856 for (std::vector<MachineBasicBlock*>::iterator 6857 I = LPadList.begin(), E = LPadList.end(); I != E; ++I) { 6858 MachineBasicBlock *CurMBB = *I; 6859 if (SeenMBBs.insert(CurMBB)) 6860 DispContBB->addSuccessor(CurMBB); 6861 } 6862 6863 // N.B. the order the invoke BBs are processed in doesn't matter here. 6864 const uint16_t *SavedRegs = RI.getCalleeSavedRegs(MF); 6865 SmallVector<MachineBasicBlock*, 64> MBBLPads; 6866 for (SmallPtrSet<MachineBasicBlock*, 64>::iterator 6867 I = InvokeBBs.begin(), E = InvokeBBs.end(); I != E; ++I) { 6868 MachineBasicBlock *BB = *I; 6869 6870 // Remove the landing pad successor from the invoke block and replace it 6871 // with the new dispatch block. 6872 SmallVector<MachineBasicBlock*, 4> Successors(BB->succ_begin(), 6873 BB->succ_end()); 6874 while (!Successors.empty()) { 6875 MachineBasicBlock *SMBB = Successors.pop_back_val(); 6876 if (SMBB->isLandingPad()) { 6877 BB->removeSuccessor(SMBB); 6878 MBBLPads.push_back(SMBB); 6879 } 6880 } 6881 6882 BB->addSuccessor(DispatchBB); 6883 6884 // Find the invoke call and mark all of the callee-saved registers as 6885 // 'implicit defined' so that they're spilled. This prevents code from 6886 // moving instructions to before the EH block, where they will never be 6887 // executed. 6888 for (MachineBasicBlock::reverse_iterator 6889 II = BB->rbegin(), IE = BB->rend(); II != IE; ++II) { 6890 if (!II->isCall()) continue; 6891 6892 DenseMap<unsigned, bool> DefRegs; 6893 for (MachineInstr::mop_iterator 6894 OI = II->operands_begin(), OE = II->operands_end(); 6895 OI != OE; ++OI) { 6896 if (!OI->isReg()) continue; 6897 DefRegs[OI->getReg()] = true; 6898 } 6899 6900 MachineInstrBuilder MIB(*MF, &*II); 6901 6902 for (unsigned i = 0; SavedRegs[i] != 0; ++i) { 6903 unsigned Reg = SavedRegs[i]; 6904 if (Subtarget->isThumb2() && 6905 !ARM::tGPRRegClass.contains(Reg) && 6906 !ARM::hGPRRegClass.contains(Reg)) 6907 continue; 6908 if (Subtarget->isThumb1Only() && !ARM::tGPRRegClass.contains(Reg)) 6909 continue; 6910 if (!Subtarget->isThumb() && !ARM::GPRRegClass.contains(Reg)) 6911 continue; 6912 if (!DefRegs[Reg]) 6913 MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); 6914 } 6915 6916 break; 6917 } 6918 } 6919 6920 // Mark all former landing pads as non-landing pads. The dispatch is the only 6921 // landing pad now. 6922 for (SmallVectorImpl<MachineBasicBlock*>::iterator 6923 I = MBBLPads.begin(), E = MBBLPads.end(); I != E; ++I) 6924 (*I)->setIsLandingPad(false); 6925 6926 // The instruction is gone now. 6927 MI->eraseFromParent(); 6928 6929 return MBB; 6930} 6931 6932static 6933MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 6934 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 6935 E = MBB->succ_end(); I != E; ++I) 6936 if (*I != Succ) 6937 return *I; 6938 llvm_unreachable("Expecting a BB with two successors!"); 6939} 6940 6941MachineBasicBlock *ARMTargetLowering:: 6942EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { 6943 // This pseudo instruction has 3 operands: dst, src, size 6944 // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). 6945 // Otherwise, we will generate unrolled scalar copies. 6946 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6947 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6948 MachineFunction::iterator It = BB; 6949 ++It; 6950 6951 unsigned dest = MI->getOperand(0).getReg(); 6952 unsigned src = MI->getOperand(1).getReg(); 6953 unsigned SizeVal = MI->getOperand(2).getImm(); 6954 unsigned Align = MI->getOperand(3).getImm(); 6955 DebugLoc dl = MI->getDebugLoc(); 6956 6957 bool isThumb2 = Subtarget->isThumb2(); 6958 MachineFunction *MF = BB->getParent(); 6959 MachineRegisterInfo &MRI = MF->getRegInfo(); 6960 unsigned ldrOpc, strOpc, UnitSize = 0; 6961 6962 const TargetRegisterClass *TRC = isThumb2 ? 6963 (const TargetRegisterClass*)&ARM::tGPRRegClass : 6964 (const TargetRegisterClass*)&ARM::GPRRegClass; 6965 const TargetRegisterClass *TRC_Vec = 0; 6966 6967 if (Align & 1) { 6968 ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; 6969 strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; 6970 UnitSize = 1; 6971 } else if (Align & 2) { 6972 ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST; 6973 strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; 6974 UnitSize = 2; 6975 } else { 6976 // Check whether we can use NEON instructions. 6977 if (!MF->getFunction()->getAttributes(). 6978 hasAttribute(AttributeSet::FunctionIndex, 6979 Attribute::NoImplicitFloat) && 6980 Subtarget->hasNEON()) { 6981 if ((Align % 16 == 0) && SizeVal >= 16) { 6982 ldrOpc = ARM::VLD1q32wb_fixed; 6983 strOpc = ARM::VST1q32wb_fixed; 6984 UnitSize = 16; 6985 TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; 6986 } 6987 else if ((Align % 8 == 0) && SizeVal >= 8) { 6988 ldrOpc = ARM::VLD1d32wb_fixed; 6989 strOpc = ARM::VST1d32wb_fixed; 6990 UnitSize = 8; 6991 TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; 6992 } 6993 } 6994 // Can't use NEON instructions. 6995 if (UnitSize == 0) { 6996 ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; 6997 strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; 6998 UnitSize = 4; 6999 } 7000 } 7001 7002 unsigned BytesLeft = SizeVal % UnitSize; 7003 unsigned LoopSize = SizeVal - BytesLeft; 7004 7005 if (SizeVal <= Subtarget->getMaxInlineSizeThreshold()) { 7006 // Use LDR and STR to copy. 7007 // [scratch, srcOut] = LDR_POST(srcIn, UnitSize) 7008 // [destOut] = STR_POST(scratch, destIn, UnitSize) 7009 unsigned srcIn = src; 7010 unsigned destIn = dest; 7011 for (unsigned i = 0; i < LoopSize; i+=UnitSize) { 7012 unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); 7013 unsigned srcOut = MRI.createVirtualRegister(TRC); 7014 unsigned destOut = MRI.createVirtualRegister(TRC); 7015 if (UnitSize >= 8) { 7016 AddDefaultPred(BuildMI(*BB, MI, dl, 7017 TII->get(ldrOpc), scratch) 7018 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); 7019 7020 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 7021 .addReg(destIn).addImm(0).addReg(scratch)); 7022 } else if (isThumb2) { 7023 AddDefaultPred(BuildMI(*BB, MI, dl, 7024 TII->get(ldrOpc), scratch) 7025 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); 7026 7027 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 7028 .addReg(scratch).addReg(destIn) 7029 .addImm(UnitSize)); 7030 } else { 7031 AddDefaultPred(BuildMI(*BB, MI, dl, 7032 TII->get(ldrOpc), scratch) 7033 .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0) 7034 .addImm(UnitSize)); 7035 7036 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 7037 .addReg(scratch).addReg(destIn) 7038 .addReg(0).addImm(UnitSize)); 7039 } 7040 srcIn = srcOut; 7041 destIn = destOut; 7042 } 7043 7044 // Handle the leftover bytes with LDRB and STRB. 7045 // [scratch, srcOut] = LDRB_POST(srcIn, 1) 7046 // [destOut] = STRB_POST(scratch, destIn, 1) 7047 ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; 7048 strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; 7049 for (unsigned i = 0; i < BytesLeft; i++) { 7050 unsigned scratch = MRI.createVirtualRegister(TRC); 7051 unsigned srcOut = MRI.createVirtualRegister(TRC); 7052 unsigned destOut = MRI.createVirtualRegister(TRC); 7053 if (isThumb2) { 7054 AddDefaultPred(BuildMI(*BB, MI, dl, 7055 TII->get(ldrOpc),scratch) 7056 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); 7057 7058 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 7059 .addReg(scratch).addReg(destIn) 7060 .addReg(0).addImm(1)); 7061 } else { 7062 AddDefaultPred(BuildMI(*BB, MI, dl, 7063 TII->get(ldrOpc),scratch) 7064 .addReg(srcOut, RegState::Define).addReg(srcIn) 7065 .addReg(0).addImm(1)); 7066 7067 AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) 7068 .addReg(scratch).addReg(destIn) 7069 .addReg(0).addImm(1)); 7070 } 7071 srcIn = srcOut; 7072 destIn = destOut; 7073 } 7074 MI->eraseFromParent(); // The instruction is gone now. 7075 return BB; 7076 } 7077 7078 // Expand the pseudo op to a loop. 7079 // thisMBB: 7080 // ... 7081 // movw varEnd, # --> with thumb2 7082 // movt varEnd, # 7083 // ldrcp varEnd, idx --> without thumb2 7084 // fallthrough --> loopMBB 7085 // loopMBB: 7086 // PHI varPhi, varEnd, varLoop 7087 // PHI srcPhi, src, srcLoop 7088 // PHI destPhi, dst, destLoop 7089 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 7090 // [destLoop] = STR_POST(scratch, destPhi, UnitSize) 7091 // subs varLoop, varPhi, #UnitSize 7092 // bne loopMBB 7093 // fallthrough --> exitMBB 7094 // exitMBB: 7095 // epilogue to handle left-over bytes 7096 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 7097 // [destOut] = STRB_POST(scratch, destLoop, 1) 7098 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 7099 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 7100 MF->insert(It, loopMBB); 7101 MF->insert(It, exitMBB); 7102 7103 // Transfer the remainder of BB and its successor edges to exitMBB. 7104 exitMBB->splice(exitMBB->begin(), BB, 7105 llvm::next(MachineBasicBlock::iterator(MI)), 7106 BB->end()); 7107 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 7108 7109 // Load an immediate to varEnd. 7110 unsigned varEnd = MRI.createVirtualRegister(TRC); 7111 if (isThumb2) { 7112 unsigned VReg1 = varEnd; 7113 if ((LoopSize & 0xFFFF0000) != 0) 7114 VReg1 = MRI.createVirtualRegister(TRC); 7115 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1) 7116 .addImm(LoopSize & 0xFFFF)); 7117 7118 if ((LoopSize & 0xFFFF0000) != 0) 7119 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) 7120 .addReg(VReg1) 7121 .addImm(LoopSize >> 16)); 7122 } else { 7123 MachineConstantPool *ConstantPool = MF->getConstantPool(); 7124 Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); 7125 const Constant *C = ConstantInt::get(Int32Ty, LoopSize); 7126 7127 // MachineConstantPool wants an explicit alignment. 7128 unsigned Align = getDataLayout()->getPrefTypeAlignment(Int32Ty); 7129 if (Align == 0) 7130 Align = getDataLayout()->getTypeAllocSize(C->getType()); 7131 unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); 7132 7133 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) 7134 .addReg(varEnd, RegState::Define) 7135 .addConstantPoolIndex(Idx) 7136 .addImm(0)); 7137 } 7138 BB->addSuccessor(loopMBB); 7139 7140 // Generate the loop body: 7141 // varPhi = PHI(varLoop, varEnd) 7142 // srcPhi = PHI(srcLoop, src) 7143 // destPhi = PHI(destLoop, dst) 7144 MachineBasicBlock *entryBB = BB; 7145 BB = loopMBB; 7146 unsigned varLoop = MRI.createVirtualRegister(TRC); 7147 unsigned varPhi = MRI.createVirtualRegister(TRC); 7148 unsigned srcLoop = MRI.createVirtualRegister(TRC); 7149 unsigned srcPhi = MRI.createVirtualRegister(TRC); 7150 unsigned destLoop = MRI.createVirtualRegister(TRC); 7151 unsigned destPhi = MRI.createVirtualRegister(TRC); 7152 7153 BuildMI(*BB, BB->begin(), dl, TII->get(ARM::PHI), varPhi) 7154 .addReg(varLoop).addMBB(loopMBB) 7155 .addReg(varEnd).addMBB(entryBB); 7156 BuildMI(BB, dl, TII->get(ARM::PHI), srcPhi) 7157 .addReg(srcLoop).addMBB(loopMBB) 7158 .addReg(src).addMBB(entryBB); 7159 BuildMI(BB, dl, TII->get(ARM::PHI), destPhi) 7160 .addReg(destLoop).addMBB(loopMBB) 7161 .addReg(dest).addMBB(entryBB); 7162 7163 // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) 7164 // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) 7165 unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); 7166 if (UnitSize >= 8) { 7167 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) 7168 .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); 7169 7170 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) 7171 .addReg(destPhi).addImm(0).addReg(scratch)); 7172 } else if (isThumb2) { 7173 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) 7174 .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); 7175 7176 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) 7177 .addReg(scratch).addReg(destPhi) 7178 .addImm(UnitSize)); 7179 } else { 7180 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) 7181 .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0) 7182 .addImm(UnitSize)); 7183 7184 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) 7185 .addReg(scratch).addReg(destPhi) 7186 .addReg(0).addImm(UnitSize)); 7187 } 7188 7189 // Decrement loop variable by UnitSize. 7190 MachineInstrBuilder MIB = BuildMI(BB, dl, 7191 TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); 7192 AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); 7193 MIB->getOperand(5).setReg(ARM::CPSR); 7194 MIB->getOperand(5).setIsDef(true); 7195 7196 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 7197 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 7198 7199 // loopMBB can loop back to loopMBB or fall through to exitMBB. 7200 BB->addSuccessor(loopMBB); 7201 BB->addSuccessor(exitMBB); 7202 7203 // Add epilogue to handle BytesLeft. 7204 BB = exitMBB; 7205 MachineInstr *StartOfExit = exitMBB->begin(); 7206 ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; 7207 strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; 7208 7209 // [scratch, srcOut] = LDRB_POST(srcLoop, 1) 7210 // [destOut] = STRB_POST(scratch, destLoop, 1) 7211 unsigned srcIn = srcLoop; 7212 unsigned destIn = destLoop; 7213 for (unsigned i = 0; i < BytesLeft; i++) { 7214 unsigned scratch = MRI.createVirtualRegister(TRC); 7215 unsigned srcOut = MRI.createVirtualRegister(TRC); 7216 unsigned destOut = MRI.createVirtualRegister(TRC); 7217 if (isThumb2) { 7218 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, 7219 TII->get(ldrOpc),scratch) 7220 .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); 7221 7222 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) 7223 .addReg(scratch).addReg(destIn) 7224 .addImm(1)); 7225 } else { 7226 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, 7227 TII->get(ldrOpc),scratch) 7228 .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1)); 7229 7230 AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) 7231 .addReg(scratch).addReg(destIn) 7232 .addReg(0).addImm(1)); 7233 } 7234 srcIn = srcOut; 7235 destIn = destOut; 7236 } 7237 7238 MI->eraseFromParent(); // The instruction is gone now. 7239 return BB; 7240} 7241 7242MachineBasicBlock * 7243ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 7244 MachineBasicBlock *BB) const { 7245 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 7246 DebugLoc dl = MI->getDebugLoc(); 7247 bool isThumb2 = Subtarget->isThumb2(); 7248 switch (MI->getOpcode()) { 7249 default: { 7250 MI->dump(); 7251 llvm_unreachable("Unexpected instr type to insert"); 7252 } 7253 // The Thumb2 pre-indexed stores have the same MI operands, they just 7254 // define them differently in the .td files from the isel patterns, so 7255 // they need pseudos. 7256 case ARM::t2STR_preidx: 7257 MI->setDesc(TII->get(ARM::t2STR_PRE)); 7258 return BB; 7259 case ARM::t2STRB_preidx: 7260 MI->setDesc(TII->get(ARM::t2STRB_PRE)); 7261 return BB; 7262 case ARM::t2STRH_preidx: 7263 MI->setDesc(TII->get(ARM::t2STRH_PRE)); 7264 return BB; 7265 7266 case ARM::STRi_preidx: 7267 case ARM::STRBi_preidx: { 7268 unsigned NewOpc = MI->getOpcode() == ARM::STRi_preidx ? 7269 ARM::STR_PRE_IMM : ARM::STRB_PRE_IMM; 7270 // Decode the offset. 7271 unsigned Offset = MI->getOperand(4).getImm(); 7272 bool isSub = ARM_AM::getAM2Op(Offset) == ARM_AM::sub; 7273 Offset = ARM_AM::getAM2Offset(Offset); 7274 if (isSub) 7275 Offset = -Offset; 7276 7277 MachineMemOperand *MMO = *MI->memoperands_begin(); 7278 BuildMI(*BB, MI, dl, TII->get(NewOpc)) 7279 .addOperand(MI->getOperand(0)) // Rn_wb 7280 .addOperand(MI->getOperand(1)) // Rt 7281 .addOperand(MI->getOperand(2)) // Rn 7282 .addImm(Offset) // offset (skip GPR==zero_reg) 7283 .addOperand(MI->getOperand(5)) // pred 7284 .addOperand(MI->getOperand(6)) 7285 .addMemOperand(MMO); 7286 MI->eraseFromParent(); 7287 return BB; 7288 } 7289 case ARM::STRr_preidx: 7290 case ARM::STRBr_preidx: 7291 case ARM::STRH_preidx: { 7292 unsigned NewOpc; 7293 switch (MI->getOpcode()) { 7294 default: llvm_unreachable("unexpected opcode!"); 7295 case ARM::STRr_preidx: NewOpc = ARM::STR_PRE_REG; break; 7296 case ARM::STRBr_preidx: NewOpc = ARM::STRB_PRE_REG; break; 7297 case ARM::STRH_preidx: NewOpc = ARM::STRH_PRE; break; 7298 } 7299 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 7300 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 7301 MIB.addOperand(MI->getOperand(i)); 7302 MI->eraseFromParent(); 7303 return BB; 7304 } 7305 case ARM::ATOMIC_LOAD_ADD_I8: 7306 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7307 case ARM::ATOMIC_LOAD_ADD_I16: 7308 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7309 case ARM::ATOMIC_LOAD_ADD_I32: 7310 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 7311 7312 case ARM::ATOMIC_LOAD_AND_I8: 7313 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7314 case ARM::ATOMIC_LOAD_AND_I16: 7315 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7316 case ARM::ATOMIC_LOAD_AND_I32: 7317 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7318 7319 case ARM::ATOMIC_LOAD_OR_I8: 7320 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7321 case ARM::ATOMIC_LOAD_OR_I16: 7322 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7323 case ARM::ATOMIC_LOAD_OR_I32: 7324 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7325 7326 case ARM::ATOMIC_LOAD_XOR_I8: 7327 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7328 case ARM::ATOMIC_LOAD_XOR_I16: 7329 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7330 case ARM::ATOMIC_LOAD_XOR_I32: 7331 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7332 7333 case ARM::ATOMIC_LOAD_NAND_I8: 7334 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7335 case ARM::ATOMIC_LOAD_NAND_I16: 7336 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7337 case ARM::ATOMIC_LOAD_NAND_I32: 7338 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 7339 7340 case ARM::ATOMIC_LOAD_SUB_I8: 7341 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7342 case ARM::ATOMIC_LOAD_SUB_I16: 7343 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7344 case ARM::ATOMIC_LOAD_SUB_I32: 7345 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 7346 7347 case ARM::ATOMIC_LOAD_MIN_I8: 7348 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); 7349 case ARM::ATOMIC_LOAD_MIN_I16: 7350 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); 7351 case ARM::ATOMIC_LOAD_MIN_I32: 7352 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); 7353 7354 case ARM::ATOMIC_LOAD_MAX_I8: 7355 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); 7356 case ARM::ATOMIC_LOAD_MAX_I16: 7357 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); 7358 case ARM::ATOMIC_LOAD_MAX_I32: 7359 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); 7360 7361 case ARM::ATOMIC_LOAD_UMIN_I8: 7362 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); 7363 case ARM::ATOMIC_LOAD_UMIN_I16: 7364 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); 7365 case ARM::ATOMIC_LOAD_UMIN_I32: 7366 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); 7367 7368 case ARM::ATOMIC_LOAD_UMAX_I8: 7369 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); 7370 case ARM::ATOMIC_LOAD_UMAX_I16: 7371 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); 7372 case ARM::ATOMIC_LOAD_UMAX_I32: 7373 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); 7374 7375 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 7376 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 7377 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 7378 7379 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 7380 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 7381 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 7382 7383 7384 case ARM::ATOMADD6432: 7385 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, 7386 isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, 7387 /*NeedsCarry*/ true); 7388 case ARM::ATOMSUB6432: 7389 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7390 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7391 /*NeedsCarry*/ true); 7392 case ARM::ATOMOR6432: 7393 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, 7394 isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 7395 case ARM::ATOMXOR6432: 7396 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, 7397 isThumb2 ? ARM::t2EORrr : ARM::EORrr); 7398 case ARM::ATOMAND6432: 7399 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, 7400 isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 7401 case ARM::ATOMSWAP6432: 7402 return EmitAtomicBinary64(MI, BB, 0, 0, false); 7403 case ARM::ATOMCMPXCHG6432: 7404 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7405 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7406 /*NeedsCarry*/ false, /*IsCmpxchg*/true); 7407 case ARM::ATOMMIN6432: 7408 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7409 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7410 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7411 /*IsMinMax*/ true, ARMCC::LT); 7412 case ARM::ATOMMAX6432: 7413 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7414 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7415 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7416 /*IsMinMax*/ true, ARMCC::GE); 7417 case ARM::ATOMUMIN6432: 7418 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7419 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7420 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7421 /*IsMinMax*/ true, ARMCC::LO); 7422 case ARM::ATOMUMAX6432: 7423 return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, 7424 isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, 7425 /*NeedsCarry*/ true, /*IsCmpxchg*/false, 7426 /*IsMinMax*/ true, ARMCC::HS); 7427 7428 case ARM::tMOVCCr_pseudo: { 7429 // To "insert" a SELECT_CC instruction, we actually have to insert the 7430 // diamond control-flow pattern. The incoming instruction knows the 7431 // destination vreg to set, the condition code register to branch on, the 7432 // true/false values to select between, and a branch opcode to use. 7433 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7434 MachineFunction::iterator It = BB; 7435 ++It; 7436 7437 // thisMBB: 7438 // ... 7439 // TrueVal = ... 7440 // cmpTY ccX, r1, r2 7441 // bCC copy1MBB 7442 // fallthrough --> copy0MBB 7443 MachineBasicBlock *thisMBB = BB; 7444 MachineFunction *F = BB->getParent(); 7445 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 7446 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 7447 F->insert(It, copy0MBB); 7448 F->insert(It, sinkMBB); 7449 7450 // Transfer the remainder of BB and its successor edges to sinkMBB. 7451 sinkMBB->splice(sinkMBB->begin(), BB, 7452 llvm::next(MachineBasicBlock::iterator(MI)), 7453 BB->end()); 7454 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 7455 7456 BB->addSuccessor(copy0MBB); 7457 BB->addSuccessor(sinkMBB); 7458 7459 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 7460 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 7461 7462 // copy0MBB: 7463 // %FalseValue = ... 7464 // # fallthrough to sinkMBB 7465 BB = copy0MBB; 7466 7467 // Update machine-CFG edges 7468 BB->addSuccessor(sinkMBB); 7469 7470 // sinkMBB: 7471 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 7472 // ... 7473 BB = sinkMBB; 7474 BuildMI(*BB, BB->begin(), dl, 7475 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 7476 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 7477 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 7478 7479 MI->eraseFromParent(); // The pseudo instruction is gone now. 7480 return BB; 7481 } 7482 7483 case ARM::BCCi64: 7484 case ARM::BCCZi64: { 7485 // If there is an unconditional branch to the other successor, remove it. 7486 BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); 7487 7488 // Compare both parts that make up the double comparison separately for 7489 // equality. 7490 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 7491 7492 unsigned LHS1 = MI->getOperand(1).getReg(); 7493 unsigned LHS2 = MI->getOperand(2).getReg(); 7494 if (RHSisZero) { 7495 AddDefaultPred(BuildMI(BB, dl, 7496 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7497 .addReg(LHS1).addImm(0)); 7498 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7499 .addReg(LHS2).addImm(0) 7500 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 7501 } else { 7502 unsigned RHS1 = MI->getOperand(3).getReg(); 7503 unsigned RHS2 = MI->getOperand(4).getReg(); 7504 AddDefaultPred(BuildMI(BB, dl, 7505 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 7506 .addReg(LHS1).addReg(RHS1)); 7507 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 7508 .addReg(LHS2).addReg(RHS2) 7509 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 7510 } 7511 7512 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 7513 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 7514 if (MI->getOperand(0).getImm() == ARMCC::NE) 7515 std::swap(destMBB, exitMBB); 7516 7517 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 7518 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 7519 if (isThumb2) 7520 AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2B)).addMBB(exitMBB)); 7521 else 7522 BuildMI(BB, dl, TII->get(ARM::B)) .addMBB(exitMBB); 7523 7524 MI->eraseFromParent(); // The pseudo instruction is gone now. 7525 return BB; 7526 } 7527 7528 case ARM::Int_eh_sjlj_setjmp: 7529 case ARM::Int_eh_sjlj_setjmp_nofp: 7530 case ARM::tInt_eh_sjlj_setjmp: 7531 case ARM::t2Int_eh_sjlj_setjmp: 7532 case ARM::t2Int_eh_sjlj_setjmp_nofp: 7533 EmitSjLjDispatchBlock(MI, BB); 7534 return BB; 7535 7536 case ARM::ABS: 7537 case ARM::t2ABS: { 7538 // To insert an ABS instruction, we have to insert the 7539 // diamond control-flow pattern. The incoming instruction knows the 7540 // source vreg to test against 0, the destination vreg to set, 7541 // the condition code register to branch on, the 7542 // true/false values to select between, and a branch opcode to use. 7543 // It transforms 7544 // V1 = ABS V0 7545 // into 7546 // V2 = MOVS V0 7547 // BCC (branch to SinkBB if V0 >= 0) 7548 // RSBBB: V3 = RSBri V2, 0 (compute ABS if V2 < 0) 7549 // SinkBB: V1 = PHI(V2, V3) 7550 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 7551 MachineFunction::iterator BBI = BB; 7552 ++BBI; 7553 MachineFunction *Fn = BB->getParent(); 7554 MachineBasicBlock *RSBBB = Fn->CreateMachineBasicBlock(LLVM_BB); 7555 MachineBasicBlock *SinkBB = Fn->CreateMachineBasicBlock(LLVM_BB); 7556 Fn->insert(BBI, RSBBB); 7557 Fn->insert(BBI, SinkBB); 7558 7559 unsigned int ABSSrcReg = MI->getOperand(1).getReg(); 7560 unsigned int ABSDstReg = MI->getOperand(0).getReg(); 7561 bool isThumb2 = Subtarget->isThumb2(); 7562 MachineRegisterInfo &MRI = Fn->getRegInfo(); 7563 // In Thumb mode S must not be specified if source register is the SP or 7564 // PC and if destination register is the SP, so restrict register class 7565 unsigned NewRsbDstReg = MRI.createVirtualRegister(isThumb2 ? 7566 (const TargetRegisterClass*)&ARM::rGPRRegClass : 7567 (const TargetRegisterClass*)&ARM::GPRRegClass); 7568 7569 // Transfer the remainder of BB and its successor edges to sinkMBB. 7570 SinkBB->splice(SinkBB->begin(), BB, 7571 llvm::next(MachineBasicBlock::iterator(MI)), 7572 BB->end()); 7573 SinkBB->transferSuccessorsAndUpdatePHIs(BB); 7574 7575 BB->addSuccessor(RSBBB); 7576 BB->addSuccessor(SinkBB); 7577 7578 // fall through to SinkMBB 7579 RSBBB->addSuccessor(SinkBB); 7580 7581 // insert a cmp at the end of BB 7582 AddDefaultPred(BuildMI(BB, dl, 7583 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 7584 .addReg(ABSSrcReg).addImm(0)); 7585 7586 // insert a bcc with opposite CC to ARMCC::MI at the end of BB 7587 BuildMI(BB, dl, 7588 TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)).addMBB(SinkBB) 7589 .addImm(ARMCC::getOppositeCondition(ARMCC::MI)).addReg(ARM::CPSR); 7590 7591 // insert rsbri in RSBBB 7592 // Note: BCC and rsbri will be converted into predicated rsbmi 7593 // by if-conversion pass 7594 BuildMI(*RSBBB, RSBBB->begin(), dl, 7595 TII->get(isThumb2 ? ARM::t2RSBri : ARM::RSBri), NewRsbDstReg) 7596 .addReg(ABSSrcReg, RegState::Kill) 7597 .addImm(0).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 7598 7599 // insert PHI in SinkBB, 7600 // reuse ABSDstReg to not change uses of ABS instruction 7601 BuildMI(*SinkBB, SinkBB->begin(), dl, 7602 TII->get(ARM::PHI), ABSDstReg) 7603 .addReg(NewRsbDstReg).addMBB(RSBBB) 7604 .addReg(ABSSrcReg).addMBB(BB); 7605 7606 // remove ABS instruction 7607 MI->eraseFromParent(); 7608 7609 // return last added BB 7610 return SinkBB; 7611 } 7612 case ARM::COPY_STRUCT_BYVAL_I32: 7613 ++NumLoopByVals; 7614 return EmitStructByval(MI, BB); 7615 } 7616} 7617 7618void ARMTargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, 7619 SDNode *Node) const { 7620 if (!MI->hasPostISelHook()) { 7621 assert(!convertAddSubFlagsOpcode(MI->getOpcode()) && 7622 "Pseudo flag-setting opcodes must be marked with 'hasPostISelHook'"); 7623 return; 7624 } 7625 7626 const MCInstrDesc *MCID = &MI->getDesc(); 7627 // Adjust potentially 's' setting instructions after isel, i.e. ADC, SBC, RSB, 7628 // RSC. Coming out of isel, they have an implicit CPSR def, but the optional 7629 // operand is still set to noreg. If needed, set the optional operand's 7630 // register to CPSR, and remove the redundant implicit def. 7631 // 7632 // e.g. ADCS (..., CPSR<imp-def>) -> ADC (... opt:CPSR<def>). 7633 7634 // Rename pseudo opcodes. 7635 unsigned NewOpc = convertAddSubFlagsOpcode(MI->getOpcode()); 7636 if (NewOpc) { 7637 const ARMBaseInstrInfo *TII = 7638 static_cast<const ARMBaseInstrInfo*>(getTargetMachine().getInstrInfo()); 7639 MCID = &TII->get(NewOpc); 7640 7641 assert(MCID->getNumOperands() == MI->getDesc().getNumOperands() + 1 && 7642 "converted opcode should be the same except for cc_out"); 7643 7644 MI->setDesc(*MCID); 7645 7646 // Add the optional cc_out operand 7647 MI->addOperand(MachineOperand::CreateReg(0, /*isDef=*/true)); 7648 } 7649 unsigned ccOutIdx = MCID->getNumOperands() - 1; 7650 7651 // Any ARM instruction that sets the 's' bit should specify an optional 7652 // "cc_out" operand in the last operand position. 7653 if (!MI->hasOptionalDef() || !MCID->OpInfo[ccOutIdx].isOptionalDef()) { 7654 assert(!NewOpc && "Optional cc_out operand required"); 7655 return; 7656 } 7657 // Look for an implicit def of CPSR added by MachineInstr ctor. Remove it 7658 // since we already have an optional CPSR def. 7659 bool definesCPSR = false; 7660 bool deadCPSR = false; 7661 for (unsigned i = MCID->getNumOperands(), e = MI->getNumOperands(); 7662 i != e; ++i) { 7663 const MachineOperand &MO = MI->getOperand(i); 7664 if (MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR) { 7665 definesCPSR = true; 7666 if (MO.isDead()) 7667 deadCPSR = true; 7668 MI->RemoveOperand(i); 7669 break; 7670 } 7671 } 7672 if (!definesCPSR) { 7673 assert(!NewOpc && "Optional cc_out operand required"); 7674 return; 7675 } 7676 assert(deadCPSR == !Node->hasAnyUseOfValue(1) && "inconsistent dead flag"); 7677 if (deadCPSR) { 7678 assert(!MI->getOperand(ccOutIdx).getReg() && 7679 "expect uninitialized optional cc_out operand"); 7680 return; 7681 } 7682 7683 // If this instruction was defined with an optional CPSR def and its dag node 7684 // had a live implicit CPSR def, then activate the optional CPSR def. 7685 MachineOperand &MO = MI->getOperand(ccOutIdx); 7686 MO.setReg(ARM::CPSR); 7687 MO.setIsDef(true); 7688} 7689 7690//===----------------------------------------------------------------------===// 7691// ARM Optimization Hooks 7692//===----------------------------------------------------------------------===// 7693 7694// Helper function that checks if N is a null or all ones constant. 7695static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) { 7696 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N); 7697 if (!C) 7698 return false; 7699 return AllOnes ? C->isAllOnesValue() : C->isNullValue(); 7700} 7701 7702// Return true if N is conditionally 0 or all ones. 7703// Detects these expressions where cc is an i1 value: 7704// 7705// (select cc 0, y) [AllOnes=0] 7706// (select cc y, 0) [AllOnes=0] 7707// (zext cc) [AllOnes=0] 7708// (sext cc) [AllOnes=0/1] 7709// (select cc -1, y) [AllOnes=1] 7710// (select cc y, -1) [AllOnes=1] 7711// 7712// Invert is set when N is the null/all ones constant when CC is false. 7713// OtherOp is set to the alternative value of N. 7714static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, 7715 SDValue &CC, bool &Invert, 7716 SDValue &OtherOp, 7717 SelectionDAG &DAG) { 7718 switch (N->getOpcode()) { 7719 default: return false; 7720 case ISD::SELECT: { 7721 CC = N->getOperand(0); 7722 SDValue N1 = N->getOperand(1); 7723 SDValue N2 = N->getOperand(2); 7724 if (isZeroOrAllOnes(N1, AllOnes)) { 7725 Invert = false; 7726 OtherOp = N2; 7727 return true; 7728 } 7729 if (isZeroOrAllOnes(N2, AllOnes)) { 7730 Invert = true; 7731 OtherOp = N1; 7732 return true; 7733 } 7734 return false; 7735 } 7736 case ISD::ZERO_EXTEND: 7737 // (zext cc) can never be the all ones value. 7738 if (AllOnes) 7739 return false; 7740 // Fall through. 7741 case ISD::SIGN_EXTEND: { 7742 EVT VT = N->getValueType(0); 7743 CC = N->getOperand(0); 7744 if (CC.getValueType() != MVT::i1) 7745 return false; 7746 Invert = !AllOnes; 7747 if (AllOnes) 7748 // When looking for an AllOnes constant, N is an sext, and the 'other' 7749 // value is 0. 7750 OtherOp = DAG.getConstant(0, VT); 7751 else if (N->getOpcode() == ISD::ZERO_EXTEND) 7752 // When looking for a 0 constant, N can be zext or sext. 7753 OtherOp = DAG.getConstant(1, VT); 7754 else 7755 OtherOp = DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT); 7756 return true; 7757 } 7758 } 7759} 7760 7761// Combine a constant select operand into its use: 7762// 7763// (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 7764// (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 7765// (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) [AllOnes=1] 7766// (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 7767// (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 7768// 7769// The transform is rejected if the select doesn't have a constant operand that 7770// is null, or all ones when AllOnes is set. 7771// 7772// Also recognize sext/zext from i1: 7773// 7774// (add (zext cc), x) -> (select cc (add x, 1), x) 7775// (add (sext cc), x) -> (select cc (add x, -1), x) 7776// 7777// These transformations eventually create predicated instructions. 7778// 7779// @param N The node to transform. 7780// @param Slct The N operand that is a select. 7781// @param OtherOp The other N operand (x above). 7782// @param DCI Context. 7783// @param AllOnes Require the select constant to be all ones instead of null. 7784// @returns The new node, or SDValue() on failure. 7785static 7786SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 7787 TargetLowering::DAGCombinerInfo &DCI, 7788 bool AllOnes = false) { 7789 SelectionDAG &DAG = DCI.DAG; 7790 EVT VT = N->getValueType(0); 7791 SDValue NonConstantVal; 7792 SDValue CCOp; 7793 bool SwapSelectOps; 7794 if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps, 7795 NonConstantVal, DAG)) 7796 return SDValue(); 7797 7798 // Slct is now know to be the desired identity constant when CC is true. 7799 SDValue TrueVal = OtherOp; 7800 SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 7801 OtherOp, NonConstantVal); 7802 // Unless SwapSelectOps says CC should be false. 7803 if (SwapSelectOps) 7804 std::swap(TrueVal, FalseVal); 7805 7806 return DAG.getNode(ISD::SELECT, SDLoc(N), VT, 7807 CCOp, TrueVal, FalseVal); 7808} 7809 7810// Attempt combineSelectAndUse on each operand of a commutative operator N. 7811static 7812SDValue combineSelectAndUseCommutative(SDNode *N, bool AllOnes, 7813 TargetLowering::DAGCombinerInfo &DCI) { 7814 SDValue N0 = N->getOperand(0); 7815 SDValue N1 = N->getOperand(1); 7816 if (N0.getNode()->hasOneUse()) { 7817 SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes); 7818 if (Result.getNode()) 7819 return Result; 7820 } 7821 if (N1.getNode()->hasOneUse()) { 7822 SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes); 7823 if (Result.getNode()) 7824 return Result; 7825 } 7826 return SDValue(); 7827} 7828 7829// AddCombineToVPADDL- For pair-wise add on neon, use the vpaddl instruction 7830// (only after legalization). 7831static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, 7832 TargetLowering::DAGCombinerInfo &DCI, 7833 const ARMSubtarget *Subtarget) { 7834 7835 // Only perform optimization if after legalize, and if NEON is available. We 7836 // also expected both operands to be BUILD_VECTORs. 7837 if (DCI.isBeforeLegalize() || !Subtarget->hasNEON() 7838 || N0.getOpcode() != ISD::BUILD_VECTOR 7839 || N1.getOpcode() != ISD::BUILD_VECTOR) 7840 return SDValue(); 7841 7842 // Check output type since VPADDL operand elements can only be 8, 16, or 32. 7843 EVT VT = N->getValueType(0); 7844 if (!VT.isInteger() || VT.getVectorElementType() == MVT::i64) 7845 return SDValue(); 7846 7847 // Check that the vector operands are of the right form. 7848 // N0 and N1 are BUILD_VECTOR nodes with N number of EXTRACT_VECTOR 7849 // operands, where N is the size of the formed vector. 7850 // Each EXTRACT_VECTOR should have the same input vector and odd or even 7851 // index such that we have a pair wise add pattern. 7852 7853 // Grab the vector that all EXTRACT_VECTOR nodes should be referencing. 7854 if (N0->getOperand(0)->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 7855 return SDValue(); 7856 SDValue Vec = N0->getOperand(0)->getOperand(0); 7857 SDNode *V = Vec.getNode(); 7858 unsigned nextIndex = 0; 7859 7860 // For each operands to the ADD which are BUILD_VECTORs, 7861 // check to see if each of their operands are an EXTRACT_VECTOR with 7862 // the same vector and appropriate index. 7863 for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) { 7864 if (N0->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT 7865 && N1->getOperand(i)->getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 7866 7867 SDValue ExtVec0 = N0->getOperand(i); 7868 SDValue ExtVec1 = N1->getOperand(i); 7869 7870 // First operand is the vector, verify its the same. 7871 if (V != ExtVec0->getOperand(0).getNode() || 7872 V != ExtVec1->getOperand(0).getNode()) 7873 return SDValue(); 7874 7875 // Second is the constant, verify its correct. 7876 ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(ExtVec0->getOperand(1)); 7877 ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(ExtVec1->getOperand(1)); 7878 7879 // For the constant, we want to see all the even or all the odd. 7880 if (!C0 || !C1 || C0->getZExtValue() != nextIndex 7881 || C1->getZExtValue() != nextIndex+1) 7882 return SDValue(); 7883 7884 // Increment index. 7885 nextIndex+=2; 7886 } else 7887 return SDValue(); 7888 } 7889 7890 // Create VPADDL node. 7891 SelectionDAG &DAG = DCI.DAG; 7892 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 7893 7894 // Build operand list. 7895 SmallVector<SDValue, 8> Ops; 7896 Ops.push_back(DAG.getConstant(Intrinsic::arm_neon_vpaddls, 7897 TLI.getPointerTy())); 7898 7899 // Input is the vector. 7900 Ops.push_back(Vec); 7901 7902 // Get widened type and narrowed type. 7903 MVT widenType; 7904 unsigned numElem = VT.getVectorNumElements(); 7905 switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { 7906 case MVT::i8: widenType = MVT::getVectorVT(MVT::i16, numElem); break; 7907 case MVT::i16: widenType = MVT::getVectorVT(MVT::i32, numElem); break; 7908 case MVT::i32: widenType = MVT::getVectorVT(MVT::i64, numElem); break; 7909 default: 7910 llvm_unreachable("Invalid vector element type for padd optimization."); 7911 } 7912 7913 SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 7914 widenType, &Ops[0], Ops.size()); 7915 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); 7916} 7917 7918static SDValue findMUL_LOHI(SDValue V) { 7919 if (V->getOpcode() == ISD::UMUL_LOHI || 7920 V->getOpcode() == ISD::SMUL_LOHI) 7921 return V; 7922 return SDValue(); 7923} 7924 7925static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, 7926 TargetLowering::DAGCombinerInfo &DCI, 7927 const ARMSubtarget *Subtarget) { 7928 7929 if (Subtarget->isThumb1Only()) return SDValue(); 7930 7931 // Only perform the checks after legalize when the pattern is available. 7932 if (DCI.isBeforeLegalize()) return SDValue(); 7933 7934 // Look for multiply add opportunities. 7935 // The pattern is a ISD::UMUL_LOHI followed by two add nodes, where 7936 // each add nodes consumes a value from ISD::UMUL_LOHI and there is 7937 // a glue link from the first add to the second add. 7938 // If we find this pattern, we can replace the U/SMUL_LOHI, ADDC, and ADDE by 7939 // a S/UMLAL instruction. 7940 // loAdd UMUL_LOHI 7941 // \ / :lo \ :hi 7942 // \ / \ [no multiline comment] 7943 // ADDC | hiAdd 7944 // \ :glue / / 7945 // \ / / 7946 // ADDE 7947 // 7948 assert(AddcNode->getOpcode() == ISD::ADDC && "Expect an ADDC"); 7949 SDValue AddcOp0 = AddcNode->getOperand(0); 7950 SDValue AddcOp1 = AddcNode->getOperand(1); 7951 7952 // Check if the two operands are from the same mul_lohi node. 7953 if (AddcOp0.getNode() == AddcOp1.getNode()) 7954 return SDValue(); 7955 7956 assert(AddcNode->getNumValues() == 2 && 7957 AddcNode->getValueType(0) == MVT::i32 && 7958 "Expect ADDC with two result values. First: i32"); 7959 7960 // Check that we have a glued ADDC node. 7961 if (AddcNode->getValueType(1) != MVT::Glue) 7962 return SDValue(); 7963 7964 // Check that the ADDC adds the low result of the S/UMUL_LOHI. 7965 if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && 7966 AddcOp0->getOpcode() != ISD::SMUL_LOHI && 7967 AddcOp1->getOpcode() != ISD::UMUL_LOHI && 7968 AddcOp1->getOpcode() != ISD::SMUL_LOHI) 7969 return SDValue(); 7970 7971 // Look for the glued ADDE. 7972 SDNode* AddeNode = AddcNode->getGluedUser(); 7973 if (AddeNode == NULL) 7974 return SDValue(); 7975 7976 // Make sure it is really an ADDE. 7977 if (AddeNode->getOpcode() != ISD::ADDE) 7978 return SDValue(); 7979 7980 assert(AddeNode->getNumOperands() == 3 && 7981 AddeNode->getOperand(2).getValueType() == MVT::Glue && 7982 "ADDE node has the wrong inputs"); 7983 7984 // Check for the triangle shape. 7985 SDValue AddeOp0 = AddeNode->getOperand(0); 7986 SDValue AddeOp1 = AddeNode->getOperand(1); 7987 7988 // Make sure that the ADDE operands are not coming from the same node. 7989 if (AddeOp0.getNode() == AddeOp1.getNode()) 7990 return SDValue(); 7991 7992 // Find the MUL_LOHI node walking up ADDE's operands. 7993 bool IsLeftOperandMUL = false; 7994 SDValue MULOp = findMUL_LOHI(AddeOp0); 7995 if (MULOp == SDValue()) 7996 MULOp = findMUL_LOHI(AddeOp1); 7997 else 7998 IsLeftOperandMUL = true; 7999 if (MULOp == SDValue()) 8000 return SDValue(); 8001 8002 // Figure out the right opcode. 8003 unsigned Opc = MULOp->getOpcode(); 8004 unsigned FinalOpc = (Opc == ISD::SMUL_LOHI) ? ARMISD::SMLAL : ARMISD::UMLAL; 8005 8006 // Figure out the high and low input values to the MLAL node. 8007 SDValue* HiMul = &MULOp; 8008 SDValue* HiAdd = NULL; 8009 SDValue* LoMul = NULL; 8010 SDValue* LowAdd = NULL; 8011 8012 if (IsLeftOperandMUL) 8013 HiAdd = &AddeOp1; 8014 else 8015 HiAdd = &AddeOp0; 8016 8017 8018 if (AddcOp0->getOpcode() == Opc) { 8019 LoMul = &AddcOp0; 8020 LowAdd = &AddcOp1; 8021 } 8022 if (AddcOp1->getOpcode() == Opc) { 8023 LoMul = &AddcOp1; 8024 LowAdd = &AddcOp0; 8025 } 8026 8027 if (LoMul == NULL) 8028 return SDValue(); 8029 8030 if (LoMul->getNode() != HiMul->getNode()) 8031 return SDValue(); 8032 8033 // Create the merged node. 8034 SelectionDAG &DAG = DCI.DAG; 8035 8036 // Build operand list. 8037 SmallVector<SDValue, 8> Ops; 8038 Ops.push_back(LoMul->getOperand(0)); 8039 Ops.push_back(LoMul->getOperand(1)); 8040 Ops.push_back(*LowAdd); 8041 Ops.push_back(*HiAdd); 8042 8043 SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), 8044 DAG.getVTList(MVT::i32, MVT::i32), 8045 &Ops[0], Ops.size()); 8046 8047 // Replace the ADDs' nodes uses by the MLA node's values. 8048 SDValue HiMLALResult(MLALNode.getNode(), 1); 8049 DAG.ReplaceAllUsesOfValueWith(SDValue(AddeNode, 0), HiMLALResult); 8050 8051 SDValue LoMLALResult(MLALNode.getNode(), 0); 8052 DAG.ReplaceAllUsesOfValueWith(SDValue(AddcNode, 0), LoMLALResult); 8053 8054 // Return original node to notify the driver to stop replacing. 8055 SDValue resNode(AddcNode, 0); 8056 return resNode; 8057} 8058 8059/// PerformADDCCombine - Target-specific dag combine transform from 8060/// ISD::ADDC, ISD::ADDE, and ISD::MUL_LOHI to MLAL. 8061static SDValue PerformADDCCombine(SDNode *N, 8062 TargetLowering::DAGCombinerInfo &DCI, 8063 const ARMSubtarget *Subtarget) { 8064 8065 return AddCombineTo64bitMLAL(N, DCI, Subtarget); 8066 8067} 8068 8069/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 8070/// operands N0 and N1. This is a helper for PerformADDCombine that is 8071/// called with the default operands, and if that fails, with commuted 8072/// operands. 8073static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 8074 TargetLowering::DAGCombinerInfo &DCI, 8075 const ARMSubtarget *Subtarget){ 8076 8077 // Attempt to create vpaddl for this add. 8078 SDValue Result = AddCombineToVPADDL(N, N0, N1, DCI, Subtarget); 8079 if (Result.getNode()) 8080 return Result; 8081 8082 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 8083 if (N0.getNode()->hasOneUse()) { 8084 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 8085 if (Result.getNode()) return Result; 8086 } 8087 return SDValue(); 8088} 8089 8090/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 8091/// 8092static SDValue PerformADDCombine(SDNode *N, 8093 TargetLowering::DAGCombinerInfo &DCI, 8094 const ARMSubtarget *Subtarget) { 8095 SDValue N0 = N->getOperand(0); 8096 SDValue N1 = N->getOperand(1); 8097 8098 // First try with the default operand order. 8099 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget); 8100 if (Result.getNode()) 8101 return Result; 8102 8103 // If that didn't work, try again with the operands commuted. 8104 return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget); 8105} 8106 8107/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 8108/// 8109static SDValue PerformSUBCombine(SDNode *N, 8110 TargetLowering::DAGCombinerInfo &DCI) { 8111 SDValue N0 = N->getOperand(0); 8112 SDValue N1 = N->getOperand(1); 8113 8114 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 8115 if (N1.getNode()->hasOneUse()) { 8116 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 8117 if (Result.getNode()) return Result; 8118 } 8119 8120 return SDValue(); 8121} 8122 8123/// PerformVMULCombine 8124/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 8125/// special multiplier accumulator forwarding. 8126/// vmul d3, d0, d2 8127/// vmla d3, d1, d2 8128/// is faster than 8129/// vadd d3, d0, d1 8130/// vmul d3, d3, d2 8131static SDValue PerformVMULCombine(SDNode *N, 8132 TargetLowering::DAGCombinerInfo &DCI, 8133 const ARMSubtarget *Subtarget) { 8134 if (!Subtarget->hasVMLxForwarding()) 8135 return SDValue(); 8136 8137 SelectionDAG &DAG = DCI.DAG; 8138 SDValue N0 = N->getOperand(0); 8139 SDValue N1 = N->getOperand(1); 8140 unsigned Opcode = N0.getOpcode(); 8141 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 8142 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 8143 Opcode = N1.getOpcode(); 8144 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 8145 Opcode != ISD::FADD && Opcode != ISD::FSUB) 8146 return SDValue(); 8147 std::swap(N0, N1); 8148 } 8149 8150 EVT VT = N->getValueType(0); 8151 SDLoc DL(N); 8152 SDValue N00 = N0->getOperand(0); 8153 SDValue N01 = N0->getOperand(1); 8154 return DAG.getNode(Opcode, DL, VT, 8155 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 8156 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 8157} 8158 8159static SDValue PerformMULCombine(SDNode *N, 8160 TargetLowering::DAGCombinerInfo &DCI, 8161 const ARMSubtarget *Subtarget) { 8162 SelectionDAG &DAG = DCI.DAG; 8163 8164 if (Subtarget->isThumb1Only()) 8165 return SDValue(); 8166 8167 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 8168 return SDValue(); 8169 8170 EVT VT = N->getValueType(0); 8171 if (VT.is64BitVector() || VT.is128BitVector()) 8172 return PerformVMULCombine(N, DCI, Subtarget); 8173 if (VT != MVT::i32) 8174 return SDValue(); 8175 8176 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8177 if (!C) 8178 return SDValue(); 8179 8180 int64_t MulAmt = C->getSExtValue(); 8181 unsigned ShiftAmt = countTrailingZeros<uint64_t>(MulAmt); 8182 8183 ShiftAmt = ShiftAmt & (32 - 1); 8184 SDValue V = N->getOperand(0); 8185 SDLoc DL(N); 8186 8187 SDValue Res; 8188 MulAmt >>= ShiftAmt; 8189 8190 if (MulAmt >= 0) { 8191 if (isPowerOf2_32(MulAmt - 1)) { 8192 // (mul x, 2^N + 1) => (add (shl x, N), x) 8193 Res = DAG.getNode(ISD::ADD, DL, VT, 8194 V, 8195 DAG.getNode(ISD::SHL, DL, VT, 8196 V, 8197 DAG.getConstant(Log2_32(MulAmt - 1), 8198 MVT::i32))); 8199 } else if (isPowerOf2_32(MulAmt + 1)) { 8200 // (mul x, 2^N - 1) => (sub (shl x, N), x) 8201 Res = DAG.getNode(ISD::SUB, DL, VT, 8202 DAG.getNode(ISD::SHL, DL, VT, 8203 V, 8204 DAG.getConstant(Log2_32(MulAmt + 1), 8205 MVT::i32)), 8206 V); 8207 } else 8208 return SDValue(); 8209 } else { 8210 uint64_t MulAmtAbs = -MulAmt; 8211 if (isPowerOf2_32(MulAmtAbs + 1)) { 8212 // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) 8213 Res = DAG.getNode(ISD::SUB, DL, VT, 8214 V, 8215 DAG.getNode(ISD::SHL, DL, VT, 8216 V, 8217 DAG.getConstant(Log2_32(MulAmtAbs + 1), 8218 MVT::i32))); 8219 } else if (isPowerOf2_32(MulAmtAbs - 1)) { 8220 // (mul x, -(2^N + 1)) => - (add (shl x, N), x) 8221 Res = DAG.getNode(ISD::ADD, DL, VT, 8222 V, 8223 DAG.getNode(ISD::SHL, DL, VT, 8224 V, 8225 DAG.getConstant(Log2_32(MulAmtAbs-1), 8226 MVT::i32))); 8227 Res = DAG.getNode(ISD::SUB, DL, VT, 8228 DAG.getConstant(0, MVT::i32),Res); 8229 8230 } else 8231 return SDValue(); 8232 } 8233 8234 if (ShiftAmt != 0) 8235 Res = DAG.getNode(ISD::SHL, DL, VT, 8236 Res, DAG.getConstant(ShiftAmt, MVT::i32)); 8237 8238 // Do not add new nodes to DAG combiner worklist. 8239 DCI.CombineTo(N, Res, false); 8240 return SDValue(); 8241} 8242 8243static SDValue PerformANDCombine(SDNode *N, 8244 TargetLowering::DAGCombinerInfo &DCI, 8245 const ARMSubtarget *Subtarget) { 8246 8247 // Attempt to use immediate-form VBIC 8248 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 8249 SDLoc dl(N); 8250 EVT VT = N->getValueType(0); 8251 SelectionDAG &DAG = DCI.DAG; 8252 8253 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8254 return SDValue(); 8255 8256 APInt SplatBits, SplatUndef; 8257 unsigned SplatBitSize; 8258 bool HasAnyUndefs; 8259 if (BVN && 8260 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 8261 if (SplatBitSize <= 64) { 8262 EVT VbicVT; 8263 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 8264 SplatUndef.getZExtValue(), SplatBitSize, 8265 DAG, VbicVT, VT.is128BitVector(), 8266 OtherModImm); 8267 if (Val.getNode()) { 8268 SDValue Input = 8269 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 8270 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 8271 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 8272 } 8273 } 8274 } 8275 8276 if (!Subtarget->isThumb1Only()) { 8277 // fold (and (select cc, -1, c), x) -> (select cc, x, (and, x, c)) 8278 SDValue Result = combineSelectAndUseCommutative(N, true, DCI); 8279 if (Result.getNode()) 8280 return Result; 8281 } 8282 8283 return SDValue(); 8284} 8285 8286/// PerformORCombine - Target-specific dag combine xforms for ISD::OR 8287static SDValue PerformORCombine(SDNode *N, 8288 TargetLowering::DAGCombinerInfo &DCI, 8289 const ARMSubtarget *Subtarget) { 8290 // Attempt to use immediate-form VORR 8291 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 8292 SDLoc dl(N); 8293 EVT VT = N->getValueType(0); 8294 SelectionDAG &DAG = DCI.DAG; 8295 8296 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8297 return SDValue(); 8298 8299 APInt SplatBits, SplatUndef; 8300 unsigned SplatBitSize; 8301 bool HasAnyUndefs; 8302 if (BVN && Subtarget->hasNEON() && 8303 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 8304 if (SplatBitSize <= 64) { 8305 EVT VorrVT; 8306 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 8307 SplatUndef.getZExtValue(), SplatBitSize, 8308 DAG, VorrVT, VT.is128BitVector(), 8309 OtherModImm); 8310 if (Val.getNode()) { 8311 SDValue Input = 8312 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 8313 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 8314 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 8315 } 8316 } 8317 } 8318 8319 if (!Subtarget->isThumb1Only()) { 8320 // fold (or (select cc, 0, c), x) -> (select cc, x, (or, x, c)) 8321 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 8322 if (Result.getNode()) 8323 return Result; 8324 } 8325 8326 // The code below optimizes (or (and X, Y), Z). 8327 // The AND operand needs to have a single user to make these optimizations 8328 // profitable. 8329 SDValue N0 = N->getOperand(0); 8330 if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) 8331 return SDValue(); 8332 SDValue N1 = N->getOperand(1); 8333 8334 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 8335 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 8336 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 8337 APInt SplatUndef; 8338 unsigned SplatBitSize; 8339 bool HasAnyUndefs; 8340 8341 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 8342 APInt SplatBits0; 8343 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 8344 HasAnyUndefs) && !HasAnyUndefs) { 8345 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 8346 APInt SplatBits1; 8347 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 8348 HasAnyUndefs) && !HasAnyUndefs && 8349 SplatBits0 == ~SplatBits1) { 8350 // Canonicalize the vector type to make instruction selection simpler. 8351 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 8352 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 8353 N0->getOperand(1), N0->getOperand(0), 8354 N1->getOperand(0)); 8355 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 8356 } 8357 } 8358 } 8359 8360 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 8361 // reasonable. 8362 8363 // BFI is only available on V6T2+ 8364 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 8365 return SDValue(); 8366 8367 SDLoc DL(N); 8368 // 1) or (and A, mask), val => ARMbfi A, val, mask 8369 // iff (val & mask) == val 8370 // 8371 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 8372 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 8373 // && mask == ~mask2 8374 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 8375 // && ~mask == mask2 8376 // (i.e., copy a bitfield value into another bitfield of the same width) 8377 8378 if (VT != MVT::i32) 8379 return SDValue(); 8380 8381 SDValue N00 = N0.getOperand(0); 8382 8383 // The value and the mask need to be constants so we can verify this is 8384 // actually a bitfield set. If the mask is 0xffff, we can do better 8385 // via a movt instruction, so don't use BFI in that case. 8386 SDValue MaskOp = N0.getOperand(1); 8387 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 8388 if (!MaskC) 8389 return SDValue(); 8390 unsigned Mask = MaskC->getZExtValue(); 8391 if (Mask == 0xffff) 8392 return SDValue(); 8393 SDValue Res; 8394 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 8395 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 8396 if (N1C) { 8397 unsigned Val = N1C->getZExtValue(); 8398 if ((Val & ~Mask) != Val) 8399 return SDValue(); 8400 8401 if (ARM::isBitFieldInvertedMask(Mask)) { 8402 Val >>= countTrailingZeros(~Mask); 8403 8404 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 8405 DAG.getConstant(Val, MVT::i32), 8406 DAG.getConstant(Mask, MVT::i32)); 8407 8408 // Do not add new nodes to DAG combiner worklist. 8409 DCI.CombineTo(N, Res, false); 8410 return SDValue(); 8411 } 8412 } else if (N1.getOpcode() == ISD::AND) { 8413 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 8414 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 8415 if (!N11C) 8416 return SDValue(); 8417 unsigned Mask2 = N11C->getZExtValue(); 8418 8419 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 8420 // as is to match. 8421 if (ARM::isBitFieldInvertedMask(Mask) && 8422 (Mask == ~Mask2)) { 8423 // The pack halfword instruction works better for masks that fit it, 8424 // so use that when it's available. 8425 if (Subtarget->hasT2ExtractPack() && 8426 (Mask == 0xffff || Mask == 0xffff0000)) 8427 return SDValue(); 8428 // 2a 8429 unsigned amt = countTrailingZeros(Mask2); 8430 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 8431 DAG.getConstant(amt, MVT::i32)); 8432 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 8433 DAG.getConstant(Mask, MVT::i32)); 8434 // Do not add new nodes to DAG combiner worklist. 8435 DCI.CombineTo(N, Res, false); 8436 return SDValue(); 8437 } else if (ARM::isBitFieldInvertedMask(~Mask) && 8438 (~Mask == Mask2)) { 8439 // The pack halfword instruction works better for masks that fit it, 8440 // so use that when it's available. 8441 if (Subtarget->hasT2ExtractPack() && 8442 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 8443 return SDValue(); 8444 // 2b 8445 unsigned lsb = countTrailingZeros(Mask); 8446 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 8447 DAG.getConstant(lsb, MVT::i32)); 8448 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 8449 DAG.getConstant(Mask2, MVT::i32)); 8450 // Do not add new nodes to DAG combiner worklist. 8451 DCI.CombineTo(N, Res, false); 8452 return SDValue(); 8453 } 8454 } 8455 8456 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 8457 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 8458 ARM::isBitFieldInvertedMask(~Mask)) { 8459 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 8460 // where lsb(mask) == #shamt and masked bits of B are known zero. 8461 SDValue ShAmt = N00.getOperand(1); 8462 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 8463 unsigned LSB = countTrailingZeros(Mask); 8464 if (ShAmtC != LSB) 8465 return SDValue(); 8466 8467 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 8468 DAG.getConstant(~Mask, MVT::i32)); 8469 8470 // Do not add new nodes to DAG combiner worklist. 8471 DCI.CombineTo(N, Res, false); 8472 } 8473 8474 return SDValue(); 8475} 8476 8477static SDValue PerformXORCombine(SDNode *N, 8478 TargetLowering::DAGCombinerInfo &DCI, 8479 const ARMSubtarget *Subtarget) { 8480 EVT VT = N->getValueType(0); 8481 SelectionDAG &DAG = DCI.DAG; 8482 8483 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 8484 return SDValue(); 8485 8486 if (!Subtarget->isThumb1Only()) { 8487 // fold (xor (select cc, 0, c), x) -> (select cc, x, (xor, x, c)) 8488 SDValue Result = combineSelectAndUseCommutative(N, false, DCI); 8489 if (Result.getNode()) 8490 return Result; 8491 } 8492 8493 return SDValue(); 8494} 8495 8496/// PerformBFICombine - (bfi A, (and B, Mask1), Mask2) -> (bfi A, B, Mask2) iff 8497/// the bits being cleared by the AND are not demanded by the BFI. 8498static SDValue PerformBFICombine(SDNode *N, 8499 TargetLowering::DAGCombinerInfo &DCI) { 8500 SDValue N1 = N->getOperand(1); 8501 if (N1.getOpcode() == ISD::AND) { 8502 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 8503 if (!N11C) 8504 return SDValue(); 8505 unsigned InvMask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 8506 unsigned LSB = countTrailingZeros(~InvMask); 8507 unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; 8508 unsigned Mask = (1 << Width)-1; 8509 unsigned Mask2 = N11C->getZExtValue(); 8510 if ((Mask & (~Mask2)) == 0) 8511 return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), 8512 N->getOperand(0), N1.getOperand(0), 8513 N->getOperand(2)); 8514 } 8515 return SDValue(); 8516} 8517 8518/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 8519/// ARMISD::VMOVRRD. 8520static SDValue PerformVMOVRRDCombine(SDNode *N, 8521 TargetLowering::DAGCombinerInfo &DCI) { 8522 // vmovrrd(vmovdrr x, y) -> x,y 8523 SDValue InDouble = N->getOperand(0); 8524 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 8525 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 8526 8527 // vmovrrd(load f64) -> (load i32), (load i32) 8528 SDNode *InNode = InDouble.getNode(); 8529 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 8530 InNode->getValueType(0) == MVT::f64 && 8531 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 8532 !cast<LoadSDNode>(InNode)->isVolatile()) { 8533 // TODO: Should this be done for non-FrameIndex operands? 8534 LoadSDNode *LD = cast<LoadSDNode>(InNode); 8535 8536 SelectionDAG &DAG = DCI.DAG; 8537 SDLoc DL(LD); 8538 SDValue BasePtr = LD->getBasePtr(); 8539 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 8540 LD->getPointerInfo(), LD->isVolatile(), 8541 LD->isNonTemporal(), LD->isInvariant(), 8542 LD->getAlignment()); 8543 8544 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 8545 DAG.getConstant(4, MVT::i32)); 8546 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 8547 LD->getPointerInfo(), LD->isVolatile(), 8548 LD->isNonTemporal(), LD->isInvariant(), 8549 std::min(4U, LD->getAlignment() / 2)); 8550 8551 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 8552 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 8553 DCI.RemoveFromWorklist(LD); 8554 DAG.DeleteNode(LD); 8555 return Result; 8556 } 8557 8558 return SDValue(); 8559} 8560 8561/// PerformVMOVDRRCombine - Target-specific dag combine xforms for 8562/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 8563static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 8564 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 8565 SDValue Op0 = N->getOperand(0); 8566 SDValue Op1 = N->getOperand(1); 8567 if (Op0.getOpcode() == ISD::BITCAST) 8568 Op0 = Op0.getOperand(0); 8569 if (Op1.getOpcode() == ISD::BITCAST) 8570 Op1 = Op1.getOperand(0); 8571 if (Op0.getOpcode() == ARMISD::VMOVRRD && 8572 Op0.getNode() == Op1.getNode() && 8573 Op0.getResNo() == 0 && Op1.getResNo() == 1) 8574 return DAG.getNode(ISD::BITCAST, SDLoc(N), 8575 N->getValueType(0), Op0.getOperand(0)); 8576 return SDValue(); 8577} 8578 8579/// PerformSTORECombine - Target-specific dag combine xforms for 8580/// ISD::STORE. 8581static SDValue PerformSTORECombine(SDNode *N, 8582 TargetLowering::DAGCombinerInfo &DCI) { 8583 StoreSDNode *St = cast<StoreSDNode>(N); 8584 if (St->isVolatile()) 8585 return SDValue(); 8586 8587 // Optimize trunc store (of multiple scalars) to shuffle and store. First, 8588 // pack all of the elements in one place. Next, store to memory in fewer 8589 // chunks. 8590 SDValue StVal = St->getValue(); 8591 EVT VT = StVal.getValueType(); 8592 if (St->isTruncatingStore() && VT.isVector()) { 8593 SelectionDAG &DAG = DCI.DAG; 8594 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8595 EVT StVT = St->getMemoryVT(); 8596 unsigned NumElems = VT.getVectorNumElements(); 8597 assert(StVT != VT && "Cannot truncate to the same type"); 8598 unsigned FromEltSz = VT.getVectorElementType().getSizeInBits(); 8599 unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits(); 8600 8601 // From, To sizes and ElemCount must be pow of two 8602 if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue(); 8603 8604 // We are going to use the original vector elt for storing. 8605 // Accumulated smaller vector elements must be a multiple of the store size. 8606 if (0 != (NumElems * FromEltSz) % ToEltSz) return SDValue(); 8607 8608 unsigned SizeRatio = FromEltSz / ToEltSz; 8609 assert(SizeRatio * NumElems * ToEltSz == VT.getSizeInBits()); 8610 8611 // Create a type on which we perform the shuffle. 8612 EVT WideVecVT = EVT::getVectorVT(*DAG.getContext(), StVT.getScalarType(), 8613 NumElems*SizeRatio); 8614 assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); 8615 8616 SDLoc DL(St); 8617 SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); 8618 SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); 8619 for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; 8620 8621 // Can't shuffle using an illegal type. 8622 if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); 8623 8624 SDValue Shuff = DAG.getVectorShuffle(WideVecVT, DL, WideVec, 8625 DAG.getUNDEF(WideVec.getValueType()), 8626 ShuffleVec.data()); 8627 // At this point all of the data is stored at the bottom of the 8628 // register. We now need to save it to mem. 8629 8630 // Find the largest store unit 8631 MVT StoreType = MVT::i8; 8632 for (unsigned tp = MVT::FIRST_INTEGER_VALUETYPE; 8633 tp < MVT::LAST_INTEGER_VALUETYPE; ++tp) { 8634 MVT Tp = (MVT::SimpleValueType)tp; 8635 if (TLI.isTypeLegal(Tp) && Tp.getSizeInBits() <= NumElems * ToEltSz) 8636 StoreType = Tp; 8637 } 8638 // Didn't find a legal store type. 8639 if (!TLI.isTypeLegal(StoreType)) 8640 return SDValue(); 8641 8642 // Bitcast the original vector into a vector of store-size units 8643 EVT StoreVecVT = EVT::getVectorVT(*DAG.getContext(), 8644 StoreType, VT.getSizeInBits()/EVT(StoreType).getSizeInBits()); 8645 assert(StoreVecVT.getSizeInBits() == VT.getSizeInBits()); 8646 SDValue ShuffWide = DAG.getNode(ISD::BITCAST, DL, StoreVecVT, Shuff); 8647 SmallVector<SDValue, 8> Chains; 8648 SDValue Increment = DAG.getConstant(StoreType.getSizeInBits()/8, 8649 TLI.getPointerTy()); 8650 SDValue BasePtr = St->getBasePtr(); 8651 8652 // Perform one or more big stores into memory. 8653 unsigned E = (ToEltSz*NumElems)/StoreType.getSizeInBits(); 8654 for (unsigned I = 0; I < E; I++) { 8655 SDValue SubVec = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, 8656 StoreType, ShuffWide, 8657 DAG.getIntPtrConstant(I)); 8658 SDValue Ch = DAG.getStore(St->getChain(), DL, SubVec, BasePtr, 8659 St->getPointerInfo(), St->isVolatile(), 8660 St->isNonTemporal(), St->getAlignment()); 8661 BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr, 8662 Increment); 8663 Chains.push_back(Ch); 8664 } 8665 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &Chains[0], 8666 Chains.size()); 8667 } 8668 8669 if (!ISD::isNormalStore(St)) 8670 return SDValue(); 8671 8672 // Split a store of a VMOVDRR into two integer stores to avoid mixing NEON and 8673 // ARM stores of arguments in the same cache line. 8674 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 8675 StVal.getNode()->hasOneUse()) { 8676 SelectionDAG &DAG = DCI.DAG; 8677 SDLoc DL(St); 8678 SDValue BasePtr = St->getBasePtr(); 8679 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 8680 StVal.getNode()->getOperand(0), BasePtr, 8681 St->getPointerInfo(), St->isVolatile(), 8682 St->isNonTemporal(), St->getAlignment()); 8683 8684 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 8685 DAG.getConstant(4, MVT::i32)); 8686 return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), 8687 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 8688 St->isNonTemporal(), 8689 std::min(4U, St->getAlignment() / 2)); 8690 } 8691 8692 if (StVal.getValueType() != MVT::i64 || 8693 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 8694 return SDValue(); 8695 8696 // Bitcast an i64 store extracted from a vector to f64. 8697 // Otherwise, the i64 value will be legalized to a pair of i32 values. 8698 SelectionDAG &DAG = DCI.DAG; 8699 SDLoc dl(StVal); 8700 SDValue IntVec = StVal.getOperand(0); 8701 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 8702 IntVec.getValueType().getVectorNumElements()); 8703 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 8704 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 8705 Vec, StVal.getOperand(1)); 8706 dl = SDLoc(N); 8707 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 8708 // Make the DAGCombiner fold the bitcasts. 8709 DCI.AddToWorklist(Vec.getNode()); 8710 DCI.AddToWorklist(ExtElt.getNode()); 8711 DCI.AddToWorklist(V.getNode()); 8712 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 8713 St->getPointerInfo(), St->isVolatile(), 8714 St->isNonTemporal(), St->getAlignment(), 8715 St->getTBAAInfo()); 8716} 8717 8718/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 8719/// are normal, non-volatile loads. If so, it is profitable to bitcast an 8720/// i64 vector to have f64 elements, since the value can then be loaded 8721/// directly into a VFP register. 8722static bool hasNormalLoadOperand(SDNode *N) { 8723 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 8724 for (unsigned i = 0; i < NumElts; ++i) { 8725 SDNode *Elt = N->getOperand(i).getNode(); 8726 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 8727 return true; 8728 } 8729 return false; 8730} 8731 8732/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 8733/// ISD::BUILD_VECTOR. 8734static SDValue PerformBUILD_VECTORCombine(SDNode *N, 8735 TargetLowering::DAGCombinerInfo &DCI){ 8736 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 8737 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 8738 // into a pair of GPRs, which is fine when the value is used as a scalar, 8739 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 8740 SelectionDAG &DAG = DCI.DAG; 8741 if (N->getNumOperands() == 2) { 8742 SDValue RV = PerformVMOVDRRCombine(N, DAG); 8743 if (RV.getNode()) 8744 return RV; 8745 } 8746 8747 // Load i64 elements as f64 values so that type legalization does not split 8748 // them up into i32 values. 8749 EVT VT = N->getValueType(0); 8750 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 8751 return SDValue(); 8752 SDLoc dl(N); 8753 SmallVector<SDValue, 8> Ops; 8754 unsigned NumElts = VT.getVectorNumElements(); 8755 for (unsigned i = 0; i < NumElts; ++i) { 8756 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 8757 Ops.push_back(V); 8758 // Make the DAGCombiner fold the bitcast. 8759 DCI.AddToWorklist(V.getNode()); 8760 } 8761 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 8762 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); 8763 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 8764} 8765 8766/// PerformInsertEltCombine - Target-specific dag combine xforms for 8767/// ISD::INSERT_VECTOR_ELT. 8768static SDValue PerformInsertEltCombine(SDNode *N, 8769 TargetLowering::DAGCombinerInfo &DCI) { 8770 // Bitcast an i64 load inserted into a vector to f64. 8771 // Otherwise, the i64 value will be legalized to a pair of i32 values. 8772 EVT VT = N->getValueType(0); 8773 SDNode *Elt = N->getOperand(1).getNode(); 8774 if (VT.getVectorElementType() != MVT::i64 || 8775 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 8776 return SDValue(); 8777 8778 SelectionDAG &DAG = DCI.DAG; 8779 SDLoc dl(N); 8780 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 8781 VT.getVectorNumElements()); 8782 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 8783 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 8784 // Make the DAGCombiner fold the bitcasts. 8785 DCI.AddToWorklist(Vec.getNode()); 8786 DCI.AddToWorklist(V.getNode()); 8787 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 8788 Vec, V, N->getOperand(2)); 8789 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 8790} 8791 8792/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 8793/// ISD::VECTOR_SHUFFLE. 8794static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 8795 // The LLVM shufflevector instruction does not require the shuffle mask 8796 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 8797 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 8798 // operands do not match the mask length, they are extended by concatenating 8799 // them with undef vectors. That is probably the right thing for other 8800 // targets, but for NEON it is better to concatenate two double-register 8801 // size vector operands into a single quad-register size vector. Do that 8802 // transformation here: 8803 // shuffle(concat(v1, undef), concat(v2, undef)) -> 8804 // shuffle(concat(v1, v2), undef) 8805 SDValue Op0 = N->getOperand(0); 8806 SDValue Op1 = N->getOperand(1); 8807 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 8808 Op1.getOpcode() != ISD::CONCAT_VECTORS || 8809 Op0.getNumOperands() != 2 || 8810 Op1.getNumOperands() != 2) 8811 return SDValue(); 8812 SDValue Concat0Op1 = Op0.getOperand(1); 8813 SDValue Concat1Op1 = Op1.getOperand(1); 8814 if (Concat0Op1.getOpcode() != ISD::UNDEF || 8815 Concat1Op1.getOpcode() != ISD::UNDEF) 8816 return SDValue(); 8817 // Skip the transformation if any of the types are illegal. 8818 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 8819 EVT VT = N->getValueType(0); 8820 if (!TLI.isTypeLegal(VT) || 8821 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 8822 !TLI.isTypeLegal(Concat1Op1.getValueType())) 8823 return SDValue(); 8824 8825 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 8826 Op0.getOperand(0), Op1.getOperand(0)); 8827 // Translate the shuffle mask. 8828 SmallVector<int, 16> NewMask; 8829 unsigned NumElts = VT.getVectorNumElements(); 8830 unsigned HalfElts = NumElts/2; 8831 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 8832 for (unsigned n = 0; n < NumElts; ++n) { 8833 int MaskElt = SVN->getMaskElt(n); 8834 int NewElt = -1; 8835 if (MaskElt < (int)HalfElts) 8836 NewElt = MaskElt; 8837 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 8838 NewElt = HalfElts + MaskElt - NumElts; 8839 NewMask.push_back(NewElt); 8840 } 8841 return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, 8842 DAG.getUNDEF(VT), NewMask.data()); 8843} 8844 8845/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 8846/// NEON load/store intrinsics to merge base address updates. 8847static SDValue CombineBaseUpdate(SDNode *N, 8848 TargetLowering::DAGCombinerInfo &DCI) { 8849 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 8850 return SDValue(); 8851 8852 SelectionDAG &DAG = DCI.DAG; 8853 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 8854 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 8855 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 8856 SDValue Addr = N->getOperand(AddrOpIdx); 8857 8858 // Search for a use of the address operand that is an increment. 8859 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 8860 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 8861 SDNode *User = *UI; 8862 if (User->getOpcode() != ISD::ADD || 8863 UI.getUse().getResNo() != Addr.getResNo()) 8864 continue; 8865 8866 // Check that the add is independent of the load/store. Otherwise, folding 8867 // it would create a cycle. 8868 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 8869 continue; 8870 8871 // Find the new opcode for the updating load/store. 8872 bool isLoad = true; 8873 bool isLaneOp = false; 8874 unsigned NewOpc = 0; 8875 unsigned NumVecs = 0; 8876 if (isIntrinsic) { 8877 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 8878 switch (IntNo) { 8879 default: llvm_unreachable("unexpected intrinsic for Neon base update"); 8880 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 8881 NumVecs = 1; break; 8882 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 8883 NumVecs = 2; break; 8884 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 8885 NumVecs = 3; break; 8886 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 8887 NumVecs = 4; break; 8888 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 8889 NumVecs = 2; isLaneOp = true; break; 8890 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 8891 NumVecs = 3; isLaneOp = true; break; 8892 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 8893 NumVecs = 4; isLaneOp = true; break; 8894 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 8895 NumVecs = 1; isLoad = false; break; 8896 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 8897 NumVecs = 2; isLoad = false; break; 8898 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 8899 NumVecs = 3; isLoad = false; break; 8900 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 8901 NumVecs = 4; isLoad = false; break; 8902 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 8903 NumVecs = 2; isLoad = false; isLaneOp = true; break; 8904 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 8905 NumVecs = 3; isLoad = false; isLaneOp = true; break; 8906 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 8907 NumVecs = 4; isLoad = false; isLaneOp = true; break; 8908 } 8909 } else { 8910 isLaneOp = true; 8911 switch (N->getOpcode()) { 8912 default: llvm_unreachable("unexpected opcode for Neon base update"); 8913 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 8914 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 8915 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 8916 } 8917 } 8918 8919 // Find the size of memory referenced by the load/store. 8920 EVT VecTy; 8921 if (isLoad) 8922 VecTy = N->getValueType(0); 8923 else 8924 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 8925 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 8926 if (isLaneOp) 8927 NumBytes /= VecTy.getVectorNumElements(); 8928 8929 // If the increment is a constant, it must match the memory ref size. 8930 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 8931 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 8932 uint64_t IncVal = CInc->getZExtValue(); 8933 if (IncVal != NumBytes) 8934 continue; 8935 } else if (NumBytes >= 3 * 16) { 8936 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 8937 // separate instructions that make it harder to use a non-constant update. 8938 continue; 8939 } 8940 8941 // Create the new updating load/store node. 8942 EVT Tys[6]; 8943 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 8944 unsigned n; 8945 for (n = 0; n < NumResultVecs; ++n) 8946 Tys[n] = VecTy; 8947 Tys[n++] = MVT::i32; 8948 Tys[n] = MVT::Other; 8949 SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); 8950 SmallVector<SDValue, 8> Ops; 8951 Ops.push_back(N->getOperand(0)); // incoming chain 8952 Ops.push_back(N->getOperand(AddrOpIdx)); 8953 Ops.push_back(Inc); 8954 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 8955 Ops.push_back(N->getOperand(i)); 8956 } 8957 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 8958 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, 8959 Ops.data(), Ops.size(), 8960 MemInt->getMemoryVT(), 8961 MemInt->getMemOperand()); 8962 8963 // Update the uses. 8964 std::vector<SDValue> NewResults; 8965 for (unsigned i = 0; i < NumResultVecs; ++i) { 8966 NewResults.push_back(SDValue(UpdN.getNode(), i)); 8967 } 8968 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 8969 DCI.CombineTo(N, NewResults); 8970 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 8971 8972 break; 8973 } 8974 return SDValue(); 8975} 8976 8977/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 8978/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 8979/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 8980/// return true. 8981static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 8982 SelectionDAG &DAG = DCI.DAG; 8983 EVT VT = N->getValueType(0); 8984 // vldN-dup instructions only support 64-bit vectors for N > 1. 8985 if (!VT.is64BitVector()) 8986 return false; 8987 8988 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 8989 SDNode *VLD = N->getOperand(0).getNode(); 8990 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 8991 return false; 8992 unsigned NumVecs = 0; 8993 unsigned NewOpc = 0; 8994 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 8995 if (IntNo == Intrinsic::arm_neon_vld2lane) { 8996 NumVecs = 2; 8997 NewOpc = ARMISD::VLD2DUP; 8998 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 8999 NumVecs = 3; 9000 NewOpc = ARMISD::VLD3DUP; 9001 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 9002 NumVecs = 4; 9003 NewOpc = ARMISD::VLD4DUP; 9004 } else { 9005 return false; 9006 } 9007 9008 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 9009 // numbers match the load. 9010 unsigned VLDLaneNo = 9011 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 9012 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 9013 UI != UE; ++UI) { 9014 // Ignore uses of the chain result. 9015 if (UI.getUse().getResNo() == NumVecs) 9016 continue; 9017 SDNode *User = *UI; 9018 if (User->getOpcode() != ARMISD::VDUPLANE || 9019 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 9020 return false; 9021 } 9022 9023 // Create the vldN-dup node. 9024 EVT Tys[5]; 9025 unsigned n; 9026 for (n = 0; n < NumVecs; ++n) 9027 Tys[n] = VT; 9028 Tys[n] = MVT::Other; 9029 SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); 9030 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 9031 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 9032 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, 9033 Ops, 2, VLDMemInt->getMemoryVT(), 9034 VLDMemInt->getMemOperand()); 9035 9036 // Update the uses. 9037 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 9038 UI != UE; ++UI) { 9039 unsigned ResNo = UI.getUse().getResNo(); 9040 // Ignore uses of the chain result. 9041 if (ResNo == NumVecs) 9042 continue; 9043 SDNode *User = *UI; 9044 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 9045 } 9046 9047 // Now the vldN-lane intrinsic is dead except for its chain result. 9048 // Update uses of the chain. 9049 std::vector<SDValue> VLDDupResults; 9050 for (unsigned n = 0; n < NumVecs; ++n) 9051 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 9052 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 9053 DCI.CombineTo(VLD, VLDDupResults); 9054 9055 return true; 9056} 9057 9058/// PerformVDUPLANECombine - Target-specific dag combine xforms for 9059/// ARMISD::VDUPLANE. 9060static SDValue PerformVDUPLANECombine(SDNode *N, 9061 TargetLowering::DAGCombinerInfo &DCI) { 9062 SDValue Op = N->getOperand(0); 9063 9064 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 9065 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 9066 if (CombineVLDDUP(N, DCI)) 9067 return SDValue(N, 0); 9068 9069 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 9070 // redundant. Ignore bit_converts for now; element sizes are checked below. 9071 while (Op.getOpcode() == ISD::BITCAST) 9072 Op = Op.getOperand(0); 9073 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 9074 return SDValue(); 9075 9076 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 9077 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 9078 // The canonical VMOV for a zero vector uses a 32-bit element size. 9079 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 9080 unsigned EltBits; 9081 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 9082 EltSize = 8; 9083 EVT VT = N->getValueType(0); 9084 if (EltSize > VT.getVectorElementType().getSizeInBits()) 9085 return SDValue(); 9086 9087 return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); 9088} 9089 9090// isConstVecPow2 - Return true if each vector element is a power of 2, all 9091// elements are the same constant, C, and Log2(C) ranges from 1 to 32. 9092static bool isConstVecPow2(SDValue ConstVec, bool isSigned, uint64_t &C) 9093{ 9094 integerPart cN; 9095 integerPart c0 = 0; 9096 for (unsigned I = 0, E = ConstVec.getValueType().getVectorNumElements(); 9097 I != E; I++) { 9098 ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(ConstVec.getOperand(I)); 9099 if (!C) 9100 return false; 9101 9102 bool isExact; 9103 APFloat APF = C->getValueAPF(); 9104 if (APF.convertToInteger(&cN, 64, isSigned, APFloat::rmTowardZero, &isExact) 9105 != APFloat::opOK || !isExact) 9106 return false; 9107 9108 c0 = (I == 0) ? cN : c0; 9109 if (!isPowerOf2_64(cN) || c0 != cN || Log2_64(c0) < 1 || Log2_64(c0) > 32) 9110 return false; 9111 } 9112 C = c0; 9113 return true; 9114} 9115 9116/// PerformVCVTCombine - VCVT (floating-point to fixed-point, Advanced SIMD) 9117/// can replace combinations of VMUL and VCVT (floating-point to integer) 9118/// when the VMUL has a constant operand that is a power of 2. 9119/// 9120/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 9121/// vmul.f32 d16, d17, d16 9122/// vcvt.s32.f32 d16, d16 9123/// becomes: 9124/// vcvt.s32.f32 d16, d16, #3 9125static SDValue PerformVCVTCombine(SDNode *N, 9126 TargetLowering::DAGCombinerInfo &DCI, 9127 const ARMSubtarget *Subtarget) { 9128 SelectionDAG &DAG = DCI.DAG; 9129 SDValue Op = N->getOperand(0); 9130 9131 if (!Subtarget->hasNEON() || !Op.getValueType().isVector() || 9132 Op.getOpcode() != ISD::FMUL) 9133 return SDValue(); 9134 9135 uint64_t C; 9136 SDValue N0 = Op->getOperand(0); 9137 SDValue ConstVec = Op->getOperand(1); 9138 bool isSigned = N->getOpcode() == ISD::FP_TO_SINT; 9139 9140 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 9141 !isConstVecPow2(ConstVec, isSigned, C)) 9142 return SDValue(); 9143 9144 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : 9145 Intrinsic::arm_neon_vcvtfp2fxu; 9146 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 9147 N->getValueType(0), 9148 DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, 9149 DAG.getConstant(Log2_64(C), MVT::i32)); 9150} 9151 9152/// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) 9153/// can replace combinations of VCVT (integer to floating-point) and VDIV 9154/// when the VDIV has a constant operand that is a power of 2. 9155/// 9156/// Example (assume d17 = <float 8.000000e+00, float 8.000000e+00>): 9157/// vcvt.f32.s32 d16, d16 9158/// vdiv.f32 d16, d17, d16 9159/// becomes: 9160/// vcvt.f32.s32 d16, d16, #3 9161static SDValue PerformVDIVCombine(SDNode *N, 9162 TargetLowering::DAGCombinerInfo &DCI, 9163 const ARMSubtarget *Subtarget) { 9164 SelectionDAG &DAG = DCI.DAG; 9165 SDValue Op = N->getOperand(0); 9166 unsigned OpOpcode = Op.getNode()->getOpcode(); 9167 9168 if (!Subtarget->hasNEON() || !N->getValueType(0).isVector() || 9169 (OpOpcode != ISD::SINT_TO_FP && OpOpcode != ISD::UINT_TO_FP)) 9170 return SDValue(); 9171 9172 uint64_t C; 9173 SDValue ConstVec = N->getOperand(1); 9174 bool isSigned = OpOpcode == ISD::SINT_TO_FP; 9175 9176 if (ConstVec.getOpcode() != ISD::BUILD_VECTOR || 9177 !isConstVecPow2(ConstVec, isSigned, C)) 9178 return SDValue(); 9179 9180 unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : 9181 Intrinsic::arm_neon_vcvtfxu2fp; 9182 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), 9183 Op.getValueType(), 9184 DAG.getConstant(IntrinsicOpcode, MVT::i32), 9185 Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); 9186} 9187 9188/// Getvshiftimm - Check if this is a valid build_vector for the immediate 9189/// operand of a vector shift operation, where all the elements of the 9190/// build_vector must have the same constant integer value. 9191static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 9192 // Ignore bit_converts. 9193 while (Op.getOpcode() == ISD::BITCAST) 9194 Op = Op.getOperand(0); 9195 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 9196 APInt SplatBits, SplatUndef; 9197 unsigned SplatBitSize; 9198 bool HasAnyUndefs; 9199 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 9200 HasAnyUndefs, ElementBits) || 9201 SplatBitSize > ElementBits) 9202 return false; 9203 Cnt = SplatBits.getSExtValue(); 9204 return true; 9205} 9206 9207/// isVShiftLImm - Check if this is a valid build_vector for the immediate 9208/// operand of a vector shift left operation. That value must be in the range: 9209/// 0 <= Value < ElementBits for a left shift; or 9210/// 0 <= Value <= ElementBits for a long left shift. 9211static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 9212 assert(VT.isVector() && "vector shift count is not a vector type"); 9213 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 9214 if (! getVShiftImm(Op, ElementBits, Cnt)) 9215 return false; 9216 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 9217} 9218 9219/// isVShiftRImm - Check if this is a valid build_vector for the immediate 9220/// operand of a vector shift right operation. For a shift opcode, the value 9221/// is positive, but for an intrinsic the value count must be negative. The 9222/// absolute value must be in the range: 9223/// 1 <= |Value| <= ElementBits for a right shift; or 9224/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 9225static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 9226 int64_t &Cnt) { 9227 assert(VT.isVector() && "vector shift count is not a vector type"); 9228 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 9229 if (! getVShiftImm(Op, ElementBits, Cnt)) 9230 return false; 9231 if (isIntrinsic) 9232 Cnt = -Cnt; 9233 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 9234} 9235 9236/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 9237static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 9238 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 9239 switch (IntNo) { 9240 default: 9241 // Don't do anything for most intrinsics. 9242 break; 9243 9244 // Vector shifts: check for immediate versions and lower them. 9245 // Note: This is done during DAG combining instead of DAG legalizing because 9246 // the build_vectors for 64-bit vector element shift counts are generally 9247 // not legal, and it is hard to see their values after they get legalized to 9248 // loads from a constant pool. 9249 case Intrinsic::arm_neon_vshifts: 9250 case Intrinsic::arm_neon_vshiftu: 9251 case Intrinsic::arm_neon_vshiftls: 9252 case Intrinsic::arm_neon_vshiftlu: 9253 case Intrinsic::arm_neon_vshiftn: 9254 case Intrinsic::arm_neon_vrshifts: 9255 case Intrinsic::arm_neon_vrshiftu: 9256 case Intrinsic::arm_neon_vrshiftn: 9257 case Intrinsic::arm_neon_vqshifts: 9258 case Intrinsic::arm_neon_vqshiftu: 9259 case Intrinsic::arm_neon_vqshiftsu: 9260 case Intrinsic::arm_neon_vqshiftns: 9261 case Intrinsic::arm_neon_vqshiftnu: 9262 case Intrinsic::arm_neon_vqshiftnsu: 9263 case Intrinsic::arm_neon_vqrshiftns: 9264 case Intrinsic::arm_neon_vqrshiftnu: 9265 case Intrinsic::arm_neon_vqrshiftnsu: { 9266 EVT VT = N->getOperand(1).getValueType(); 9267 int64_t Cnt; 9268 unsigned VShiftOpc = 0; 9269 9270 switch (IntNo) { 9271 case Intrinsic::arm_neon_vshifts: 9272 case Intrinsic::arm_neon_vshiftu: 9273 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 9274 VShiftOpc = ARMISD::VSHL; 9275 break; 9276 } 9277 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 9278 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 9279 ARMISD::VSHRs : ARMISD::VSHRu); 9280 break; 9281 } 9282 return SDValue(); 9283 9284 case Intrinsic::arm_neon_vshiftls: 9285 case Intrinsic::arm_neon_vshiftlu: 9286 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 9287 break; 9288 llvm_unreachable("invalid shift count for vshll intrinsic"); 9289 9290 case Intrinsic::arm_neon_vrshifts: 9291 case Intrinsic::arm_neon_vrshiftu: 9292 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 9293 break; 9294 return SDValue(); 9295 9296 case Intrinsic::arm_neon_vqshifts: 9297 case Intrinsic::arm_neon_vqshiftu: 9298 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 9299 break; 9300 return SDValue(); 9301 9302 case Intrinsic::arm_neon_vqshiftsu: 9303 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 9304 break; 9305 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 9306 9307 case Intrinsic::arm_neon_vshiftn: 9308 case Intrinsic::arm_neon_vrshiftn: 9309 case Intrinsic::arm_neon_vqshiftns: 9310 case Intrinsic::arm_neon_vqshiftnu: 9311 case Intrinsic::arm_neon_vqshiftnsu: 9312 case Intrinsic::arm_neon_vqrshiftns: 9313 case Intrinsic::arm_neon_vqrshiftnu: 9314 case Intrinsic::arm_neon_vqrshiftnsu: 9315 // Narrowing shifts require an immediate right shift. 9316 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 9317 break; 9318 llvm_unreachable("invalid shift count for narrowing vector shift " 9319 "intrinsic"); 9320 9321 default: 9322 llvm_unreachable("unhandled vector shift"); 9323 } 9324 9325 switch (IntNo) { 9326 case Intrinsic::arm_neon_vshifts: 9327 case Intrinsic::arm_neon_vshiftu: 9328 // Opcode already set above. 9329 break; 9330 case Intrinsic::arm_neon_vshiftls: 9331 case Intrinsic::arm_neon_vshiftlu: 9332 if (Cnt == VT.getVectorElementType().getSizeInBits()) 9333 VShiftOpc = ARMISD::VSHLLi; 9334 else 9335 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 9336 ARMISD::VSHLLs : ARMISD::VSHLLu); 9337 break; 9338 case Intrinsic::arm_neon_vshiftn: 9339 VShiftOpc = ARMISD::VSHRN; break; 9340 case Intrinsic::arm_neon_vrshifts: 9341 VShiftOpc = ARMISD::VRSHRs; break; 9342 case Intrinsic::arm_neon_vrshiftu: 9343 VShiftOpc = ARMISD::VRSHRu; break; 9344 case Intrinsic::arm_neon_vrshiftn: 9345 VShiftOpc = ARMISD::VRSHRN; break; 9346 case Intrinsic::arm_neon_vqshifts: 9347 VShiftOpc = ARMISD::VQSHLs; break; 9348 case Intrinsic::arm_neon_vqshiftu: 9349 VShiftOpc = ARMISD::VQSHLu; break; 9350 case Intrinsic::arm_neon_vqshiftsu: 9351 VShiftOpc = ARMISD::VQSHLsu; break; 9352 case Intrinsic::arm_neon_vqshiftns: 9353 VShiftOpc = ARMISD::VQSHRNs; break; 9354 case Intrinsic::arm_neon_vqshiftnu: 9355 VShiftOpc = ARMISD::VQSHRNu; break; 9356 case Intrinsic::arm_neon_vqshiftnsu: 9357 VShiftOpc = ARMISD::VQSHRNsu; break; 9358 case Intrinsic::arm_neon_vqrshiftns: 9359 VShiftOpc = ARMISD::VQRSHRNs; break; 9360 case Intrinsic::arm_neon_vqrshiftnu: 9361 VShiftOpc = ARMISD::VQRSHRNu; break; 9362 case Intrinsic::arm_neon_vqrshiftnsu: 9363 VShiftOpc = ARMISD::VQRSHRNsu; break; 9364 } 9365 9366 return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), 9367 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 9368 } 9369 9370 case Intrinsic::arm_neon_vshiftins: { 9371 EVT VT = N->getOperand(1).getValueType(); 9372 int64_t Cnt; 9373 unsigned VShiftOpc = 0; 9374 9375 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 9376 VShiftOpc = ARMISD::VSLI; 9377 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 9378 VShiftOpc = ARMISD::VSRI; 9379 else { 9380 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 9381 } 9382 9383 return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), 9384 N->getOperand(1), N->getOperand(2), 9385 DAG.getConstant(Cnt, MVT::i32)); 9386 } 9387 9388 case Intrinsic::arm_neon_vqrshifts: 9389 case Intrinsic::arm_neon_vqrshiftu: 9390 // No immediate versions of these to check for. 9391 break; 9392 } 9393 9394 return SDValue(); 9395} 9396 9397/// PerformShiftCombine - Checks for immediate versions of vector shifts and 9398/// lowers them. As with the vector shift intrinsics, this is done during DAG 9399/// combining instead of DAG legalizing because the build_vectors for 64-bit 9400/// vector element shift counts are generally not legal, and it is hard to see 9401/// their values after they get legalized to loads from a constant pool. 9402static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 9403 const ARMSubtarget *ST) { 9404 EVT VT = N->getValueType(0); 9405 if (N->getOpcode() == ISD::SRL && VT == MVT::i32 && ST->hasV6Ops()) { 9406 // Canonicalize (srl (bswap x), 16) to (rotr (bswap x), 16) if the high 9407 // 16-bits of x is zero. This optimizes rev + lsr 16 to rev16. 9408 SDValue N1 = N->getOperand(1); 9409 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 9410 SDValue N0 = N->getOperand(0); 9411 if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && 9412 DAG.MaskedValueIsZero(N0.getOperand(0), 9413 APInt::getHighBitsSet(32, 16))) 9414 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); 9415 } 9416 } 9417 9418 // Nothing to be done for scalar shifts. 9419 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9420 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 9421 return SDValue(); 9422 9423 assert(ST->hasNEON() && "unexpected vector shift"); 9424 int64_t Cnt; 9425 9426 switch (N->getOpcode()) { 9427 default: llvm_unreachable("unexpected shift opcode"); 9428 9429 case ISD::SHL: 9430 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 9431 return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0), 9432 DAG.getConstant(Cnt, MVT::i32)); 9433 break; 9434 9435 case ISD::SRA: 9436 case ISD::SRL: 9437 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 9438 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 9439 ARMISD::VSHRs : ARMISD::VSHRu); 9440 return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0), 9441 DAG.getConstant(Cnt, MVT::i32)); 9442 } 9443 } 9444 return SDValue(); 9445} 9446 9447/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 9448/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 9449static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 9450 const ARMSubtarget *ST) { 9451 SDValue N0 = N->getOperand(0); 9452 9453 // Check for sign- and zero-extensions of vector extract operations of 8- 9454 // and 16-bit vector elements. NEON supports these directly. They are 9455 // handled during DAG combining because type legalization will promote them 9456 // to 32-bit types and it is messy to recognize the operations after that. 9457 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 9458 SDValue Vec = N0.getOperand(0); 9459 SDValue Lane = N0.getOperand(1); 9460 EVT VT = N->getValueType(0); 9461 EVT EltVT = N0.getValueType(); 9462 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 9463 9464 if (VT == MVT::i32 && 9465 (EltVT == MVT::i8 || EltVT == MVT::i16) && 9466 TLI.isTypeLegal(Vec.getValueType()) && 9467 isa<ConstantSDNode>(Lane)) { 9468 9469 unsigned Opc = 0; 9470 switch (N->getOpcode()) { 9471 default: llvm_unreachable("unexpected opcode"); 9472 case ISD::SIGN_EXTEND: 9473 Opc = ARMISD::VGETLANEs; 9474 break; 9475 case ISD::ZERO_EXTEND: 9476 case ISD::ANY_EXTEND: 9477 Opc = ARMISD::VGETLANEu; 9478 break; 9479 } 9480 return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); 9481 } 9482 } 9483 9484 return SDValue(); 9485} 9486 9487/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 9488/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 9489static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 9490 const ARMSubtarget *ST) { 9491 // If the target supports NEON, try to use vmax/vmin instructions for f32 9492 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 9493 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 9494 // a NaN; only do the transformation when it matches that behavior. 9495 9496 // For now only do this when using NEON for FP operations; if using VFP, it 9497 // is not obvious that the benefit outweighs the cost of switching to the 9498 // NEON pipeline. 9499 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 9500 N->getValueType(0) != MVT::f32) 9501 return SDValue(); 9502 9503 SDValue CondLHS = N->getOperand(0); 9504 SDValue CondRHS = N->getOperand(1); 9505 SDValue LHS = N->getOperand(2); 9506 SDValue RHS = N->getOperand(3); 9507 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 9508 9509 unsigned Opcode = 0; 9510 bool IsReversed; 9511 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 9512 IsReversed = false; // x CC y ? x : y 9513 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 9514 IsReversed = true ; // x CC y ? y : x 9515 } else { 9516 return SDValue(); 9517 } 9518 9519 bool IsUnordered; 9520 switch (CC) { 9521 default: break; 9522 case ISD::SETOLT: 9523 case ISD::SETOLE: 9524 case ISD::SETLT: 9525 case ISD::SETLE: 9526 case ISD::SETULT: 9527 case ISD::SETULE: 9528 // If LHS is NaN, an ordered comparison will be false and the result will 9529 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 9530 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 9531 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 9532 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 9533 break; 9534 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 9535 // will return -0, so vmin can only be used for unsafe math or if one of 9536 // the operands is known to be nonzero. 9537 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 9538 !DAG.getTarget().Options.UnsafeFPMath && 9539 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 9540 break; 9541 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 9542 break; 9543 9544 case ISD::SETOGT: 9545 case ISD::SETOGE: 9546 case ISD::SETGT: 9547 case ISD::SETGE: 9548 case ISD::SETUGT: 9549 case ISD::SETUGE: 9550 // If LHS is NaN, an ordered comparison will be false and the result will 9551 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 9552 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 9553 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 9554 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 9555 break; 9556 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 9557 // will return +0, so vmax can only be used for unsafe math or if one of 9558 // the operands is known to be nonzero. 9559 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 9560 !DAG.getTarget().Options.UnsafeFPMath && 9561 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 9562 break; 9563 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 9564 break; 9565 } 9566 9567 if (!Opcode) 9568 return SDValue(); 9569 return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); 9570} 9571 9572/// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. 9573SDValue 9574ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { 9575 SDValue Cmp = N->getOperand(4); 9576 if (Cmp.getOpcode() != ARMISD::CMPZ) 9577 // Only looking at EQ and NE cases. 9578 return SDValue(); 9579 9580 EVT VT = N->getValueType(0); 9581 SDLoc dl(N); 9582 SDValue LHS = Cmp.getOperand(0); 9583 SDValue RHS = Cmp.getOperand(1); 9584 SDValue FalseVal = N->getOperand(0); 9585 SDValue TrueVal = N->getOperand(1); 9586 SDValue ARMcc = N->getOperand(2); 9587 ARMCC::CondCodes CC = 9588 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 9589 9590 // Simplify 9591 // mov r1, r0 9592 // cmp r1, x 9593 // mov r0, y 9594 // moveq r0, x 9595 // to 9596 // cmp r0, x 9597 // movne r0, y 9598 // 9599 // mov r1, r0 9600 // cmp r1, x 9601 // mov r0, x 9602 // movne r0, y 9603 // to 9604 // cmp r0, x 9605 // movne r0, y 9606 /// FIXME: Turn this into a target neutral optimization? 9607 SDValue Res; 9608 if (CC == ARMCC::NE && FalseVal == RHS && FalseVal != LHS) { 9609 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, TrueVal, ARMcc, 9610 N->getOperand(3), Cmp); 9611 } else if (CC == ARMCC::EQ && TrueVal == RHS) { 9612 SDValue ARMcc; 9613 SDValue NewCmp = getARMCmp(LHS, RHS, ISD::SETNE, ARMcc, DAG, dl); 9614 Res = DAG.getNode(ARMISD::CMOV, dl, VT, LHS, FalseVal, ARMcc, 9615 N->getOperand(3), NewCmp); 9616 } 9617 9618 if (Res.getNode()) { 9619 APInt KnownZero, KnownOne; 9620 DAG.ComputeMaskedBits(SDValue(N,0), KnownZero, KnownOne); 9621 // Capture demanded bits information that would be otherwise lost. 9622 if (KnownZero == 0xfffffffe) 9623 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 9624 DAG.getValueType(MVT::i1)); 9625 else if (KnownZero == 0xffffff00) 9626 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 9627 DAG.getValueType(MVT::i8)); 9628 else if (KnownZero == 0xffff0000) 9629 Res = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Res, 9630 DAG.getValueType(MVT::i16)); 9631 } 9632 9633 return Res; 9634} 9635 9636SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 9637 DAGCombinerInfo &DCI) const { 9638 switch (N->getOpcode()) { 9639 default: break; 9640 case ISD::ADDC: return PerformADDCCombine(N, DCI, Subtarget); 9641 case ISD::ADD: return PerformADDCombine(N, DCI, Subtarget); 9642 case ISD::SUB: return PerformSUBCombine(N, DCI); 9643 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 9644 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 9645 case ISD::XOR: return PerformXORCombine(N, DCI, Subtarget); 9646 case ISD::AND: return PerformANDCombine(N, DCI, Subtarget); 9647 case ARMISD::BFI: return PerformBFICombine(N, DCI); 9648 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 9649 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 9650 case ISD::STORE: return PerformSTORECombine(N, DCI); 9651 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); 9652 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 9653 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 9654 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 9655 case ISD::FP_TO_SINT: 9656 case ISD::FP_TO_UINT: return PerformVCVTCombine(N, DCI, Subtarget); 9657 case ISD::FDIV: return PerformVDIVCombine(N, DCI, Subtarget); 9658 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 9659 case ISD::SHL: 9660 case ISD::SRA: 9661 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 9662 case ISD::SIGN_EXTEND: 9663 case ISD::ZERO_EXTEND: 9664 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 9665 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 9666 case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG); 9667 case ARMISD::VLD2DUP: 9668 case ARMISD::VLD3DUP: 9669 case ARMISD::VLD4DUP: 9670 return CombineBaseUpdate(N, DCI); 9671 case ISD::INTRINSIC_VOID: 9672 case ISD::INTRINSIC_W_CHAIN: 9673 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 9674 case Intrinsic::arm_neon_vld1: 9675 case Intrinsic::arm_neon_vld2: 9676 case Intrinsic::arm_neon_vld3: 9677 case Intrinsic::arm_neon_vld4: 9678 case Intrinsic::arm_neon_vld2lane: 9679 case Intrinsic::arm_neon_vld3lane: 9680 case Intrinsic::arm_neon_vld4lane: 9681 case Intrinsic::arm_neon_vst1: 9682 case Intrinsic::arm_neon_vst2: 9683 case Intrinsic::arm_neon_vst3: 9684 case Intrinsic::arm_neon_vst4: 9685 case Intrinsic::arm_neon_vst2lane: 9686 case Intrinsic::arm_neon_vst3lane: 9687 case Intrinsic::arm_neon_vst4lane: 9688 return CombineBaseUpdate(N, DCI); 9689 default: break; 9690 } 9691 break; 9692 } 9693 return SDValue(); 9694} 9695 9696bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 9697 EVT VT) const { 9698 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 9699} 9700 9701bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, bool *Fast) const { 9702 // The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus 9703 bool AllowsUnaligned = Subtarget->allowsUnalignedMem(); 9704 9705 switch (VT.getSimpleVT().SimpleTy) { 9706 default: 9707 return false; 9708 case MVT::i8: 9709 case MVT::i16: 9710 case MVT::i32: { 9711 // Unaligned access can use (for example) LRDB, LRDH, LDR 9712 if (AllowsUnaligned) { 9713 if (Fast) 9714 *Fast = Subtarget->hasV7Ops(); 9715 return true; 9716 } 9717 return false; 9718 } 9719 case MVT::f64: 9720 case MVT::v2f64: { 9721 // For any little-endian targets with neon, we can support unaligned ld/st 9722 // of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8. 9723 // A big-endian target may also explictly support unaligned accesses 9724 if (Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian())) { 9725 if (Fast) 9726 *Fast = true; 9727 return true; 9728 } 9729 return false; 9730 } 9731 } 9732} 9733 9734static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, 9735 unsigned AlignCheck) { 9736 return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && 9737 (DstAlign == 0 || DstAlign % AlignCheck == 0)); 9738} 9739 9740EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, 9741 unsigned DstAlign, unsigned SrcAlign, 9742 bool IsMemset, bool ZeroMemset, 9743 bool MemcpyStrSrc, 9744 MachineFunction &MF) const { 9745 const Function *F = MF.getFunction(); 9746 9747 // See if we can use NEON instructions for this... 9748 if ((!IsMemset || ZeroMemset) && 9749 Subtarget->hasNEON() && 9750 !F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, 9751 Attribute::NoImplicitFloat)) { 9752 bool Fast; 9753 if (Size >= 16 && 9754 (memOpAlign(SrcAlign, DstAlign, 16) || 9755 (allowsUnalignedMemoryAccesses(MVT::v2f64, &Fast) && Fast))) { 9756 return MVT::v2f64; 9757 } else if (Size >= 8 && 9758 (memOpAlign(SrcAlign, DstAlign, 8) || 9759 (allowsUnalignedMemoryAccesses(MVT::f64, &Fast) && Fast))) { 9760 return MVT::f64; 9761 } 9762 } 9763 9764 // Lowering to i32/i16 if the size permits. 9765 if (Size >= 4) 9766 return MVT::i32; 9767 else if (Size >= 2) 9768 return MVT::i16; 9769 9770 // Let the target-independent logic figure it out. 9771 return MVT::Other; 9772} 9773 9774bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { 9775 if (Val.getOpcode() != ISD::LOAD) 9776 return false; 9777 9778 EVT VT1 = Val.getValueType(); 9779 if (!VT1.isSimple() || !VT1.isInteger() || 9780 !VT2.isSimple() || !VT2.isInteger()) 9781 return false; 9782 9783 switch (VT1.getSimpleVT().SimpleTy) { 9784 default: break; 9785 case MVT::i1: 9786 case MVT::i8: 9787 case MVT::i16: 9788 // 8-bit and 16-bit loads implicitly zero-extend to 32-bits. 9789 return true; 9790 } 9791 9792 return false; 9793} 9794 9795static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 9796 if (V < 0) 9797 return false; 9798 9799 unsigned Scale = 1; 9800 switch (VT.getSimpleVT().SimpleTy) { 9801 default: return false; 9802 case MVT::i1: 9803 case MVT::i8: 9804 // Scale == 1; 9805 break; 9806 case MVT::i16: 9807 // Scale == 2; 9808 Scale = 2; 9809 break; 9810 case MVT::i32: 9811 // Scale == 4; 9812 Scale = 4; 9813 break; 9814 } 9815 9816 if ((V & (Scale - 1)) != 0) 9817 return false; 9818 V /= Scale; 9819 return V == (V & ((1LL << 5) - 1)); 9820} 9821 9822static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 9823 const ARMSubtarget *Subtarget) { 9824 bool isNeg = false; 9825 if (V < 0) { 9826 isNeg = true; 9827 V = - V; 9828 } 9829 9830 switch (VT.getSimpleVT().SimpleTy) { 9831 default: return false; 9832 case MVT::i1: 9833 case MVT::i8: 9834 case MVT::i16: 9835 case MVT::i32: 9836 // + imm12 or - imm8 9837 if (isNeg) 9838 return V == (V & ((1LL << 8) - 1)); 9839 return V == (V & ((1LL << 12) - 1)); 9840 case MVT::f32: 9841 case MVT::f64: 9842 // Same as ARM mode. FIXME: NEON? 9843 if (!Subtarget->hasVFP2()) 9844 return false; 9845 if ((V & 3) != 0) 9846 return false; 9847 V >>= 2; 9848 return V == (V & ((1LL << 8) - 1)); 9849 } 9850} 9851 9852/// isLegalAddressImmediate - Return true if the integer value can be used 9853/// as the offset of the target addressing mode for load / store of the 9854/// given type. 9855static bool isLegalAddressImmediate(int64_t V, EVT VT, 9856 const ARMSubtarget *Subtarget) { 9857 if (V == 0) 9858 return true; 9859 9860 if (!VT.isSimple()) 9861 return false; 9862 9863 if (Subtarget->isThumb1Only()) 9864 return isLegalT1AddressImmediate(V, VT); 9865 else if (Subtarget->isThumb2()) 9866 return isLegalT2AddressImmediate(V, VT, Subtarget); 9867 9868 // ARM mode. 9869 if (V < 0) 9870 V = - V; 9871 switch (VT.getSimpleVT().SimpleTy) { 9872 default: return false; 9873 case MVT::i1: 9874 case MVT::i8: 9875 case MVT::i32: 9876 // +- imm12 9877 return V == (V & ((1LL << 12) - 1)); 9878 case MVT::i16: 9879 // +- imm8 9880 return V == (V & ((1LL << 8) - 1)); 9881 case MVT::f32: 9882 case MVT::f64: 9883 if (!Subtarget->hasVFP2()) // FIXME: NEON? 9884 return false; 9885 if ((V & 3) != 0) 9886 return false; 9887 V >>= 2; 9888 return V == (V & ((1LL << 8) - 1)); 9889 } 9890} 9891 9892bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 9893 EVT VT) const { 9894 int Scale = AM.Scale; 9895 if (Scale < 0) 9896 return false; 9897 9898 switch (VT.getSimpleVT().SimpleTy) { 9899 default: return false; 9900 case MVT::i1: 9901 case MVT::i8: 9902 case MVT::i16: 9903 case MVT::i32: 9904 if (Scale == 1) 9905 return true; 9906 // r + r << imm 9907 Scale = Scale & ~1; 9908 return Scale == 2 || Scale == 4 || Scale == 8; 9909 case MVT::i64: 9910 // r + r 9911 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 9912 return true; 9913 return false; 9914 case MVT::isVoid: 9915 // Note, we allow "void" uses (basically, uses that aren't loads or 9916 // stores), because arm allows folding a scale into many arithmetic 9917 // operations. This should be made more precise and revisited later. 9918 9919 // Allow r << imm, but the imm has to be a multiple of two. 9920 if (Scale & 1) return false; 9921 return isPowerOf2_32(Scale); 9922 } 9923} 9924 9925/// isLegalAddressingMode - Return true if the addressing mode represented 9926/// by AM is legal for this target, for a load/store of the specified type. 9927bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 9928 Type *Ty) const { 9929 EVT VT = getValueType(Ty, true); 9930 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 9931 return false; 9932 9933 // Can never fold addr of global into load/store. 9934 if (AM.BaseGV) 9935 return false; 9936 9937 switch (AM.Scale) { 9938 case 0: // no scale reg, must be "r+i" or "r", or "i". 9939 break; 9940 case 1: 9941 if (Subtarget->isThumb1Only()) 9942 return false; 9943 // FALL THROUGH. 9944 default: 9945 // ARM doesn't support any R+R*scale+imm addr modes. 9946 if (AM.BaseOffs) 9947 return false; 9948 9949 if (!VT.isSimple()) 9950 return false; 9951 9952 if (Subtarget->isThumb2()) 9953 return isLegalT2ScaledAddressingMode(AM, VT); 9954 9955 int Scale = AM.Scale; 9956 switch (VT.getSimpleVT().SimpleTy) { 9957 default: return false; 9958 case MVT::i1: 9959 case MVT::i8: 9960 case MVT::i32: 9961 if (Scale < 0) Scale = -Scale; 9962 if (Scale == 1) 9963 return true; 9964 // r + r << imm 9965 return isPowerOf2_32(Scale & ~1); 9966 case MVT::i16: 9967 case MVT::i64: 9968 // r + r 9969 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 9970 return true; 9971 return false; 9972 9973 case MVT::isVoid: 9974 // Note, we allow "void" uses (basically, uses that aren't loads or 9975 // stores), because arm allows folding a scale into many arithmetic 9976 // operations. This should be made more precise and revisited later. 9977 9978 // Allow r << imm, but the imm has to be a multiple of two. 9979 if (Scale & 1) return false; 9980 return isPowerOf2_32(Scale); 9981 } 9982 } 9983 return true; 9984} 9985 9986/// isLegalICmpImmediate - Return true if the specified immediate is legal 9987/// icmp immediate, that is the target has icmp instructions which can compare 9988/// a register against the immediate without having to materialize the 9989/// immediate into a register. 9990bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 9991 // Thumb2 and ARM modes can use cmn for negative immediates. 9992 if (!Subtarget->isThumb()) 9993 return ARM_AM::getSOImmVal(llvm::abs64(Imm)) != -1; 9994 if (Subtarget->isThumb2()) 9995 return ARM_AM::getT2SOImmVal(llvm::abs64(Imm)) != -1; 9996 // Thumb1 doesn't have cmn, and only 8-bit immediates. 9997 return Imm >= 0 && Imm <= 255; 9998} 9999 10000/// isLegalAddImmediate - Return true if the specified immediate is a legal add 10001/// *or sub* immediate, that is the target has add or sub instructions which can 10002/// add a register with the immediate without having to materialize the 10003/// immediate into a register. 10004bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 10005 // Same encoding for add/sub, just flip the sign. 10006 int64_t AbsImm = llvm::abs64(Imm); 10007 if (!Subtarget->isThumb()) 10008 return ARM_AM::getSOImmVal(AbsImm) != -1; 10009 if (Subtarget->isThumb2()) 10010 return ARM_AM::getT2SOImmVal(AbsImm) != -1; 10011 // Thumb1 only has 8-bit unsigned immediate. 10012 return AbsImm >= 0 && AbsImm <= 255; 10013} 10014 10015static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 10016 bool isSEXTLoad, SDValue &Base, 10017 SDValue &Offset, bool &isInc, 10018 SelectionDAG &DAG) { 10019 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 10020 return false; 10021 10022 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 10023 // AddressingMode 3 10024 Base = Ptr->getOperand(0); 10025 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10026 int RHSC = (int)RHS->getZExtValue(); 10027 if (RHSC < 0 && RHSC > -256) { 10028 assert(Ptr->getOpcode() == ISD::ADD); 10029 isInc = false; 10030 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10031 return true; 10032 } 10033 } 10034 isInc = (Ptr->getOpcode() == ISD::ADD); 10035 Offset = Ptr->getOperand(1); 10036 return true; 10037 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 10038 // AddressingMode 2 10039 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10040 int RHSC = (int)RHS->getZExtValue(); 10041 if (RHSC < 0 && RHSC > -0x1000) { 10042 assert(Ptr->getOpcode() == ISD::ADD); 10043 isInc = false; 10044 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10045 Base = Ptr->getOperand(0); 10046 return true; 10047 } 10048 } 10049 10050 if (Ptr->getOpcode() == ISD::ADD) { 10051 isInc = true; 10052 ARM_AM::ShiftOpc ShOpcVal= 10053 ARM_AM::getShiftOpcForNode(Ptr->getOperand(0).getOpcode()); 10054 if (ShOpcVal != ARM_AM::no_shift) { 10055 Base = Ptr->getOperand(1); 10056 Offset = Ptr->getOperand(0); 10057 } else { 10058 Base = Ptr->getOperand(0); 10059 Offset = Ptr->getOperand(1); 10060 } 10061 return true; 10062 } 10063 10064 isInc = (Ptr->getOpcode() == ISD::ADD); 10065 Base = Ptr->getOperand(0); 10066 Offset = Ptr->getOperand(1); 10067 return true; 10068 } 10069 10070 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 10071 return false; 10072} 10073 10074static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 10075 bool isSEXTLoad, SDValue &Base, 10076 SDValue &Offset, bool &isInc, 10077 SelectionDAG &DAG) { 10078 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 10079 return false; 10080 10081 Base = Ptr->getOperand(0); 10082 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 10083 int RHSC = (int)RHS->getZExtValue(); 10084 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 10085 assert(Ptr->getOpcode() == ISD::ADD); 10086 isInc = false; 10087 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 10088 return true; 10089 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 10090 isInc = Ptr->getOpcode() == ISD::ADD; 10091 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 10092 return true; 10093 } 10094 } 10095 10096 return false; 10097} 10098 10099/// getPreIndexedAddressParts - returns true by value, base pointer and 10100/// offset pointer and addressing mode by reference if the node's address 10101/// can be legally represented as pre-indexed load / store address. 10102bool 10103ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 10104 SDValue &Offset, 10105 ISD::MemIndexedMode &AM, 10106 SelectionDAG &DAG) const { 10107 if (Subtarget->isThumb1Only()) 10108 return false; 10109 10110 EVT VT; 10111 SDValue Ptr; 10112 bool isSEXTLoad = false; 10113 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 10114 Ptr = LD->getBasePtr(); 10115 VT = LD->getMemoryVT(); 10116 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 10117 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 10118 Ptr = ST->getBasePtr(); 10119 VT = ST->getMemoryVT(); 10120 } else 10121 return false; 10122 10123 bool isInc; 10124 bool isLegal = false; 10125 if (Subtarget->isThumb2()) 10126 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 10127 Offset, isInc, DAG); 10128 else 10129 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 10130 Offset, isInc, DAG); 10131 if (!isLegal) 10132 return false; 10133 10134 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 10135 return true; 10136} 10137 10138/// getPostIndexedAddressParts - returns true by value, base pointer and 10139/// offset pointer and addressing mode by reference if this node can be 10140/// combined with a load / store to form a post-indexed load / store. 10141bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 10142 SDValue &Base, 10143 SDValue &Offset, 10144 ISD::MemIndexedMode &AM, 10145 SelectionDAG &DAG) const { 10146 if (Subtarget->isThumb1Only()) 10147 return false; 10148 10149 EVT VT; 10150 SDValue Ptr; 10151 bool isSEXTLoad = false; 10152 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 10153 VT = LD->getMemoryVT(); 10154 Ptr = LD->getBasePtr(); 10155 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 10156 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 10157 VT = ST->getMemoryVT(); 10158 Ptr = ST->getBasePtr(); 10159 } else 10160 return false; 10161 10162 bool isInc; 10163 bool isLegal = false; 10164 if (Subtarget->isThumb2()) 10165 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 10166 isInc, DAG); 10167 else 10168 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 10169 isInc, DAG); 10170 if (!isLegal) 10171 return false; 10172 10173 if (Ptr != Base) { 10174 // Swap base ptr and offset to catch more post-index load / store when 10175 // it's legal. In Thumb2 mode, offset must be an immediate. 10176 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 10177 !Subtarget->isThumb2()) 10178 std::swap(Base, Offset); 10179 10180 // Post-indexed load / store update the base pointer. 10181 if (Ptr != Base) 10182 return false; 10183 } 10184 10185 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 10186 return true; 10187} 10188 10189void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 10190 APInt &KnownZero, 10191 APInt &KnownOne, 10192 const SelectionDAG &DAG, 10193 unsigned Depth) const { 10194 unsigned BitWidth = KnownOne.getBitWidth(); 10195 KnownZero = KnownOne = APInt(BitWidth, 0); 10196 switch (Op.getOpcode()) { 10197 default: break; 10198 case ARMISD::ADDC: 10199 case ARMISD::ADDE: 10200 case ARMISD::SUBC: 10201 case ARMISD::SUBE: 10202 // These nodes' second result is a boolean 10203 if (Op.getResNo() == 0) 10204 break; 10205 KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); 10206 break; 10207 case ARMISD::CMOV: { 10208 // Bits are known zero/one if known on the LHS and RHS. 10209 DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); 10210 if (KnownZero == 0 && KnownOne == 0) return; 10211 10212 APInt KnownZeroRHS, KnownOneRHS; 10213 DAG.ComputeMaskedBits(Op.getOperand(1), KnownZeroRHS, KnownOneRHS, Depth+1); 10214 KnownZero &= KnownZeroRHS; 10215 KnownOne &= KnownOneRHS; 10216 return; 10217 } 10218 } 10219} 10220 10221//===----------------------------------------------------------------------===// 10222// ARM Inline Assembly Support 10223//===----------------------------------------------------------------------===// 10224 10225bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 10226 // Looking for "rev" which is V6+. 10227 if (!Subtarget->hasV6Ops()) 10228 return false; 10229 10230 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 10231 std::string AsmStr = IA->getAsmString(); 10232 SmallVector<StringRef, 4> AsmPieces; 10233 SplitString(AsmStr, AsmPieces, ";\n"); 10234 10235 switch (AsmPieces.size()) { 10236 default: return false; 10237 case 1: 10238 AsmStr = AsmPieces[0]; 10239 AsmPieces.clear(); 10240 SplitString(AsmStr, AsmPieces, " \t,"); 10241 10242 // rev $0, $1 10243 if (AsmPieces.size() == 3 && 10244 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 10245 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 10246 IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 10247 if (Ty && Ty->getBitWidth() == 32) 10248 return IntrinsicLowering::LowerToByteSwap(CI); 10249 } 10250 break; 10251 } 10252 10253 return false; 10254} 10255 10256/// getConstraintType - Given a constraint letter, return the type of 10257/// constraint it is for this target. 10258ARMTargetLowering::ConstraintType 10259ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 10260 if (Constraint.size() == 1) { 10261 switch (Constraint[0]) { 10262 default: break; 10263 case 'l': return C_RegisterClass; 10264 case 'w': return C_RegisterClass; 10265 case 'h': return C_RegisterClass; 10266 case 'x': return C_RegisterClass; 10267 case 't': return C_RegisterClass; 10268 case 'j': return C_Other; // Constant for movw. 10269 // An address with a single base register. Due to the way we 10270 // currently handle addresses it is the same as an 'r' memory constraint. 10271 case 'Q': return C_Memory; 10272 } 10273 } else if (Constraint.size() == 2) { 10274 switch (Constraint[0]) { 10275 default: break; 10276 // All 'U+' constraints are addresses. 10277 case 'U': return C_Memory; 10278 } 10279 } 10280 return TargetLowering::getConstraintType(Constraint); 10281} 10282 10283/// Examine constraint type and operand type and determine a weight value. 10284/// This object must already have been set up with the operand type 10285/// and the current alternative constraint selected. 10286TargetLowering::ConstraintWeight 10287ARMTargetLowering::getSingleConstraintMatchWeight( 10288 AsmOperandInfo &info, const char *constraint) const { 10289 ConstraintWeight weight = CW_Invalid; 10290 Value *CallOperandVal = info.CallOperandVal; 10291 // If we don't have a value, we can't do a match, 10292 // but allow it at the lowest weight. 10293 if (CallOperandVal == NULL) 10294 return CW_Default; 10295 Type *type = CallOperandVal->getType(); 10296 // Look at the constraint type. 10297 switch (*constraint) { 10298 default: 10299 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 10300 break; 10301 case 'l': 10302 if (type->isIntegerTy()) { 10303 if (Subtarget->isThumb()) 10304 weight = CW_SpecificReg; 10305 else 10306 weight = CW_Register; 10307 } 10308 break; 10309 case 'w': 10310 if (type->isFloatingPointTy()) 10311 weight = CW_Register; 10312 break; 10313 } 10314 return weight; 10315} 10316 10317typedef std::pair<unsigned, const TargetRegisterClass*> RCPair; 10318RCPair 10319ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 10320 MVT VT) const { 10321 if (Constraint.size() == 1) { 10322 // GCC ARM Constraint Letters 10323 switch (Constraint[0]) { 10324 case 'l': // Low regs or general regs. 10325 if (Subtarget->isThumb()) 10326 return RCPair(0U, &ARM::tGPRRegClass); 10327 return RCPair(0U, &ARM::GPRRegClass); 10328 case 'h': // High regs or no regs. 10329 if (Subtarget->isThumb()) 10330 return RCPair(0U, &ARM::hGPRRegClass); 10331 break; 10332 case 'r': 10333 return RCPair(0U, &ARM::GPRRegClass); 10334 case 'w': 10335 if (VT == MVT::f32) 10336 return RCPair(0U, &ARM::SPRRegClass); 10337 if (VT.getSizeInBits() == 64) 10338 return RCPair(0U, &ARM::DPRRegClass); 10339 if (VT.getSizeInBits() == 128) 10340 return RCPair(0U, &ARM::QPRRegClass); 10341 break; 10342 case 'x': 10343 if (VT == MVT::f32) 10344 return RCPair(0U, &ARM::SPR_8RegClass); 10345 if (VT.getSizeInBits() == 64) 10346 return RCPair(0U, &ARM::DPR_8RegClass); 10347 if (VT.getSizeInBits() == 128) 10348 return RCPair(0U, &ARM::QPR_8RegClass); 10349 break; 10350 case 't': 10351 if (VT == MVT::f32) 10352 return RCPair(0U, &ARM::SPRRegClass); 10353 break; 10354 } 10355 } 10356 if (StringRef("{cc}").equals_lower(Constraint)) 10357 return std::make_pair(unsigned(ARM::CPSR), &ARM::CCRRegClass); 10358 10359 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 10360} 10361 10362/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 10363/// vector. If it is invalid, don't add anything to Ops. 10364void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 10365 std::string &Constraint, 10366 std::vector<SDValue>&Ops, 10367 SelectionDAG &DAG) const { 10368 SDValue Result(0, 0); 10369 10370 // Currently only support length 1 constraints. 10371 if (Constraint.length() != 1) return; 10372 10373 char ConstraintLetter = Constraint[0]; 10374 switch (ConstraintLetter) { 10375 default: break; 10376 case 'j': 10377 case 'I': case 'J': case 'K': case 'L': 10378 case 'M': case 'N': case 'O': 10379 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 10380 if (!C) 10381 return; 10382 10383 int64_t CVal64 = C->getSExtValue(); 10384 int CVal = (int) CVal64; 10385 // None of these constraints allow values larger than 32 bits. Check 10386 // that the value fits in an int. 10387 if (CVal != CVal64) 10388 return; 10389 10390 switch (ConstraintLetter) { 10391 case 'j': 10392 // Constant suitable for movw, must be between 0 and 10393 // 65535. 10394 if (Subtarget->hasV6T2Ops()) 10395 if (CVal >= 0 && CVal <= 65535) 10396 break; 10397 return; 10398 case 'I': 10399 if (Subtarget->isThumb1Only()) { 10400 // This must be a constant between 0 and 255, for ADD 10401 // immediates. 10402 if (CVal >= 0 && CVal <= 255) 10403 break; 10404 } else if (Subtarget->isThumb2()) { 10405 // A constant that can be used as an immediate value in a 10406 // data-processing instruction. 10407 if (ARM_AM::getT2SOImmVal(CVal) != -1) 10408 break; 10409 } else { 10410 // A constant that can be used as an immediate value in a 10411 // data-processing instruction. 10412 if (ARM_AM::getSOImmVal(CVal) != -1) 10413 break; 10414 } 10415 return; 10416 10417 case 'J': 10418 if (Subtarget->isThumb()) { // FIXME thumb2 10419 // This must be a constant between -255 and -1, for negated ADD 10420 // immediates. This can be used in GCC with an "n" modifier that 10421 // prints the negated value, for use with SUB instructions. It is 10422 // not useful otherwise but is implemented for compatibility. 10423 if (CVal >= -255 && CVal <= -1) 10424 break; 10425 } else { 10426 // This must be a constant between -4095 and 4095. It is not clear 10427 // what this constraint is intended for. Implemented for 10428 // compatibility with GCC. 10429 if (CVal >= -4095 && CVal <= 4095) 10430 break; 10431 } 10432 return; 10433 10434 case 'K': 10435 if (Subtarget->isThumb1Only()) { 10436 // A 32-bit value where only one byte has a nonzero value. Exclude 10437 // zero to match GCC. This constraint is used by GCC internally for 10438 // constants that can be loaded with a move/shift combination. 10439 // It is not useful otherwise but is implemented for compatibility. 10440 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 10441 break; 10442 } else if (Subtarget->isThumb2()) { 10443 // A constant whose bitwise inverse can be used as an immediate 10444 // value in a data-processing instruction. This can be used in GCC 10445 // with a "B" modifier that prints the inverted value, for use with 10446 // BIC and MVN instructions. It is not useful otherwise but is 10447 // implemented for compatibility. 10448 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 10449 break; 10450 } else { 10451 // A constant whose bitwise inverse can be used as an immediate 10452 // value in a data-processing instruction. This can be used in GCC 10453 // with a "B" modifier that prints the inverted value, for use with 10454 // BIC and MVN instructions. It is not useful otherwise but is 10455 // implemented for compatibility. 10456 if (ARM_AM::getSOImmVal(~CVal) != -1) 10457 break; 10458 } 10459 return; 10460 10461 case 'L': 10462 if (Subtarget->isThumb1Only()) { 10463 // This must be a constant between -7 and 7, 10464 // for 3-operand ADD/SUB immediate instructions. 10465 if (CVal >= -7 && CVal < 7) 10466 break; 10467 } else if (Subtarget->isThumb2()) { 10468 // A constant whose negation can be used as an immediate value in a 10469 // data-processing instruction. This can be used in GCC with an "n" 10470 // modifier that prints the negated value, for use with SUB 10471 // instructions. It is not useful otherwise but is implemented for 10472 // compatibility. 10473 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 10474 break; 10475 } else { 10476 // A constant whose negation can be used as an immediate value in a 10477 // data-processing instruction. This can be used in GCC with an "n" 10478 // modifier that prints the negated value, for use with SUB 10479 // instructions. It is not useful otherwise but is implemented for 10480 // compatibility. 10481 if (ARM_AM::getSOImmVal(-CVal) != -1) 10482 break; 10483 } 10484 return; 10485 10486 case 'M': 10487 if (Subtarget->isThumb()) { // FIXME thumb2 10488 // This must be a multiple of 4 between 0 and 1020, for 10489 // ADD sp + immediate. 10490 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 10491 break; 10492 } else { 10493 // A power of two or a constant between 0 and 32. This is used in 10494 // GCC for the shift amount on shifted register operands, but it is 10495 // useful in general for any shift amounts. 10496 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 10497 break; 10498 } 10499 return; 10500 10501 case 'N': 10502 if (Subtarget->isThumb()) { // FIXME thumb2 10503 // This must be a constant between 0 and 31, for shift amounts. 10504 if (CVal >= 0 && CVal <= 31) 10505 break; 10506 } 10507 return; 10508 10509 case 'O': 10510 if (Subtarget->isThumb()) { // FIXME thumb2 10511 // This must be a multiple of 4 between -508 and 508, for 10512 // ADD/SUB sp = sp + immediate. 10513 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 10514 break; 10515 } 10516 return; 10517 } 10518 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 10519 break; 10520 } 10521 10522 if (Result.getNode()) { 10523 Ops.push_back(Result); 10524 return; 10525 } 10526 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 10527} 10528 10529bool 10530ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 10531 // The ARM target isn't yet aware of offsets. 10532 return false; 10533} 10534 10535bool ARM::isBitFieldInvertedMask(unsigned v) { 10536 if (v == 0xffffffff) 10537 return false; 10538 10539 // there can be 1's on either or both "outsides", all the "inside" 10540 // bits must be 0's 10541 unsigned TO = CountTrailingOnes_32(v); 10542 unsigned LO = CountLeadingOnes_32(v); 10543 v = (v >> TO) << TO; 10544 v = (v << LO) >> LO; 10545 return v == 0; 10546} 10547 10548/// isFPImmLegal - Returns true if the target can instruction select the 10549/// specified FP immediate natively. If false, the legalizer will 10550/// materialize the FP immediate as a load from a constant pool. 10551bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 10552 if (!Subtarget->hasVFP3()) 10553 return false; 10554 if (VT == MVT::f32) 10555 return ARM_AM::getFP32Imm(Imm) != -1; 10556 if (VT == MVT::f64) 10557 return ARM_AM::getFP64Imm(Imm) != -1; 10558 return false; 10559} 10560 10561/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 10562/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 10563/// specified in the intrinsic calls. 10564bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 10565 const CallInst &I, 10566 unsigned Intrinsic) const { 10567 switch (Intrinsic) { 10568 case Intrinsic::arm_neon_vld1: 10569 case Intrinsic::arm_neon_vld2: 10570 case Intrinsic::arm_neon_vld3: 10571 case Intrinsic::arm_neon_vld4: 10572 case Intrinsic::arm_neon_vld2lane: 10573 case Intrinsic::arm_neon_vld3lane: 10574 case Intrinsic::arm_neon_vld4lane: { 10575 Info.opc = ISD::INTRINSIC_W_CHAIN; 10576 // Conservatively set memVT to the entire set of vectors loaded. 10577 uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; 10578 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 10579 Info.ptrVal = I.getArgOperand(0); 10580 Info.offset = 0; 10581 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 10582 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 10583 Info.vol = false; // volatile loads with NEON intrinsics not supported 10584 Info.readMem = true; 10585 Info.writeMem = false; 10586 return true; 10587 } 10588 case Intrinsic::arm_neon_vst1: 10589 case Intrinsic::arm_neon_vst2: 10590 case Intrinsic::arm_neon_vst3: 10591 case Intrinsic::arm_neon_vst4: 10592 case Intrinsic::arm_neon_vst2lane: 10593 case Intrinsic::arm_neon_vst3lane: 10594 case Intrinsic::arm_neon_vst4lane: { 10595 Info.opc = ISD::INTRINSIC_VOID; 10596 // Conservatively set memVT to the entire set of vectors stored. 10597 unsigned NumElts = 0; 10598 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 10599 Type *ArgTy = I.getArgOperand(ArgI)->getType(); 10600 if (!ArgTy->isVectorTy()) 10601 break; 10602 NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; 10603 } 10604 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 10605 Info.ptrVal = I.getArgOperand(0); 10606 Info.offset = 0; 10607 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 10608 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 10609 Info.vol = false; // volatile stores with NEON intrinsics not supported 10610 Info.readMem = false; 10611 Info.writeMem = true; 10612 return true; 10613 } 10614 case Intrinsic::arm_strexd: { 10615 Info.opc = ISD::INTRINSIC_W_CHAIN; 10616 Info.memVT = MVT::i64; 10617 Info.ptrVal = I.getArgOperand(2); 10618 Info.offset = 0; 10619 Info.align = 8; 10620 Info.vol = true; 10621 Info.readMem = false; 10622 Info.writeMem = true; 10623 return true; 10624 } 10625 case Intrinsic::arm_ldrexd: { 10626 Info.opc = ISD::INTRINSIC_W_CHAIN; 10627 Info.memVT = MVT::i64; 10628 Info.ptrVal = I.getArgOperand(0); 10629 Info.offset = 0; 10630 Info.align = 8; 10631 Info.vol = true; 10632 Info.readMem = true; 10633 Info.writeMem = false; 10634 return true; 10635 } 10636 default: 10637 break; 10638 } 10639 10640 return false; 10641} 10642