ARMISelLowering.cpp revision de2b151dbf125af49717807b9cfc1f6f7a5b9ea6
190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2f71323e297a928af368937089d3ed71239786f86Andreas Huber// 390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber// The LLVM Compiler Infrastructure 4f71323e297a928af368937089d3ed71239786f86Andreas Huber// 5f71323e297a928af368937089d3ed71239786f86Andreas Huber// This file is distributed under the University of Illinois Open Source 6f71323e297a928af368937089d3ed71239786f86Andreas Huber// License. See LICENSE.TXT for details. 7f71323e297a928af368937089d3ed71239786f86Andreas Huber// 8f71323e297a928af368937089d3ed71239786f86Andreas Huber//===----------------------------------------------------------------------===// 990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber// 1090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber// This file defines the interfaces that ARM uses to lower LLVM code into a 1190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber// selection DAG. 1290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber// 1390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber//===----------------------------------------------------------------------===// 1490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 1590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#define DEBUG_TYPE "arm-isel" 1690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "ARM.h" 1790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "ARMAddressingModes.h" 1890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "ARMConstantPoolValue.h" 1990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "ARMISelLowering.h" 2090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "ARMMachineFunctionInfo.h" 2190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "ARMPerfectShuffle.h" 2290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "ARMRegisterInfo.h" 2390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "ARMSubtarget.h" 24538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber#include "ARMTargetMachine.h" 2590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "ARMTargetObjectFile.h" 2690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/CallingConv.h" 2790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/Constants.h" 2890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/Function.h" 2990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/GlobalValue.h" 3090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/Instruction.h" 3190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/Intrinsics.h" 3290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/Type.h" 3390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/CodeGen/CallingConvLower.h" 3490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/CodeGen/MachineBasicBlock.h" 3590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/CodeGen/MachineFrameInfo.h" 3690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/CodeGen/MachineFunction.h" 3790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/CodeGen/MachineInstrBuilder.h" 3890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/CodeGen/MachineRegisterInfo.h" 3990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/CodeGen/PseudoSourceValue.h" 4090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/CodeGen/SelectionDAG.h" 4190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/MC/MCSectionMachO.h" 4290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/Target/TargetOptions.h" 4390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/ADT/VectorExtras.h" 4490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/ADT/Statistic.h" 4590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/Support/CommandLine.h" 4690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/Support/ErrorHandling.h" 4790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/Support/MathExtras.h" 4890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include "llvm/Support/raw_ostream.h" 4990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber#include <sstream> 5090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberusing namespace llvm; 5190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberSTATISTIC(NumTailCalls, "Number of tail calls"); 5390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 5490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber// This option should go away when Machine LICM is smart enough to hoist a 5590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber// reg-to-reg VDUP. 5690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic cl::opt<bool> 5790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberEnableARMVDUPsplat("arm-vdup-splat", cl::Hidden, 5890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."), 5990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cl::init(false)); 6090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic cl::opt<bool> 6290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberEnableARMLongCalls("arm-long-calls", cl::Hidden, 63538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber cl::desc("Generate calls via indirect call instructions"), 6490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cl::init(false)); 6590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 6690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic cl::opt<bool> 6790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberARMInterworking("arm-interworking", cl::Hidden, 6890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cl::desc("Enable / disable ARM interworking (for debugging only)"), 6990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cl::init(true)); 7090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic cl::opt<bool> 7290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas HuberEnableARMCodePlacement("arm-code-placement", cl::Hidden, 7390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cl::desc("Enable code placement pass for ARM"), 7490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber cl::init(false)); 7590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 7690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 7790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber CCValAssign::LocInfo &LocInfo, 7890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ISD::ArgFlagsTy &ArgFlags, 7990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber CCState &State); 8090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 8190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber CCValAssign::LocInfo &LocInfo, 8290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ISD::ArgFlagsTy &ArgFlags, 83538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber CCState &State); 8490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 8590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber CCValAssign::LocInfo &LocInfo, 8690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ISD::ArgFlagsTy &ArgFlags, 8790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber CCState &State); 8890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huberstatic bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 8990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber CCValAssign::LocInfo &LocInfo, 9090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber ISD::ArgFlagsTy &ArgFlags, 9190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber CCState &State); 9290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 9390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 9490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EVT PromotedBitwiseVT) { 9590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber if (VT != PromotedLdStVT) { 9690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 9790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 9890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PromotedLdStVT.getSimpleVT()); 9990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 10190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 10290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PromotedLdStVT.getSimpleVT()); 10390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 10490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 10590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber EVT ElemTy = VT.getVectorElementType(); 10690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 10790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); 10890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber if (ElemTy == MVT::i8 || ElemTy == MVT::i16) 10990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 11090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber if (ElemTy != MVT::i32) { 111538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 11290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 11390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 11490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 11590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 11690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 11790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 11890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); 11990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); 12090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 12190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 12290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber if (VT.isInteger()) { 12390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 12490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 12590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 12690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 12790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 12890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // Promote all bit-wise operations. 12990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber if (VT.isInteger() && VT != PromotedBitwiseVT) { 13090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 131538f6170b788de7408b06efc6613dc98579aa6a6Andreas Huber AddPromotedToType (ISD::AND, VT.getSimpleVT(), 13290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PromotedBitwiseVT.getSimpleVT()); 13390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 13490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AddPromotedToType (ISD::OR, VT.getSimpleVT(), 13590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PromotedBitwiseVT.getSimpleVT()); 13690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 13790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 13890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber PromotedBitwiseVT.getSimpleVT()); 13990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber } 14090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 14190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber // Neon does not support vector divide/remainder operations. 14290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 14390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 14490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 14590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 14690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 14790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 14890d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 14990d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15090d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid ARMTargetLowering::addDRTypeForNEON(EVT VT) { 15190d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addRegisterClass(VT, ARM::DPRRegisterClass); 15290d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addTypeForNEON(VT, MVT::f64, MVT::v2i32); 15390d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber} 15490d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber 15590d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Hubervoid ARMTargetLowering::addQRTypeForNEON(EVT VT) { 15690d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addRegisterClass(VT, ARM::QPRRegisterClass); 15790d3ed91ae9228e1c8bab561b6138d4cb8c1e4fdAndreas Huber addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 158} 159 160static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 161 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 162 return new TargetLoweringObjectFileMachO(); 163 164 return new ARMElfTargetObjectFile(); 165} 166 167ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 168 : TargetLowering(TM, createTLOF(TM)) { 169 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 170 RegInfo = TM.getRegisterInfo(); 171 172 if (Subtarget->isTargetDarwin()) { 173 // Uses VFP for Thumb libfuncs if available. 174 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 175 // Single-precision floating-point arithmetic. 176 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 177 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 178 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 179 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 180 181 // Double-precision floating-point arithmetic. 182 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 183 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 184 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 185 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 186 187 // Single-precision comparisons. 188 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 189 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 190 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 191 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 192 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 193 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 194 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 195 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 196 197 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 198 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 199 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 200 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 201 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 202 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 203 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 204 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 205 206 // Double-precision comparisons. 207 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 208 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 209 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 210 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 211 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 212 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 213 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 214 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 215 216 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 217 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 218 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 219 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 220 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 221 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 222 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 223 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 224 225 // Floating-point to integer conversions. 226 // i64 conversions are done via library routines even when generating VFP 227 // instructions, so use the same ones. 228 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 229 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 230 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 231 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 232 233 // Conversions between floating types. 234 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 235 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 236 237 // Integer to floating-point conversions. 238 // i64 conversions are done via library routines even when generating VFP 239 // instructions, so use the same ones. 240 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 241 // e.g., __floatunsidf vs. __floatunssidfvfp. 242 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 243 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 244 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 245 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 246 } 247 } 248 249 // These libcalls are not available in 32-bit. 250 setLibcallName(RTLIB::SHL_I128, 0); 251 setLibcallName(RTLIB::SRL_I128, 0); 252 setLibcallName(RTLIB::SRA_I128, 0); 253 254 // Libcalls should use the AAPCS base standard ABI, even if hard float 255 // is in effect, as per the ARM RTABI specification, section 4.1.2. 256 if (Subtarget->isAAPCS_ABI()) { 257 for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { 258 setLibcallCallingConv(static_cast<RTLIB::Libcall>(i), 259 CallingConv::ARM_AAPCS); 260 } 261 } 262 263 if (Subtarget->isThumb1Only()) 264 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 265 else 266 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 267 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 268 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 269 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 270 271 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 272 } 273 274 if (Subtarget->hasNEON()) { 275 addDRTypeForNEON(MVT::v2f32); 276 addDRTypeForNEON(MVT::v8i8); 277 addDRTypeForNEON(MVT::v4i16); 278 addDRTypeForNEON(MVT::v2i32); 279 addDRTypeForNEON(MVT::v1i64); 280 281 addQRTypeForNEON(MVT::v4f32); 282 addQRTypeForNEON(MVT::v2f64); 283 addQRTypeForNEON(MVT::v16i8); 284 addQRTypeForNEON(MVT::v8i16); 285 addQRTypeForNEON(MVT::v4i32); 286 addQRTypeForNEON(MVT::v2i64); 287 288 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 289 // neither Neon nor VFP support any arithmetic operations on it. 290 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 291 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 292 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 293 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 294 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 295 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 296 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); 297 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 298 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 299 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 300 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 301 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 302 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 303 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 304 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 305 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 306 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 307 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 308 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 309 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 310 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 311 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 312 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 313 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 314 315 // Neon does not support some operations on v1i64 and v2i64 types. 316 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 317 setOperationAction(ISD::MUL, MVT::v2i64, Expand); 318 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); 319 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); 320 321 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 322 setTargetDAGCombine(ISD::SHL); 323 setTargetDAGCombine(ISD::SRL); 324 setTargetDAGCombine(ISD::SRA); 325 setTargetDAGCombine(ISD::SIGN_EXTEND); 326 setTargetDAGCombine(ISD::ZERO_EXTEND); 327 setTargetDAGCombine(ISD::ANY_EXTEND); 328 setTargetDAGCombine(ISD::SELECT_CC); 329 } 330 331 computeRegisterProperties(); 332 333 // ARM does not have f32 extending load. 334 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 335 336 // ARM does not have i1 sign extending load. 337 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 338 339 // ARM supports all 4 flavors of integer indexed load / store. 340 if (!Subtarget->isThumb1Only()) { 341 for (unsigned im = (unsigned)ISD::PRE_INC; 342 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 343 setIndexedLoadAction(im, MVT::i1, Legal); 344 setIndexedLoadAction(im, MVT::i8, Legal); 345 setIndexedLoadAction(im, MVT::i16, Legal); 346 setIndexedLoadAction(im, MVT::i32, Legal); 347 setIndexedStoreAction(im, MVT::i1, Legal); 348 setIndexedStoreAction(im, MVT::i8, Legal); 349 setIndexedStoreAction(im, MVT::i16, Legal); 350 setIndexedStoreAction(im, MVT::i32, Legal); 351 } 352 } 353 354 // i64 operation support. 355 if (Subtarget->isThumb1Only()) { 356 setOperationAction(ISD::MUL, MVT::i64, Expand); 357 setOperationAction(ISD::MULHU, MVT::i32, Expand); 358 setOperationAction(ISD::MULHS, MVT::i32, Expand); 359 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 360 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 361 } else { 362 setOperationAction(ISD::MUL, MVT::i64, Expand); 363 setOperationAction(ISD::MULHU, MVT::i32, Expand); 364 if (!Subtarget->hasV6Ops()) 365 setOperationAction(ISD::MULHS, MVT::i32, Expand); 366 } 367 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 368 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 369 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 370 setOperationAction(ISD::SRL, MVT::i64, Custom); 371 setOperationAction(ISD::SRA, MVT::i64, Custom); 372 373 // ARM does not have ROTL. 374 setOperationAction(ISD::ROTL, MVT::i32, Expand); 375 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 376 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 377 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 378 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 379 380 // Only ARMv6 has BSWAP. 381 if (!Subtarget->hasV6Ops()) 382 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 383 384 // These are expanded into libcalls. 385 if (!Subtarget->hasDivide()) { 386 // v7M has a hardware divider 387 setOperationAction(ISD::SDIV, MVT::i32, Expand); 388 setOperationAction(ISD::UDIV, MVT::i32, Expand); 389 } 390 setOperationAction(ISD::SREM, MVT::i32, Expand); 391 setOperationAction(ISD::UREM, MVT::i32, Expand); 392 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 393 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 394 395 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 396 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 397 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 398 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 399 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 400 401 setOperationAction(ISD::TRAP, MVT::Other, Legal); 402 403 // Use the default implementation. 404 setOperationAction(ISD::VASTART, MVT::Other, Custom); 405 setOperationAction(ISD::VAARG, MVT::Other, Expand); 406 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 407 setOperationAction(ISD::VAEND, MVT::Other, Expand); 408 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 409 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 410 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 411 // FIXME: Shouldn't need this, since no register is used, but the legalizer 412 // doesn't yet know how to not do that for SjLj. 413 setExceptionSelectorRegister(ARM::R0); 414 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 415 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 416 // the default expansion. 417 if (Subtarget->hasDataBarrier() || 418 (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) { 419 // membarrier needs custom lowering; the rest are legal and handled 420 // normally. 421 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 422 } else { 423 // Set them all for expansion, which will force libcalls. 424 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 425 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); 426 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); 427 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 428 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); 429 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); 430 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 431 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); 432 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); 433 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 434 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); 435 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); 436 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 437 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); 438 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); 439 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 440 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); 441 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); 442 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 443 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); 444 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); 445 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 446 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); 447 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); 448 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 449 // Since the libcalls include locking, fold in the fences 450 setShouldFoldAtomicFences(true); 451 } 452 // 64-bit versions are always libcalls (for now) 453 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); 454 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); 455 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); 456 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); 457 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); 458 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); 459 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); 460 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); 461 462 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 463 if (!Subtarget->hasV6Ops()) { 464 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 465 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 466 } 467 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 468 469 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 470 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 471 // iff target supports vfp2. 472 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); 473 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 474 } 475 476 // We want to custom lower some of our intrinsics. 477 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 478 if (Subtarget->isTargetDarwin()) { 479 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 480 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 481 } 482 483 setOperationAction(ISD::SETCC, MVT::i32, Expand); 484 setOperationAction(ISD::SETCC, MVT::f32, Expand); 485 setOperationAction(ISD::SETCC, MVT::f64, Expand); 486 setOperationAction(ISD::SELECT, MVT::i32, Custom); 487 setOperationAction(ISD::SELECT, MVT::f32, Custom); 488 setOperationAction(ISD::SELECT, MVT::f64, Custom); 489 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 490 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 491 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 492 493 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 494 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 495 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 496 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 497 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 498 499 // We don't support sin/cos/fmod/copysign/pow 500 setOperationAction(ISD::FSIN, MVT::f64, Expand); 501 setOperationAction(ISD::FSIN, MVT::f32, Expand); 502 setOperationAction(ISD::FCOS, MVT::f32, Expand); 503 setOperationAction(ISD::FCOS, MVT::f64, Expand); 504 setOperationAction(ISD::FREM, MVT::f64, Expand); 505 setOperationAction(ISD::FREM, MVT::f32, Expand); 506 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 507 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 508 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 509 } 510 setOperationAction(ISD::FPOW, MVT::f64, Expand); 511 setOperationAction(ISD::FPOW, MVT::f32, Expand); 512 513 // Various VFP goodness 514 if (!UseSoftFloat && !Subtarget->isThumb1Only()) { 515 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 516 if (Subtarget->hasVFP2()) { 517 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 518 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 519 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 520 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 521 } 522 // Special handling for half-precision FP. 523 if (!Subtarget->hasFP16()) { 524 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 525 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 526 } 527 } 528 529 // We have target-specific dag combine patterns for the following nodes: 530 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 531 setTargetDAGCombine(ISD::ADD); 532 setTargetDAGCombine(ISD::SUB); 533 setTargetDAGCombine(ISD::MUL); 534 535 if (Subtarget->hasV6T2Ops()) 536 setTargetDAGCombine(ISD::OR); 537 538 setStackPointerRegisterToSaveRestore(ARM::SP); 539 540 if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) 541 setSchedulingPreference(Sched::RegPressure); 542 else 543 setSchedulingPreference(Sched::Hybrid); 544 545 maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type 546 547 // On ARM arguments smaller than 4 bytes are extended, so all arguments 548 // are at least 4 bytes aligned. 549 setMinStackArgumentAlignment(4); 550 551 if (EnableARMCodePlacement) 552 benefitFromCodePlacementOpt = true; 553} 554 555std::pair<const TargetRegisterClass*, uint8_t> 556ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 557 const TargetRegisterClass *RRC = 0; 558 uint8_t Cost = 1; 559 switch (VT.getSimpleVT().SimpleTy) { 560 default: 561 return TargetLowering::findRepresentativeClass(VT); 562 // Use DPR as representative register class for all floating point 563 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 564 // the cost is 1 for both f32 and f64. 565 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 566 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 567 RRC = ARM::DPRRegisterClass; 568 break; 569 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 570 case MVT::v4f32: case MVT::v2f64: 571 RRC = ARM::DPRRegisterClass; 572 Cost = 2; 573 break; 574 case MVT::v4i64: 575 RRC = ARM::DPRRegisterClass; 576 Cost = 4; 577 break; 578 case MVT::v8i64: 579 RRC = ARM::DPRRegisterClass; 580 Cost = 8; 581 break; 582 } 583 return std::make_pair(RRC, Cost); 584} 585 586const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 587 switch (Opcode) { 588 default: return 0; 589 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 590 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 591 case ARMISD::CALL: return "ARMISD::CALL"; 592 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 593 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 594 case ARMISD::tCALL: return "ARMISD::tCALL"; 595 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 596 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 597 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 598 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 599 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 600 case ARMISD::CMP: return "ARMISD::CMP"; 601 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 602 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 603 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 604 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 605 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 606 case ARMISD::CMOV: return "ARMISD::CMOV"; 607 case ARMISD::CNEG: return "ARMISD::CNEG"; 608 609 case ARMISD::RBIT: return "ARMISD::RBIT"; 610 611 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 612 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 613 case ARMISD::SITOF: return "ARMISD::SITOF"; 614 case ARMISD::UITOF: return "ARMISD::UITOF"; 615 616 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 617 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 618 case ARMISD::RRX: return "ARMISD::RRX"; 619 620 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 621 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 622 623 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 624 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 625 626 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 627 628 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 629 630 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 631 632 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 633 case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER"; 634 635 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 636 case ARMISD::VCGE: return "ARMISD::VCGE"; 637 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 638 case ARMISD::VCGT: return "ARMISD::VCGT"; 639 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 640 case ARMISD::VTST: return "ARMISD::VTST"; 641 642 case ARMISD::VSHL: return "ARMISD::VSHL"; 643 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 644 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 645 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 646 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 647 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 648 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 649 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 650 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 651 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 652 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 653 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 654 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 655 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 656 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 657 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 658 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 659 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 660 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 661 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 662 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 663 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 664 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 665 case ARMISD::VDUP: return "ARMISD::VDUP"; 666 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 667 case ARMISD::VEXT: return "ARMISD::VEXT"; 668 case ARMISD::VREV64: return "ARMISD::VREV64"; 669 case ARMISD::VREV32: return "ARMISD::VREV32"; 670 case ARMISD::VREV16: return "ARMISD::VREV16"; 671 case ARMISD::VZIP: return "ARMISD::VZIP"; 672 case ARMISD::VUZP: return "ARMISD::VUZP"; 673 case ARMISD::VTRN: return "ARMISD::VTRN"; 674 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 675 case ARMISD::FMAX: return "ARMISD::FMAX"; 676 case ARMISD::FMIN: return "ARMISD::FMIN"; 677 case ARMISD::BFI: return "ARMISD::BFI"; 678 } 679} 680 681/// getRegClassFor - Return the register class that should be used for the 682/// specified value type. 683TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 684 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 685 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 686 // load / store 4 to 8 consecutive D registers. 687 if (Subtarget->hasNEON()) { 688 if (VT == MVT::v4i64) 689 return ARM::QQPRRegisterClass; 690 else if (VT == MVT::v8i64) 691 return ARM::QQQQPRRegisterClass; 692 } 693 return TargetLowering::getRegClassFor(VT); 694} 695 696// Create a fast isel object. 697FastISel * 698ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { 699 return ARM::createFastISel(funcInfo); 700} 701 702/// getFunctionAlignment - Return the Log2 alignment of this function. 703unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { 704 return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; 705} 706 707/// getMaximalGlobalOffset - Returns the maximal possible offset which can 708/// be used for loads / stores from the global. 709unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 710 return (Subtarget->isThumb1Only() ? 127 : 4095); 711} 712 713Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 714 unsigned NumVals = N->getNumValues(); 715 if (!NumVals) 716 return Sched::RegPressure; 717 718 for (unsigned i = 0; i != NumVals; ++i) { 719 EVT VT = N->getValueType(i); 720 if (VT.isFloatingPoint() || VT.isVector()) 721 return Sched::Latency; 722 } 723 724 if (!N->isMachineOpcode()) 725 return Sched::RegPressure; 726 727 // Load are scheduled for latency even if there instruction itinerary 728 // is not available. 729 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 730 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 731 if (TID.mayLoad()) 732 return Sched::Latency; 733 734 const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData(); 735 if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2) 736 return Sched::Latency; 737 return Sched::RegPressure; 738} 739 740unsigned 741ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, 742 MachineFunction &MF) const { 743 switch (RC->getID()) { 744 default: 745 return 0; 746 case ARM::tGPRRegClassID: 747 return RegInfo->hasFP(MF) ? 4 : 5; 748 case ARM::GPRRegClassID: { 749 unsigned FP = RegInfo->hasFP(MF) ? 1 : 0; 750 return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); 751 } 752 case ARM::SPRRegClassID: // Currently not used as 'rep' register class. 753 case ARM::DPRRegClassID: 754 return 32 - 10; 755 } 756} 757 758//===----------------------------------------------------------------------===// 759// Lowering Code 760//===----------------------------------------------------------------------===// 761 762/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 763static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 764 switch (CC) { 765 default: llvm_unreachable("Unknown condition code!"); 766 case ISD::SETNE: return ARMCC::NE; 767 case ISD::SETEQ: return ARMCC::EQ; 768 case ISD::SETGT: return ARMCC::GT; 769 case ISD::SETGE: return ARMCC::GE; 770 case ISD::SETLT: return ARMCC::LT; 771 case ISD::SETLE: return ARMCC::LE; 772 case ISD::SETUGT: return ARMCC::HI; 773 case ISD::SETUGE: return ARMCC::HS; 774 case ISD::SETULT: return ARMCC::LO; 775 case ISD::SETULE: return ARMCC::LS; 776 } 777} 778 779/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 780static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 781 ARMCC::CondCodes &CondCode2) { 782 CondCode2 = ARMCC::AL; 783 switch (CC) { 784 default: llvm_unreachable("Unknown FP condition!"); 785 case ISD::SETEQ: 786 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 787 case ISD::SETGT: 788 case ISD::SETOGT: CondCode = ARMCC::GT; break; 789 case ISD::SETGE: 790 case ISD::SETOGE: CondCode = ARMCC::GE; break; 791 case ISD::SETOLT: CondCode = ARMCC::MI; break; 792 case ISD::SETOLE: CondCode = ARMCC::LS; break; 793 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 794 case ISD::SETO: CondCode = ARMCC::VC; break; 795 case ISD::SETUO: CondCode = ARMCC::VS; break; 796 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 797 case ISD::SETUGT: CondCode = ARMCC::HI; break; 798 case ISD::SETUGE: CondCode = ARMCC::PL; break; 799 case ISD::SETLT: 800 case ISD::SETULT: CondCode = ARMCC::LT; break; 801 case ISD::SETLE: 802 case ISD::SETULE: CondCode = ARMCC::LE; break; 803 case ISD::SETNE: 804 case ISD::SETUNE: CondCode = ARMCC::NE; break; 805 } 806} 807 808//===----------------------------------------------------------------------===// 809// Calling Convention Implementation 810//===----------------------------------------------------------------------===// 811 812#include "ARMGenCallingConv.inc" 813 814// APCS f64 is in register pairs, possibly split to stack 815static bool f64AssignAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 816 CCValAssign::LocInfo &LocInfo, 817 CCState &State, bool CanFail) { 818 static const unsigned RegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; 819 820 // Try to get the first register. 821 if (unsigned Reg = State.AllocateReg(RegList, 4)) 822 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 823 else { 824 // For the 2nd half of a v2f64, do not fail. 825 if (CanFail) 826 return false; 827 828 // Put the whole thing on the stack. 829 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 830 State.AllocateStack(8, 4), 831 LocVT, LocInfo)); 832 return true; 833 } 834 835 // Try to get the second register. 836 if (unsigned Reg = State.AllocateReg(RegList, 4)) 837 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 838 else 839 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 840 State.AllocateStack(4, 4), 841 LocVT, LocInfo)); 842 return true; 843} 844 845static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 846 CCValAssign::LocInfo &LocInfo, 847 ISD::ArgFlagsTy &ArgFlags, 848 CCState &State) { 849 if (!f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 850 return false; 851 if (LocVT == MVT::v2f64 && 852 !f64AssignAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 853 return false; 854 return true; // we handled it 855} 856 857// AAPCS f64 is in aligned register pairs 858static bool f64AssignAAPCS(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 859 CCValAssign::LocInfo &LocInfo, 860 CCState &State, bool CanFail) { 861 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 862 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 863 static const unsigned ShadowRegList[] = { ARM::R0, ARM::R1 }; 864 865 unsigned Reg = State.AllocateReg(HiRegList, ShadowRegList, 2); 866 if (Reg == 0) { 867 // For the 2nd half of a v2f64, do not just fail. 868 if (CanFail) 869 return false; 870 871 // Put the whole thing on the stack. 872 State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, 873 State.AllocateStack(8, 8), 874 LocVT, LocInfo)); 875 return true; 876 } 877 878 unsigned i; 879 for (i = 0; i < 2; ++i) 880 if (HiRegList[i] == Reg) 881 break; 882 883 unsigned T = State.AllocateReg(LoRegList[i]); 884 (void)T; 885 assert(T == LoRegList[i] && "Could not allocate register"); 886 887 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 888 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 889 LocVT, LocInfo)); 890 return true; 891} 892 893static bool CC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 894 CCValAssign::LocInfo &LocInfo, 895 ISD::ArgFlagsTy &ArgFlags, 896 CCState &State) { 897 if (!f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, true)) 898 return false; 899 if (LocVT == MVT::v2f64 && 900 !f64AssignAAPCS(ValNo, ValVT, LocVT, LocInfo, State, false)) 901 return false; 902 return true; // we handled it 903} 904 905static bool f64RetAssign(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 906 CCValAssign::LocInfo &LocInfo, CCState &State) { 907 static const unsigned HiRegList[] = { ARM::R0, ARM::R2 }; 908 static const unsigned LoRegList[] = { ARM::R1, ARM::R3 }; 909 910 unsigned Reg = State.AllocateReg(HiRegList, LoRegList, 2); 911 if (Reg == 0) 912 return false; // we didn't handle it 913 914 unsigned i; 915 for (i = 0; i < 2; ++i) 916 if (HiRegList[i] == Reg) 917 break; 918 919 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo)); 920 State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, LoRegList[i], 921 LocVT, LocInfo)); 922 return true; 923} 924 925static bool RetCC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 926 CCValAssign::LocInfo &LocInfo, 927 ISD::ArgFlagsTy &ArgFlags, 928 CCState &State) { 929 if (!f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 930 return false; 931 if (LocVT == MVT::v2f64 && !f64RetAssign(ValNo, ValVT, LocVT, LocInfo, State)) 932 return false; 933 return true; // we handled it 934} 935 936static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, 937 CCValAssign::LocInfo &LocInfo, 938 ISD::ArgFlagsTy &ArgFlags, 939 CCState &State) { 940 return RetCC_ARM_APCS_Custom_f64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, 941 State); 942} 943 944/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 945/// given CallingConvention value. 946CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 947 bool Return, 948 bool isVarArg) const { 949 switch (CC) { 950 default: 951 llvm_unreachable("Unsupported calling convention"); 952 case CallingConv::C: 953 case CallingConv::Fast: 954 // Use target triple & subtarget features to do actual dispatch. 955 if (Subtarget->isAAPCS_ABI()) { 956 if (Subtarget->hasVFP2() && 957 FloatABIType == FloatABI::Hard && !isVarArg) 958 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 959 else 960 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 961 } else 962 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 963 case CallingConv::ARM_AAPCS_VFP: 964 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 965 case CallingConv::ARM_AAPCS: 966 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 967 case CallingConv::ARM_APCS: 968 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 969 } 970} 971 972/// LowerCallResult - Lower the result values of a call into the 973/// appropriate copies out of appropriate physical registers. 974SDValue 975ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 976 CallingConv::ID CallConv, bool isVarArg, 977 const SmallVectorImpl<ISD::InputArg> &Ins, 978 DebugLoc dl, SelectionDAG &DAG, 979 SmallVectorImpl<SDValue> &InVals) const { 980 981 // Assign locations to each value returned by this call. 982 SmallVector<CCValAssign, 16> RVLocs; 983 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 984 RVLocs, *DAG.getContext()); 985 CCInfo.AnalyzeCallResult(Ins, 986 CCAssignFnForNode(CallConv, /* Return*/ true, 987 isVarArg)); 988 989 // Copy all of the result registers out of their specified physreg. 990 for (unsigned i = 0; i != RVLocs.size(); ++i) { 991 CCValAssign VA = RVLocs[i]; 992 993 SDValue Val; 994 if (VA.needsCustom()) { 995 // Handle f64 or half of a v2f64. 996 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 997 InFlag); 998 Chain = Lo.getValue(1); 999 InFlag = Lo.getValue(2); 1000 VA = RVLocs[++i]; // skip ahead to next loc 1001 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1002 InFlag); 1003 Chain = Hi.getValue(1); 1004 InFlag = Hi.getValue(2); 1005 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1006 1007 if (VA.getLocVT() == MVT::v2f64) { 1008 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1009 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1010 DAG.getConstant(0, MVT::i32)); 1011 1012 VA = RVLocs[++i]; // skip ahead to next loc 1013 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1014 Chain = Lo.getValue(1); 1015 InFlag = Lo.getValue(2); 1016 VA = RVLocs[++i]; // skip ahead to next loc 1017 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1018 Chain = Hi.getValue(1); 1019 InFlag = Hi.getValue(2); 1020 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1021 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1022 DAG.getConstant(1, MVT::i32)); 1023 } 1024 } else { 1025 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1026 InFlag); 1027 Chain = Val.getValue(1); 1028 InFlag = Val.getValue(2); 1029 } 1030 1031 switch (VA.getLocInfo()) { 1032 default: llvm_unreachable("Unknown loc info!"); 1033 case CCValAssign::Full: break; 1034 case CCValAssign::BCvt: 1035 Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); 1036 break; 1037 } 1038 1039 InVals.push_back(Val); 1040 } 1041 1042 return Chain; 1043} 1044 1045/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1046/// by "Src" to address "Dst" of size "Size". Alignment information is 1047/// specified by the specific parameter attribute. The copy will be passed as 1048/// a byval function parameter. 1049/// Sometimes what we are copying is the end of a larger object, the part that 1050/// does not fit in registers. 1051static SDValue 1052CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1053 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 1054 DebugLoc dl) { 1055 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1056 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 1057 /*isVolatile=*/false, /*AlwaysInline=*/false, 1058 NULL, 0, NULL, 0); 1059} 1060 1061/// LowerMemOpCallTo - Store the argument to the stack. 1062SDValue 1063ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1064 SDValue StackPtr, SDValue Arg, 1065 DebugLoc dl, SelectionDAG &DAG, 1066 const CCValAssign &VA, 1067 ISD::ArgFlagsTy Flags) const { 1068 unsigned LocMemOffset = VA.getLocMemOffset(); 1069 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1070 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1071 if (Flags.isByVal()) { 1072 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 1073 } 1074 return DAG.getStore(Chain, dl, Arg, PtrOff, 1075 PseudoSourceValue::getStack(), LocMemOffset, 1076 false, false, 0); 1077} 1078 1079void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1080 SDValue Chain, SDValue &Arg, 1081 RegsToPassVector &RegsToPass, 1082 CCValAssign &VA, CCValAssign &NextVA, 1083 SDValue &StackPtr, 1084 SmallVector<SDValue, 8> &MemOpChains, 1085 ISD::ArgFlagsTy Flags) const { 1086 1087 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1088 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1089 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1090 1091 if (NextVA.isRegLoc()) 1092 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1093 else { 1094 assert(NextVA.isMemLoc()); 1095 if (StackPtr.getNode() == 0) 1096 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1097 1098 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1099 dl, DAG, NextVA, 1100 Flags)); 1101 } 1102} 1103 1104/// LowerCall - Lowering a call into a callseq_start <- 1105/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1106/// nodes. 1107SDValue 1108ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1109 CallingConv::ID CallConv, bool isVarArg, 1110 bool &isTailCall, 1111 const SmallVectorImpl<ISD::OutputArg> &Outs, 1112 const SmallVectorImpl<SDValue> &OutVals, 1113 const SmallVectorImpl<ISD::InputArg> &Ins, 1114 DebugLoc dl, SelectionDAG &DAG, 1115 SmallVectorImpl<SDValue> &InVals) const { 1116 MachineFunction &MF = DAG.getMachineFunction(); 1117 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1118 bool IsSibCall = false; 1119 if (isTailCall) { 1120 // Check if it's really possible to do a tail call. 1121 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1122 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1123 Outs, OutVals, Ins, DAG); 1124 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1125 // detected sibcalls. 1126 if (isTailCall) { 1127 ++NumTailCalls; 1128 IsSibCall = true; 1129 } 1130 } 1131 1132 // Analyze operands of the call, assigning locations to each operand. 1133 SmallVector<CCValAssign, 16> ArgLocs; 1134 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1135 *DAG.getContext()); 1136 CCInfo.AnalyzeCallOperands(Outs, 1137 CCAssignFnForNode(CallConv, /* Return*/ false, 1138 isVarArg)); 1139 1140 // Get a count of how many bytes are to be pushed on the stack. 1141 unsigned NumBytes = CCInfo.getNextStackOffset(); 1142 1143 // For tail calls, memory operands are available in our caller's stack. 1144 if (IsSibCall) 1145 NumBytes = 0; 1146 1147 // Adjust the stack pointer for the new arguments... 1148 // These operations are automatically eliminated by the prolog/epilog pass 1149 if (!IsSibCall) 1150 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1151 1152 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1153 1154 RegsToPassVector RegsToPass; 1155 SmallVector<SDValue, 8> MemOpChains; 1156 1157 // Walk the register/memloc assignments, inserting copies/loads. In the case 1158 // of tail call optimization, arguments are handled later. 1159 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1160 i != e; 1161 ++i, ++realArgIdx) { 1162 CCValAssign &VA = ArgLocs[i]; 1163 SDValue Arg = OutVals[realArgIdx]; 1164 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1165 1166 // Promote the value if needed. 1167 switch (VA.getLocInfo()) { 1168 default: llvm_unreachable("Unknown loc info!"); 1169 case CCValAssign::Full: break; 1170 case CCValAssign::SExt: 1171 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1172 break; 1173 case CCValAssign::ZExt: 1174 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1175 break; 1176 case CCValAssign::AExt: 1177 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1178 break; 1179 case CCValAssign::BCvt: 1180 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1181 break; 1182 } 1183 1184 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1185 if (VA.needsCustom()) { 1186 if (VA.getLocVT() == MVT::v2f64) { 1187 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1188 DAG.getConstant(0, MVT::i32)); 1189 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1190 DAG.getConstant(1, MVT::i32)); 1191 1192 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1193 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1194 1195 VA = ArgLocs[++i]; // skip ahead to next loc 1196 if (VA.isRegLoc()) { 1197 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1198 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1199 } else { 1200 assert(VA.isMemLoc()); 1201 1202 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1203 dl, DAG, VA, Flags)); 1204 } 1205 } else { 1206 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1207 StackPtr, MemOpChains, Flags); 1208 } 1209 } else if (VA.isRegLoc()) { 1210 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1211 } else if (!IsSibCall) { 1212 assert(VA.isMemLoc()); 1213 1214 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1215 dl, DAG, VA, Flags)); 1216 } 1217 } 1218 1219 if (!MemOpChains.empty()) 1220 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1221 &MemOpChains[0], MemOpChains.size()); 1222 1223 // Build a sequence of copy-to-reg nodes chained together with token chain 1224 // and flag operands which copy the outgoing args into the appropriate regs. 1225 SDValue InFlag; 1226 // Tail call byval lowering might overwrite argument registers so in case of 1227 // tail call optimization the copies to registers are lowered later. 1228 if (!isTailCall) 1229 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1230 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1231 RegsToPass[i].second, InFlag); 1232 InFlag = Chain.getValue(1); 1233 } 1234 1235 // For tail calls lower the arguments to the 'real' stack slot. 1236 if (isTailCall) { 1237 // Force all the incoming stack arguments to be loaded from the stack 1238 // before any new outgoing arguments are stored to the stack, because the 1239 // outgoing stack slots may alias the incoming argument stack slots, and 1240 // the alias isn't otherwise explicit. This is slightly more conservative 1241 // than necessary, because it means that each store effectively depends 1242 // on every argument instead of just those arguments it would clobber. 1243 1244 // Do not flag preceeding copytoreg stuff together with the following stuff. 1245 InFlag = SDValue(); 1246 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1247 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1248 RegsToPass[i].second, InFlag); 1249 InFlag = Chain.getValue(1); 1250 } 1251 InFlag =SDValue(); 1252 } 1253 1254 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1255 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1256 // node so that legalize doesn't hack it. 1257 bool isDirect = false; 1258 bool isARMFunc = false; 1259 bool isLocalARMFunc = false; 1260 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1261 1262 if (EnableARMLongCalls) { 1263 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1264 && "long-calls with non-static relocation model!"); 1265 // Handle a global address or an external symbol. If it's not one of 1266 // those, the target's already in a register, so we don't need to do 1267 // anything extra. 1268 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1269 const GlobalValue *GV = G->getGlobal(); 1270 // Create a constant pool entry for the callee address 1271 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1272 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1273 ARMPCLabelIndex, 1274 ARMCP::CPValue, 0); 1275 // Get the address of the callee into a register 1276 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1277 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1278 Callee = DAG.getLoad(getPointerTy(), dl, 1279 DAG.getEntryNode(), CPAddr, 1280 PseudoSourceValue::getConstantPool(), 0, 1281 false, false, 0); 1282 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1283 const char *Sym = S->getSymbol(); 1284 1285 // Create a constant pool entry for the callee address 1286 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1287 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1288 Sym, ARMPCLabelIndex, 0); 1289 // Get the address of the callee into a register 1290 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1291 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1292 Callee = DAG.getLoad(getPointerTy(), dl, 1293 DAG.getEntryNode(), CPAddr, 1294 PseudoSourceValue::getConstantPool(), 0, 1295 false, false, 0); 1296 } 1297 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1298 const GlobalValue *GV = G->getGlobal(); 1299 isDirect = true; 1300 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1301 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1302 getTargetMachine().getRelocationModel() != Reloc::Static; 1303 isARMFunc = !Subtarget->isThumb() || isStub; 1304 // ARM call to a local ARM function is predicable. 1305 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1306 // tBX takes a register source operand. 1307 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1308 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1309 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1310 ARMPCLabelIndex, 1311 ARMCP::CPValue, 4); 1312 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1313 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1314 Callee = DAG.getLoad(getPointerTy(), dl, 1315 DAG.getEntryNode(), CPAddr, 1316 PseudoSourceValue::getConstantPool(), 0, 1317 false, false, 0); 1318 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1319 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1320 getPointerTy(), Callee, PICLabel); 1321 } else 1322 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); 1323 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1324 isDirect = true; 1325 bool isStub = Subtarget->isTargetDarwin() && 1326 getTargetMachine().getRelocationModel() != Reloc::Static; 1327 isARMFunc = !Subtarget->isThumb() || isStub; 1328 // tBX takes a register source operand. 1329 const char *Sym = S->getSymbol(); 1330 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1331 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1332 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1333 Sym, ARMPCLabelIndex, 4); 1334 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1335 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1336 Callee = DAG.getLoad(getPointerTy(), dl, 1337 DAG.getEntryNode(), CPAddr, 1338 PseudoSourceValue::getConstantPool(), 0, 1339 false, false, 0); 1340 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1341 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1342 getPointerTy(), Callee, PICLabel); 1343 } else 1344 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy()); 1345 } 1346 1347 // FIXME: handle tail calls differently. 1348 unsigned CallOpc; 1349 if (Subtarget->isThumb()) { 1350 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1351 CallOpc = ARMISD::CALL_NOLINK; 1352 else 1353 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1354 } else { 1355 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1356 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1357 : ARMISD::CALL_NOLINK; 1358 } 1359 1360 std::vector<SDValue> Ops; 1361 Ops.push_back(Chain); 1362 Ops.push_back(Callee); 1363 1364 // Add argument registers to the end of the list so that they are known live 1365 // into the call. 1366 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1367 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1368 RegsToPass[i].second.getValueType())); 1369 1370 if (InFlag.getNode()) 1371 Ops.push_back(InFlag); 1372 1373 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1374 if (isTailCall) 1375 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1376 1377 // Returns a chain and a flag for retval copy to use. 1378 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1379 InFlag = Chain.getValue(1); 1380 1381 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1382 DAG.getIntPtrConstant(0, true), InFlag); 1383 if (!Ins.empty()) 1384 InFlag = Chain.getValue(1); 1385 1386 // Handle result values, copying them out of physregs into vregs that we 1387 // return. 1388 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1389 dl, DAG, InVals); 1390} 1391 1392/// MatchingStackOffset - Return true if the given stack call argument is 1393/// already available in the same position (relatively) of the caller's 1394/// incoming argument stack. 1395static 1396bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1397 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1398 const ARMInstrInfo *TII) { 1399 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1400 int FI = INT_MAX; 1401 if (Arg.getOpcode() == ISD::CopyFromReg) { 1402 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1403 if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) 1404 return false; 1405 MachineInstr *Def = MRI->getVRegDef(VR); 1406 if (!Def) 1407 return false; 1408 if (!Flags.isByVal()) { 1409 if (!TII->isLoadFromStackSlot(Def, FI)) 1410 return false; 1411 } else { 1412 return false; 1413 } 1414 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1415 if (Flags.isByVal()) 1416 // ByVal argument is passed in as a pointer but it's now being 1417 // dereferenced. e.g. 1418 // define @foo(%struct.X* %A) { 1419 // tail call @bar(%struct.X* byval %A) 1420 // } 1421 return false; 1422 SDValue Ptr = Ld->getBasePtr(); 1423 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1424 if (!FINode) 1425 return false; 1426 FI = FINode->getIndex(); 1427 } else 1428 return false; 1429 1430 assert(FI != INT_MAX); 1431 if (!MFI->isFixedObjectIndex(FI)) 1432 return false; 1433 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1434} 1435 1436/// IsEligibleForTailCallOptimization - Check whether the call is eligible 1437/// for tail call optimization. Targets which want to do tail call 1438/// optimization should implement this function. 1439bool 1440ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1441 CallingConv::ID CalleeCC, 1442 bool isVarArg, 1443 bool isCalleeStructRet, 1444 bool isCallerStructRet, 1445 const SmallVectorImpl<ISD::OutputArg> &Outs, 1446 const SmallVectorImpl<SDValue> &OutVals, 1447 const SmallVectorImpl<ISD::InputArg> &Ins, 1448 SelectionDAG& DAG) const { 1449 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1450 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1451 bool CCMatch = CallerCC == CalleeCC; 1452 1453 // Look for obvious safe cases to perform tail call optimization that do not 1454 // require ABI changes. This is what gcc calls sibcall. 1455 1456 // Do not sibcall optimize vararg calls unless the call site is not passing 1457 // any arguments. 1458 if (isVarArg && !Outs.empty()) 1459 return false; 1460 1461 // Also avoid sibcall optimization if either caller or callee uses struct 1462 // return semantics. 1463 if (isCalleeStructRet || isCallerStructRet) 1464 return false; 1465 1466 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1467 // emitEpilogue is not ready for them. 1468 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1469 // LR. This means if we need to reload LR, it takes an extra instructions, 1470 // which outweighs the value of the tail call; but here we don't know yet 1471 // whether LR is going to be used. Probably the right approach is to 1472 // generate the tail call here and turn it back into CALL/RET in 1473 // emitEpilogue if LR is used. 1474 if (Subtarget->isThumb1Only()) 1475 return false; 1476 1477 // For the moment, we can only do this to functions defined in this 1478 // compilation, or to indirect calls. A Thumb B to an ARM function, 1479 // or vice versa, is not easily fixed up in the linker unlike BL. 1480 // (We could do this by loading the address of the callee into a register; 1481 // that is an extra instruction over the direct call and burns a register 1482 // as well, so is not likely to be a win.) 1483 1484 // It might be safe to remove this restriction on non-Darwin. 1485 1486 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1487 // but we need to make sure there are enough registers; the only valid 1488 // registers are the 4 used for parameters. We don't currently do this 1489 // case. 1490 if (isa<ExternalSymbolSDNode>(Callee)) 1491 return false; 1492 1493 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1494 const GlobalValue *GV = G->getGlobal(); 1495 if (GV->isDeclaration() || GV->isWeakForLinker()) 1496 return false; 1497 } 1498 1499 // If the calling conventions do not match, then we'd better make sure the 1500 // results are returned in the same way as what the caller expects. 1501 if (!CCMatch) { 1502 SmallVector<CCValAssign, 16> RVLocs1; 1503 CCState CCInfo1(CalleeCC, false, getTargetMachine(), 1504 RVLocs1, *DAG.getContext()); 1505 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1506 1507 SmallVector<CCValAssign, 16> RVLocs2; 1508 CCState CCInfo2(CallerCC, false, getTargetMachine(), 1509 RVLocs2, *DAG.getContext()); 1510 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1511 1512 if (RVLocs1.size() != RVLocs2.size()) 1513 return false; 1514 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1515 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1516 return false; 1517 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1518 return false; 1519 if (RVLocs1[i].isRegLoc()) { 1520 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1521 return false; 1522 } else { 1523 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1524 return false; 1525 } 1526 } 1527 } 1528 1529 // If the callee takes no arguments then go on to check the results of the 1530 // call. 1531 if (!Outs.empty()) { 1532 // Check if stack adjustment is needed. For now, do not do this if any 1533 // argument is passed on the stack. 1534 SmallVector<CCValAssign, 16> ArgLocs; 1535 CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), 1536 ArgLocs, *DAG.getContext()); 1537 CCInfo.AnalyzeCallOperands(Outs, 1538 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1539 if (CCInfo.getNextStackOffset()) { 1540 MachineFunction &MF = DAG.getMachineFunction(); 1541 1542 // Check if the arguments are already laid out in the right way as 1543 // the caller's fixed stack objects. 1544 MachineFrameInfo *MFI = MF.getFrameInfo(); 1545 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1546 const ARMInstrInfo *TII = 1547 ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); 1548 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1549 i != e; 1550 ++i, ++realArgIdx) { 1551 CCValAssign &VA = ArgLocs[i]; 1552 EVT RegVT = VA.getLocVT(); 1553 SDValue Arg = OutVals[realArgIdx]; 1554 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1555 if (VA.getLocInfo() == CCValAssign::Indirect) 1556 return false; 1557 if (VA.needsCustom()) { 1558 // f64 and vector types are split into multiple registers or 1559 // register/stack-slot combinations. The types will not match 1560 // the registers; give up on memory f64 refs until we figure 1561 // out what to do about this. 1562 if (!VA.isRegLoc()) 1563 return false; 1564 if (!ArgLocs[++i].isRegLoc()) 1565 return false; 1566 if (RegVT == MVT::v2f64) { 1567 if (!ArgLocs[++i].isRegLoc()) 1568 return false; 1569 if (!ArgLocs[++i].isRegLoc()) 1570 return false; 1571 } 1572 } else if (!VA.isRegLoc()) { 1573 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1574 MFI, MRI, TII)) 1575 return false; 1576 } 1577 } 1578 } 1579 } 1580 1581 return true; 1582} 1583 1584SDValue 1585ARMTargetLowering::LowerReturn(SDValue Chain, 1586 CallingConv::ID CallConv, bool isVarArg, 1587 const SmallVectorImpl<ISD::OutputArg> &Outs, 1588 const SmallVectorImpl<SDValue> &OutVals, 1589 DebugLoc dl, SelectionDAG &DAG) const { 1590 1591 // CCValAssign - represent the assignment of the return value to a location. 1592 SmallVector<CCValAssign, 16> RVLocs; 1593 1594 // CCState - Info about the registers and stack slots. 1595 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, 1596 *DAG.getContext()); 1597 1598 // Analyze outgoing return values. 1599 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1600 isVarArg)); 1601 1602 // If this is the first return lowered for this function, add 1603 // the regs to the liveout set for the function. 1604 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1605 for (unsigned i = 0; i != RVLocs.size(); ++i) 1606 if (RVLocs[i].isRegLoc()) 1607 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1608 } 1609 1610 SDValue Flag; 1611 1612 // Copy the result values into the output registers. 1613 for (unsigned i = 0, realRVLocIdx = 0; 1614 i != RVLocs.size(); 1615 ++i, ++realRVLocIdx) { 1616 CCValAssign &VA = RVLocs[i]; 1617 assert(VA.isRegLoc() && "Can only return in registers!"); 1618 1619 SDValue Arg = OutVals[realRVLocIdx]; 1620 1621 switch (VA.getLocInfo()) { 1622 default: llvm_unreachable("Unknown loc info!"); 1623 case CCValAssign::Full: break; 1624 case CCValAssign::BCvt: 1625 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1626 break; 1627 } 1628 1629 if (VA.needsCustom()) { 1630 if (VA.getLocVT() == MVT::v2f64) { 1631 // Extract the first half and return it in two registers. 1632 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1633 DAG.getConstant(0, MVT::i32)); 1634 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1635 DAG.getVTList(MVT::i32, MVT::i32), Half); 1636 1637 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1638 Flag = Chain.getValue(1); 1639 VA = RVLocs[++i]; // skip ahead to next loc 1640 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1641 HalfGPRs.getValue(1), Flag); 1642 Flag = Chain.getValue(1); 1643 VA = RVLocs[++i]; // skip ahead to next loc 1644 1645 // Extract the 2nd half and fall through to handle it as an f64 value. 1646 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1647 DAG.getConstant(1, MVT::i32)); 1648 } 1649 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1650 // available. 1651 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1652 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1653 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1654 Flag = Chain.getValue(1); 1655 VA = RVLocs[++i]; // skip ahead to next loc 1656 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1657 Flag); 1658 } else 1659 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1660 1661 // Guarantee that all emitted copies are 1662 // stuck together, avoiding something bad. 1663 Flag = Chain.getValue(1); 1664 } 1665 1666 SDValue result; 1667 if (Flag.getNode()) 1668 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1669 else // Return Void 1670 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1671 1672 return result; 1673} 1674 1675// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1676// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1677// one of the above mentioned nodes. It has to be wrapped because otherwise 1678// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1679// be used to form addressing mode. These wrapped nodes will be selected 1680// into MOVi. 1681static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1682 EVT PtrVT = Op.getValueType(); 1683 // FIXME there is no actual debug info here 1684 DebugLoc dl = Op.getDebugLoc(); 1685 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1686 SDValue Res; 1687 if (CP->isMachineConstantPoolEntry()) 1688 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1689 CP->getAlignment()); 1690 else 1691 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1692 CP->getAlignment()); 1693 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1694} 1695 1696unsigned ARMTargetLowering::getJumpTableEncoding() const { 1697 return MachineJumpTableInfo::EK_Inline; 1698} 1699 1700SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 1701 SelectionDAG &DAG) const { 1702 MachineFunction &MF = DAG.getMachineFunction(); 1703 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1704 unsigned ARMPCLabelIndex = 0; 1705 DebugLoc DL = Op.getDebugLoc(); 1706 EVT PtrVT = getPointerTy(); 1707 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1708 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1709 SDValue CPAddr; 1710 if (RelocM == Reloc::Static) { 1711 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1712 } else { 1713 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1714 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1715 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, 1716 ARMCP::CPBlockAddress, 1717 PCAdj); 1718 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1719 } 1720 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1721 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1722 PseudoSourceValue::getConstantPool(), 0, 1723 false, false, 0); 1724 if (RelocM == Reloc::Static) 1725 return Result; 1726 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1727 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1728} 1729 1730// Lower ISD::GlobalTLSAddress using the "general dynamic" model 1731SDValue 1732ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1733 SelectionDAG &DAG) const { 1734 DebugLoc dl = GA->getDebugLoc(); 1735 EVT PtrVT = getPointerTy(); 1736 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1737 MachineFunction &MF = DAG.getMachineFunction(); 1738 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1739 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1740 ARMConstantPoolValue *CPV = 1741 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1742 ARMCP::CPValue, PCAdj, "tlsgd", true); 1743 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1744 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1745 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1746 PseudoSourceValue::getConstantPool(), 0, 1747 false, false, 0); 1748 SDValue Chain = Argument.getValue(1); 1749 1750 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1751 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1752 1753 // call __tls_get_addr. 1754 ArgListTy Args; 1755 ArgListEntry Entry; 1756 Entry.Node = Argument; 1757 Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); 1758 Args.push_back(Entry); 1759 // FIXME: is there useful debug info available here? 1760 std::pair<SDValue, SDValue> CallResult = 1761 LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), 1762 false, false, false, false, 1763 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1764 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 1765 return CallResult.first; 1766} 1767 1768// Lower ISD::GlobalTLSAddress using the "initial exec" or 1769// "local exec" model. 1770SDValue 1771ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 1772 SelectionDAG &DAG) const { 1773 const GlobalValue *GV = GA->getGlobal(); 1774 DebugLoc dl = GA->getDebugLoc(); 1775 SDValue Offset; 1776 SDValue Chain = DAG.getEntryNode(); 1777 EVT PtrVT = getPointerTy(); 1778 // Get the Thread Pointer 1779 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1780 1781 if (GV->isDeclaration()) { 1782 MachineFunction &MF = DAG.getMachineFunction(); 1783 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1784 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1785 // Initial exec model. 1786 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1787 ARMConstantPoolValue *CPV = 1788 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1789 ARMCP::CPValue, PCAdj, "gottpoff", true); 1790 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1791 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1792 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1793 PseudoSourceValue::getConstantPool(), 0, 1794 false, false, 0); 1795 Chain = Offset.getValue(1); 1796 1797 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1798 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 1799 1800 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1801 PseudoSourceValue::getConstantPool(), 0, 1802 false, false, 0); 1803 } else { 1804 // local exec model 1805 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); 1806 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1807 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1808 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1809 PseudoSourceValue::getConstantPool(), 0, 1810 false, false, 0); 1811 } 1812 1813 // The address of the thread local variable is the add of the thread 1814 // pointer with the offset of the variable. 1815 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 1816} 1817 1818SDValue 1819ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 1820 // TODO: implement the "local dynamic" model 1821 assert(Subtarget->isTargetELF() && 1822 "TLS not implemented for non-ELF targets"); 1823 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1824 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 1825 // otherwise use the "Local Exec" TLS Model 1826 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 1827 return LowerToTLSGeneralDynamicModel(GA, DAG); 1828 else 1829 return LowerToTLSExecModels(GA, DAG); 1830} 1831 1832SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 1833 SelectionDAG &DAG) const { 1834 EVT PtrVT = getPointerTy(); 1835 DebugLoc dl = Op.getDebugLoc(); 1836 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1837 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1838 if (RelocM == Reloc::PIC_) { 1839 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 1840 ARMConstantPoolValue *CPV = 1841 new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); 1842 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1843 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1844 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 1845 CPAddr, 1846 PseudoSourceValue::getConstantPool(), 0, 1847 false, false, 0); 1848 SDValue Chain = Result.getValue(1); 1849 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 1850 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 1851 if (!UseGOTOFF) 1852 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1853 PseudoSourceValue::getGOT(), 0, 1854 false, false, 0); 1855 return Result; 1856 } else { 1857 // If we have T2 ops, we can materialize the address directly via movt/movw 1858 // pair. This is always cheaper. 1859 if (Subtarget->useMovt()) { 1860 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 1861 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 1862 } else { 1863 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1864 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1865 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1866 PseudoSourceValue::getConstantPool(), 0, 1867 false, false, 0); 1868 } 1869 } 1870} 1871 1872SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 1873 SelectionDAG &DAG) const { 1874 MachineFunction &MF = DAG.getMachineFunction(); 1875 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1876 unsigned ARMPCLabelIndex = 0; 1877 EVT PtrVT = getPointerTy(); 1878 DebugLoc dl = Op.getDebugLoc(); 1879 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1880 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1881 SDValue CPAddr; 1882 if (RelocM == Reloc::Static) 1883 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1884 else { 1885 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1886 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 1887 ARMConstantPoolValue *CPV = 1888 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); 1889 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1890 } 1891 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1892 1893 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1894 PseudoSourceValue::getConstantPool(), 0, 1895 false, false, 0); 1896 SDValue Chain = Result.getValue(1); 1897 1898 if (RelocM == Reloc::PIC_) { 1899 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1900 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1901 } 1902 1903 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 1904 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1905 PseudoSourceValue::getGOT(), 0, 1906 false, false, 0); 1907 1908 return Result; 1909} 1910 1911SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 1912 SelectionDAG &DAG) const { 1913 assert(Subtarget->isTargetELF() && 1914 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 1915 MachineFunction &MF = DAG.getMachineFunction(); 1916 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1917 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1918 EVT PtrVT = getPointerTy(); 1919 DebugLoc dl = Op.getDebugLoc(); 1920 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1921 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1922 "_GLOBAL_OFFSET_TABLE_", 1923 ARMPCLabelIndex, PCAdj); 1924 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1925 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1926 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1927 PseudoSourceValue::getConstantPool(), 0, 1928 false, false, 0); 1929 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1930 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1931} 1932 1933SDValue 1934ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 1935 DebugLoc dl = Op.getDebugLoc(); 1936 SDValue Val = DAG.getConstant(0, MVT::i32); 1937 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), 1938 Op.getOperand(1), Val); 1939} 1940 1941SDValue 1942ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 1943 DebugLoc dl = Op.getDebugLoc(); 1944 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 1945 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 1946} 1947 1948SDValue 1949ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 1950 const ARMSubtarget *Subtarget) const { 1951 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1952 DebugLoc dl = Op.getDebugLoc(); 1953 switch (IntNo) { 1954 default: return SDValue(); // Don't custom lower most intrinsics. 1955 case Intrinsic::arm_thread_pointer: { 1956 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1957 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1958 } 1959 case Intrinsic::eh_sjlj_lsda: { 1960 MachineFunction &MF = DAG.getMachineFunction(); 1961 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1962 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1963 EVT PtrVT = getPointerTy(); 1964 DebugLoc dl = Op.getDebugLoc(); 1965 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1966 SDValue CPAddr; 1967 unsigned PCAdj = (RelocM != Reloc::PIC_) 1968 ? 0 : (Subtarget->isThumb() ? 4 : 8); 1969 ARMConstantPoolValue *CPV = 1970 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, 1971 ARMCP::CPLSDA, PCAdj); 1972 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1973 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1974 SDValue Result = 1975 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1976 PseudoSourceValue::getConstantPool(), 0, 1977 false, false, 0); 1978 1979 if (RelocM == Reloc::PIC_) { 1980 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1981 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1982 } 1983 return Result; 1984 } 1985 } 1986} 1987 1988static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 1989 const ARMSubtarget *Subtarget) { 1990 DebugLoc dl = Op.getDebugLoc(); 1991 SDValue Op5 = Op.getOperand(5); 1992 unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); 1993 // Some subtargets which have dmb and dsb instructions can handle barriers 1994 // directly. Some ARMv6 cpus can support them with the help of mcr 1995 // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should 1996 // never get here. 1997 unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; 1998 if (Subtarget->hasDataBarrier()) 1999 return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); 2000 else { 2001 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() && 2002 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2003 return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), 2004 DAG.getConstant(0, MVT::i32)); 2005 } 2006} 2007 2008static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2009 MachineFunction &MF = DAG.getMachineFunction(); 2010 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2011 2012 // vastart just stores the address of the VarArgsFrameIndex slot into the 2013 // memory location argument. 2014 DebugLoc dl = Op.getDebugLoc(); 2015 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2016 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2017 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2018 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0, 2019 false, false, 0); 2020} 2021 2022SDValue 2023ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2024 SDValue &Root, SelectionDAG &DAG, 2025 DebugLoc dl) const { 2026 MachineFunction &MF = DAG.getMachineFunction(); 2027 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2028 2029 TargetRegisterClass *RC; 2030 if (AFI->isThumb1OnlyFunction()) 2031 RC = ARM::tGPRRegisterClass; 2032 else 2033 RC = ARM::GPRRegisterClass; 2034 2035 // Transform the arguments stored in physical registers into virtual ones. 2036 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2037 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2038 2039 SDValue ArgValue2; 2040 if (NextVA.isMemLoc()) { 2041 MachineFrameInfo *MFI = MF.getFrameInfo(); 2042 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2043 2044 // Create load node to retrieve arguments from the stack. 2045 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2046 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2047 PseudoSourceValue::getFixedStack(FI), 0, 2048 false, false, 0); 2049 } else { 2050 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2051 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2052 } 2053 2054 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2055} 2056 2057SDValue 2058ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2059 CallingConv::ID CallConv, bool isVarArg, 2060 const SmallVectorImpl<ISD::InputArg> 2061 &Ins, 2062 DebugLoc dl, SelectionDAG &DAG, 2063 SmallVectorImpl<SDValue> &InVals) 2064 const { 2065 2066 MachineFunction &MF = DAG.getMachineFunction(); 2067 MachineFrameInfo *MFI = MF.getFrameInfo(); 2068 2069 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2070 2071 // Assign locations to all of the incoming arguments. 2072 SmallVector<CCValAssign, 16> ArgLocs; 2073 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 2074 *DAG.getContext()); 2075 CCInfo.AnalyzeFormalArguments(Ins, 2076 CCAssignFnForNode(CallConv, /* Return*/ false, 2077 isVarArg)); 2078 2079 SmallVector<SDValue, 16> ArgValues; 2080 2081 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2082 CCValAssign &VA = ArgLocs[i]; 2083 2084 // Arguments stored in registers. 2085 if (VA.isRegLoc()) { 2086 EVT RegVT = VA.getLocVT(); 2087 2088 SDValue ArgValue; 2089 if (VA.needsCustom()) { 2090 // f64 and vector types are split up into multiple registers or 2091 // combinations of registers and stack slots. 2092 if (VA.getLocVT() == MVT::v2f64) { 2093 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2094 Chain, DAG, dl); 2095 VA = ArgLocs[++i]; // skip ahead to next loc 2096 SDValue ArgValue2; 2097 if (VA.isMemLoc()) { 2098 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2099 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2100 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2101 PseudoSourceValue::getFixedStack(FI), 0, 2102 false, false, 0); 2103 } else { 2104 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2105 Chain, DAG, dl); 2106 } 2107 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2108 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2109 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2110 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2111 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2112 } else 2113 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2114 2115 } else { 2116 TargetRegisterClass *RC; 2117 2118 if (RegVT == MVT::f32) 2119 RC = ARM::SPRRegisterClass; 2120 else if (RegVT == MVT::f64) 2121 RC = ARM::DPRRegisterClass; 2122 else if (RegVT == MVT::v2f64) 2123 RC = ARM::QPRRegisterClass; 2124 else if (RegVT == MVT::i32) 2125 RC = (AFI->isThumb1OnlyFunction() ? 2126 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 2127 else 2128 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2129 2130 // Transform the arguments in physical registers into virtual ones. 2131 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2132 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2133 } 2134 2135 // If this is an 8 or 16-bit value, it is really passed promoted 2136 // to 32 bits. Insert an assert[sz]ext to capture this, then 2137 // truncate to the right size. 2138 switch (VA.getLocInfo()) { 2139 default: llvm_unreachable("Unknown loc info!"); 2140 case CCValAssign::Full: break; 2141 case CCValAssign::BCvt: 2142 ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); 2143 break; 2144 case CCValAssign::SExt: 2145 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2146 DAG.getValueType(VA.getValVT())); 2147 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2148 break; 2149 case CCValAssign::ZExt: 2150 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2151 DAG.getValueType(VA.getValVT())); 2152 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2153 break; 2154 } 2155 2156 InVals.push_back(ArgValue); 2157 2158 } else { // VA.isRegLoc() 2159 2160 // sanity check 2161 assert(VA.isMemLoc()); 2162 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2163 2164 unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; 2165 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true); 2166 2167 // Create load nodes to retrieve arguments from the stack. 2168 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2169 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2170 PseudoSourceValue::getFixedStack(FI), 0, 2171 false, false, 0)); 2172 } 2173 } 2174 2175 // varargs 2176 if (isVarArg) { 2177 static const unsigned GPRArgRegs[] = { 2178 ARM::R0, ARM::R1, ARM::R2, ARM::R3 2179 }; 2180 2181 unsigned NumGPRs = CCInfo.getFirstUnallocated 2182 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2183 2184 unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); 2185 unsigned VARegSize = (4 - NumGPRs) * 4; 2186 unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2187 unsigned ArgOffset = CCInfo.getNextStackOffset(); 2188 if (VARegSaveSize) { 2189 // If this function is vararg, store any remaining integer argument regs 2190 // to their spots on the stack so that they may be loaded by deferencing 2191 // the result of va_next. 2192 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2193 AFI->setVarArgsFrameIndex( 2194 MFI->CreateFixedObject(VARegSaveSize, 2195 ArgOffset + VARegSaveSize - VARegSize, 2196 true)); 2197 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2198 getPointerTy()); 2199 2200 SmallVector<SDValue, 4> MemOps; 2201 for (; NumGPRs < 4; ++NumGPRs) { 2202 TargetRegisterClass *RC; 2203 if (AFI->isThumb1OnlyFunction()) 2204 RC = ARM::tGPRRegisterClass; 2205 else 2206 RC = ARM::GPRRegisterClass; 2207 2208 unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); 2209 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2210 SDValue Store = 2211 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2212 PseudoSourceValue::getFixedStack(AFI->getVarArgsFrameIndex()), 2213 0, false, false, 0); 2214 MemOps.push_back(Store); 2215 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2216 DAG.getConstant(4, getPointerTy())); 2217 } 2218 if (!MemOps.empty()) 2219 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2220 &MemOps[0], MemOps.size()); 2221 } else 2222 // This will point to the next argument passed via stack. 2223 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2224 } 2225 2226 return Chain; 2227} 2228 2229/// isFloatingPointZero - Return true if this is +0.0. 2230static bool isFloatingPointZero(SDValue Op) { 2231 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2232 return CFP->getValueAPF().isPosZero(); 2233 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2234 // Maybe this has already been legalized into the constant pool? 2235 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2236 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2237 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2238 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2239 return CFP->getValueAPF().isPosZero(); 2240 } 2241 } 2242 return false; 2243} 2244 2245/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2246/// the given operands. 2247SDValue 2248ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2249 SDValue &ARMcc, SelectionDAG &DAG, 2250 DebugLoc dl) const { 2251 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2252 unsigned C = RHSC->getZExtValue(); 2253 if (!isLegalICmpImmediate(C)) { 2254 // Constant does not fit, try adjusting it by one? 2255 switch (CC) { 2256 default: break; 2257 case ISD::SETLT: 2258 case ISD::SETGE: 2259 if (isLegalICmpImmediate(C-1)) { 2260 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2261 RHS = DAG.getConstant(C-1, MVT::i32); 2262 } 2263 break; 2264 case ISD::SETULT: 2265 case ISD::SETUGE: 2266 if (C > 0 && isLegalICmpImmediate(C-1)) { 2267 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2268 RHS = DAG.getConstant(C-1, MVT::i32); 2269 } 2270 break; 2271 case ISD::SETLE: 2272 case ISD::SETGT: 2273 if (isLegalICmpImmediate(C+1)) { 2274 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2275 RHS = DAG.getConstant(C+1, MVT::i32); 2276 } 2277 break; 2278 case ISD::SETULE: 2279 case ISD::SETUGT: 2280 if (C < 0xffffffff && isLegalICmpImmediate(C+1)) { 2281 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2282 RHS = DAG.getConstant(C+1, MVT::i32); 2283 } 2284 break; 2285 } 2286 } 2287 } 2288 2289 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2290 ARMISD::NodeType CompareType; 2291 switch (CondCode) { 2292 default: 2293 CompareType = ARMISD::CMP; 2294 break; 2295 case ARMCC::EQ: 2296 case ARMCC::NE: 2297 // Uses only Z Flag 2298 CompareType = ARMISD::CMPZ; 2299 break; 2300 } 2301 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2302 return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); 2303} 2304 2305/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2306SDValue 2307ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2308 DebugLoc dl) const { 2309 SDValue Cmp; 2310 if (!isFloatingPointZero(RHS)) 2311 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); 2312 else 2313 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); 2314 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); 2315} 2316 2317SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2318 SDValue Cond = Op.getOperand(0); 2319 SDValue SelectTrue = Op.getOperand(1); 2320 SDValue SelectFalse = Op.getOperand(2); 2321 DebugLoc dl = Op.getDebugLoc(); 2322 2323 // Convert: 2324 // 2325 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 2326 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 2327 // 2328 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 2329 const ConstantSDNode *CMOVTrue = 2330 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 2331 const ConstantSDNode *CMOVFalse = 2332 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 2333 2334 if (CMOVTrue && CMOVFalse) { 2335 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 2336 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 2337 2338 SDValue True; 2339 SDValue False; 2340 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 2341 True = SelectTrue; 2342 False = SelectFalse; 2343 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 2344 True = SelectFalse; 2345 False = SelectTrue; 2346 } 2347 2348 if (True.getNode() && False.getNode()) { 2349 EVT VT = Cond.getValueType(); 2350 SDValue ARMcc = Cond.getOperand(2); 2351 SDValue CCR = Cond.getOperand(3); 2352 SDValue Cmp = Cond.getOperand(4); 2353 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 2354 } 2355 } 2356 } 2357 2358 return DAG.getSelectCC(dl, Cond, 2359 DAG.getConstant(0, Cond.getValueType()), 2360 SelectTrue, SelectFalse, ISD::SETNE); 2361} 2362 2363SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2364 EVT VT = Op.getValueType(); 2365 SDValue LHS = Op.getOperand(0); 2366 SDValue RHS = Op.getOperand(1); 2367 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2368 SDValue TrueVal = Op.getOperand(2); 2369 SDValue FalseVal = Op.getOperand(3); 2370 DebugLoc dl = Op.getDebugLoc(); 2371 2372 if (LHS.getValueType() == MVT::i32) { 2373 SDValue ARMcc; 2374 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2375 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2376 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 2377 } 2378 2379 ARMCC::CondCodes CondCode, CondCode2; 2380 FPCCToARMCC(CC, CondCode, CondCode2); 2381 2382 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2383 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2384 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2385 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2386 ARMcc, CCR, Cmp); 2387 if (CondCode2 != ARMCC::AL) { 2388 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2389 // FIXME: Needs another CMP because flag can have but one use. 2390 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2391 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2392 Result, TrueVal, ARMcc2, CCR, Cmp2); 2393 } 2394 return Result; 2395} 2396 2397/// canChangeToInt - Given the fp compare operand, return true if it is suitable 2398/// to morph to an integer compare sequence. 2399static bool canChangeToInt(SDValue Op, bool &SeenZero, 2400 const ARMSubtarget *Subtarget) { 2401 SDNode *N = Op.getNode(); 2402 if (!N->hasOneUse()) 2403 // Otherwise it requires moving the value from fp to integer registers. 2404 return false; 2405 if (!N->getNumValues()) 2406 return false; 2407 EVT VT = Op.getValueType(); 2408 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2409 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2410 // vmrs are very slow, e.g. cortex-a8. 2411 return false; 2412 2413 if (isFloatingPointZero(Op)) { 2414 SeenZero = true; 2415 return true; 2416 } 2417 return ISD::isNormalLoad(N); 2418} 2419 2420static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2421 if (isFloatingPointZero(Op)) 2422 return DAG.getConstant(0, MVT::i32); 2423 2424 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2425 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2426 Ld->getChain(), Ld->getBasePtr(), 2427 Ld->getSrcValue(), Ld->getSrcValueOffset(), 2428 Ld->isVolatile(), Ld->isNonTemporal(), 2429 Ld->getAlignment()); 2430 2431 llvm_unreachable("Unknown VFP cmp argument!"); 2432} 2433 2434static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2435 SDValue &RetVal1, SDValue &RetVal2) { 2436 if (isFloatingPointZero(Op)) { 2437 RetVal1 = DAG.getConstant(0, MVT::i32); 2438 RetVal2 = DAG.getConstant(0, MVT::i32); 2439 return; 2440 } 2441 2442 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2443 SDValue Ptr = Ld->getBasePtr(); 2444 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2445 Ld->getChain(), Ptr, 2446 Ld->getSrcValue(), Ld->getSrcValueOffset(), 2447 Ld->isVolatile(), Ld->isNonTemporal(), 2448 Ld->getAlignment()); 2449 2450 EVT PtrType = Ptr.getValueType(); 2451 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2452 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2453 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2454 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2455 Ld->getChain(), NewPtr, 2456 Ld->getSrcValue(), Ld->getSrcValueOffset() + 4, 2457 Ld->isVolatile(), Ld->isNonTemporal(), 2458 NewAlign); 2459 return; 2460 } 2461 2462 llvm_unreachable("Unknown VFP cmp argument!"); 2463} 2464 2465/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2466/// f32 and even f64 comparisons to integer ones. 2467SDValue 2468ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 2469 SDValue Chain = Op.getOperand(0); 2470 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2471 SDValue LHS = Op.getOperand(2); 2472 SDValue RHS = Op.getOperand(3); 2473 SDValue Dest = Op.getOperand(4); 2474 DebugLoc dl = Op.getDebugLoc(); 2475 2476 bool SeenZero = false; 2477 if (canChangeToInt(LHS, SeenZero, Subtarget) && 2478 canChangeToInt(RHS, SeenZero, Subtarget) && 2479 // If one of the operand is zero, it's safe to ignore the NaN case since 2480 // we only care about equality comparisons. 2481 (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { 2482 // If unsafe fp math optimization is enabled and there are no othter uses of 2483 // the CMP operands, and the condition code is EQ oe NE, we can optimize it 2484 // to an integer comparison. 2485 if (CC == ISD::SETOEQ) 2486 CC = ISD::SETEQ; 2487 else if (CC == ISD::SETUNE) 2488 CC = ISD::SETNE; 2489 2490 SDValue ARMcc; 2491 if (LHS.getValueType() == MVT::f32) { 2492 LHS = bitcastf32Toi32(LHS, DAG); 2493 RHS = bitcastf32Toi32(RHS, DAG); 2494 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2495 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2496 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2497 Chain, Dest, ARMcc, CCR, Cmp); 2498 } 2499 2500 SDValue LHS1, LHS2; 2501 SDValue RHS1, RHS2; 2502 expandf64Toi32(LHS, DAG, LHS1, LHS2); 2503 expandf64Toi32(RHS, DAG, RHS1, RHS2); 2504 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2505 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2506 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 2507 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 2508 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 2509 } 2510 2511 return SDValue(); 2512} 2513 2514SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 2515 SDValue Chain = Op.getOperand(0); 2516 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2517 SDValue LHS = Op.getOperand(2); 2518 SDValue RHS = Op.getOperand(3); 2519 SDValue Dest = Op.getOperand(4); 2520 DebugLoc dl = Op.getDebugLoc(); 2521 2522 if (LHS.getValueType() == MVT::i32) { 2523 SDValue ARMcc; 2524 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2525 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2526 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2527 Chain, Dest, ARMcc, CCR, Cmp); 2528 } 2529 2530 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 2531 2532 if (UnsafeFPMath && 2533 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 2534 CC == ISD::SETNE || CC == ISD::SETUNE)) { 2535 SDValue Result = OptimizeVFPBrcond(Op, DAG); 2536 if (Result.getNode()) 2537 return Result; 2538 } 2539 2540 ARMCC::CondCodes CondCode, CondCode2; 2541 FPCCToARMCC(CC, CondCode, CondCode2); 2542 2543 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2544 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2545 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2546 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 2547 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 2548 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2549 if (CondCode2 != ARMCC::AL) { 2550 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 2551 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 2552 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2553 } 2554 return Res; 2555} 2556 2557SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 2558 SDValue Chain = Op.getOperand(0); 2559 SDValue Table = Op.getOperand(1); 2560 SDValue Index = Op.getOperand(2); 2561 DebugLoc dl = Op.getDebugLoc(); 2562 2563 EVT PTy = getPointerTy(); 2564 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 2565 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2566 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 2567 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 2568 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 2569 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 2570 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 2571 if (Subtarget->isThumb2()) { 2572 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 2573 // which does another jump to the destination. This also makes it easier 2574 // to translate it to TBB / TBH later. 2575 // FIXME: This might not work if the function is extremely large. 2576 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 2577 Addr, Op.getOperand(2), JTI, UId); 2578 } 2579 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2580 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 2581 PseudoSourceValue::getJumpTable(), 0, 2582 false, false, 0); 2583 Chain = Addr.getValue(1); 2584 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 2585 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2586 } else { 2587 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 2588 PseudoSourceValue::getJumpTable(), 0, false, false, 0); 2589 Chain = Addr.getValue(1); 2590 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2591 } 2592} 2593 2594static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 2595 DebugLoc dl = Op.getDebugLoc(); 2596 unsigned Opc; 2597 2598 switch (Op.getOpcode()) { 2599 default: 2600 assert(0 && "Invalid opcode!"); 2601 case ISD::FP_TO_SINT: 2602 Opc = ARMISD::FTOSI; 2603 break; 2604 case ISD::FP_TO_UINT: 2605 Opc = ARMISD::FTOUI; 2606 break; 2607 } 2608 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 2609 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); 2610} 2611 2612static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2613 EVT VT = Op.getValueType(); 2614 DebugLoc dl = Op.getDebugLoc(); 2615 unsigned Opc; 2616 2617 switch (Op.getOpcode()) { 2618 default: 2619 assert(0 && "Invalid opcode!"); 2620 case ISD::SINT_TO_FP: 2621 Opc = ARMISD::SITOF; 2622 break; 2623 case ISD::UINT_TO_FP: 2624 Opc = ARMISD::UITOF; 2625 break; 2626 } 2627 2628 Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); 2629 return DAG.getNode(Opc, dl, VT, Op); 2630} 2631 2632SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 2633 // Implement fcopysign with a fabs and a conditional fneg. 2634 SDValue Tmp0 = Op.getOperand(0); 2635 SDValue Tmp1 = Op.getOperand(1); 2636 DebugLoc dl = Op.getDebugLoc(); 2637 EVT VT = Op.getValueType(); 2638 EVT SrcVT = Tmp1.getValueType(); 2639 SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); 2640 SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32); 2641 SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); 2642 SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl); 2643 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2644 return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp); 2645} 2646 2647SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 2648 MachineFunction &MF = DAG.getMachineFunction(); 2649 MachineFrameInfo *MFI = MF.getFrameInfo(); 2650 MFI->setReturnAddressIsTaken(true); 2651 2652 EVT VT = Op.getValueType(); 2653 DebugLoc dl = Op.getDebugLoc(); 2654 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2655 if (Depth) { 2656 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 2657 SDValue Offset = DAG.getConstant(4, MVT::i32); 2658 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 2659 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 2660 NULL, 0, false, false, 0); 2661 } 2662 2663 // Return LR, which contains the return address. Mark it an implicit live-in. 2664 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 2665 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 2666} 2667 2668SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 2669 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 2670 MFI->setFrameAddressIsTaken(true); 2671 2672 EVT VT = Op.getValueType(); 2673 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 2674 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2675 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 2676 ? ARM::R7 : ARM::R11; 2677 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 2678 while (Depth--) 2679 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0, 2680 false, false, 0); 2681 return FrameAddr; 2682} 2683 2684/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to 2685/// expand a bit convert where either the source or destination type is i64 to 2686/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 2687/// operand type is illegal (e.g., v2f32 for a target that doesn't support 2688/// vectors), since the legalizer won't know what to do with that. 2689static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { 2690 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2691 DebugLoc dl = N->getDebugLoc(); 2692 SDValue Op = N->getOperand(0); 2693 2694 // This function is only supposed to be called for i64 types, either as the 2695 // source or destination of the bit convert. 2696 EVT SrcVT = Op.getValueType(); 2697 EVT DstVT = N->getValueType(0); 2698 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 2699 "ExpandBIT_CONVERT called for non-i64 type"); 2700 2701 // Turn i64->f64 into VMOVDRR. 2702 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 2703 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2704 DAG.getConstant(0, MVT::i32)); 2705 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2706 DAG.getConstant(1, MVT::i32)); 2707 return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT, 2708 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 2709 } 2710 2711 // Turn f64->i64 into VMOVRRD. 2712 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 2713 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 2714 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 2715 // Merge the pieces into a single i64 value. 2716 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 2717 } 2718 2719 return SDValue(); 2720} 2721 2722/// getZeroVector - Returns a vector of specified type with all zero elements. 2723/// Zero vectors are used to represent vector negation and in those cases 2724/// will be implemented with the NEON VNEG instruction. However, VNEG does 2725/// not support i64 elements, so sometimes the zero vectors will need to be 2726/// explicitly constructed. Regardless, use a canonical VMOV to create the 2727/// zero vector. 2728static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2729 assert(VT.isVector() && "Expected a vector type"); 2730 // The canonical modified immediate encoding of a zero vector is....0! 2731 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 2732 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 2733 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 2734 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 2735} 2736 2737/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 2738/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2739SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 2740 SelectionDAG &DAG) const { 2741 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2742 EVT VT = Op.getValueType(); 2743 unsigned VTBits = VT.getSizeInBits(); 2744 DebugLoc dl = Op.getDebugLoc(); 2745 SDValue ShOpLo = Op.getOperand(0); 2746 SDValue ShOpHi = Op.getOperand(1); 2747 SDValue ShAmt = Op.getOperand(2); 2748 SDValue ARMcc; 2749 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 2750 2751 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 2752 2753 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2754 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2755 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 2756 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2757 DAG.getConstant(VTBits, MVT::i32)); 2758 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 2759 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2760 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 2761 2762 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2763 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2764 ARMcc, DAG, dl); 2765 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 2766 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 2767 CCR, Cmp); 2768 2769 SDValue Ops[2] = { Lo, Hi }; 2770 return DAG.getMergeValues(Ops, 2, dl); 2771} 2772 2773/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 2774/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2775SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 2776 SelectionDAG &DAG) const { 2777 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2778 EVT VT = Op.getValueType(); 2779 unsigned VTBits = VT.getSizeInBits(); 2780 DebugLoc dl = Op.getDebugLoc(); 2781 SDValue ShOpLo = Op.getOperand(0); 2782 SDValue ShOpHi = Op.getOperand(1); 2783 SDValue ShAmt = Op.getOperand(2); 2784 SDValue ARMcc; 2785 2786 assert(Op.getOpcode() == ISD::SHL_PARTS); 2787 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2788 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2789 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 2790 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2791 DAG.getConstant(VTBits, MVT::i32)); 2792 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 2793 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 2794 2795 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2796 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2797 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2798 ARMcc, DAG, dl); 2799 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 2800 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 2801 CCR, Cmp); 2802 2803 SDValue Ops[2] = { Lo, Hi }; 2804 return DAG.getMergeValues(Ops, 2, dl); 2805} 2806 2807SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 2808 SelectionDAG &DAG) const { 2809 // The rounding mode is in bits 23:22 of the FPSCR. 2810 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 2811 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 2812 // so that the shift + and get folded into a bitfield extract. 2813 DebugLoc dl = Op.getDebugLoc(); 2814 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 2815 DAG.getConstant(Intrinsic::arm_get_fpscr, 2816 MVT::i32)); 2817 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 2818 DAG.getConstant(1U << 22, MVT::i32)); 2819 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 2820 DAG.getConstant(22, MVT::i32)); 2821 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 2822 DAG.getConstant(3, MVT::i32)); 2823} 2824 2825static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 2826 const ARMSubtarget *ST) { 2827 EVT VT = N->getValueType(0); 2828 DebugLoc dl = N->getDebugLoc(); 2829 2830 if (!ST->hasV6T2Ops()) 2831 return SDValue(); 2832 2833 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 2834 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 2835} 2836 2837static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 2838 const ARMSubtarget *ST) { 2839 EVT VT = N->getValueType(0); 2840 DebugLoc dl = N->getDebugLoc(); 2841 2842 // Lower vector shifts on NEON to use VSHL. 2843 if (VT.isVector()) { 2844 assert(ST->hasNEON() && "unexpected vector shift"); 2845 2846 // Left shifts translate directly to the vshiftu intrinsic. 2847 if (N->getOpcode() == ISD::SHL) 2848 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2849 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 2850 N->getOperand(0), N->getOperand(1)); 2851 2852 assert((N->getOpcode() == ISD::SRA || 2853 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 2854 2855 // NEON uses the same intrinsics for both left and right shifts. For 2856 // right shifts, the shift amounts are negative, so negate the vector of 2857 // shift amounts. 2858 EVT ShiftVT = N->getOperand(1).getValueType(); 2859 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 2860 getZeroVector(ShiftVT, DAG, dl), 2861 N->getOperand(1)); 2862 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 2863 Intrinsic::arm_neon_vshifts : 2864 Intrinsic::arm_neon_vshiftu); 2865 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2866 DAG.getConstant(vshiftInt, MVT::i32), 2867 N->getOperand(0), NegatedCount); 2868 } 2869 2870 // We can get here for a node like i32 = ISD::SHL i32, i64 2871 if (VT != MVT::i64) 2872 return SDValue(); 2873 2874 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 2875 "Unknown shift to lower!"); 2876 2877 // We only lower SRA, SRL of 1 here, all others use generic lowering. 2878 if (!isa<ConstantSDNode>(N->getOperand(1)) || 2879 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 2880 return SDValue(); 2881 2882 // If we are in thumb mode, we don't have RRX. 2883 if (ST->isThumb1Only()) return SDValue(); 2884 2885 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 2886 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2887 DAG.getConstant(0, MVT::i32)); 2888 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2889 DAG.getConstant(1, MVT::i32)); 2890 2891 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 2892 // captures the result into a carry flag. 2893 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 2894 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); 2895 2896 // The low part is an ARMISD::RRX operand, which shifts the carry in. 2897 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 2898 2899 // Merge the pieces into a single i64 value. 2900 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 2901} 2902 2903static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 2904 SDValue TmpOp0, TmpOp1; 2905 bool Invert = false; 2906 bool Swap = false; 2907 unsigned Opc = 0; 2908 2909 SDValue Op0 = Op.getOperand(0); 2910 SDValue Op1 = Op.getOperand(1); 2911 SDValue CC = Op.getOperand(2); 2912 EVT VT = Op.getValueType(); 2913 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2914 DebugLoc dl = Op.getDebugLoc(); 2915 2916 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 2917 switch (SetCCOpcode) { 2918 default: llvm_unreachable("Illegal FP comparison"); break; 2919 case ISD::SETUNE: 2920 case ISD::SETNE: Invert = true; // Fallthrough 2921 case ISD::SETOEQ: 2922 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2923 case ISD::SETOLT: 2924 case ISD::SETLT: Swap = true; // Fallthrough 2925 case ISD::SETOGT: 2926 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2927 case ISD::SETOLE: 2928 case ISD::SETLE: Swap = true; // Fallthrough 2929 case ISD::SETOGE: 2930 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2931 case ISD::SETUGE: Swap = true; // Fallthrough 2932 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 2933 case ISD::SETUGT: Swap = true; // Fallthrough 2934 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 2935 case ISD::SETUEQ: Invert = true; // Fallthrough 2936 case ISD::SETONE: 2937 // Expand this to (OLT | OGT). 2938 TmpOp0 = Op0; 2939 TmpOp1 = Op1; 2940 Opc = ISD::OR; 2941 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2942 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 2943 break; 2944 case ISD::SETUO: Invert = true; // Fallthrough 2945 case ISD::SETO: 2946 // Expand this to (OLT | OGE). 2947 TmpOp0 = Op0; 2948 TmpOp1 = Op1; 2949 Opc = ISD::OR; 2950 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2951 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 2952 break; 2953 } 2954 } else { 2955 // Integer comparisons. 2956 switch (SetCCOpcode) { 2957 default: llvm_unreachable("Illegal integer comparison"); break; 2958 case ISD::SETNE: Invert = true; 2959 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2960 case ISD::SETLT: Swap = true; 2961 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2962 case ISD::SETLE: Swap = true; 2963 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2964 case ISD::SETULT: Swap = true; 2965 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 2966 case ISD::SETULE: Swap = true; 2967 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 2968 } 2969 2970 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 2971 if (Opc == ARMISD::VCEQ) { 2972 2973 SDValue AndOp; 2974 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 2975 AndOp = Op0; 2976 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 2977 AndOp = Op1; 2978 2979 // Ignore bitconvert. 2980 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) 2981 AndOp = AndOp.getOperand(0); 2982 2983 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 2984 Opc = ARMISD::VTST; 2985 Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); 2986 Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); 2987 Invert = !Invert; 2988 } 2989 } 2990 } 2991 2992 if (Swap) 2993 std::swap(Op0, Op1); 2994 2995 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 2996 2997 if (Invert) 2998 Result = DAG.getNOT(dl, Result, VT); 2999 3000 return Result; 3001} 3002 3003/// isNEONModifiedImm - Check if the specified splat value corresponds to a 3004/// valid vector constant for a NEON instruction with a "modified immediate" 3005/// operand (e.g., VMOV). If so, return the encoded value. 3006static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 3007 unsigned SplatBitSize, SelectionDAG &DAG, 3008 EVT &VT, bool is128Bits, bool isVMOV) { 3009 unsigned OpCmode, Imm; 3010 3011 // SplatBitSize is set to the smallest size that splats the vector, so a 3012 // zero vector will always have SplatBitSize == 8. However, NEON modified 3013 // immediate instructions others than VMOV do not support the 8-bit encoding 3014 // of a zero vector, and the default encoding of zero is supposed to be the 3015 // 32-bit version. 3016 if (SplatBits == 0) 3017 SplatBitSize = 32; 3018 3019 switch (SplatBitSize) { 3020 case 8: 3021 if (!isVMOV) 3022 return SDValue(); 3023 // Any 1-byte value is OK. Op=0, Cmode=1110. 3024 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 3025 OpCmode = 0xe; 3026 Imm = SplatBits; 3027 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 3028 break; 3029 3030 case 16: 3031 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 3032 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 3033 if ((SplatBits & ~0xff) == 0) { 3034 // Value = 0x00nn: Op=x, Cmode=100x. 3035 OpCmode = 0x8; 3036 Imm = SplatBits; 3037 break; 3038 } 3039 if ((SplatBits & ~0xff00) == 0) { 3040 // Value = 0xnn00: Op=x, Cmode=101x. 3041 OpCmode = 0xa; 3042 Imm = SplatBits >> 8; 3043 break; 3044 } 3045 return SDValue(); 3046 3047 case 32: 3048 // NEON's 32-bit VMOV supports splat values where: 3049 // * only one byte is nonzero, or 3050 // * the least significant byte is 0xff and the second byte is nonzero, or 3051 // * the least significant 2 bytes are 0xff and the third is nonzero. 3052 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 3053 if ((SplatBits & ~0xff) == 0) { 3054 // Value = 0x000000nn: Op=x, Cmode=000x. 3055 OpCmode = 0; 3056 Imm = SplatBits; 3057 break; 3058 } 3059 if ((SplatBits & ~0xff00) == 0) { 3060 // Value = 0x0000nn00: Op=x, Cmode=001x. 3061 OpCmode = 0x2; 3062 Imm = SplatBits >> 8; 3063 break; 3064 } 3065 if ((SplatBits & ~0xff0000) == 0) { 3066 // Value = 0x00nn0000: Op=x, Cmode=010x. 3067 OpCmode = 0x4; 3068 Imm = SplatBits >> 16; 3069 break; 3070 } 3071 if ((SplatBits & ~0xff000000) == 0) { 3072 // Value = 0xnn000000: Op=x, Cmode=011x. 3073 OpCmode = 0x6; 3074 Imm = SplatBits >> 24; 3075 break; 3076 } 3077 3078 if ((SplatBits & ~0xffff) == 0 && 3079 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 3080 // Value = 0x0000nnff: Op=x, Cmode=1100. 3081 OpCmode = 0xc; 3082 Imm = SplatBits >> 8; 3083 SplatBits |= 0xff; 3084 break; 3085 } 3086 3087 if ((SplatBits & ~0xffffff) == 0 && 3088 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 3089 // Value = 0x00nnffff: Op=x, Cmode=1101. 3090 OpCmode = 0xd; 3091 Imm = SplatBits >> 16; 3092 SplatBits |= 0xffff; 3093 break; 3094 } 3095 3096 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 3097 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 3098 // VMOV.I32. A (very) minor optimization would be to replicate the value 3099 // and fall through here to test for a valid 64-bit splat. But, then the 3100 // caller would also need to check and handle the change in size. 3101 return SDValue(); 3102 3103 case 64: { 3104 if (!isVMOV) 3105 return SDValue(); 3106 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 3107 uint64_t BitMask = 0xff; 3108 uint64_t Val = 0; 3109 unsigned ImmMask = 1; 3110 Imm = 0; 3111 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 3112 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 3113 Val |= BitMask; 3114 Imm |= ImmMask; 3115 } else if ((SplatBits & BitMask) != 0) { 3116 return SDValue(); 3117 } 3118 BitMask <<= 8; 3119 ImmMask <<= 1; 3120 } 3121 // Op=1, Cmode=1110. 3122 OpCmode = 0x1e; 3123 SplatBits = Val; 3124 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 3125 break; 3126 } 3127 3128 default: 3129 llvm_unreachable("unexpected size for isNEONModifiedImm"); 3130 return SDValue(); 3131 } 3132 3133 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 3134 return DAG.getTargetConstant(EncodedVal, MVT::i32); 3135} 3136 3137static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 3138 bool &ReverseVEXT, unsigned &Imm) { 3139 unsigned NumElts = VT.getVectorNumElements(); 3140 ReverseVEXT = false; 3141 Imm = M[0]; 3142 3143 // If this is a VEXT shuffle, the immediate value is the index of the first 3144 // element. The other shuffle indices must be the successive elements after 3145 // the first one. 3146 unsigned ExpectedElt = Imm; 3147 for (unsigned i = 1; i < NumElts; ++i) { 3148 // Increment the expected index. If it wraps around, it may still be 3149 // a VEXT but the source vectors must be swapped. 3150 ExpectedElt += 1; 3151 if (ExpectedElt == NumElts * 2) { 3152 ExpectedElt = 0; 3153 ReverseVEXT = true; 3154 } 3155 3156 if (ExpectedElt != static_cast<unsigned>(M[i])) 3157 return false; 3158 } 3159 3160 // Adjust the index value if the source operands will be swapped. 3161 if (ReverseVEXT) 3162 Imm -= NumElts; 3163 3164 return true; 3165} 3166 3167/// isVREVMask - Check if a vector shuffle corresponds to a VREV 3168/// instruction with the specified blocksize. (The order of the elements 3169/// within each block of the vector is reversed.) 3170static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 3171 unsigned BlockSize) { 3172 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 3173 "Only possible block sizes for VREV are: 16, 32, 64"); 3174 3175 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3176 if (EltSz == 64) 3177 return false; 3178 3179 unsigned NumElts = VT.getVectorNumElements(); 3180 unsigned BlockElts = M[0] + 1; 3181 3182 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 3183 return false; 3184 3185 for (unsigned i = 0; i < NumElts; ++i) { 3186 if ((unsigned) M[i] != 3187 (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 3188 return false; 3189 } 3190 3191 return true; 3192} 3193 3194static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 3195 unsigned &WhichResult) { 3196 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3197 if (EltSz == 64) 3198 return false; 3199 3200 unsigned NumElts = VT.getVectorNumElements(); 3201 WhichResult = (M[0] == 0 ? 0 : 1); 3202 for (unsigned i = 0; i < NumElts; i += 2) { 3203 if ((unsigned) M[i] != i + WhichResult || 3204 (unsigned) M[i+1] != i + NumElts + WhichResult) 3205 return false; 3206 } 3207 return true; 3208} 3209 3210/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 3211/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3212/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 3213static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3214 unsigned &WhichResult) { 3215 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3216 if (EltSz == 64) 3217 return false; 3218 3219 unsigned NumElts = VT.getVectorNumElements(); 3220 WhichResult = (M[0] == 0 ? 0 : 1); 3221 for (unsigned i = 0; i < NumElts; i += 2) { 3222 if ((unsigned) M[i] != i + WhichResult || 3223 (unsigned) M[i+1] != i + WhichResult) 3224 return false; 3225 } 3226 return true; 3227} 3228 3229static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 3230 unsigned &WhichResult) { 3231 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3232 if (EltSz == 64) 3233 return false; 3234 3235 unsigned NumElts = VT.getVectorNumElements(); 3236 WhichResult = (M[0] == 0 ? 0 : 1); 3237 for (unsigned i = 0; i != NumElts; ++i) { 3238 if ((unsigned) M[i] != 2 * i + WhichResult) 3239 return false; 3240 } 3241 3242 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3243 if (VT.is64BitVector() && EltSz == 32) 3244 return false; 3245 3246 return true; 3247} 3248 3249/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 3250/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3251/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 3252static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3253 unsigned &WhichResult) { 3254 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3255 if (EltSz == 64) 3256 return false; 3257 3258 unsigned Half = VT.getVectorNumElements() / 2; 3259 WhichResult = (M[0] == 0 ? 0 : 1); 3260 for (unsigned j = 0; j != 2; ++j) { 3261 unsigned Idx = WhichResult; 3262 for (unsigned i = 0; i != Half; ++i) { 3263 if ((unsigned) M[i + j * Half] != Idx) 3264 return false; 3265 Idx += 2; 3266 } 3267 } 3268 3269 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3270 if (VT.is64BitVector() && EltSz == 32) 3271 return false; 3272 3273 return true; 3274} 3275 3276static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 3277 unsigned &WhichResult) { 3278 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3279 if (EltSz == 64) 3280 return false; 3281 3282 unsigned NumElts = VT.getVectorNumElements(); 3283 WhichResult = (M[0] == 0 ? 0 : 1); 3284 unsigned Idx = WhichResult * NumElts / 2; 3285 for (unsigned i = 0; i != NumElts; i += 2) { 3286 if ((unsigned) M[i] != Idx || 3287 (unsigned) M[i+1] != Idx + NumElts) 3288 return false; 3289 Idx += 1; 3290 } 3291 3292 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3293 if (VT.is64BitVector() && EltSz == 32) 3294 return false; 3295 3296 return true; 3297} 3298 3299/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 3300/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3301/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 3302static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3303 unsigned &WhichResult) { 3304 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3305 if (EltSz == 64) 3306 return false; 3307 3308 unsigned NumElts = VT.getVectorNumElements(); 3309 WhichResult = (M[0] == 0 ? 0 : 1); 3310 unsigned Idx = WhichResult * NumElts / 2; 3311 for (unsigned i = 0; i != NumElts; i += 2) { 3312 if ((unsigned) M[i] != Idx || 3313 (unsigned) M[i+1] != Idx) 3314 return false; 3315 Idx += 1; 3316 } 3317 3318 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3319 if (VT.is64BitVector() && EltSz == 32) 3320 return false; 3321 3322 return true; 3323} 3324 3325// If N is an integer constant that can be moved into a register in one 3326// instruction, return an SDValue of such a constant (will become a MOV 3327// instruction). Otherwise return null. 3328static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 3329 const ARMSubtarget *ST, DebugLoc dl) { 3330 uint64_t Val; 3331 if (!isa<ConstantSDNode>(N)) 3332 return SDValue(); 3333 Val = cast<ConstantSDNode>(N)->getZExtValue(); 3334 3335 if (ST->isThumb1Only()) { 3336 if (Val <= 255 || ~Val <= 255) 3337 return DAG.getConstant(Val, MVT::i32); 3338 } else { 3339 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 3340 return DAG.getConstant(Val, MVT::i32); 3341 } 3342 return SDValue(); 3343} 3344 3345// If this is a case we can't handle, return null and let the default 3346// expansion code take care of it. 3347static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 3348 const ARMSubtarget *ST) { 3349 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 3350 DebugLoc dl = Op.getDebugLoc(); 3351 EVT VT = Op.getValueType(); 3352 3353 APInt SplatBits, SplatUndef; 3354 unsigned SplatBitSize; 3355 bool HasAnyUndefs; 3356 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 3357 if (SplatBitSize <= 64) { 3358 // Check if an immediate VMOV works. 3359 EVT VmovVT; 3360 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 3361 SplatUndef.getZExtValue(), SplatBitSize, 3362 DAG, VmovVT, VT.is128BitVector(), true); 3363 if (Val.getNode()) { 3364 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 3365 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 3366 } 3367 3368 // Try an immediate VMVN. 3369 uint64_t NegatedImm = (SplatBits.getZExtValue() ^ 3370 ((1LL << SplatBitSize) - 1)); 3371 Val = isNEONModifiedImm(NegatedImm, 3372 SplatUndef.getZExtValue(), SplatBitSize, 3373 DAG, VmovVT, VT.is128BitVector(), false); 3374 if (Val.getNode()) { 3375 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 3376 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 3377 } 3378 } 3379 } 3380 3381 // Scan through the operands to see if only one value is used. 3382 unsigned NumElts = VT.getVectorNumElements(); 3383 bool isOnlyLowElement = true; 3384 bool usesOnlyOneValue = true; 3385 bool isConstant = true; 3386 SDValue Value; 3387 for (unsigned i = 0; i < NumElts; ++i) { 3388 SDValue V = Op.getOperand(i); 3389 if (V.getOpcode() == ISD::UNDEF) 3390 continue; 3391 if (i > 0) 3392 isOnlyLowElement = false; 3393 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 3394 isConstant = false; 3395 3396 if (!Value.getNode()) 3397 Value = V; 3398 else if (V != Value) 3399 usesOnlyOneValue = false; 3400 } 3401 3402 if (!Value.getNode()) 3403 return DAG.getUNDEF(VT); 3404 3405 if (isOnlyLowElement) 3406 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 3407 3408 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3409 3410 if (EnableARMVDUPsplat) { 3411 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 3412 // i32 and try again. 3413 if (usesOnlyOneValue && EltSize <= 32) { 3414 if (!isConstant) 3415 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 3416 if (VT.getVectorElementType().isFloatingPoint()) { 3417 SmallVector<SDValue, 8> Ops; 3418 for (unsigned i = 0; i < NumElts; ++i) 3419 Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, 3420 Op.getOperand(i))); 3421 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0], 3422 NumElts); 3423 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 3424 LowerBUILD_VECTOR(Val, DAG, ST)); 3425 } 3426 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 3427 if (Val.getNode()) 3428 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 3429 } 3430 } 3431 3432 // If all elements are constants and the case above didn't get hit, fall back 3433 // to the default expansion, which will generate a load from the constant 3434 // pool. 3435 if (isConstant) 3436 return SDValue(); 3437 3438 if (!EnableARMVDUPsplat) { 3439 // Use VDUP for non-constant splats. 3440 if (usesOnlyOneValue && EltSize <= 32) 3441 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 3442 } 3443 3444 // Vectors with 32- or 64-bit elements can be built by directly assigning 3445 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 3446 // will be legalized. 3447 if (EltSize >= 32) { 3448 // Do the expansion with floating-point types, since that is what the VFP 3449 // registers are defined to use, and since i64 is not legal. 3450 EVT EltVT = EVT::getFloatingPointVT(EltSize); 3451 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 3452 SmallVector<SDValue, 8> Ops; 3453 for (unsigned i = 0; i < NumElts; ++i) 3454 Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i))); 3455 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 3456 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); 3457 } 3458 3459 return SDValue(); 3460} 3461 3462/// isShuffleMaskLegal - Targets can use this to indicate that they only 3463/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3464/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3465/// are assumed to be legal. 3466bool 3467ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 3468 EVT VT) const { 3469 if (VT.getVectorNumElements() == 4 && 3470 (VT.is128BitVector() || VT.is64BitVector())) { 3471 unsigned PFIndexes[4]; 3472 for (unsigned i = 0; i != 4; ++i) { 3473 if (M[i] < 0) 3474 PFIndexes[i] = 8; 3475 else 3476 PFIndexes[i] = M[i]; 3477 } 3478 3479 // Compute the index in the perfect shuffle table. 3480 unsigned PFTableIndex = 3481 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3482 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3483 unsigned Cost = (PFEntry >> 30); 3484 3485 if (Cost <= 4) 3486 return true; 3487 } 3488 3489 bool ReverseVEXT; 3490 unsigned Imm, WhichResult; 3491 3492 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3493 return (EltSize >= 32 || 3494 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 3495 isVREVMask(M, VT, 64) || 3496 isVREVMask(M, VT, 32) || 3497 isVREVMask(M, VT, 16) || 3498 isVEXTMask(M, VT, ReverseVEXT, Imm) || 3499 isVTRNMask(M, VT, WhichResult) || 3500 isVUZPMask(M, VT, WhichResult) || 3501 isVZIPMask(M, VT, WhichResult) || 3502 isVTRN_v_undef_Mask(M, VT, WhichResult) || 3503 isVUZP_v_undef_Mask(M, VT, WhichResult) || 3504 isVZIP_v_undef_Mask(M, VT, WhichResult)); 3505} 3506 3507/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 3508/// the specified operations to build the shuffle. 3509static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 3510 SDValue RHS, SelectionDAG &DAG, 3511 DebugLoc dl) { 3512 unsigned OpNum = (PFEntry >> 26) & 0x0F; 3513 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 3514 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 3515 3516 enum { 3517 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 3518 OP_VREV, 3519 OP_VDUP0, 3520 OP_VDUP1, 3521 OP_VDUP2, 3522 OP_VDUP3, 3523 OP_VEXT1, 3524 OP_VEXT2, 3525 OP_VEXT3, 3526 OP_VUZPL, // VUZP, left result 3527 OP_VUZPR, // VUZP, right result 3528 OP_VZIPL, // VZIP, left result 3529 OP_VZIPR, // VZIP, right result 3530 OP_VTRNL, // VTRN, left result 3531 OP_VTRNR // VTRN, right result 3532 }; 3533 3534 if (OpNum == OP_COPY) { 3535 if (LHSID == (1*9+2)*9+3) return LHS; 3536 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 3537 return RHS; 3538 } 3539 3540 SDValue OpLHS, OpRHS; 3541 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 3542 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 3543 EVT VT = OpLHS.getValueType(); 3544 3545 switch (OpNum) { 3546 default: llvm_unreachable("Unknown shuffle opcode!"); 3547 case OP_VREV: 3548 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 3549 case OP_VDUP0: 3550 case OP_VDUP1: 3551 case OP_VDUP2: 3552 case OP_VDUP3: 3553 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 3554 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 3555 case OP_VEXT1: 3556 case OP_VEXT2: 3557 case OP_VEXT3: 3558 return DAG.getNode(ARMISD::VEXT, dl, VT, 3559 OpLHS, OpRHS, 3560 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 3561 case OP_VUZPL: 3562 case OP_VUZPR: 3563 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3564 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 3565 case OP_VZIPL: 3566 case OP_VZIPR: 3567 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3568 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 3569 case OP_VTRNL: 3570 case OP_VTRNR: 3571 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3572 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 3573 } 3574} 3575 3576static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 3577 SDValue V1 = Op.getOperand(0); 3578 SDValue V2 = Op.getOperand(1); 3579 DebugLoc dl = Op.getDebugLoc(); 3580 EVT VT = Op.getValueType(); 3581 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 3582 SmallVector<int, 8> ShuffleMask; 3583 3584 // Convert shuffles that are directly supported on NEON to target-specific 3585 // DAG nodes, instead of keeping them as shuffles and matching them again 3586 // during code selection. This is more efficient and avoids the possibility 3587 // of inconsistencies between legalization and selection. 3588 // FIXME: floating-point vectors should be canonicalized to integer vectors 3589 // of the same time so that they get CSEd properly. 3590 SVN->getMask(ShuffleMask); 3591 3592 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3593 if (EltSize <= 32) { 3594 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 3595 int Lane = SVN->getSplatIndex(); 3596 // If this is undef splat, generate it via "just" vdup, if possible. 3597 if (Lane == -1) Lane = 0; 3598 3599 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 3600 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 3601 } 3602 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 3603 DAG.getConstant(Lane, MVT::i32)); 3604 } 3605 3606 bool ReverseVEXT; 3607 unsigned Imm; 3608 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 3609 if (ReverseVEXT) 3610 std::swap(V1, V2); 3611 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 3612 DAG.getConstant(Imm, MVT::i32)); 3613 } 3614 3615 if (isVREVMask(ShuffleMask, VT, 64)) 3616 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 3617 if (isVREVMask(ShuffleMask, VT, 32)) 3618 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 3619 if (isVREVMask(ShuffleMask, VT, 16)) 3620 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 3621 3622 // Check for Neon shuffles that modify both input vectors in place. 3623 // If both results are used, i.e., if there are two shuffles with the same 3624 // source operands and with masks corresponding to both results of one of 3625 // these operations, DAG memoization will ensure that a single node is 3626 // used for both shuffles. 3627 unsigned WhichResult; 3628 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 3629 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3630 V1, V2).getValue(WhichResult); 3631 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 3632 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3633 V1, V2).getValue(WhichResult); 3634 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 3635 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3636 V1, V2).getValue(WhichResult); 3637 3638 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3639 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3640 V1, V1).getValue(WhichResult); 3641 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3642 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3643 V1, V1).getValue(WhichResult); 3644 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3645 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3646 V1, V1).getValue(WhichResult); 3647 } 3648 3649 // If the shuffle is not directly supported and it has 4 elements, use 3650 // the PerfectShuffle-generated table to synthesize it from other shuffles. 3651 unsigned NumElts = VT.getVectorNumElements(); 3652 if (NumElts == 4) { 3653 unsigned PFIndexes[4]; 3654 for (unsigned i = 0; i != 4; ++i) { 3655 if (ShuffleMask[i] < 0) 3656 PFIndexes[i] = 8; 3657 else 3658 PFIndexes[i] = ShuffleMask[i]; 3659 } 3660 3661 // Compute the index in the perfect shuffle table. 3662 unsigned PFTableIndex = 3663 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3664 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3665 unsigned Cost = (PFEntry >> 30); 3666 3667 if (Cost <= 4) 3668 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 3669 } 3670 3671 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 3672 if (EltSize >= 32) { 3673 // Do the expansion with floating-point types, since that is what the VFP 3674 // registers are defined to use, and since i64 is not legal. 3675 EVT EltVT = EVT::getFloatingPointVT(EltSize); 3676 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 3677 V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); 3678 V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); 3679 SmallVector<SDValue, 8> Ops; 3680 for (unsigned i = 0; i < NumElts; ++i) { 3681 if (ShuffleMask[i] < 0) 3682 Ops.push_back(DAG.getUNDEF(EltVT)); 3683 else 3684 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 3685 ShuffleMask[i] < (int)NumElts ? V1 : V2, 3686 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 3687 MVT::i32))); 3688 } 3689 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 3690 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); 3691 } 3692 3693 return SDValue(); 3694} 3695 3696static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 3697 EVT VT = Op.getValueType(); 3698 DebugLoc dl = Op.getDebugLoc(); 3699 SDValue Vec = Op.getOperand(0); 3700 SDValue Lane = Op.getOperand(1); 3701 assert(VT == MVT::i32 && 3702 Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && 3703 "unexpected type for custom-lowering vector extract"); 3704 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 3705} 3706 3707static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 3708 // The only time a CONCAT_VECTORS operation can have legal types is when 3709 // two 64-bit vectors are concatenated to a 128-bit vector. 3710 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 3711 "unexpected CONCAT_VECTORS"); 3712 DebugLoc dl = Op.getDebugLoc(); 3713 SDValue Val = DAG.getUNDEF(MVT::v2f64); 3714 SDValue Op0 = Op.getOperand(0); 3715 SDValue Op1 = Op.getOperand(1); 3716 if (Op0.getOpcode() != ISD::UNDEF) 3717 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3718 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0), 3719 DAG.getIntPtrConstant(0)); 3720 if (Op1.getOpcode() != ISD::UNDEF) 3721 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3722 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1), 3723 DAG.getIntPtrConstant(1)); 3724 return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); 3725} 3726 3727SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 3728 switch (Op.getOpcode()) { 3729 default: llvm_unreachable("Don't know how to custom lower this!"); 3730 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3731 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 3732 case ISD::GlobalAddress: 3733 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 3734 LowerGlobalAddressELF(Op, DAG); 3735 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3736 case ISD::SELECT: return LowerSELECT(Op, DAG); 3737 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3738 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 3739 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 3740 case ISD::VASTART: return LowerVASTART(Op, DAG); 3741 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 3742 case ISD::SINT_TO_FP: 3743 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 3744 case ISD::FP_TO_SINT: 3745 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 3746 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 3747 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 3748 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3749 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 3750 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 3751 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 3752 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 3753 Subtarget); 3754 case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); 3755 case ISD::SHL: 3756 case ISD::SRL: 3757 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 3758 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 3759 case ISD::SRL_PARTS: 3760 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 3761 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 3762 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 3763 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 3764 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3765 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 3766 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 3767 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 3768 } 3769 return SDValue(); 3770} 3771 3772/// ReplaceNodeResults - Replace the results of node with an illegal result 3773/// type with new values built out of custom code. 3774void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 3775 SmallVectorImpl<SDValue>&Results, 3776 SelectionDAG &DAG) const { 3777 SDValue Res; 3778 switch (N->getOpcode()) { 3779 default: 3780 llvm_unreachable("Don't know how to custom expand this!"); 3781 break; 3782 case ISD::BIT_CONVERT: 3783 Res = ExpandBIT_CONVERT(N, DAG); 3784 break; 3785 case ISD::SRL: 3786 case ISD::SRA: 3787 Res = LowerShift(N, DAG, Subtarget); 3788 break; 3789 } 3790 if (Res.getNode()) 3791 Results.push_back(Res); 3792} 3793 3794//===----------------------------------------------------------------------===// 3795// ARM Scheduler Hooks 3796//===----------------------------------------------------------------------===// 3797 3798MachineBasicBlock * 3799ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 3800 MachineBasicBlock *BB, 3801 unsigned Size) const { 3802 unsigned dest = MI->getOperand(0).getReg(); 3803 unsigned ptr = MI->getOperand(1).getReg(); 3804 unsigned oldval = MI->getOperand(2).getReg(); 3805 unsigned newval = MI->getOperand(3).getReg(); 3806 unsigned scratch = BB->getParent()->getRegInfo() 3807 .createVirtualRegister(ARM::GPRRegisterClass); 3808 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3809 DebugLoc dl = MI->getDebugLoc(); 3810 bool isThumb2 = Subtarget->isThumb2(); 3811 3812 unsigned ldrOpc, strOpc; 3813 switch (Size) { 3814 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3815 case 1: 3816 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3817 strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; 3818 break; 3819 case 2: 3820 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3821 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3822 break; 3823 case 4: 3824 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3825 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3826 break; 3827 } 3828 3829 MachineFunction *MF = BB->getParent(); 3830 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3831 MachineFunction::iterator It = BB; 3832 ++It; // insert the new blocks after the current block 3833 3834 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3835 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3836 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3837 MF->insert(It, loop1MBB); 3838 MF->insert(It, loop2MBB); 3839 MF->insert(It, exitMBB); 3840 3841 // Transfer the remainder of BB and its successor edges to exitMBB. 3842 exitMBB->splice(exitMBB->begin(), BB, 3843 llvm::next(MachineBasicBlock::iterator(MI)), 3844 BB->end()); 3845 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 3846 3847 // thisMBB: 3848 // ... 3849 // fallthrough --> loop1MBB 3850 BB->addSuccessor(loop1MBB); 3851 3852 // loop1MBB: 3853 // ldrex dest, [ptr] 3854 // cmp dest, oldval 3855 // bne exitMBB 3856 BB = loop1MBB; 3857 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3858 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 3859 .addReg(dest).addReg(oldval)); 3860 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3861 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3862 BB->addSuccessor(loop2MBB); 3863 BB->addSuccessor(exitMBB); 3864 3865 // loop2MBB: 3866 // strex scratch, newval, [ptr] 3867 // cmp scratch, #0 3868 // bne loop1MBB 3869 BB = loop2MBB; 3870 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) 3871 .addReg(ptr)); 3872 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3873 .addReg(scratch).addImm(0)); 3874 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3875 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3876 BB->addSuccessor(loop1MBB); 3877 BB->addSuccessor(exitMBB); 3878 3879 // exitMBB: 3880 // ... 3881 BB = exitMBB; 3882 3883 MI->eraseFromParent(); // The instruction is gone now. 3884 3885 return BB; 3886} 3887 3888MachineBasicBlock * 3889ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 3890 unsigned Size, unsigned BinOpcode) const { 3891 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 3892 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3893 3894 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3895 MachineFunction *MF = BB->getParent(); 3896 MachineFunction::iterator It = BB; 3897 ++It; 3898 3899 unsigned dest = MI->getOperand(0).getReg(); 3900 unsigned ptr = MI->getOperand(1).getReg(); 3901 unsigned incr = MI->getOperand(2).getReg(); 3902 DebugLoc dl = MI->getDebugLoc(); 3903 3904 bool isThumb2 = Subtarget->isThumb2(); 3905 unsigned ldrOpc, strOpc; 3906 switch (Size) { 3907 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3908 case 1: 3909 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3910 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 3911 break; 3912 case 2: 3913 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3914 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3915 break; 3916 case 4: 3917 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3918 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3919 break; 3920 } 3921 3922 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3923 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3924 MF->insert(It, loopMBB); 3925 MF->insert(It, exitMBB); 3926 3927 // Transfer the remainder of BB and its successor edges to exitMBB. 3928 exitMBB->splice(exitMBB->begin(), BB, 3929 llvm::next(MachineBasicBlock::iterator(MI)), 3930 BB->end()); 3931 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 3932 3933 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 3934 unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 3935 unsigned scratch2 = (!BinOpcode) ? incr : 3936 RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 3937 3938 // thisMBB: 3939 // ... 3940 // fallthrough --> loopMBB 3941 BB->addSuccessor(loopMBB); 3942 3943 // loopMBB: 3944 // ldrex dest, ptr 3945 // <binop> scratch2, dest, incr 3946 // strex scratch, scratch2, ptr 3947 // cmp scratch, #0 3948 // bne- loopMBB 3949 // fallthrough --> exitMBB 3950 BB = loopMBB; 3951 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3952 if (BinOpcode) { 3953 // operand order needs to go the other way for NAND 3954 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 3955 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 3956 addReg(incr).addReg(dest)).addReg(0); 3957 else 3958 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 3959 addReg(dest).addReg(incr)).addReg(0); 3960 } 3961 3962 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) 3963 .addReg(ptr)); 3964 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3965 .addReg(scratch).addImm(0)); 3966 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3967 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3968 3969 BB->addSuccessor(loopMBB); 3970 BB->addSuccessor(exitMBB); 3971 3972 // exitMBB: 3973 // ... 3974 BB = exitMBB; 3975 3976 MI->eraseFromParent(); // The instruction is gone now. 3977 3978 return BB; 3979} 3980 3981static 3982MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 3983 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 3984 E = MBB->succ_end(); I != E; ++I) 3985 if (*I != Succ) 3986 return *I; 3987 llvm_unreachable("Expecting a BB with two successors!"); 3988} 3989 3990MachineBasicBlock * 3991ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 3992 MachineBasicBlock *BB) const { 3993 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3994 DebugLoc dl = MI->getDebugLoc(); 3995 bool isThumb2 = Subtarget->isThumb2(); 3996 switch (MI->getOpcode()) { 3997 default: 3998 MI->dump(); 3999 llvm_unreachable("Unexpected instr type to insert"); 4000 4001 case ARM::ATOMIC_LOAD_ADD_I8: 4002 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 4003 case ARM::ATOMIC_LOAD_ADD_I16: 4004 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 4005 case ARM::ATOMIC_LOAD_ADD_I32: 4006 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 4007 4008 case ARM::ATOMIC_LOAD_AND_I8: 4009 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 4010 case ARM::ATOMIC_LOAD_AND_I16: 4011 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 4012 case ARM::ATOMIC_LOAD_AND_I32: 4013 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 4014 4015 case ARM::ATOMIC_LOAD_OR_I8: 4016 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 4017 case ARM::ATOMIC_LOAD_OR_I16: 4018 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 4019 case ARM::ATOMIC_LOAD_OR_I32: 4020 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 4021 4022 case ARM::ATOMIC_LOAD_XOR_I8: 4023 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 4024 case ARM::ATOMIC_LOAD_XOR_I16: 4025 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 4026 case ARM::ATOMIC_LOAD_XOR_I32: 4027 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 4028 4029 case ARM::ATOMIC_LOAD_NAND_I8: 4030 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 4031 case ARM::ATOMIC_LOAD_NAND_I16: 4032 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 4033 case ARM::ATOMIC_LOAD_NAND_I32: 4034 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 4035 4036 case ARM::ATOMIC_LOAD_SUB_I8: 4037 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 4038 case ARM::ATOMIC_LOAD_SUB_I16: 4039 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 4040 case ARM::ATOMIC_LOAD_SUB_I32: 4041 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 4042 4043 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 4044 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 4045 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 4046 4047 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 4048 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 4049 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 4050 4051 case ARM::tMOVCCr_pseudo: { 4052 // To "insert" a SELECT_CC instruction, we actually have to insert the 4053 // diamond control-flow pattern. The incoming instruction knows the 4054 // destination vreg to set, the condition code register to branch on, the 4055 // true/false values to select between, and a branch opcode to use. 4056 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4057 MachineFunction::iterator It = BB; 4058 ++It; 4059 4060 // thisMBB: 4061 // ... 4062 // TrueVal = ... 4063 // cmpTY ccX, r1, r2 4064 // bCC copy1MBB 4065 // fallthrough --> copy0MBB 4066 MachineBasicBlock *thisMBB = BB; 4067 MachineFunction *F = BB->getParent(); 4068 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 4069 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 4070 F->insert(It, copy0MBB); 4071 F->insert(It, sinkMBB); 4072 4073 // Transfer the remainder of BB and its successor edges to sinkMBB. 4074 sinkMBB->splice(sinkMBB->begin(), BB, 4075 llvm::next(MachineBasicBlock::iterator(MI)), 4076 BB->end()); 4077 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 4078 4079 BB->addSuccessor(copy0MBB); 4080 BB->addSuccessor(sinkMBB); 4081 4082 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 4083 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 4084 4085 // copy0MBB: 4086 // %FalseValue = ... 4087 // # fallthrough to sinkMBB 4088 BB = copy0MBB; 4089 4090 // Update machine-CFG edges 4091 BB->addSuccessor(sinkMBB); 4092 4093 // sinkMBB: 4094 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4095 // ... 4096 BB = sinkMBB; 4097 BuildMI(*BB, BB->begin(), dl, 4098 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 4099 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4100 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4101 4102 MI->eraseFromParent(); // The pseudo instruction is gone now. 4103 return BB; 4104 } 4105 4106 case ARM::BCCi64: 4107 case ARM::BCCZi64: { 4108 // Compare both parts that make up the double comparison separately for 4109 // equality. 4110 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 4111 4112 unsigned LHS1 = MI->getOperand(1).getReg(); 4113 unsigned LHS2 = MI->getOperand(2).getReg(); 4114 if (RHSisZero) { 4115 AddDefaultPred(BuildMI(BB, dl, 4116 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 4117 .addReg(LHS1).addImm(0)); 4118 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 4119 .addReg(LHS2).addImm(0) 4120 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 4121 } else { 4122 unsigned RHS1 = MI->getOperand(3).getReg(); 4123 unsigned RHS2 = MI->getOperand(4).getReg(); 4124 AddDefaultPred(BuildMI(BB, dl, 4125 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 4126 .addReg(LHS1).addReg(RHS1)); 4127 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 4128 .addReg(LHS2).addReg(RHS2) 4129 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 4130 } 4131 4132 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 4133 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 4134 if (MI->getOperand(0).getImm() == ARMCC::NE) 4135 std::swap(destMBB, exitMBB); 4136 4137 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 4138 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 4139 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) 4140 .addMBB(exitMBB); 4141 4142 MI->eraseFromParent(); // The pseudo instruction is gone now. 4143 return BB; 4144 } 4145 } 4146} 4147 4148//===----------------------------------------------------------------------===// 4149// ARM Optimization Hooks 4150//===----------------------------------------------------------------------===// 4151 4152static 4153SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 4154 TargetLowering::DAGCombinerInfo &DCI) { 4155 SelectionDAG &DAG = DCI.DAG; 4156 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4157 EVT VT = N->getValueType(0); 4158 unsigned Opc = N->getOpcode(); 4159 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 4160 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 4161 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 4162 ISD::CondCode CC = ISD::SETCC_INVALID; 4163 4164 if (isSlctCC) { 4165 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 4166 } else { 4167 SDValue CCOp = Slct.getOperand(0); 4168 if (CCOp.getOpcode() == ISD::SETCC) 4169 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 4170 } 4171 4172 bool DoXform = false; 4173 bool InvCC = false; 4174 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 4175 "Bad input!"); 4176 4177 if (LHS.getOpcode() == ISD::Constant && 4178 cast<ConstantSDNode>(LHS)->isNullValue()) { 4179 DoXform = true; 4180 } else if (CC != ISD::SETCC_INVALID && 4181 RHS.getOpcode() == ISD::Constant && 4182 cast<ConstantSDNode>(RHS)->isNullValue()) { 4183 std::swap(LHS, RHS); 4184 SDValue Op0 = Slct.getOperand(0); 4185 EVT OpVT = isSlctCC ? Op0.getValueType() : 4186 Op0.getOperand(0).getValueType(); 4187 bool isInt = OpVT.isInteger(); 4188 CC = ISD::getSetCCInverse(CC, isInt); 4189 4190 if (!TLI.isCondCodeLegal(CC, OpVT)) 4191 return SDValue(); // Inverse operator isn't legal. 4192 4193 DoXform = true; 4194 InvCC = true; 4195 } 4196 4197 if (DoXform) { 4198 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 4199 if (isSlctCC) 4200 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 4201 Slct.getOperand(0), Slct.getOperand(1), CC); 4202 SDValue CCOp = Slct.getOperand(0); 4203 if (InvCC) 4204 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 4205 CCOp.getOperand(0), CCOp.getOperand(1), CC); 4206 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 4207 CCOp, OtherOp, Result); 4208 } 4209 return SDValue(); 4210} 4211 4212/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 4213/// operands N0 and N1. This is a helper for PerformADDCombine that is 4214/// called with the default operands, and if that fails, with commuted 4215/// operands. 4216static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 4217 TargetLowering::DAGCombinerInfo &DCI) { 4218 SelectionDAG &DAG = DCI.DAG; 4219 4220 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 4221 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 4222 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 4223 if (Result.getNode()) return Result; 4224 } 4225 4226 // fold (add (arm_neon_vabd a, b) c) -> (arm_neon_vaba c, a, b) 4227 EVT VT = N->getValueType(0); 4228 if (N0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && VT.isInteger()) { 4229 unsigned IntNo = cast<ConstantSDNode>(N0.getOperand(0))->getZExtValue(); 4230 if (IntNo == Intrinsic::arm_neon_vabds) 4231 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), VT, 4232 DAG.getConstant(Intrinsic::arm_neon_vabas, MVT::i32), 4233 N1, N0.getOperand(1), N0.getOperand(2)); 4234 if (IntNo == Intrinsic::arm_neon_vabdu) 4235 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), VT, 4236 DAG.getConstant(Intrinsic::arm_neon_vabau, MVT::i32), 4237 N1, N0.getOperand(1), N0.getOperand(2)); 4238 } 4239 4240 return SDValue(); 4241} 4242 4243/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 4244/// 4245static SDValue PerformADDCombine(SDNode *N, 4246 TargetLowering::DAGCombinerInfo &DCI) { 4247 SDValue N0 = N->getOperand(0); 4248 SDValue N1 = N->getOperand(1); 4249 4250 // First try with the default operand order. 4251 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI); 4252 if (Result.getNode()) 4253 return Result; 4254 4255 // If that didn't work, try again with the operands commuted. 4256 return PerformADDCombineWithOperands(N, N1, N0, DCI); 4257} 4258 4259/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 4260/// 4261static SDValue PerformSUBCombine(SDNode *N, 4262 TargetLowering::DAGCombinerInfo &DCI) { 4263 SDValue N0 = N->getOperand(0); 4264 SDValue N1 = N->getOperand(1); 4265 4266 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 4267 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 4268 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 4269 if (Result.getNode()) return Result; 4270 } 4271 4272 return SDValue(); 4273} 4274 4275static SDValue PerformMULCombine(SDNode *N, 4276 TargetLowering::DAGCombinerInfo &DCI, 4277 const ARMSubtarget *Subtarget) { 4278 SelectionDAG &DAG = DCI.DAG; 4279 4280 if (Subtarget->isThumb1Only()) 4281 return SDValue(); 4282 4283 if (DAG.getMachineFunction(). 4284 getFunction()->hasFnAttr(Attribute::OptimizeForSize)) 4285 return SDValue(); 4286 4287 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 4288 return SDValue(); 4289 4290 EVT VT = N->getValueType(0); 4291 if (VT != MVT::i32) 4292 return SDValue(); 4293 4294 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4295 if (!C) 4296 return SDValue(); 4297 4298 uint64_t MulAmt = C->getZExtValue(); 4299 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 4300 ShiftAmt = ShiftAmt & (32 - 1); 4301 SDValue V = N->getOperand(0); 4302 DebugLoc DL = N->getDebugLoc(); 4303 4304 SDValue Res; 4305 MulAmt >>= ShiftAmt; 4306 if (isPowerOf2_32(MulAmt - 1)) { 4307 // (mul x, 2^N + 1) => (add (shl x, N), x) 4308 Res = DAG.getNode(ISD::ADD, DL, VT, 4309 V, DAG.getNode(ISD::SHL, DL, VT, 4310 V, DAG.getConstant(Log2_32(MulAmt-1), 4311 MVT::i32))); 4312 } else if (isPowerOf2_32(MulAmt + 1)) { 4313 // (mul x, 2^N - 1) => (sub (shl x, N), x) 4314 Res = DAG.getNode(ISD::SUB, DL, VT, 4315 DAG.getNode(ISD::SHL, DL, VT, 4316 V, DAG.getConstant(Log2_32(MulAmt+1), 4317 MVT::i32)), 4318 V); 4319 } else 4320 return SDValue(); 4321 4322 if (ShiftAmt != 0) 4323 Res = DAG.getNode(ISD::SHL, DL, VT, Res, 4324 DAG.getConstant(ShiftAmt, MVT::i32)); 4325 4326 // Do not add new nodes to DAG combiner worklist. 4327 DCI.CombineTo(N, Res, false); 4328 return SDValue(); 4329} 4330 4331/// PerformORCombine - Target-specific dag combine xforms for ISD::OR 4332static SDValue PerformORCombine(SDNode *N, 4333 TargetLowering::DAGCombinerInfo &DCI, 4334 const ARMSubtarget *Subtarget) { 4335 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 4336 // reasonable. 4337 4338 // BFI is only available on V6T2+ 4339 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 4340 return SDValue(); 4341 4342 SelectionDAG &DAG = DCI.DAG; 4343 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 4344 DebugLoc DL = N->getDebugLoc(); 4345 // 1) or (and A, mask), val => ARMbfi A, val, mask 4346 // iff (val & mask) == val 4347 // 4348 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 4349 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 4350 // && CountPopulation_32(mask) == CountPopulation_32(~mask2) 4351 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 4352 // && CountPopulation_32(mask) == CountPopulation_32(~mask2) 4353 // (i.e., copy a bitfield value into another bitfield of the same width) 4354 if (N0.getOpcode() != ISD::AND) 4355 return SDValue(); 4356 4357 EVT VT = N->getValueType(0); 4358 if (VT != MVT::i32) 4359 return SDValue(); 4360 4361 4362 // The value and the mask need to be constants so we can verify this is 4363 // actually a bitfield set. If the mask is 0xffff, we can do better 4364 // via a movt instruction, so don't use BFI in that case. 4365 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4366 if (!C) 4367 return SDValue(); 4368 unsigned Mask = C->getZExtValue(); 4369 if (Mask == 0xffff) 4370 return SDValue(); 4371 SDValue Res; 4372 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 4373 if ((C = dyn_cast<ConstantSDNode>(N1))) { 4374 unsigned Val = C->getZExtValue(); 4375 if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val) 4376 return SDValue(); 4377 Val >>= CountTrailingZeros_32(~Mask); 4378 4379 Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), 4380 DAG.getConstant(Val, MVT::i32), 4381 DAG.getConstant(Mask, MVT::i32)); 4382 4383 // Do not add new nodes to DAG combiner worklist. 4384 DCI.CombineTo(N, Res, false); 4385 } else if (N1.getOpcode() == ISD::AND) { 4386 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 4387 C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 4388 if (!C) 4389 return SDValue(); 4390 unsigned Mask2 = C->getZExtValue(); 4391 4392 if (ARM::isBitFieldInvertedMask(Mask) && 4393 ARM::isBitFieldInvertedMask(~Mask2) && 4394 (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) { 4395 // The pack halfword instruction works better for masks that fit it, 4396 // so use that when it's available. 4397 if (Subtarget->hasT2ExtractPack() && 4398 (Mask == 0xffff || Mask == 0xffff0000)) 4399 return SDValue(); 4400 // 2a 4401 unsigned lsb = CountTrailingZeros_32(Mask2); 4402 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 4403 DAG.getConstant(lsb, MVT::i32)); 4404 Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res, 4405 DAG.getConstant(Mask, MVT::i32)); 4406 // Do not add new nodes to DAG combiner worklist. 4407 DCI.CombineTo(N, Res, false); 4408 } else if (ARM::isBitFieldInvertedMask(~Mask) && 4409 ARM::isBitFieldInvertedMask(Mask2) && 4410 (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { 4411 // The pack halfword instruction works better for masks that fit it, 4412 // so use that when it's available. 4413 if (Subtarget->hasT2ExtractPack() && 4414 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 4415 return SDValue(); 4416 // 2b 4417 unsigned lsb = CountTrailingZeros_32(Mask); 4418 Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 4419 DAG.getConstant(lsb, MVT::i32)); 4420 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 4421 DAG.getConstant(Mask2, MVT::i32)); 4422 // Do not add new nodes to DAG combiner worklist. 4423 DCI.CombineTo(N, Res, false); 4424 } 4425 } 4426 4427 return SDValue(); 4428} 4429 4430/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 4431/// ARMISD::VMOVRRD. 4432static SDValue PerformVMOVRRDCombine(SDNode *N, 4433 TargetLowering::DAGCombinerInfo &DCI) { 4434 // fmrrd(fmdrr x, y) -> x,y 4435 SDValue InDouble = N->getOperand(0); 4436 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 4437 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 4438 return SDValue(); 4439} 4440 4441/// PerformVDUPLANECombine - Target-specific dag combine xforms for 4442/// ARMISD::VDUPLANE. 4443static SDValue PerformVDUPLANECombine(SDNode *N, 4444 TargetLowering::DAGCombinerInfo &DCI) { 4445 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 4446 // redundant. 4447 SDValue Op = N->getOperand(0); 4448 EVT VT = N->getValueType(0); 4449 4450 // Ignore bit_converts. 4451 while (Op.getOpcode() == ISD::BIT_CONVERT) 4452 Op = Op.getOperand(0); 4453 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 4454 return SDValue(); 4455 4456 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 4457 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 4458 // The canonical VMOV for a zero vector uses a 32-bit element size. 4459 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 4460 unsigned EltBits; 4461 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 4462 EltSize = 8; 4463 if (EltSize > VT.getVectorElementType().getSizeInBits()) 4464 return SDValue(); 4465 4466 SDValue Res = DCI.DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); 4467 return DCI.CombineTo(N, Res, false); 4468} 4469 4470/// getVShiftImm - Check if this is a valid build_vector for the immediate 4471/// operand of a vector shift operation, where all the elements of the 4472/// build_vector must have the same constant integer value. 4473static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 4474 // Ignore bit_converts. 4475 while (Op.getOpcode() == ISD::BIT_CONVERT) 4476 Op = Op.getOperand(0); 4477 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 4478 APInt SplatBits, SplatUndef; 4479 unsigned SplatBitSize; 4480 bool HasAnyUndefs; 4481 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 4482 HasAnyUndefs, ElementBits) || 4483 SplatBitSize > ElementBits) 4484 return false; 4485 Cnt = SplatBits.getSExtValue(); 4486 return true; 4487} 4488 4489/// isVShiftLImm - Check if this is a valid build_vector for the immediate 4490/// operand of a vector shift left operation. That value must be in the range: 4491/// 0 <= Value < ElementBits for a left shift; or 4492/// 0 <= Value <= ElementBits for a long left shift. 4493static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 4494 assert(VT.isVector() && "vector shift count is not a vector type"); 4495 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 4496 if (! getVShiftImm(Op, ElementBits, Cnt)) 4497 return false; 4498 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 4499} 4500 4501/// isVShiftRImm - Check if this is a valid build_vector for the immediate 4502/// operand of a vector shift right operation. For a shift opcode, the value 4503/// is positive, but for an intrinsic the value count must be negative. The 4504/// absolute value must be in the range: 4505/// 1 <= |Value| <= ElementBits for a right shift; or 4506/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 4507static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 4508 int64_t &Cnt) { 4509 assert(VT.isVector() && "vector shift count is not a vector type"); 4510 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 4511 if (! getVShiftImm(Op, ElementBits, Cnt)) 4512 return false; 4513 if (isIntrinsic) 4514 Cnt = -Cnt; 4515 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 4516} 4517 4518/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 4519static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 4520 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4521 switch (IntNo) { 4522 default: 4523 // Don't do anything for most intrinsics. 4524 break; 4525 4526 // Vector shifts: check for immediate versions and lower them. 4527 // Note: This is done during DAG combining instead of DAG legalizing because 4528 // the build_vectors for 64-bit vector element shift counts are generally 4529 // not legal, and it is hard to see their values after they get legalized to 4530 // loads from a constant pool. 4531 case Intrinsic::arm_neon_vshifts: 4532 case Intrinsic::arm_neon_vshiftu: 4533 case Intrinsic::arm_neon_vshiftls: 4534 case Intrinsic::arm_neon_vshiftlu: 4535 case Intrinsic::arm_neon_vshiftn: 4536 case Intrinsic::arm_neon_vrshifts: 4537 case Intrinsic::arm_neon_vrshiftu: 4538 case Intrinsic::arm_neon_vrshiftn: 4539 case Intrinsic::arm_neon_vqshifts: 4540 case Intrinsic::arm_neon_vqshiftu: 4541 case Intrinsic::arm_neon_vqshiftsu: 4542 case Intrinsic::arm_neon_vqshiftns: 4543 case Intrinsic::arm_neon_vqshiftnu: 4544 case Intrinsic::arm_neon_vqshiftnsu: 4545 case Intrinsic::arm_neon_vqrshiftns: 4546 case Intrinsic::arm_neon_vqrshiftnu: 4547 case Intrinsic::arm_neon_vqrshiftnsu: { 4548 EVT VT = N->getOperand(1).getValueType(); 4549 int64_t Cnt; 4550 unsigned VShiftOpc = 0; 4551 4552 switch (IntNo) { 4553 case Intrinsic::arm_neon_vshifts: 4554 case Intrinsic::arm_neon_vshiftu: 4555 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 4556 VShiftOpc = ARMISD::VSHL; 4557 break; 4558 } 4559 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 4560 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 4561 ARMISD::VSHRs : ARMISD::VSHRu); 4562 break; 4563 } 4564 return SDValue(); 4565 4566 case Intrinsic::arm_neon_vshiftls: 4567 case Intrinsic::arm_neon_vshiftlu: 4568 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 4569 break; 4570 llvm_unreachable("invalid shift count for vshll intrinsic"); 4571 4572 case Intrinsic::arm_neon_vrshifts: 4573 case Intrinsic::arm_neon_vrshiftu: 4574 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 4575 break; 4576 return SDValue(); 4577 4578 case Intrinsic::arm_neon_vqshifts: 4579 case Intrinsic::arm_neon_vqshiftu: 4580 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 4581 break; 4582 return SDValue(); 4583 4584 case Intrinsic::arm_neon_vqshiftsu: 4585 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 4586 break; 4587 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 4588 4589 case Intrinsic::arm_neon_vshiftn: 4590 case Intrinsic::arm_neon_vrshiftn: 4591 case Intrinsic::arm_neon_vqshiftns: 4592 case Intrinsic::arm_neon_vqshiftnu: 4593 case Intrinsic::arm_neon_vqshiftnsu: 4594 case Intrinsic::arm_neon_vqrshiftns: 4595 case Intrinsic::arm_neon_vqrshiftnu: 4596 case Intrinsic::arm_neon_vqrshiftnsu: 4597 // Narrowing shifts require an immediate right shift. 4598 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 4599 break; 4600 llvm_unreachable("invalid shift count for narrowing vector shift " 4601 "intrinsic"); 4602 4603 default: 4604 llvm_unreachable("unhandled vector shift"); 4605 } 4606 4607 switch (IntNo) { 4608 case Intrinsic::arm_neon_vshifts: 4609 case Intrinsic::arm_neon_vshiftu: 4610 // Opcode already set above. 4611 break; 4612 case Intrinsic::arm_neon_vshiftls: 4613 case Intrinsic::arm_neon_vshiftlu: 4614 if (Cnt == VT.getVectorElementType().getSizeInBits()) 4615 VShiftOpc = ARMISD::VSHLLi; 4616 else 4617 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 4618 ARMISD::VSHLLs : ARMISD::VSHLLu); 4619 break; 4620 case Intrinsic::arm_neon_vshiftn: 4621 VShiftOpc = ARMISD::VSHRN; break; 4622 case Intrinsic::arm_neon_vrshifts: 4623 VShiftOpc = ARMISD::VRSHRs; break; 4624 case Intrinsic::arm_neon_vrshiftu: 4625 VShiftOpc = ARMISD::VRSHRu; break; 4626 case Intrinsic::arm_neon_vrshiftn: 4627 VShiftOpc = ARMISD::VRSHRN; break; 4628 case Intrinsic::arm_neon_vqshifts: 4629 VShiftOpc = ARMISD::VQSHLs; break; 4630 case Intrinsic::arm_neon_vqshiftu: 4631 VShiftOpc = ARMISD::VQSHLu; break; 4632 case Intrinsic::arm_neon_vqshiftsu: 4633 VShiftOpc = ARMISD::VQSHLsu; break; 4634 case Intrinsic::arm_neon_vqshiftns: 4635 VShiftOpc = ARMISD::VQSHRNs; break; 4636 case Intrinsic::arm_neon_vqshiftnu: 4637 VShiftOpc = ARMISD::VQSHRNu; break; 4638 case Intrinsic::arm_neon_vqshiftnsu: 4639 VShiftOpc = ARMISD::VQSHRNsu; break; 4640 case Intrinsic::arm_neon_vqrshiftns: 4641 VShiftOpc = ARMISD::VQRSHRNs; break; 4642 case Intrinsic::arm_neon_vqrshiftnu: 4643 VShiftOpc = ARMISD::VQRSHRNu; break; 4644 case Intrinsic::arm_neon_vqrshiftnsu: 4645 VShiftOpc = ARMISD::VQRSHRNsu; break; 4646 } 4647 4648 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 4649 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 4650 } 4651 4652 case Intrinsic::arm_neon_vshiftins: { 4653 EVT VT = N->getOperand(1).getValueType(); 4654 int64_t Cnt; 4655 unsigned VShiftOpc = 0; 4656 4657 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 4658 VShiftOpc = ARMISD::VSLI; 4659 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 4660 VShiftOpc = ARMISD::VSRI; 4661 else { 4662 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 4663 } 4664 4665 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 4666 N->getOperand(1), N->getOperand(2), 4667 DAG.getConstant(Cnt, MVT::i32)); 4668 } 4669 4670 case Intrinsic::arm_neon_vqrshifts: 4671 case Intrinsic::arm_neon_vqrshiftu: 4672 // No immediate versions of these to check for. 4673 break; 4674 } 4675 4676 return SDValue(); 4677} 4678 4679/// PerformShiftCombine - Checks for immediate versions of vector shifts and 4680/// lowers them. As with the vector shift intrinsics, this is done during DAG 4681/// combining instead of DAG legalizing because the build_vectors for 64-bit 4682/// vector element shift counts are generally not legal, and it is hard to see 4683/// their values after they get legalized to loads from a constant pool. 4684static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 4685 const ARMSubtarget *ST) { 4686 EVT VT = N->getValueType(0); 4687 4688 // Nothing to be done for scalar shifts. 4689 if (! VT.isVector()) 4690 return SDValue(); 4691 4692 assert(ST->hasNEON() && "unexpected vector shift"); 4693 int64_t Cnt; 4694 4695 switch (N->getOpcode()) { 4696 default: llvm_unreachable("unexpected shift opcode"); 4697 4698 case ISD::SHL: 4699 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 4700 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 4701 DAG.getConstant(Cnt, MVT::i32)); 4702 break; 4703 4704 case ISD::SRA: 4705 case ISD::SRL: 4706 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 4707 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 4708 ARMISD::VSHRs : ARMISD::VSHRu); 4709 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 4710 DAG.getConstant(Cnt, MVT::i32)); 4711 } 4712 } 4713 return SDValue(); 4714} 4715 4716/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 4717/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 4718static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 4719 const ARMSubtarget *ST) { 4720 SDValue N0 = N->getOperand(0); 4721 4722 // Check for sign- and zero-extensions of vector extract operations of 8- 4723 // and 16-bit vector elements. NEON supports these directly. They are 4724 // handled during DAG combining because type legalization will promote them 4725 // to 32-bit types and it is messy to recognize the operations after that. 4726 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 4727 SDValue Vec = N0.getOperand(0); 4728 SDValue Lane = N0.getOperand(1); 4729 EVT VT = N->getValueType(0); 4730 EVT EltVT = N0.getValueType(); 4731 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4732 4733 if (VT == MVT::i32 && 4734 (EltVT == MVT::i8 || EltVT == MVT::i16) && 4735 TLI.isTypeLegal(Vec.getValueType())) { 4736 4737 unsigned Opc = 0; 4738 switch (N->getOpcode()) { 4739 default: llvm_unreachable("unexpected opcode"); 4740 case ISD::SIGN_EXTEND: 4741 Opc = ARMISD::VGETLANEs; 4742 break; 4743 case ISD::ZERO_EXTEND: 4744 case ISD::ANY_EXTEND: 4745 Opc = ARMISD::VGETLANEu; 4746 break; 4747 } 4748 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 4749 } 4750 } 4751 4752 return SDValue(); 4753} 4754 4755/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 4756/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 4757static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 4758 const ARMSubtarget *ST) { 4759 // If the target supports NEON, try to use vmax/vmin instructions for f32 4760 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 4761 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 4762 // a NaN; only do the transformation when it matches that behavior. 4763 4764 // For now only do this when using NEON for FP operations; if using VFP, it 4765 // is not obvious that the benefit outweighs the cost of switching to the 4766 // NEON pipeline. 4767 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 4768 N->getValueType(0) != MVT::f32) 4769 return SDValue(); 4770 4771 SDValue CondLHS = N->getOperand(0); 4772 SDValue CondRHS = N->getOperand(1); 4773 SDValue LHS = N->getOperand(2); 4774 SDValue RHS = N->getOperand(3); 4775 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 4776 4777 unsigned Opcode = 0; 4778 bool IsReversed; 4779 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 4780 IsReversed = false; // x CC y ? x : y 4781 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 4782 IsReversed = true ; // x CC y ? y : x 4783 } else { 4784 return SDValue(); 4785 } 4786 4787 bool IsUnordered; 4788 switch (CC) { 4789 default: break; 4790 case ISD::SETOLT: 4791 case ISD::SETOLE: 4792 case ISD::SETLT: 4793 case ISD::SETLE: 4794 case ISD::SETULT: 4795 case ISD::SETULE: 4796 // If LHS is NaN, an ordered comparison will be false and the result will 4797 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 4798 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 4799 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 4800 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 4801 break; 4802 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 4803 // will return -0, so vmin can only be used for unsafe math or if one of 4804 // the operands is known to be nonzero. 4805 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 4806 !UnsafeFPMath && 4807 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 4808 break; 4809 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 4810 break; 4811 4812 case ISD::SETOGT: 4813 case ISD::SETOGE: 4814 case ISD::SETGT: 4815 case ISD::SETGE: 4816 case ISD::SETUGT: 4817 case ISD::SETUGE: 4818 // If LHS is NaN, an ordered comparison will be false and the result will 4819 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 4820 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 4821 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 4822 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 4823 break; 4824 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 4825 // will return +0, so vmax can only be used for unsafe math or if one of 4826 // the operands is known to be nonzero. 4827 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 4828 !UnsafeFPMath && 4829 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 4830 break; 4831 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 4832 break; 4833 } 4834 4835 if (!Opcode) 4836 return SDValue(); 4837 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 4838} 4839 4840SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 4841 DAGCombinerInfo &DCI) const { 4842 switch (N->getOpcode()) { 4843 default: break; 4844 case ISD::ADD: return PerformADDCombine(N, DCI); 4845 case ISD::SUB: return PerformSUBCombine(N, DCI); 4846 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 4847 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 4848 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 4849 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 4850 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 4851 case ISD::SHL: 4852 case ISD::SRA: 4853 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 4854 case ISD::SIGN_EXTEND: 4855 case ISD::ZERO_EXTEND: 4856 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 4857 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 4858 } 4859 return SDValue(); 4860} 4861 4862bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 4863 if (!Subtarget->hasV6Ops()) 4864 // Pre-v6 does not support unaligned mem access. 4865 return false; 4866 4867 // v6+ may or may not support unaligned mem access depending on the system 4868 // configuration. 4869 // FIXME: This is pretty conservative. Should we provide cmdline option to 4870 // control the behaviour? 4871 if (!Subtarget->isTargetDarwin()) 4872 return false; 4873 4874 switch (VT.getSimpleVT().SimpleTy) { 4875 default: 4876 return false; 4877 case MVT::i8: 4878 case MVT::i16: 4879 case MVT::i32: 4880 return true; 4881 // FIXME: VLD1 etc with standard alignment is legal. 4882 } 4883} 4884 4885static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 4886 if (V < 0) 4887 return false; 4888 4889 unsigned Scale = 1; 4890 switch (VT.getSimpleVT().SimpleTy) { 4891 default: return false; 4892 case MVT::i1: 4893 case MVT::i8: 4894 // Scale == 1; 4895 break; 4896 case MVT::i16: 4897 // Scale == 2; 4898 Scale = 2; 4899 break; 4900 case MVT::i32: 4901 // Scale == 4; 4902 Scale = 4; 4903 break; 4904 } 4905 4906 if ((V & (Scale - 1)) != 0) 4907 return false; 4908 V /= Scale; 4909 return V == (V & ((1LL << 5) - 1)); 4910} 4911 4912static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 4913 const ARMSubtarget *Subtarget) { 4914 bool isNeg = false; 4915 if (V < 0) { 4916 isNeg = true; 4917 V = - V; 4918 } 4919 4920 switch (VT.getSimpleVT().SimpleTy) { 4921 default: return false; 4922 case MVT::i1: 4923 case MVT::i8: 4924 case MVT::i16: 4925 case MVT::i32: 4926 // + imm12 or - imm8 4927 if (isNeg) 4928 return V == (V & ((1LL << 8) - 1)); 4929 return V == (V & ((1LL << 12) - 1)); 4930 case MVT::f32: 4931 case MVT::f64: 4932 // Same as ARM mode. FIXME: NEON? 4933 if (!Subtarget->hasVFP2()) 4934 return false; 4935 if ((V & 3) != 0) 4936 return false; 4937 V >>= 2; 4938 return V == (V & ((1LL << 8) - 1)); 4939 } 4940} 4941 4942/// isLegalAddressImmediate - Return true if the integer value can be used 4943/// as the offset of the target addressing mode for load / store of the 4944/// given type. 4945static bool isLegalAddressImmediate(int64_t V, EVT VT, 4946 const ARMSubtarget *Subtarget) { 4947 if (V == 0) 4948 return true; 4949 4950 if (!VT.isSimple()) 4951 return false; 4952 4953 if (Subtarget->isThumb1Only()) 4954 return isLegalT1AddressImmediate(V, VT); 4955 else if (Subtarget->isThumb2()) 4956 return isLegalT2AddressImmediate(V, VT, Subtarget); 4957 4958 // ARM mode. 4959 if (V < 0) 4960 V = - V; 4961 switch (VT.getSimpleVT().SimpleTy) { 4962 default: return false; 4963 case MVT::i1: 4964 case MVT::i8: 4965 case MVT::i32: 4966 // +- imm12 4967 return V == (V & ((1LL << 12) - 1)); 4968 case MVT::i16: 4969 // +- imm8 4970 return V == (V & ((1LL << 8) - 1)); 4971 case MVT::f32: 4972 case MVT::f64: 4973 if (!Subtarget->hasVFP2()) // FIXME: NEON? 4974 return false; 4975 if ((V & 3) != 0) 4976 return false; 4977 V >>= 2; 4978 return V == (V & ((1LL << 8) - 1)); 4979 } 4980} 4981 4982bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 4983 EVT VT) const { 4984 int Scale = AM.Scale; 4985 if (Scale < 0) 4986 return false; 4987 4988 switch (VT.getSimpleVT().SimpleTy) { 4989 default: return false; 4990 case MVT::i1: 4991 case MVT::i8: 4992 case MVT::i16: 4993 case MVT::i32: 4994 if (Scale == 1) 4995 return true; 4996 // r + r << imm 4997 Scale = Scale & ~1; 4998 return Scale == 2 || Scale == 4 || Scale == 8; 4999 case MVT::i64: 5000 // r + r 5001 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 5002 return true; 5003 return false; 5004 case MVT::isVoid: 5005 // Note, we allow "void" uses (basically, uses that aren't loads or 5006 // stores), because arm allows folding a scale into many arithmetic 5007 // operations. This should be made more precise and revisited later. 5008 5009 // Allow r << imm, but the imm has to be a multiple of two. 5010 if (Scale & 1) return false; 5011 return isPowerOf2_32(Scale); 5012 } 5013} 5014 5015/// isLegalAddressingMode - Return true if the addressing mode represented 5016/// by AM is legal for this target, for a load/store of the specified type. 5017bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 5018 const Type *Ty) const { 5019 EVT VT = getValueType(Ty, true); 5020 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 5021 return false; 5022 5023 // Can never fold addr of global into load/store. 5024 if (AM.BaseGV) 5025 return false; 5026 5027 switch (AM.Scale) { 5028 case 0: // no scale reg, must be "r+i" or "r", or "i". 5029 break; 5030 case 1: 5031 if (Subtarget->isThumb1Only()) 5032 return false; 5033 // FALL THROUGH. 5034 default: 5035 // ARM doesn't support any R+R*scale+imm addr modes. 5036 if (AM.BaseOffs) 5037 return false; 5038 5039 if (!VT.isSimple()) 5040 return false; 5041 5042 if (Subtarget->isThumb2()) 5043 return isLegalT2ScaledAddressingMode(AM, VT); 5044 5045 int Scale = AM.Scale; 5046 switch (VT.getSimpleVT().SimpleTy) { 5047 default: return false; 5048 case MVT::i1: 5049 case MVT::i8: 5050 case MVT::i32: 5051 if (Scale < 0) Scale = -Scale; 5052 if (Scale == 1) 5053 return true; 5054 // r + r << imm 5055 return isPowerOf2_32(Scale & ~1); 5056 case MVT::i16: 5057 case MVT::i64: 5058 // r + r 5059 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 5060 return true; 5061 return false; 5062 5063 case MVT::isVoid: 5064 // Note, we allow "void" uses (basically, uses that aren't loads or 5065 // stores), because arm allows folding a scale into many arithmetic 5066 // operations. This should be made more precise and revisited later. 5067 5068 // Allow r << imm, but the imm has to be a multiple of two. 5069 if (Scale & 1) return false; 5070 return isPowerOf2_32(Scale); 5071 } 5072 break; 5073 } 5074 return true; 5075} 5076 5077/// isLegalICmpImmediate - Return true if the specified immediate is legal 5078/// icmp immediate, that is the target has icmp instructions which can compare 5079/// a register against the immediate without having to materialize the 5080/// immediate into a register. 5081bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 5082 if (!Subtarget->isThumb()) 5083 return ARM_AM::getSOImmVal(Imm) != -1; 5084 if (Subtarget->isThumb2()) 5085 return ARM_AM::getT2SOImmVal(Imm) != -1; 5086 return Imm >= 0 && Imm <= 255; 5087} 5088 5089static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 5090 bool isSEXTLoad, SDValue &Base, 5091 SDValue &Offset, bool &isInc, 5092 SelectionDAG &DAG) { 5093 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 5094 return false; 5095 5096 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 5097 // AddressingMode 3 5098 Base = Ptr->getOperand(0); 5099 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5100 int RHSC = (int)RHS->getZExtValue(); 5101 if (RHSC < 0 && RHSC > -256) { 5102 assert(Ptr->getOpcode() == ISD::ADD); 5103 isInc = false; 5104 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5105 return true; 5106 } 5107 } 5108 isInc = (Ptr->getOpcode() == ISD::ADD); 5109 Offset = Ptr->getOperand(1); 5110 return true; 5111 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 5112 // AddressingMode 2 5113 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5114 int RHSC = (int)RHS->getZExtValue(); 5115 if (RHSC < 0 && RHSC > -0x1000) { 5116 assert(Ptr->getOpcode() == ISD::ADD); 5117 isInc = false; 5118 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5119 Base = Ptr->getOperand(0); 5120 return true; 5121 } 5122 } 5123 5124 if (Ptr->getOpcode() == ISD::ADD) { 5125 isInc = true; 5126 ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); 5127 if (ShOpcVal != ARM_AM::no_shift) { 5128 Base = Ptr->getOperand(1); 5129 Offset = Ptr->getOperand(0); 5130 } else { 5131 Base = Ptr->getOperand(0); 5132 Offset = Ptr->getOperand(1); 5133 } 5134 return true; 5135 } 5136 5137 isInc = (Ptr->getOpcode() == ISD::ADD); 5138 Base = Ptr->getOperand(0); 5139 Offset = Ptr->getOperand(1); 5140 return true; 5141 } 5142 5143 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 5144 return false; 5145} 5146 5147static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 5148 bool isSEXTLoad, SDValue &Base, 5149 SDValue &Offset, bool &isInc, 5150 SelectionDAG &DAG) { 5151 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 5152 return false; 5153 5154 Base = Ptr->getOperand(0); 5155 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5156 int RHSC = (int)RHS->getZExtValue(); 5157 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 5158 assert(Ptr->getOpcode() == ISD::ADD); 5159 isInc = false; 5160 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5161 return true; 5162 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 5163 isInc = Ptr->getOpcode() == ISD::ADD; 5164 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 5165 return true; 5166 } 5167 } 5168 5169 return false; 5170} 5171 5172/// getPreIndexedAddressParts - returns true by value, base pointer and 5173/// offset pointer and addressing mode by reference if the node's address 5174/// can be legally represented as pre-indexed load / store address. 5175bool 5176ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 5177 SDValue &Offset, 5178 ISD::MemIndexedMode &AM, 5179 SelectionDAG &DAG) const { 5180 if (Subtarget->isThumb1Only()) 5181 return false; 5182 5183 EVT VT; 5184 SDValue Ptr; 5185 bool isSEXTLoad = false; 5186 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5187 Ptr = LD->getBasePtr(); 5188 VT = LD->getMemoryVT(); 5189 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 5190 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5191 Ptr = ST->getBasePtr(); 5192 VT = ST->getMemoryVT(); 5193 } else 5194 return false; 5195 5196 bool isInc; 5197 bool isLegal = false; 5198 if (Subtarget->isThumb2()) 5199 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 5200 Offset, isInc, DAG); 5201 else 5202 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 5203 Offset, isInc, DAG); 5204 if (!isLegal) 5205 return false; 5206 5207 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 5208 return true; 5209} 5210 5211/// getPostIndexedAddressParts - returns true by value, base pointer and 5212/// offset pointer and addressing mode by reference if this node can be 5213/// combined with a load / store to form a post-indexed load / store. 5214bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 5215 SDValue &Base, 5216 SDValue &Offset, 5217 ISD::MemIndexedMode &AM, 5218 SelectionDAG &DAG) const { 5219 if (Subtarget->isThumb1Only()) 5220 return false; 5221 5222 EVT VT; 5223 SDValue Ptr; 5224 bool isSEXTLoad = false; 5225 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5226 VT = LD->getMemoryVT(); 5227 Ptr = LD->getBasePtr(); 5228 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 5229 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5230 VT = ST->getMemoryVT(); 5231 Ptr = ST->getBasePtr(); 5232 } else 5233 return false; 5234 5235 bool isInc; 5236 bool isLegal = false; 5237 if (Subtarget->isThumb2()) 5238 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 5239 isInc, DAG); 5240 else 5241 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 5242 isInc, DAG); 5243 if (!isLegal) 5244 return false; 5245 5246 if (Ptr != Base) { 5247 // Swap base ptr and offset to catch more post-index load / store when 5248 // it's legal. In Thumb2 mode, offset must be an immediate. 5249 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 5250 !Subtarget->isThumb2()) 5251 std::swap(Base, Offset); 5252 5253 // Post-indexed load / store update the base pointer. 5254 if (Ptr != Base) 5255 return false; 5256 } 5257 5258 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 5259 return true; 5260} 5261 5262void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 5263 const APInt &Mask, 5264 APInt &KnownZero, 5265 APInt &KnownOne, 5266 const SelectionDAG &DAG, 5267 unsigned Depth) const { 5268 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 5269 switch (Op.getOpcode()) { 5270 default: break; 5271 case ARMISD::CMOV: { 5272 // Bits are known zero/one if known on the LHS and RHS. 5273 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 5274 if (KnownZero == 0 && KnownOne == 0) return; 5275 5276 APInt KnownZeroRHS, KnownOneRHS; 5277 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 5278 KnownZeroRHS, KnownOneRHS, Depth+1); 5279 KnownZero &= KnownZeroRHS; 5280 KnownOne &= KnownOneRHS; 5281 return; 5282 } 5283 } 5284} 5285 5286//===----------------------------------------------------------------------===// 5287// ARM Inline Assembly Support 5288//===----------------------------------------------------------------------===// 5289 5290/// getConstraintType - Given a constraint letter, return the type of 5291/// constraint it is for this target. 5292ARMTargetLowering::ConstraintType 5293ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 5294 if (Constraint.size() == 1) { 5295 switch (Constraint[0]) { 5296 default: break; 5297 case 'l': return C_RegisterClass; 5298 case 'w': return C_RegisterClass; 5299 } 5300 } 5301 return TargetLowering::getConstraintType(Constraint); 5302} 5303 5304std::pair<unsigned, const TargetRegisterClass*> 5305ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5306 EVT VT) const { 5307 if (Constraint.size() == 1) { 5308 // GCC ARM Constraint Letters 5309 switch (Constraint[0]) { 5310 case 'l': 5311 if (Subtarget->isThumb()) 5312 return std::make_pair(0U, ARM::tGPRRegisterClass); 5313 else 5314 return std::make_pair(0U, ARM::GPRRegisterClass); 5315 case 'r': 5316 return std::make_pair(0U, ARM::GPRRegisterClass); 5317 case 'w': 5318 if (VT == MVT::f32) 5319 return std::make_pair(0U, ARM::SPRRegisterClass); 5320 if (VT.getSizeInBits() == 64) 5321 return std::make_pair(0U, ARM::DPRRegisterClass); 5322 if (VT.getSizeInBits() == 128) 5323 return std::make_pair(0U, ARM::QPRRegisterClass); 5324 break; 5325 } 5326 } 5327 if (StringRef("{cc}").equals_lower(Constraint)) 5328 return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); 5329 5330 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5331} 5332 5333std::vector<unsigned> ARMTargetLowering:: 5334getRegClassForInlineAsmConstraint(const std::string &Constraint, 5335 EVT VT) const { 5336 if (Constraint.size() != 1) 5337 return std::vector<unsigned>(); 5338 5339 switch (Constraint[0]) { // GCC ARM Constraint Letters 5340 default: break; 5341 case 'l': 5342 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 5343 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 5344 0); 5345 case 'r': 5346 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 5347 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 5348 ARM::R8, ARM::R9, ARM::R10, ARM::R11, 5349 ARM::R12, ARM::LR, 0); 5350 case 'w': 5351 if (VT == MVT::f32) 5352 return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, 5353 ARM::S4, ARM::S5, ARM::S6, ARM::S7, 5354 ARM::S8, ARM::S9, ARM::S10, ARM::S11, 5355 ARM::S12,ARM::S13,ARM::S14,ARM::S15, 5356 ARM::S16,ARM::S17,ARM::S18,ARM::S19, 5357 ARM::S20,ARM::S21,ARM::S22,ARM::S23, 5358 ARM::S24,ARM::S25,ARM::S26,ARM::S27, 5359 ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); 5360 if (VT.getSizeInBits() == 64) 5361 return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, 5362 ARM::D4, ARM::D5, ARM::D6, ARM::D7, 5363 ARM::D8, ARM::D9, ARM::D10,ARM::D11, 5364 ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); 5365 if (VT.getSizeInBits() == 128) 5366 return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, 5367 ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); 5368 break; 5369 } 5370 5371 return std::vector<unsigned>(); 5372} 5373 5374/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5375/// vector. If it is invalid, don't add anything to Ops. 5376void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 5377 char Constraint, 5378 std::vector<SDValue>&Ops, 5379 SelectionDAG &DAG) const { 5380 SDValue Result(0, 0); 5381 5382 switch (Constraint) { 5383 default: break; 5384 case 'I': case 'J': case 'K': case 'L': 5385 case 'M': case 'N': case 'O': 5386 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 5387 if (!C) 5388 return; 5389 5390 int64_t CVal64 = C->getSExtValue(); 5391 int CVal = (int) CVal64; 5392 // None of these constraints allow values larger than 32 bits. Check 5393 // that the value fits in an int. 5394 if (CVal != CVal64) 5395 return; 5396 5397 switch (Constraint) { 5398 case 'I': 5399 if (Subtarget->isThumb1Only()) { 5400 // This must be a constant between 0 and 255, for ADD 5401 // immediates. 5402 if (CVal >= 0 && CVal <= 255) 5403 break; 5404 } else if (Subtarget->isThumb2()) { 5405 // A constant that can be used as an immediate value in a 5406 // data-processing instruction. 5407 if (ARM_AM::getT2SOImmVal(CVal) != -1) 5408 break; 5409 } else { 5410 // A constant that can be used as an immediate value in a 5411 // data-processing instruction. 5412 if (ARM_AM::getSOImmVal(CVal) != -1) 5413 break; 5414 } 5415 return; 5416 5417 case 'J': 5418 if (Subtarget->isThumb()) { // FIXME thumb2 5419 // This must be a constant between -255 and -1, for negated ADD 5420 // immediates. This can be used in GCC with an "n" modifier that 5421 // prints the negated value, for use with SUB instructions. It is 5422 // not useful otherwise but is implemented for compatibility. 5423 if (CVal >= -255 && CVal <= -1) 5424 break; 5425 } else { 5426 // This must be a constant between -4095 and 4095. It is not clear 5427 // what this constraint is intended for. Implemented for 5428 // compatibility with GCC. 5429 if (CVal >= -4095 && CVal <= 4095) 5430 break; 5431 } 5432 return; 5433 5434 case 'K': 5435 if (Subtarget->isThumb1Only()) { 5436 // A 32-bit value where only one byte has a nonzero value. Exclude 5437 // zero to match GCC. This constraint is used by GCC internally for 5438 // constants that can be loaded with a move/shift combination. 5439 // It is not useful otherwise but is implemented for compatibility. 5440 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 5441 break; 5442 } else if (Subtarget->isThumb2()) { 5443 // A constant whose bitwise inverse can be used as an immediate 5444 // value in a data-processing instruction. This can be used in GCC 5445 // with a "B" modifier that prints the inverted value, for use with 5446 // BIC and MVN instructions. It is not useful otherwise but is 5447 // implemented for compatibility. 5448 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 5449 break; 5450 } else { 5451 // A constant whose bitwise inverse can be used as an immediate 5452 // value in a data-processing instruction. This can be used in GCC 5453 // with a "B" modifier that prints the inverted value, for use with 5454 // BIC and MVN instructions. It is not useful otherwise but is 5455 // implemented for compatibility. 5456 if (ARM_AM::getSOImmVal(~CVal) != -1) 5457 break; 5458 } 5459 return; 5460 5461 case 'L': 5462 if (Subtarget->isThumb1Only()) { 5463 // This must be a constant between -7 and 7, 5464 // for 3-operand ADD/SUB immediate instructions. 5465 if (CVal >= -7 && CVal < 7) 5466 break; 5467 } else if (Subtarget->isThumb2()) { 5468 // A constant whose negation can be used as an immediate value in a 5469 // data-processing instruction. This can be used in GCC with an "n" 5470 // modifier that prints the negated value, for use with SUB 5471 // instructions. It is not useful otherwise but is implemented for 5472 // compatibility. 5473 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 5474 break; 5475 } else { 5476 // A constant whose negation can be used as an immediate value in a 5477 // data-processing instruction. This can be used in GCC with an "n" 5478 // modifier that prints the negated value, for use with SUB 5479 // instructions. It is not useful otherwise but is implemented for 5480 // compatibility. 5481 if (ARM_AM::getSOImmVal(-CVal) != -1) 5482 break; 5483 } 5484 return; 5485 5486 case 'M': 5487 if (Subtarget->isThumb()) { // FIXME thumb2 5488 // This must be a multiple of 4 between 0 and 1020, for 5489 // ADD sp + immediate. 5490 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 5491 break; 5492 } else { 5493 // A power of two or a constant between 0 and 32. This is used in 5494 // GCC for the shift amount on shifted register operands, but it is 5495 // useful in general for any shift amounts. 5496 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 5497 break; 5498 } 5499 return; 5500 5501 case 'N': 5502 if (Subtarget->isThumb()) { // FIXME thumb2 5503 // This must be a constant between 0 and 31, for shift amounts. 5504 if (CVal >= 0 && CVal <= 31) 5505 break; 5506 } 5507 return; 5508 5509 case 'O': 5510 if (Subtarget->isThumb()) { // FIXME thumb2 5511 // This must be a multiple of 4 between -508 and 508, for 5512 // ADD/SUB sp = sp + immediate. 5513 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 5514 break; 5515 } 5516 return; 5517 } 5518 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 5519 break; 5520 } 5521 5522 if (Result.getNode()) { 5523 Ops.push_back(Result); 5524 return; 5525 } 5526 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5527} 5528 5529bool 5530ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 5531 // The ARM target isn't yet aware of offsets. 5532 return false; 5533} 5534 5535int ARM::getVFPf32Imm(const APFloat &FPImm) { 5536 APInt Imm = FPImm.bitcastToAPInt(); 5537 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; 5538 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 5539 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits 5540 5541 // We can handle 4 bits of mantissa. 5542 // mantissa = (16+UInt(e:f:g:h))/16. 5543 if (Mantissa & 0x7ffff) 5544 return -1; 5545 Mantissa >>= 19; 5546 if ((Mantissa & 0xf) != Mantissa) 5547 return -1; 5548 5549 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 5550 if (Exp < -3 || Exp > 4) 5551 return -1; 5552 Exp = ((Exp+3) & 0x7) ^ 4; 5553 5554 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 5555} 5556 5557int ARM::getVFPf64Imm(const APFloat &FPImm) { 5558 APInt Imm = FPImm.bitcastToAPInt(); 5559 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; 5560 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 5561 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; 5562 5563 // We can handle 4 bits of mantissa. 5564 // mantissa = (16+UInt(e:f:g:h))/16. 5565 if (Mantissa & 0xffffffffffffLL) 5566 return -1; 5567 Mantissa >>= 48; 5568 if ((Mantissa & 0xf) != Mantissa) 5569 return -1; 5570 5571 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 5572 if (Exp < -3 || Exp > 4) 5573 return -1; 5574 Exp = ((Exp+3) & 0x7) ^ 4; 5575 5576 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 5577} 5578 5579bool ARM::isBitFieldInvertedMask(unsigned v) { 5580 if (v == 0xffffffff) 5581 return 0; 5582 // there can be 1's on either or both "outsides", all the "inside" 5583 // bits must be 0's 5584 unsigned int lsb = 0, msb = 31; 5585 while (v & (1 << msb)) --msb; 5586 while (v & (1 << lsb)) ++lsb; 5587 for (unsigned int i = lsb; i <= msb; ++i) { 5588 if (v & (1 << i)) 5589 return 0; 5590 } 5591 return 1; 5592} 5593 5594/// isFPImmLegal - Returns true if the target can instruction select the 5595/// specified FP immediate natively. If false, the legalizer will 5596/// materialize the FP immediate as a load from a constant pool. 5597bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 5598 if (!Subtarget->hasVFP3()) 5599 return false; 5600 if (VT == MVT::f32) 5601 return ARM::getVFPf32Imm(Imm) != -1; 5602 if (VT == MVT::f64) 5603 return ARM::getVFPf64Imm(Imm) != -1; 5604 return false; 5605} 5606