ARMISelLowering.cpp revision 1dd5a2f4e127a99914359cf39f19b3a9916d6be1
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-isel" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMCallingConv.h" 19#include "ARMConstantPoolValue.h" 20#include "ARMISelLowering.h" 21#include "ARMMachineFunctionInfo.h" 22#include "ARMPerfectShuffle.h" 23#include "ARMRegisterInfo.h" 24#include "ARMSubtarget.h" 25#include "ARMTargetMachine.h" 26#include "ARMTargetObjectFile.h" 27#include "llvm/CallingConv.h" 28#include "llvm/Constants.h" 29#include "llvm/Function.h" 30#include "llvm/GlobalValue.h" 31#include "llvm/Instruction.h" 32#include "llvm/Instructions.h" 33#include "llvm/Intrinsics.h" 34#include "llvm/Type.h" 35#include "llvm/CodeGen/CallingConvLower.h" 36#include "llvm/CodeGen/MachineBasicBlock.h" 37#include "llvm/CodeGen/MachineFrameInfo.h" 38#include "llvm/CodeGen/MachineFunction.h" 39#include "llvm/CodeGen/MachineInstrBuilder.h" 40#include "llvm/CodeGen/MachineRegisterInfo.h" 41#include "llvm/CodeGen/PseudoSourceValue.h" 42#include "llvm/CodeGen/SelectionDAG.h" 43#include "llvm/MC/MCSectionMachO.h" 44#include "llvm/Target/TargetOptions.h" 45#include "llvm/ADT/VectorExtras.h" 46#include "llvm/ADT/Statistic.h" 47#include "llvm/Support/CommandLine.h" 48#include "llvm/Support/ErrorHandling.h" 49#include "llvm/Support/MathExtras.h" 50#include "llvm/Support/raw_ostream.h" 51#include <sstream> 52using namespace llvm; 53 54STATISTIC(NumTailCalls, "Number of tail calls"); 55 56// This option should go away when tail calls fully work. 57static cl::opt<bool> 58EnableARMTailCalls("arm-tail-calls", cl::Hidden, 59 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 60 cl::init(false)); 61 62// This option should go away when Machine LICM is smart enough to hoist a 63// reg-to-reg VDUP. 64static cl::opt<bool> 65EnableARMVDUPsplat("arm-vdup-splat", cl::Hidden, 66 cl::desc("Generate VDUP for integer constant splats (TEMPORARY OPTION)."), 67 cl::init(false)); 68 69static cl::opt<bool> 70EnableARMLongCalls("arm-long-calls", cl::Hidden, 71 cl::desc("Generate calls via indirect call instructions"), 72 cl::init(false)); 73 74static cl::opt<bool> 75ARMInterworking("arm-interworking", cl::Hidden, 76 cl::desc("Enable / disable ARM interworking (for debugging only)"), 77 cl::init(true)); 78 79void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 80 EVT PromotedBitwiseVT) { 81 if (VT != PromotedLdStVT) { 82 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 83 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 84 PromotedLdStVT.getSimpleVT()); 85 86 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 87 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 88 PromotedLdStVT.getSimpleVT()); 89 } 90 91 EVT ElemTy = VT.getVectorElementType(); 92 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 93 setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); 94 if (ElemTy == MVT::i8 || ElemTy == MVT::i16) 95 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 96 if (ElemTy != MVT::i32) { 97 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 98 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 99 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 100 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 101 } 102 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 103 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 104 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); 105 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Expand); 106 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 107 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 108 if (VT.isInteger()) { 109 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 110 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 111 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 112 setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); 113 setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); 114 } 115 setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); 116 117 // Promote all bit-wise operations. 118 if (VT.isInteger() && VT != PromotedBitwiseVT) { 119 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 120 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 121 PromotedBitwiseVT.getSimpleVT()); 122 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 123 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 124 PromotedBitwiseVT.getSimpleVT()); 125 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 126 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 127 PromotedBitwiseVT.getSimpleVT()); 128 } 129 130 // Neon does not support vector divide/remainder operations. 131 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 132 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 133 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 134 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 135 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 136 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 137} 138 139void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 140 addRegisterClass(VT, ARM::DPRRegisterClass); 141 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 142} 143 144void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 145 addRegisterClass(VT, ARM::QPRRegisterClass); 146 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 147} 148 149static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 150 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 151 return new TargetLoweringObjectFileMachO(); 152 153 return new ARMElfTargetObjectFile(); 154} 155 156ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 157 : TargetLowering(TM, createTLOF(TM)) { 158 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 159 RegInfo = TM.getRegisterInfo(); 160 Itins = TM.getInstrItineraryData(); 161 162 if (Subtarget->isTargetDarwin()) { 163 // Uses VFP for Thumb libfuncs if available. 164 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 165 // Single-precision floating-point arithmetic. 166 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 167 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 168 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 169 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 170 171 // Double-precision floating-point arithmetic. 172 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 173 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 174 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 175 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 176 177 // Single-precision comparisons. 178 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 179 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 180 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 181 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 182 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 183 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 184 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 185 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 186 187 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 188 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 189 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 190 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 191 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 192 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 193 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 194 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 195 196 // Double-precision comparisons. 197 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 198 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 199 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 200 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 201 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 202 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 203 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 204 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 205 206 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 207 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 208 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 209 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 210 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 211 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 212 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 213 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 214 215 // Floating-point to integer conversions. 216 // i64 conversions are done via library routines even when generating VFP 217 // instructions, so use the same ones. 218 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 219 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 220 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 221 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 222 223 // Conversions between floating types. 224 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 225 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 226 227 // Integer to floating-point conversions. 228 // i64 conversions are done via library routines even when generating VFP 229 // instructions, so use the same ones. 230 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 231 // e.g., __floatunsidf vs. __floatunssidfvfp. 232 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 233 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 234 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 235 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 236 } 237 } 238 239 // These libcalls are not available in 32-bit. 240 setLibcallName(RTLIB::SHL_I128, 0); 241 setLibcallName(RTLIB::SRL_I128, 0); 242 setLibcallName(RTLIB::SRA_I128, 0); 243 244 if (Subtarget->isAAPCS_ABI()) { 245 // Double-precision floating-point arithmetic helper functions 246 // RTABI chapter 4.1.2, Table 2 247 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 248 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 249 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 250 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 251 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 252 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 253 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 254 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 255 256 // Double-precision floating-point comparison helper functions 257 // RTABI chapter 4.1.2, Table 3 258 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 259 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 260 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 261 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 262 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 263 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 264 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 265 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 266 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 267 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 268 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 269 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 270 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 271 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 272 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 273 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 274 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 275 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 276 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 277 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 278 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 279 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 280 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 281 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 282 283 // Single-precision floating-point arithmetic helper functions 284 // RTABI chapter 4.1.2, Table 4 285 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 286 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 287 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 288 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 289 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 290 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 291 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 292 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 293 294 // Single-precision floating-point comparison helper functions 295 // RTABI chapter 4.1.2, Table 5 296 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 297 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 298 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 299 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 300 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 301 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 302 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 303 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 304 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 305 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 306 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 307 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 308 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 309 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 310 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 311 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 312 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 313 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 314 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 315 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 316 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 317 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 318 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 319 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 320 321 // Floating-point to integer conversions. 322 // RTABI chapter 4.1.2, Table 6 323 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 324 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 325 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 326 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 327 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 328 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 329 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 330 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 331 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 332 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 333 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 334 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 335 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 336 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 337 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 338 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 339 340 // Conversions between floating types. 341 // RTABI chapter 4.1.2, Table 7 342 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 343 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 344 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 345 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 346 347 // Integer to floating-point conversions. 348 // RTABI chapter 4.1.2, Table 8 349 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 350 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 351 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 352 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 353 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 354 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 355 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 356 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 357 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 358 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 359 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 360 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 361 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 362 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 363 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 364 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 365 366 // Long long helper functions 367 // RTABI chapter 4.2, Table 9 368 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 369 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 370 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 371 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 372 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 373 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 374 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 375 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 376 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 377 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 378 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 379 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 380 381 // Integer division functions 382 // RTABI chapter 4.3.1 383 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 384 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 385 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 386 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 387 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 388 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 389 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 390 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 391 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 392 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 393 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 394 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 395 } 396 397 if (Subtarget->isThumb1Only()) 398 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 399 else 400 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 401 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 402 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 403 if (!Subtarget->isFPOnlySP()) 404 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 405 406 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 407 } 408 409 if (Subtarget->hasNEON()) { 410 addDRTypeForNEON(MVT::v2f32); 411 addDRTypeForNEON(MVT::v8i8); 412 addDRTypeForNEON(MVT::v4i16); 413 addDRTypeForNEON(MVT::v2i32); 414 addDRTypeForNEON(MVT::v1i64); 415 416 addQRTypeForNEON(MVT::v4f32); 417 addQRTypeForNEON(MVT::v2f64); 418 addQRTypeForNEON(MVT::v16i8); 419 addQRTypeForNEON(MVT::v8i16); 420 addQRTypeForNEON(MVT::v4i32); 421 addQRTypeForNEON(MVT::v2i64); 422 423 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 424 // neither Neon nor VFP support any arithmetic operations on it. 425 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 426 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 427 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 428 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 429 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 430 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 431 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); 432 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 433 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 434 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 435 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 436 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 437 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 438 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 439 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 440 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 441 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 442 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 443 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 444 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 445 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 446 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 447 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 448 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 449 450 setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); 451 452 // Neon does not support some operations on v1i64 and v2i64 types. 453 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 454 // Custom handling for some quad-vector types to detect VMULL. 455 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 456 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 457 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 458 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); 459 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); 460 461 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 462 setTargetDAGCombine(ISD::SHL); 463 setTargetDAGCombine(ISD::SRL); 464 setTargetDAGCombine(ISD::SRA); 465 setTargetDAGCombine(ISD::SIGN_EXTEND); 466 setTargetDAGCombine(ISD::ZERO_EXTEND); 467 setTargetDAGCombine(ISD::ANY_EXTEND); 468 setTargetDAGCombine(ISD::SELECT_CC); 469 setTargetDAGCombine(ISD::BUILD_VECTOR); 470 } 471 472 computeRegisterProperties(); 473 474 // ARM does not have f32 extending load. 475 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 476 477 // ARM does not have i1 sign extending load. 478 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 479 480 // ARM supports all 4 flavors of integer indexed load / store. 481 if (!Subtarget->isThumb1Only()) { 482 for (unsigned im = (unsigned)ISD::PRE_INC; 483 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 484 setIndexedLoadAction(im, MVT::i1, Legal); 485 setIndexedLoadAction(im, MVT::i8, Legal); 486 setIndexedLoadAction(im, MVT::i16, Legal); 487 setIndexedLoadAction(im, MVT::i32, Legal); 488 setIndexedStoreAction(im, MVT::i1, Legal); 489 setIndexedStoreAction(im, MVT::i8, Legal); 490 setIndexedStoreAction(im, MVT::i16, Legal); 491 setIndexedStoreAction(im, MVT::i32, Legal); 492 } 493 } 494 495 // i64 operation support. 496 if (Subtarget->isThumb1Only()) { 497 setOperationAction(ISD::MUL, MVT::i64, Expand); 498 setOperationAction(ISD::MULHU, MVT::i32, Expand); 499 setOperationAction(ISD::MULHS, MVT::i32, Expand); 500 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 501 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 502 } else { 503 setOperationAction(ISD::MUL, MVT::i64, Expand); 504 setOperationAction(ISD::MULHU, MVT::i32, Expand); 505 if (!Subtarget->hasV6Ops()) 506 setOperationAction(ISD::MULHS, MVT::i32, Expand); 507 } 508 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 509 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 510 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 511 setOperationAction(ISD::SRL, MVT::i64, Custom); 512 setOperationAction(ISD::SRA, MVT::i64, Custom); 513 514 // ARM does not have ROTL. 515 setOperationAction(ISD::ROTL, MVT::i32, Expand); 516 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 517 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 518 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 519 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 520 521 // Only ARMv6 has BSWAP. 522 if (!Subtarget->hasV6Ops()) 523 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 524 525 // These are expanded into libcalls. 526 if (!Subtarget->hasDivide()) { 527 // v7M has a hardware divider 528 setOperationAction(ISD::SDIV, MVT::i32, Expand); 529 setOperationAction(ISD::UDIV, MVT::i32, Expand); 530 } 531 setOperationAction(ISD::SREM, MVT::i32, Expand); 532 setOperationAction(ISD::UREM, MVT::i32, Expand); 533 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 534 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 535 536 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 537 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 538 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 539 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 540 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 541 542 setOperationAction(ISD::TRAP, MVT::Other, Legal); 543 544 // Use the default implementation. 545 setOperationAction(ISD::VASTART, MVT::Other, Custom); 546 setOperationAction(ISD::VAARG, MVT::Other, Expand); 547 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 548 setOperationAction(ISD::VAEND, MVT::Other, Expand); 549 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 550 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 551 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 552 // FIXME: Shouldn't need this, since no register is used, but the legalizer 553 // doesn't yet know how to not do that for SjLj. 554 setExceptionSelectorRegister(ARM::R0); 555 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 556 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 557 // the default expansion. 558 if (Subtarget->hasDataBarrier() || 559 (Subtarget->hasV6Ops() && !Subtarget->isThumb1Only())) { 560 // membarrier needs custom lowering; the rest are legal and handled 561 // normally. 562 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 563 } else { 564 // Set them all for expansion, which will force libcalls. 565 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 566 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); 567 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); 568 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 569 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); 570 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); 571 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 572 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); 573 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); 574 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 575 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); 576 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); 577 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 578 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); 579 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); 580 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 581 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); 582 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); 583 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 584 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); 585 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); 586 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 587 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); 588 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); 589 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 590 // Since the libcalls include locking, fold in the fences 591 setShouldFoldAtomicFences(true); 592 } 593 // 64-bit versions are always libcalls (for now) 594 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); 595 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); 596 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); 597 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); 598 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); 599 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); 600 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); 601 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); 602 603 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 604 if (!Subtarget->hasV6Ops()) { 605 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 606 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 607 } 608 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 609 610 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 611 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 612 // iff target supports vfp2. 613 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom); 614 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 615 } 616 617 // We want to custom lower some of our intrinsics. 618 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 619 if (Subtarget->isTargetDarwin()) { 620 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 621 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 622 } 623 624 setOperationAction(ISD::SETCC, MVT::i32, Expand); 625 setOperationAction(ISD::SETCC, MVT::f32, Expand); 626 setOperationAction(ISD::SETCC, MVT::f64, Expand); 627 setOperationAction(ISD::SELECT, MVT::i32, Custom); 628 setOperationAction(ISD::SELECT, MVT::f32, Custom); 629 setOperationAction(ISD::SELECT, MVT::f64, Custom); 630 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 631 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 632 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 633 634 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 635 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 636 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 637 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 638 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 639 640 // We don't support sin/cos/fmod/copysign/pow 641 setOperationAction(ISD::FSIN, MVT::f64, Expand); 642 setOperationAction(ISD::FSIN, MVT::f32, Expand); 643 setOperationAction(ISD::FCOS, MVT::f32, Expand); 644 setOperationAction(ISD::FCOS, MVT::f64, Expand); 645 setOperationAction(ISD::FREM, MVT::f64, Expand); 646 setOperationAction(ISD::FREM, MVT::f32, Expand); 647 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 648 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 649 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 650 } 651 setOperationAction(ISD::FPOW, MVT::f64, Expand); 652 setOperationAction(ISD::FPOW, MVT::f32, Expand); 653 654 // Various VFP goodness 655 if (!UseSoftFloat && !Subtarget->isThumb1Only()) { 656 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 657 if (Subtarget->hasVFP2()) { 658 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 659 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 660 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 661 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 662 } 663 // Special handling for half-precision FP. 664 if (!Subtarget->hasFP16()) { 665 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 666 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 667 } 668 } 669 670 // We have target-specific dag combine patterns for the following nodes: 671 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 672 setTargetDAGCombine(ISD::ADD); 673 setTargetDAGCombine(ISD::SUB); 674 setTargetDAGCombine(ISD::MUL); 675 676 if (Subtarget->hasV6T2Ops()) 677 setTargetDAGCombine(ISD::OR); 678 679 setStackPointerRegisterToSaveRestore(ARM::SP); 680 681 if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) 682 setSchedulingPreference(Sched::RegPressure); 683 else 684 setSchedulingPreference(Sched::Hybrid); 685 686 maxStoresPerMemcpy = 1; //// temporary - rewrite interface to use type 687 688 // On ARM arguments smaller than 4 bytes are extended, so all arguments 689 // are at least 4 bytes aligned. 690 setMinStackArgumentAlignment(4); 691 692 benefitFromCodePlacementOpt = true; 693} 694 695std::pair<const TargetRegisterClass*, uint8_t> 696ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 697 const TargetRegisterClass *RRC = 0; 698 uint8_t Cost = 1; 699 switch (VT.getSimpleVT().SimpleTy) { 700 default: 701 return TargetLowering::findRepresentativeClass(VT); 702 // Use DPR as representative register class for all floating point 703 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 704 // the cost is 1 for both f32 and f64. 705 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 706 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 707 RRC = ARM::DPRRegisterClass; 708 break; 709 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 710 case MVT::v4f32: case MVT::v2f64: 711 RRC = ARM::DPRRegisterClass; 712 Cost = 2; 713 break; 714 case MVT::v4i64: 715 RRC = ARM::DPRRegisterClass; 716 Cost = 4; 717 break; 718 case MVT::v8i64: 719 RRC = ARM::DPRRegisterClass; 720 Cost = 8; 721 break; 722 } 723 return std::make_pair(RRC, Cost); 724} 725 726const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 727 switch (Opcode) { 728 default: return 0; 729 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 730 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 731 case ARMISD::CALL: return "ARMISD::CALL"; 732 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 733 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 734 case ARMISD::tCALL: return "ARMISD::tCALL"; 735 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 736 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 737 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 738 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 739 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 740 case ARMISD::CMP: return "ARMISD::CMP"; 741 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 742 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 743 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 744 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 745 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 746 case ARMISD::CMOV: return "ARMISD::CMOV"; 747 case ARMISD::CNEG: return "ARMISD::CNEG"; 748 749 case ARMISD::RBIT: return "ARMISD::RBIT"; 750 751 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 752 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 753 case ARMISD::SITOF: return "ARMISD::SITOF"; 754 case ARMISD::UITOF: return "ARMISD::UITOF"; 755 756 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 757 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 758 case ARMISD::RRX: return "ARMISD::RRX"; 759 760 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 761 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 762 763 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 764 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 765 766 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 767 768 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 769 770 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 771 772 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 773 case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER"; 774 775 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 776 case ARMISD::VCGE: return "ARMISD::VCGE"; 777 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 778 case ARMISD::VCGT: return "ARMISD::VCGT"; 779 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 780 case ARMISD::VTST: return "ARMISD::VTST"; 781 782 case ARMISD::VSHL: return "ARMISD::VSHL"; 783 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 784 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 785 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 786 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 787 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 788 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 789 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 790 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 791 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 792 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 793 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 794 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 795 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 796 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 797 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 798 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 799 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 800 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 801 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 802 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 803 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 804 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 805 case ARMISD::VDUP: return "ARMISD::VDUP"; 806 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 807 case ARMISD::VEXT: return "ARMISD::VEXT"; 808 case ARMISD::VREV64: return "ARMISD::VREV64"; 809 case ARMISD::VREV32: return "ARMISD::VREV32"; 810 case ARMISD::VREV16: return "ARMISD::VREV16"; 811 case ARMISD::VZIP: return "ARMISD::VZIP"; 812 case ARMISD::VUZP: return "ARMISD::VUZP"; 813 case ARMISD::VTRN: return "ARMISD::VTRN"; 814 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 815 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 816 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 817 case ARMISD::FMAX: return "ARMISD::FMAX"; 818 case ARMISD::FMIN: return "ARMISD::FMIN"; 819 case ARMISD::BFI: return "ARMISD::BFI"; 820 } 821} 822 823/// getRegClassFor - Return the register class that should be used for the 824/// specified value type. 825TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 826 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 827 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 828 // load / store 4 to 8 consecutive D registers. 829 if (Subtarget->hasNEON()) { 830 if (VT == MVT::v4i64) 831 return ARM::QQPRRegisterClass; 832 else if (VT == MVT::v8i64) 833 return ARM::QQQQPRRegisterClass; 834 } 835 return TargetLowering::getRegClassFor(VT); 836} 837 838// Create a fast isel object. 839FastISel * 840ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { 841 return ARM::createFastISel(funcInfo); 842} 843 844/// getFunctionAlignment - Return the Log2 alignment of this function. 845unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { 846 return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; 847} 848 849/// getMaximalGlobalOffset - Returns the maximal possible offset which can 850/// be used for loads / stores from the global. 851unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 852 return (Subtarget->isThumb1Only() ? 127 : 4095); 853} 854 855Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 856 unsigned NumVals = N->getNumValues(); 857 if (!NumVals) 858 return Sched::RegPressure; 859 860 for (unsigned i = 0; i != NumVals; ++i) { 861 EVT VT = N->getValueType(i); 862 if (VT.isFloatingPoint() || VT.isVector()) 863 return Sched::Latency; 864 } 865 866 if (!N->isMachineOpcode()) 867 return Sched::RegPressure; 868 869 // Load are scheduled for latency even if there instruction itinerary 870 // is not available. 871 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 872 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 873 if (TID.mayLoad()) 874 return Sched::Latency; 875 876 if (!Itins->isEmpty() && Itins->getStageLatency(TID.getSchedClass()) > 2) 877 return Sched::Latency; 878 return Sched::RegPressure; 879} 880 881unsigned 882ARMTargetLowering::getRegPressureLimit(const TargetRegisterClass *RC, 883 MachineFunction &MF) const { 884 switch (RC->getID()) { 885 default: 886 return 0; 887 case ARM::tGPRRegClassID: 888 return RegInfo->hasFP(MF) ? 4 : 5; 889 case ARM::GPRRegClassID: { 890 unsigned FP = RegInfo->hasFP(MF) ? 1 : 0; 891 return 10 - FP - (Subtarget->isR9Reserved() ? 1 : 0); 892 } 893 case ARM::SPRRegClassID: // Currently not used as 'rep' register class. 894 case ARM::DPRRegClassID: 895 return 32 - 10; 896 } 897} 898 899//===----------------------------------------------------------------------===// 900// Lowering Code 901//===----------------------------------------------------------------------===// 902 903/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 904static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 905 switch (CC) { 906 default: llvm_unreachable("Unknown condition code!"); 907 case ISD::SETNE: return ARMCC::NE; 908 case ISD::SETEQ: return ARMCC::EQ; 909 case ISD::SETGT: return ARMCC::GT; 910 case ISD::SETGE: return ARMCC::GE; 911 case ISD::SETLT: return ARMCC::LT; 912 case ISD::SETLE: return ARMCC::LE; 913 case ISD::SETUGT: return ARMCC::HI; 914 case ISD::SETUGE: return ARMCC::HS; 915 case ISD::SETULT: return ARMCC::LO; 916 case ISD::SETULE: return ARMCC::LS; 917 } 918} 919 920/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 921static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 922 ARMCC::CondCodes &CondCode2) { 923 CondCode2 = ARMCC::AL; 924 switch (CC) { 925 default: llvm_unreachable("Unknown FP condition!"); 926 case ISD::SETEQ: 927 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 928 case ISD::SETGT: 929 case ISD::SETOGT: CondCode = ARMCC::GT; break; 930 case ISD::SETGE: 931 case ISD::SETOGE: CondCode = ARMCC::GE; break; 932 case ISD::SETOLT: CondCode = ARMCC::MI; break; 933 case ISD::SETOLE: CondCode = ARMCC::LS; break; 934 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 935 case ISD::SETO: CondCode = ARMCC::VC; break; 936 case ISD::SETUO: CondCode = ARMCC::VS; break; 937 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 938 case ISD::SETUGT: CondCode = ARMCC::HI; break; 939 case ISD::SETUGE: CondCode = ARMCC::PL; break; 940 case ISD::SETLT: 941 case ISD::SETULT: CondCode = ARMCC::LT; break; 942 case ISD::SETLE: 943 case ISD::SETULE: CondCode = ARMCC::LE; break; 944 case ISD::SETNE: 945 case ISD::SETUNE: CondCode = ARMCC::NE; break; 946 } 947} 948 949//===----------------------------------------------------------------------===// 950// Calling Convention Implementation 951//===----------------------------------------------------------------------===// 952 953#include "ARMGenCallingConv.inc" 954 955/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 956/// given CallingConvention value. 957CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 958 bool Return, 959 bool isVarArg) const { 960 switch (CC) { 961 default: 962 llvm_unreachable("Unsupported calling convention"); 963 case CallingConv::C: 964 case CallingConv::Fast: 965 // Use target triple & subtarget features to do actual dispatch. 966 if (Subtarget->isAAPCS_ABI()) { 967 if (Subtarget->hasVFP2() && 968 FloatABIType == FloatABI::Hard && !isVarArg) 969 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 970 else 971 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 972 } else 973 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 974 case CallingConv::ARM_AAPCS_VFP: 975 return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP); 976 case CallingConv::ARM_AAPCS: 977 return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS); 978 case CallingConv::ARM_APCS: 979 return (Return ? RetCC_ARM_APCS: CC_ARM_APCS); 980 } 981} 982 983/// LowerCallResult - Lower the result values of a call into the 984/// appropriate copies out of appropriate physical registers. 985SDValue 986ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 987 CallingConv::ID CallConv, bool isVarArg, 988 const SmallVectorImpl<ISD::InputArg> &Ins, 989 DebugLoc dl, SelectionDAG &DAG, 990 SmallVectorImpl<SDValue> &InVals) const { 991 992 // Assign locations to each value returned by this call. 993 SmallVector<CCValAssign, 16> RVLocs; 994 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 995 RVLocs, *DAG.getContext()); 996 CCInfo.AnalyzeCallResult(Ins, 997 CCAssignFnForNode(CallConv, /* Return*/ true, 998 isVarArg)); 999 1000 // Copy all of the result registers out of their specified physreg. 1001 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1002 CCValAssign VA = RVLocs[i]; 1003 1004 SDValue Val; 1005 if (VA.needsCustom()) { 1006 // Handle f64 or half of a v2f64. 1007 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1008 InFlag); 1009 Chain = Lo.getValue(1); 1010 InFlag = Lo.getValue(2); 1011 VA = RVLocs[++i]; // skip ahead to next loc 1012 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1013 InFlag); 1014 Chain = Hi.getValue(1); 1015 InFlag = Hi.getValue(2); 1016 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1017 1018 if (VA.getLocVT() == MVT::v2f64) { 1019 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1020 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1021 DAG.getConstant(0, MVT::i32)); 1022 1023 VA = RVLocs[++i]; // skip ahead to next loc 1024 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1025 Chain = Lo.getValue(1); 1026 InFlag = Lo.getValue(2); 1027 VA = RVLocs[++i]; // skip ahead to next loc 1028 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1029 Chain = Hi.getValue(1); 1030 InFlag = Hi.getValue(2); 1031 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1032 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1033 DAG.getConstant(1, MVT::i32)); 1034 } 1035 } else { 1036 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1037 InFlag); 1038 Chain = Val.getValue(1); 1039 InFlag = Val.getValue(2); 1040 } 1041 1042 switch (VA.getLocInfo()) { 1043 default: llvm_unreachable("Unknown loc info!"); 1044 case CCValAssign::Full: break; 1045 case CCValAssign::BCvt: 1046 Val = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), Val); 1047 break; 1048 } 1049 1050 InVals.push_back(Val); 1051 } 1052 1053 return Chain; 1054} 1055 1056/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1057/// by "Src" to address "Dst" of size "Size". Alignment information is 1058/// specified by the specific parameter attribute. The copy will be passed as 1059/// a byval function parameter. 1060/// Sometimes what we are copying is the end of a larger object, the part that 1061/// does not fit in registers. 1062static SDValue 1063CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1064 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 1065 DebugLoc dl) { 1066 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1067 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 1068 /*isVolatile=*/false, /*AlwaysInline=*/false, 1069 MachinePointerInfo(0), MachinePointerInfo(0)); 1070} 1071 1072/// LowerMemOpCallTo - Store the argument to the stack. 1073SDValue 1074ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1075 SDValue StackPtr, SDValue Arg, 1076 DebugLoc dl, SelectionDAG &DAG, 1077 const CCValAssign &VA, 1078 ISD::ArgFlagsTy Flags) const { 1079 unsigned LocMemOffset = VA.getLocMemOffset(); 1080 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1081 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1082 if (Flags.isByVal()) 1083 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 1084 1085 return DAG.getStore(Chain, dl, Arg, PtrOff, 1086 MachinePointerInfo::getStack(LocMemOffset), 1087 false, false, 0); 1088} 1089 1090void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1091 SDValue Chain, SDValue &Arg, 1092 RegsToPassVector &RegsToPass, 1093 CCValAssign &VA, CCValAssign &NextVA, 1094 SDValue &StackPtr, 1095 SmallVector<SDValue, 8> &MemOpChains, 1096 ISD::ArgFlagsTy Flags) const { 1097 1098 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1099 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1100 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1101 1102 if (NextVA.isRegLoc()) 1103 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1104 else { 1105 assert(NextVA.isMemLoc()); 1106 if (StackPtr.getNode() == 0) 1107 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1108 1109 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1110 dl, DAG, NextVA, 1111 Flags)); 1112 } 1113} 1114 1115/// LowerCall - Lowering a call into a callseq_start <- 1116/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1117/// nodes. 1118SDValue 1119ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1120 CallingConv::ID CallConv, bool isVarArg, 1121 bool &isTailCall, 1122 const SmallVectorImpl<ISD::OutputArg> &Outs, 1123 const SmallVectorImpl<SDValue> &OutVals, 1124 const SmallVectorImpl<ISD::InputArg> &Ins, 1125 DebugLoc dl, SelectionDAG &DAG, 1126 SmallVectorImpl<SDValue> &InVals) const { 1127 MachineFunction &MF = DAG.getMachineFunction(); 1128 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1129 bool IsSibCall = false; 1130 // Temporarily disable tail calls so things don't break. 1131 if (!EnableARMTailCalls) 1132 isTailCall = false; 1133 if (isTailCall) { 1134 // Check if it's really possible to do a tail call. 1135 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1136 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1137 Outs, OutVals, Ins, DAG); 1138 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1139 // detected sibcalls. 1140 if (isTailCall) { 1141 ++NumTailCalls; 1142 IsSibCall = true; 1143 } 1144 } 1145 1146 // Analyze operands of the call, assigning locations to each operand. 1147 SmallVector<CCValAssign, 16> ArgLocs; 1148 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1149 *DAG.getContext()); 1150 CCInfo.AnalyzeCallOperands(Outs, 1151 CCAssignFnForNode(CallConv, /* Return*/ false, 1152 isVarArg)); 1153 1154 // Get a count of how many bytes are to be pushed on the stack. 1155 unsigned NumBytes = CCInfo.getNextStackOffset(); 1156 1157 // For tail calls, memory operands are available in our caller's stack. 1158 if (IsSibCall) 1159 NumBytes = 0; 1160 1161 // Adjust the stack pointer for the new arguments... 1162 // These operations are automatically eliminated by the prolog/epilog pass 1163 if (!IsSibCall) 1164 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1165 1166 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1167 1168 RegsToPassVector RegsToPass; 1169 SmallVector<SDValue, 8> MemOpChains; 1170 1171 // Walk the register/memloc assignments, inserting copies/loads. In the case 1172 // of tail call optimization, arguments are handled later. 1173 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1174 i != e; 1175 ++i, ++realArgIdx) { 1176 CCValAssign &VA = ArgLocs[i]; 1177 SDValue Arg = OutVals[realArgIdx]; 1178 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1179 1180 // Promote the value if needed. 1181 switch (VA.getLocInfo()) { 1182 default: llvm_unreachable("Unknown loc info!"); 1183 case CCValAssign::Full: break; 1184 case CCValAssign::SExt: 1185 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1186 break; 1187 case CCValAssign::ZExt: 1188 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1189 break; 1190 case CCValAssign::AExt: 1191 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1192 break; 1193 case CCValAssign::BCvt: 1194 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1195 break; 1196 } 1197 1198 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1199 if (VA.needsCustom()) { 1200 if (VA.getLocVT() == MVT::v2f64) { 1201 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1202 DAG.getConstant(0, MVT::i32)); 1203 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1204 DAG.getConstant(1, MVT::i32)); 1205 1206 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1207 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1208 1209 VA = ArgLocs[++i]; // skip ahead to next loc 1210 if (VA.isRegLoc()) { 1211 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1212 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1213 } else { 1214 assert(VA.isMemLoc()); 1215 1216 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1217 dl, DAG, VA, Flags)); 1218 } 1219 } else { 1220 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1221 StackPtr, MemOpChains, Flags); 1222 } 1223 } else if (VA.isRegLoc()) { 1224 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1225 } else if (!IsSibCall) { 1226 assert(VA.isMemLoc()); 1227 1228 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1229 dl, DAG, VA, Flags)); 1230 } 1231 } 1232 1233 if (!MemOpChains.empty()) 1234 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1235 &MemOpChains[0], MemOpChains.size()); 1236 1237 // Build a sequence of copy-to-reg nodes chained together with token chain 1238 // and flag operands which copy the outgoing args into the appropriate regs. 1239 SDValue InFlag; 1240 // Tail call byval lowering might overwrite argument registers so in case of 1241 // tail call optimization the copies to registers are lowered later. 1242 if (!isTailCall) 1243 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1244 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1245 RegsToPass[i].second, InFlag); 1246 InFlag = Chain.getValue(1); 1247 } 1248 1249 // For tail calls lower the arguments to the 'real' stack slot. 1250 if (isTailCall) { 1251 // Force all the incoming stack arguments to be loaded from the stack 1252 // before any new outgoing arguments are stored to the stack, because the 1253 // outgoing stack slots may alias the incoming argument stack slots, and 1254 // the alias isn't otherwise explicit. This is slightly more conservative 1255 // than necessary, because it means that each store effectively depends 1256 // on every argument instead of just those arguments it would clobber. 1257 1258 // Do not flag preceeding copytoreg stuff together with the following stuff. 1259 InFlag = SDValue(); 1260 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1261 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1262 RegsToPass[i].second, InFlag); 1263 InFlag = Chain.getValue(1); 1264 } 1265 InFlag =SDValue(); 1266 } 1267 1268 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1269 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1270 // node so that legalize doesn't hack it. 1271 bool isDirect = false; 1272 bool isARMFunc = false; 1273 bool isLocalARMFunc = false; 1274 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1275 1276 if (EnableARMLongCalls) { 1277 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1278 && "long-calls with non-static relocation model!"); 1279 // Handle a global address or an external symbol. If it's not one of 1280 // those, the target's already in a register, so we don't need to do 1281 // anything extra. 1282 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1283 const GlobalValue *GV = G->getGlobal(); 1284 // Create a constant pool entry for the callee address 1285 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1286 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1287 ARMPCLabelIndex, 1288 ARMCP::CPValue, 0); 1289 // Get the address of the callee into a register 1290 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1291 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1292 Callee = DAG.getLoad(getPointerTy(), dl, 1293 DAG.getEntryNode(), CPAddr, 1294 MachinePointerInfo::getConstantPool(), 1295 false, false, 0); 1296 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1297 const char *Sym = S->getSymbol(); 1298 1299 // Create a constant pool entry for the callee address 1300 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1301 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1302 Sym, ARMPCLabelIndex, 0); 1303 // Get the address of the callee into a register 1304 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1305 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1306 Callee = DAG.getLoad(getPointerTy(), dl, 1307 DAG.getEntryNode(), CPAddr, 1308 MachinePointerInfo::getConstantPool(), 1309 false, false, 0); 1310 } 1311 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1312 const GlobalValue *GV = G->getGlobal(); 1313 isDirect = true; 1314 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1315 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1316 getTargetMachine().getRelocationModel() != Reloc::Static; 1317 isARMFunc = !Subtarget->isThumb() || isStub; 1318 // ARM call to a local ARM function is predicable. 1319 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1320 // tBX takes a register source operand. 1321 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1322 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1323 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1324 ARMPCLabelIndex, 1325 ARMCP::CPValue, 4); 1326 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1327 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1328 Callee = DAG.getLoad(getPointerTy(), dl, 1329 DAG.getEntryNode(), CPAddr, 1330 MachinePointerInfo::getConstantPool(), 1331 false, false, 0); 1332 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1333 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1334 getPointerTy(), Callee, PICLabel); 1335 } else { 1336 // On ELF targets for PIC code, direct calls should go through the PLT 1337 unsigned OpFlags = 0; 1338 if (Subtarget->isTargetELF() && 1339 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1340 OpFlags = ARMII::MO_PLT; 1341 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1342 } 1343 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1344 isDirect = true; 1345 bool isStub = Subtarget->isTargetDarwin() && 1346 getTargetMachine().getRelocationModel() != Reloc::Static; 1347 isARMFunc = !Subtarget->isThumb() || isStub; 1348 // tBX takes a register source operand. 1349 const char *Sym = S->getSymbol(); 1350 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1351 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1352 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1353 Sym, ARMPCLabelIndex, 4); 1354 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1355 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1356 Callee = DAG.getLoad(getPointerTy(), dl, 1357 DAG.getEntryNode(), CPAddr, 1358 MachinePointerInfo::getConstantPool(), 1359 false, false, 0); 1360 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1361 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1362 getPointerTy(), Callee, PICLabel); 1363 } else { 1364 unsigned OpFlags = 0; 1365 // On ELF targets for PIC code, direct calls should go through the PLT 1366 if (Subtarget->isTargetELF() && 1367 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1368 OpFlags = ARMII::MO_PLT; 1369 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1370 } 1371 } 1372 1373 // FIXME: handle tail calls differently. 1374 unsigned CallOpc; 1375 if (Subtarget->isThumb()) { 1376 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1377 CallOpc = ARMISD::CALL_NOLINK; 1378 else 1379 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1380 } else { 1381 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1382 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1383 : ARMISD::CALL_NOLINK; 1384 } 1385 1386 std::vector<SDValue> Ops; 1387 Ops.push_back(Chain); 1388 Ops.push_back(Callee); 1389 1390 // Add argument registers to the end of the list so that they are known live 1391 // into the call. 1392 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1393 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1394 RegsToPass[i].second.getValueType())); 1395 1396 if (InFlag.getNode()) 1397 Ops.push_back(InFlag); 1398 1399 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 1400 if (isTailCall) 1401 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1402 1403 // Returns a chain and a flag for retval copy to use. 1404 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1405 InFlag = Chain.getValue(1); 1406 1407 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1408 DAG.getIntPtrConstant(0, true), InFlag); 1409 if (!Ins.empty()) 1410 InFlag = Chain.getValue(1); 1411 1412 // Handle result values, copying them out of physregs into vregs that we 1413 // return. 1414 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1415 dl, DAG, InVals); 1416} 1417 1418/// MatchingStackOffset - Return true if the given stack call argument is 1419/// already available in the same position (relatively) of the caller's 1420/// incoming argument stack. 1421static 1422bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1423 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1424 const ARMInstrInfo *TII) { 1425 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1426 int FI = INT_MAX; 1427 if (Arg.getOpcode() == ISD::CopyFromReg) { 1428 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1429 if (!VR || TargetRegisterInfo::isPhysicalRegister(VR)) 1430 return false; 1431 MachineInstr *Def = MRI->getVRegDef(VR); 1432 if (!Def) 1433 return false; 1434 if (!Flags.isByVal()) { 1435 if (!TII->isLoadFromStackSlot(Def, FI)) 1436 return false; 1437 } else { 1438 return false; 1439 } 1440 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1441 if (Flags.isByVal()) 1442 // ByVal argument is passed in as a pointer but it's now being 1443 // dereferenced. e.g. 1444 // define @foo(%struct.X* %A) { 1445 // tail call @bar(%struct.X* byval %A) 1446 // } 1447 return false; 1448 SDValue Ptr = Ld->getBasePtr(); 1449 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1450 if (!FINode) 1451 return false; 1452 FI = FINode->getIndex(); 1453 } else 1454 return false; 1455 1456 assert(FI != INT_MAX); 1457 if (!MFI->isFixedObjectIndex(FI)) 1458 return false; 1459 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1460} 1461 1462/// IsEligibleForTailCallOptimization - Check whether the call is eligible 1463/// for tail call optimization. Targets which want to do tail call 1464/// optimization should implement this function. 1465bool 1466ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1467 CallingConv::ID CalleeCC, 1468 bool isVarArg, 1469 bool isCalleeStructRet, 1470 bool isCallerStructRet, 1471 const SmallVectorImpl<ISD::OutputArg> &Outs, 1472 const SmallVectorImpl<SDValue> &OutVals, 1473 const SmallVectorImpl<ISD::InputArg> &Ins, 1474 SelectionDAG& DAG) const { 1475 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1476 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1477 bool CCMatch = CallerCC == CalleeCC; 1478 1479 // Look for obvious safe cases to perform tail call optimization that do not 1480 // require ABI changes. This is what gcc calls sibcall. 1481 1482 // Do not sibcall optimize vararg calls unless the call site is not passing 1483 // any arguments. 1484 if (isVarArg && !Outs.empty()) 1485 return false; 1486 1487 // Also avoid sibcall optimization if either caller or callee uses struct 1488 // return semantics. 1489 if (isCalleeStructRet || isCallerStructRet) 1490 return false; 1491 1492 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1493 // emitEpilogue is not ready for them. 1494 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1495 // LR. This means if we need to reload LR, it takes an extra instructions, 1496 // which outweighs the value of the tail call; but here we don't know yet 1497 // whether LR is going to be used. Probably the right approach is to 1498 // generate the tail call here and turn it back into CALL/RET in 1499 // emitEpilogue if LR is used. 1500 if (Subtarget->isThumb1Only()) 1501 return false; 1502 1503 // For the moment, we can only do this to functions defined in this 1504 // compilation, or to indirect calls. A Thumb B to an ARM function, 1505 // or vice versa, is not easily fixed up in the linker unlike BL. 1506 // (We could do this by loading the address of the callee into a register; 1507 // that is an extra instruction over the direct call and burns a register 1508 // as well, so is not likely to be a win.) 1509 1510 // It might be safe to remove this restriction on non-Darwin. 1511 1512 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1513 // but we need to make sure there are enough registers; the only valid 1514 // registers are the 4 used for parameters. We don't currently do this 1515 // case. 1516 if (isa<ExternalSymbolSDNode>(Callee)) 1517 return false; 1518 1519 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1520 const GlobalValue *GV = G->getGlobal(); 1521 if (GV->isDeclaration() || GV->isWeakForLinker()) 1522 return false; 1523 } 1524 1525 // If the calling conventions do not match, then we'd better make sure the 1526 // results are returned in the same way as what the caller expects. 1527 if (!CCMatch) { 1528 SmallVector<CCValAssign, 16> RVLocs1; 1529 CCState CCInfo1(CalleeCC, false, getTargetMachine(), 1530 RVLocs1, *DAG.getContext()); 1531 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1532 1533 SmallVector<CCValAssign, 16> RVLocs2; 1534 CCState CCInfo2(CallerCC, false, getTargetMachine(), 1535 RVLocs2, *DAG.getContext()); 1536 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1537 1538 if (RVLocs1.size() != RVLocs2.size()) 1539 return false; 1540 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1541 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1542 return false; 1543 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1544 return false; 1545 if (RVLocs1[i].isRegLoc()) { 1546 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1547 return false; 1548 } else { 1549 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1550 return false; 1551 } 1552 } 1553 } 1554 1555 // If the callee takes no arguments then go on to check the results of the 1556 // call. 1557 if (!Outs.empty()) { 1558 // Check if stack adjustment is needed. For now, do not do this if any 1559 // argument is passed on the stack. 1560 SmallVector<CCValAssign, 16> ArgLocs; 1561 CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), 1562 ArgLocs, *DAG.getContext()); 1563 CCInfo.AnalyzeCallOperands(Outs, 1564 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1565 if (CCInfo.getNextStackOffset()) { 1566 MachineFunction &MF = DAG.getMachineFunction(); 1567 1568 // Check if the arguments are already laid out in the right way as 1569 // the caller's fixed stack objects. 1570 MachineFrameInfo *MFI = MF.getFrameInfo(); 1571 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1572 const ARMInstrInfo *TII = 1573 ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); 1574 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1575 i != e; 1576 ++i, ++realArgIdx) { 1577 CCValAssign &VA = ArgLocs[i]; 1578 EVT RegVT = VA.getLocVT(); 1579 SDValue Arg = OutVals[realArgIdx]; 1580 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1581 if (VA.getLocInfo() == CCValAssign::Indirect) 1582 return false; 1583 if (VA.needsCustom()) { 1584 // f64 and vector types are split into multiple registers or 1585 // register/stack-slot combinations. The types will not match 1586 // the registers; give up on memory f64 refs until we figure 1587 // out what to do about this. 1588 if (!VA.isRegLoc()) 1589 return false; 1590 if (!ArgLocs[++i].isRegLoc()) 1591 return false; 1592 if (RegVT == MVT::v2f64) { 1593 if (!ArgLocs[++i].isRegLoc()) 1594 return false; 1595 if (!ArgLocs[++i].isRegLoc()) 1596 return false; 1597 } 1598 } else if (!VA.isRegLoc()) { 1599 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1600 MFI, MRI, TII)) 1601 return false; 1602 } 1603 } 1604 } 1605 } 1606 1607 return true; 1608} 1609 1610SDValue 1611ARMTargetLowering::LowerReturn(SDValue Chain, 1612 CallingConv::ID CallConv, bool isVarArg, 1613 const SmallVectorImpl<ISD::OutputArg> &Outs, 1614 const SmallVectorImpl<SDValue> &OutVals, 1615 DebugLoc dl, SelectionDAG &DAG) const { 1616 1617 // CCValAssign - represent the assignment of the return value to a location. 1618 SmallVector<CCValAssign, 16> RVLocs; 1619 1620 // CCState - Info about the registers and stack slots. 1621 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, 1622 *DAG.getContext()); 1623 1624 // Analyze outgoing return values. 1625 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1626 isVarArg)); 1627 1628 // If this is the first return lowered for this function, add 1629 // the regs to the liveout set for the function. 1630 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1631 for (unsigned i = 0; i != RVLocs.size(); ++i) 1632 if (RVLocs[i].isRegLoc()) 1633 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1634 } 1635 1636 SDValue Flag; 1637 1638 // Copy the result values into the output registers. 1639 for (unsigned i = 0, realRVLocIdx = 0; 1640 i != RVLocs.size(); 1641 ++i, ++realRVLocIdx) { 1642 CCValAssign &VA = RVLocs[i]; 1643 assert(VA.isRegLoc() && "Can only return in registers!"); 1644 1645 SDValue Arg = OutVals[realRVLocIdx]; 1646 1647 switch (VA.getLocInfo()) { 1648 default: llvm_unreachable("Unknown loc info!"); 1649 case CCValAssign::Full: break; 1650 case CCValAssign::BCvt: 1651 Arg = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getLocVT(), Arg); 1652 break; 1653 } 1654 1655 if (VA.needsCustom()) { 1656 if (VA.getLocVT() == MVT::v2f64) { 1657 // Extract the first half and return it in two registers. 1658 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1659 DAG.getConstant(0, MVT::i32)); 1660 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1661 DAG.getVTList(MVT::i32, MVT::i32), Half); 1662 1663 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1664 Flag = Chain.getValue(1); 1665 VA = RVLocs[++i]; // skip ahead to next loc 1666 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1667 HalfGPRs.getValue(1), Flag); 1668 Flag = Chain.getValue(1); 1669 VA = RVLocs[++i]; // skip ahead to next loc 1670 1671 // Extract the 2nd half and fall through to handle it as an f64 value. 1672 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1673 DAG.getConstant(1, MVT::i32)); 1674 } 1675 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1676 // available. 1677 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1678 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1679 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1680 Flag = Chain.getValue(1); 1681 VA = RVLocs[++i]; // skip ahead to next loc 1682 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1683 Flag); 1684 } else 1685 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1686 1687 // Guarantee that all emitted copies are 1688 // stuck together, avoiding something bad. 1689 Flag = Chain.getValue(1); 1690 } 1691 1692 SDValue result; 1693 if (Flag.getNode()) 1694 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1695 else // Return Void 1696 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1697 1698 return result; 1699} 1700 1701// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1702// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1703// one of the above mentioned nodes. It has to be wrapped because otherwise 1704// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1705// be used to form addressing mode. These wrapped nodes will be selected 1706// into MOVi. 1707static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1708 EVT PtrVT = Op.getValueType(); 1709 // FIXME there is no actual debug info here 1710 DebugLoc dl = Op.getDebugLoc(); 1711 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1712 SDValue Res; 1713 if (CP->isMachineConstantPoolEntry()) 1714 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1715 CP->getAlignment()); 1716 else 1717 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1718 CP->getAlignment()); 1719 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1720} 1721 1722unsigned ARMTargetLowering::getJumpTableEncoding() const { 1723 return MachineJumpTableInfo::EK_Inline; 1724} 1725 1726SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 1727 SelectionDAG &DAG) const { 1728 MachineFunction &MF = DAG.getMachineFunction(); 1729 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1730 unsigned ARMPCLabelIndex = 0; 1731 DebugLoc DL = Op.getDebugLoc(); 1732 EVT PtrVT = getPointerTy(); 1733 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1734 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1735 SDValue CPAddr; 1736 if (RelocM == Reloc::Static) { 1737 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1738 } else { 1739 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1740 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1741 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, 1742 ARMCP::CPBlockAddress, 1743 PCAdj); 1744 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1745 } 1746 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1747 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1748 MachinePointerInfo::getConstantPool(), 1749 false, false, 0); 1750 if (RelocM == Reloc::Static) 1751 return Result; 1752 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1753 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1754} 1755 1756// Lower ISD::GlobalTLSAddress using the "general dynamic" model 1757SDValue 1758ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1759 SelectionDAG &DAG) const { 1760 DebugLoc dl = GA->getDebugLoc(); 1761 EVT PtrVT = getPointerTy(); 1762 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1763 MachineFunction &MF = DAG.getMachineFunction(); 1764 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1765 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1766 ARMConstantPoolValue *CPV = 1767 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1768 ARMCP::CPValue, PCAdj, "tlsgd", true); 1769 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1770 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1771 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1772 MachinePointerInfo::getConstantPool(), 1773 false, false, 0); 1774 SDValue Chain = Argument.getValue(1); 1775 1776 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1777 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1778 1779 // call __tls_get_addr. 1780 ArgListTy Args; 1781 ArgListEntry Entry; 1782 Entry.Node = Argument; 1783 Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); 1784 Args.push_back(Entry); 1785 // FIXME: is there useful debug info available here? 1786 std::pair<SDValue, SDValue> CallResult = 1787 LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), 1788 false, false, false, false, 1789 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1790 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 1791 return CallResult.first; 1792} 1793 1794// Lower ISD::GlobalTLSAddress using the "initial exec" or 1795// "local exec" model. 1796SDValue 1797ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 1798 SelectionDAG &DAG) const { 1799 const GlobalValue *GV = GA->getGlobal(); 1800 DebugLoc dl = GA->getDebugLoc(); 1801 SDValue Offset; 1802 SDValue Chain = DAG.getEntryNode(); 1803 EVT PtrVT = getPointerTy(); 1804 // Get the Thread Pointer 1805 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1806 1807 if (GV->isDeclaration()) { 1808 MachineFunction &MF = DAG.getMachineFunction(); 1809 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1810 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1811 // Initial exec model. 1812 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1813 ARMConstantPoolValue *CPV = 1814 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1815 ARMCP::CPValue, PCAdj, "gottpoff", true); 1816 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1817 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1818 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1819 MachinePointerInfo::getConstantPool(), 1820 false, false, 0); 1821 Chain = Offset.getValue(1); 1822 1823 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1824 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 1825 1826 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1827 MachinePointerInfo::getConstantPool(), 1828 false, false, 0); 1829 } else { 1830 // local exec model 1831 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, "tpoff"); 1832 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1833 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1834 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1835 MachinePointerInfo::getConstantPool(), 1836 false, false, 0); 1837 } 1838 1839 // The address of the thread local variable is the add of the thread 1840 // pointer with the offset of the variable. 1841 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 1842} 1843 1844SDValue 1845ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 1846 // TODO: implement the "local dynamic" model 1847 assert(Subtarget->isTargetELF() && 1848 "TLS not implemented for non-ELF targets"); 1849 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1850 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 1851 // otherwise use the "Local Exec" TLS Model 1852 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 1853 return LowerToTLSGeneralDynamicModel(GA, DAG); 1854 else 1855 return LowerToTLSExecModels(GA, DAG); 1856} 1857 1858SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 1859 SelectionDAG &DAG) const { 1860 EVT PtrVT = getPointerTy(); 1861 DebugLoc dl = Op.getDebugLoc(); 1862 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1863 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1864 if (RelocM == Reloc::PIC_) { 1865 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 1866 ARMConstantPoolValue *CPV = 1867 new ARMConstantPoolValue(GV, UseGOTOFF ? "GOTOFF" : "GOT"); 1868 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1869 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1870 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 1871 CPAddr, 1872 MachinePointerInfo::getConstantPool(), 1873 false, false, 0); 1874 SDValue Chain = Result.getValue(1); 1875 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 1876 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 1877 if (!UseGOTOFF) 1878 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 1879 MachinePointerInfo::getGOT(), false, false, 0); 1880 return Result; 1881 } else { 1882 // If we have T2 ops, we can materialize the address directly via movt/movw 1883 // pair. This is always cheaper. 1884 if (Subtarget->useMovt()) { 1885 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 1886 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 1887 } else { 1888 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1889 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1890 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1891 MachinePointerInfo::getConstantPool(), 1892 false, false, 0); 1893 } 1894 } 1895} 1896 1897SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 1898 SelectionDAG &DAG) const { 1899 MachineFunction &MF = DAG.getMachineFunction(); 1900 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1901 unsigned ARMPCLabelIndex = 0; 1902 EVT PtrVT = getPointerTy(); 1903 DebugLoc dl = Op.getDebugLoc(); 1904 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1905 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1906 SDValue CPAddr; 1907 if (RelocM == Reloc::Static) 1908 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 1909 else { 1910 ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1911 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 1912 ARMConstantPoolValue *CPV = 1913 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); 1914 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1915 } 1916 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1917 1918 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1919 MachinePointerInfo::getConstantPool(), 1920 false, false, 0); 1921 SDValue Chain = Result.getValue(1); 1922 1923 if (RelocM == Reloc::PIC_) { 1924 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1925 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1926 } 1927 1928 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 1929 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 1930 false, false, 0); 1931 1932 return Result; 1933} 1934 1935SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 1936 SelectionDAG &DAG) const { 1937 assert(Subtarget->isTargetELF() && 1938 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 1939 MachineFunction &MF = DAG.getMachineFunction(); 1940 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1941 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1942 EVT PtrVT = getPointerTy(); 1943 DebugLoc dl = Op.getDebugLoc(); 1944 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1945 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1946 "_GLOBAL_OFFSET_TABLE_", 1947 ARMPCLabelIndex, PCAdj); 1948 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1949 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1950 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 1951 MachinePointerInfo::getConstantPool(), 1952 false, false, 0); 1953 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1954 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 1955} 1956 1957SDValue 1958ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 1959 DebugLoc dl = Op.getDebugLoc(); 1960 SDValue Val = DAG.getConstant(0, MVT::i32); 1961 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), 1962 Op.getOperand(1), Val); 1963} 1964 1965SDValue 1966ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 1967 DebugLoc dl = Op.getDebugLoc(); 1968 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 1969 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 1970} 1971 1972SDValue 1973ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 1974 const ARMSubtarget *Subtarget) const { 1975 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 1976 DebugLoc dl = Op.getDebugLoc(); 1977 switch (IntNo) { 1978 default: return SDValue(); // Don't custom lower most intrinsics. 1979 case Intrinsic::arm_thread_pointer: { 1980 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1981 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1982 } 1983 case Intrinsic::eh_sjlj_lsda: { 1984 MachineFunction &MF = DAG.getMachineFunction(); 1985 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1986 unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId(); 1987 EVT PtrVT = getPointerTy(); 1988 DebugLoc dl = Op.getDebugLoc(); 1989 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1990 SDValue CPAddr; 1991 unsigned PCAdj = (RelocM != Reloc::PIC_) 1992 ? 0 : (Subtarget->isThumb() ? 4 : 8); 1993 ARMConstantPoolValue *CPV = 1994 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, 1995 ARMCP::CPLSDA, PCAdj); 1996 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1997 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1998 SDValue Result = 1999 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2000 MachinePointerInfo::getConstantPool(), 2001 false, false, 0); 2002 2003 if (RelocM == Reloc::PIC_) { 2004 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2005 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2006 } 2007 return Result; 2008 } 2009 } 2010} 2011 2012static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 2013 const ARMSubtarget *Subtarget) { 2014 DebugLoc dl = Op.getDebugLoc(); 2015 SDValue Op5 = Op.getOperand(5); 2016 unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue(); 2017 // Some subtargets which have dmb and dsb instructions can handle barriers 2018 // directly. Some ARMv6 cpus can support them with the help of mcr 2019 // instruction. Thumb1 and pre-v6 ARM mode use a libcall instead and should 2020 // never get here. 2021 unsigned Opc = isDeviceBarrier ? ARMISD::SYNCBARRIER : ARMISD::MEMBARRIER; 2022 if (Subtarget->hasDataBarrier()) 2023 return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0)); 2024 else { 2025 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb1Only() && 2026 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2027 return DAG.getNode(Opc, dl, MVT::Other, Op.getOperand(0), 2028 DAG.getConstant(0, MVT::i32)); 2029 } 2030} 2031 2032static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2033 MachineFunction &MF = DAG.getMachineFunction(); 2034 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2035 2036 // vastart just stores the address of the VarArgsFrameIndex slot into the 2037 // memory location argument. 2038 DebugLoc dl = Op.getDebugLoc(); 2039 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2040 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2041 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2042 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2043 MachinePointerInfo(SV), false, false, 0); 2044} 2045 2046SDValue 2047ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2048 SDValue &Root, SelectionDAG &DAG, 2049 DebugLoc dl) const { 2050 MachineFunction &MF = DAG.getMachineFunction(); 2051 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2052 2053 TargetRegisterClass *RC; 2054 if (AFI->isThumb1OnlyFunction()) 2055 RC = ARM::tGPRRegisterClass; 2056 else 2057 RC = ARM::GPRRegisterClass; 2058 2059 // Transform the arguments stored in physical registers into virtual ones. 2060 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2061 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2062 2063 SDValue ArgValue2; 2064 if (NextVA.isMemLoc()) { 2065 MachineFrameInfo *MFI = MF.getFrameInfo(); 2066 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2067 2068 // Create load node to retrieve arguments from the stack. 2069 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2070 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2071 MachinePointerInfo::getFixedStack(FI), 2072 false, false, 0); 2073 } else { 2074 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2075 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2076 } 2077 2078 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2079} 2080 2081SDValue 2082ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2083 CallingConv::ID CallConv, bool isVarArg, 2084 const SmallVectorImpl<ISD::InputArg> 2085 &Ins, 2086 DebugLoc dl, SelectionDAG &DAG, 2087 SmallVectorImpl<SDValue> &InVals) 2088 const { 2089 2090 MachineFunction &MF = DAG.getMachineFunction(); 2091 MachineFrameInfo *MFI = MF.getFrameInfo(); 2092 2093 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2094 2095 // Assign locations to all of the incoming arguments. 2096 SmallVector<CCValAssign, 16> ArgLocs; 2097 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 2098 *DAG.getContext()); 2099 CCInfo.AnalyzeFormalArguments(Ins, 2100 CCAssignFnForNode(CallConv, /* Return*/ false, 2101 isVarArg)); 2102 2103 SmallVector<SDValue, 16> ArgValues; 2104 2105 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2106 CCValAssign &VA = ArgLocs[i]; 2107 2108 // Arguments stored in registers. 2109 if (VA.isRegLoc()) { 2110 EVT RegVT = VA.getLocVT(); 2111 2112 SDValue ArgValue; 2113 if (VA.needsCustom()) { 2114 // f64 and vector types are split up into multiple registers or 2115 // combinations of registers and stack slots. 2116 if (VA.getLocVT() == MVT::v2f64) { 2117 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2118 Chain, DAG, dl); 2119 VA = ArgLocs[++i]; // skip ahead to next loc 2120 SDValue ArgValue2; 2121 if (VA.isMemLoc()) { 2122 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2123 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2124 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2125 MachinePointerInfo::getFixedStack(FI), 2126 false, false, 0); 2127 } else { 2128 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2129 Chain, DAG, dl); 2130 } 2131 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2132 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2133 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2134 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2135 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2136 } else 2137 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2138 2139 } else { 2140 TargetRegisterClass *RC; 2141 2142 if (RegVT == MVT::f32) 2143 RC = ARM::SPRRegisterClass; 2144 else if (RegVT == MVT::f64) 2145 RC = ARM::DPRRegisterClass; 2146 else if (RegVT == MVT::v2f64) 2147 RC = ARM::QPRRegisterClass; 2148 else if (RegVT == MVT::i32) 2149 RC = (AFI->isThumb1OnlyFunction() ? 2150 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 2151 else 2152 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2153 2154 // Transform the arguments in physical registers into virtual ones. 2155 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2156 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2157 } 2158 2159 // If this is an 8 or 16-bit value, it is really passed promoted 2160 // to 32 bits. Insert an assert[sz]ext to capture this, then 2161 // truncate to the right size. 2162 switch (VA.getLocInfo()) { 2163 default: llvm_unreachable("Unknown loc info!"); 2164 case CCValAssign::Full: break; 2165 case CCValAssign::BCvt: 2166 ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, VA.getValVT(), ArgValue); 2167 break; 2168 case CCValAssign::SExt: 2169 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2170 DAG.getValueType(VA.getValVT())); 2171 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2172 break; 2173 case CCValAssign::ZExt: 2174 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2175 DAG.getValueType(VA.getValVT())); 2176 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2177 break; 2178 } 2179 2180 InVals.push_back(ArgValue); 2181 2182 } else { // VA.isRegLoc() 2183 2184 // sanity check 2185 assert(VA.isMemLoc()); 2186 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2187 2188 unsigned ArgSize = VA.getLocVT().getSizeInBits()/8; 2189 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), true); 2190 2191 // Create load nodes to retrieve arguments from the stack. 2192 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2193 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2194 MachinePointerInfo::getFixedStack(FI), 2195 false, false, 0)); 2196 } 2197 } 2198 2199 // varargs 2200 if (isVarArg) { 2201 static const unsigned GPRArgRegs[] = { 2202 ARM::R0, ARM::R1, ARM::R2, ARM::R3 2203 }; 2204 2205 unsigned NumGPRs = CCInfo.getFirstUnallocated 2206 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2207 2208 unsigned Align = MF.getTarget().getFrameInfo()->getStackAlignment(); 2209 unsigned VARegSize = (4 - NumGPRs) * 4; 2210 unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2211 unsigned ArgOffset = CCInfo.getNextStackOffset(); 2212 if (VARegSaveSize) { 2213 // If this function is vararg, store any remaining integer argument regs 2214 // to their spots on the stack so that they may be loaded by deferencing 2215 // the result of va_next. 2216 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2217 AFI->setVarArgsFrameIndex( 2218 MFI->CreateFixedObject(VARegSaveSize, 2219 ArgOffset + VARegSaveSize - VARegSize, 2220 true)); 2221 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2222 getPointerTy()); 2223 2224 SmallVector<SDValue, 4> MemOps; 2225 for (; NumGPRs < 4; ++NumGPRs) { 2226 TargetRegisterClass *RC; 2227 if (AFI->isThumb1OnlyFunction()) 2228 RC = ARM::tGPRRegisterClass; 2229 else 2230 RC = ARM::GPRRegisterClass; 2231 2232 unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); 2233 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2234 SDValue Store = 2235 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2236 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), 2237 false, false, 0); 2238 MemOps.push_back(Store); 2239 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2240 DAG.getConstant(4, getPointerTy())); 2241 } 2242 if (!MemOps.empty()) 2243 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2244 &MemOps[0], MemOps.size()); 2245 } else 2246 // This will point to the next argument passed via stack. 2247 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2248 } 2249 2250 return Chain; 2251} 2252 2253/// isFloatingPointZero - Return true if this is +0.0. 2254static bool isFloatingPointZero(SDValue Op) { 2255 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2256 return CFP->getValueAPF().isPosZero(); 2257 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2258 // Maybe this has already been legalized into the constant pool? 2259 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2260 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2261 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2262 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2263 return CFP->getValueAPF().isPosZero(); 2264 } 2265 } 2266 return false; 2267} 2268 2269/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2270/// the given operands. 2271SDValue 2272ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2273 SDValue &ARMcc, SelectionDAG &DAG, 2274 DebugLoc dl) const { 2275 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2276 unsigned C = RHSC->getZExtValue(); 2277 if (!isLegalICmpImmediate(C)) { 2278 // Constant does not fit, try adjusting it by one? 2279 switch (CC) { 2280 default: break; 2281 case ISD::SETLT: 2282 case ISD::SETGE: 2283 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 2284 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2285 RHS = DAG.getConstant(C-1, MVT::i32); 2286 } 2287 break; 2288 case ISD::SETULT: 2289 case ISD::SETUGE: 2290 if (C != 0 && isLegalICmpImmediate(C-1)) { 2291 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2292 RHS = DAG.getConstant(C-1, MVT::i32); 2293 } 2294 break; 2295 case ISD::SETLE: 2296 case ISD::SETGT: 2297 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 2298 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2299 RHS = DAG.getConstant(C+1, MVT::i32); 2300 } 2301 break; 2302 case ISD::SETULE: 2303 case ISD::SETUGT: 2304 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 2305 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2306 RHS = DAG.getConstant(C+1, MVT::i32); 2307 } 2308 break; 2309 } 2310 } 2311 } 2312 2313 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2314 ARMISD::NodeType CompareType; 2315 switch (CondCode) { 2316 default: 2317 CompareType = ARMISD::CMP; 2318 break; 2319 case ARMCC::EQ: 2320 case ARMCC::NE: 2321 // Uses only Z Flag 2322 CompareType = ARMISD::CMPZ; 2323 break; 2324 } 2325 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2326 return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); 2327} 2328 2329/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2330SDValue 2331ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2332 DebugLoc dl) const { 2333 SDValue Cmp; 2334 if (!isFloatingPointZero(RHS)) 2335 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); 2336 else 2337 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Flag, LHS); 2338 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); 2339} 2340 2341SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2342 SDValue Cond = Op.getOperand(0); 2343 SDValue SelectTrue = Op.getOperand(1); 2344 SDValue SelectFalse = Op.getOperand(2); 2345 DebugLoc dl = Op.getDebugLoc(); 2346 2347 // Convert: 2348 // 2349 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 2350 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 2351 // 2352 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 2353 const ConstantSDNode *CMOVTrue = 2354 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 2355 const ConstantSDNode *CMOVFalse = 2356 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 2357 2358 if (CMOVTrue && CMOVFalse) { 2359 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 2360 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 2361 2362 SDValue True; 2363 SDValue False; 2364 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 2365 True = SelectTrue; 2366 False = SelectFalse; 2367 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 2368 True = SelectFalse; 2369 False = SelectTrue; 2370 } 2371 2372 if (True.getNode() && False.getNode()) { 2373 EVT VT = Cond.getValueType(); 2374 SDValue ARMcc = Cond.getOperand(2); 2375 SDValue CCR = Cond.getOperand(3); 2376 SDValue Cmp = Cond.getOperand(4); 2377 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 2378 } 2379 } 2380 } 2381 2382 return DAG.getSelectCC(dl, Cond, 2383 DAG.getConstant(0, Cond.getValueType()), 2384 SelectTrue, SelectFalse, ISD::SETNE); 2385} 2386 2387SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2388 EVT VT = Op.getValueType(); 2389 SDValue LHS = Op.getOperand(0); 2390 SDValue RHS = Op.getOperand(1); 2391 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2392 SDValue TrueVal = Op.getOperand(2); 2393 SDValue FalseVal = Op.getOperand(3); 2394 DebugLoc dl = Op.getDebugLoc(); 2395 2396 if (LHS.getValueType() == MVT::i32) { 2397 SDValue ARMcc; 2398 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2399 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2400 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 2401 } 2402 2403 ARMCC::CondCodes CondCode, CondCode2; 2404 FPCCToARMCC(CC, CondCode, CondCode2); 2405 2406 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2407 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2408 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2409 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2410 ARMcc, CCR, Cmp); 2411 if (CondCode2 != ARMCC::AL) { 2412 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2413 // FIXME: Needs another CMP because flag can have but one use. 2414 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2415 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2416 Result, TrueVal, ARMcc2, CCR, Cmp2); 2417 } 2418 return Result; 2419} 2420 2421/// canChangeToInt - Given the fp compare operand, return true if it is suitable 2422/// to morph to an integer compare sequence. 2423static bool canChangeToInt(SDValue Op, bool &SeenZero, 2424 const ARMSubtarget *Subtarget) { 2425 SDNode *N = Op.getNode(); 2426 if (!N->hasOneUse()) 2427 // Otherwise it requires moving the value from fp to integer registers. 2428 return false; 2429 if (!N->getNumValues()) 2430 return false; 2431 EVT VT = Op.getValueType(); 2432 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2433 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2434 // vmrs are very slow, e.g. cortex-a8. 2435 return false; 2436 2437 if (isFloatingPointZero(Op)) { 2438 SeenZero = true; 2439 return true; 2440 } 2441 return ISD::isNormalLoad(N); 2442} 2443 2444static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2445 if (isFloatingPointZero(Op)) 2446 return DAG.getConstant(0, MVT::i32); 2447 2448 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2449 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2450 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 2451 Ld->isVolatile(), Ld->isNonTemporal(), 2452 Ld->getAlignment()); 2453 2454 llvm_unreachable("Unknown VFP cmp argument!"); 2455} 2456 2457static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2458 SDValue &RetVal1, SDValue &RetVal2) { 2459 if (isFloatingPointZero(Op)) { 2460 RetVal1 = DAG.getConstant(0, MVT::i32); 2461 RetVal2 = DAG.getConstant(0, MVT::i32); 2462 return; 2463 } 2464 2465 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2466 SDValue Ptr = Ld->getBasePtr(); 2467 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2468 Ld->getChain(), Ptr, 2469 Ld->getPointerInfo(), 2470 Ld->isVolatile(), Ld->isNonTemporal(), 2471 Ld->getAlignment()); 2472 2473 EVT PtrType = Ptr.getValueType(); 2474 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2475 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2476 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2477 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2478 Ld->getChain(), NewPtr, 2479 Ld->getPointerInfo().getWithOffset(4), 2480 Ld->isVolatile(), Ld->isNonTemporal(), 2481 NewAlign); 2482 return; 2483 } 2484 2485 llvm_unreachable("Unknown VFP cmp argument!"); 2486} 2487 2488/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2489/// f32 and even f64 comparisons to integer ones. 2490SDValue 2491ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 2492 SDValue Chain = Op.getOperand(0); 2493 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2494 SDValue LHS = Op.getOperand(2); 2495 SDValue RHS = Op.getOperand(3); 2496 SDValue Dest = Op.getOperand(4); 2497 DebugLoc dl = Op.getDebugLoc(); 2498 2499 bool SeenZero = false; 2500 if (canChangeToInt(LHS, SeenZero, Subtarget) && 2501 canChangeToInt(RHS, SeenZero, Subtarget) && 2502 // If one of the operand is zero, it's safe to ignore the NaN case since 2503 // we only care about equality comparisons. 2504 (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { 2505 // If unsafe fp math optimization is enabled and there are no othter uses of 2506 // the CMP operands, and the condition code is EQ oe NE, we can optimize it 2507 // to an integer comparison. 2508 if (CC == ISD::SETOEQ) 2509 CC = ISD::SETEQ; 2510 else if (CC == ISD::SETUNE) 2511 CC = ISD::SETNE; 2512 2513 SDValue ARMcc; 2514 if (LHS.getValueType() == MVT::f32) { 2515 LHS = bitcastf32Toi32(LHS, DAG); 2516 RHS = bitcastf32Toi32(RHS, DAG); 2517 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2518 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2519 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2520 Chain, Dest, ARMcc, CCR, Cmp); 2521 } 2522 2523 SDValue LHS1, LHS2; 2524 SDValue RHS1, RHS2; 2525 expandf64Toi32(LHS, DAG, LHS1, LHS2); 2526 expandf64Toi32(RHS, DAG, RHS1, RHS2); 2527 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2528 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2529 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 2530 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 2531 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 2532 } 2533 2534 return SDValue(); 2535} 2536 2537SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 2538 SDValue Chain = Op.getOperand(0); 2539 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2540 SDValue LHS = Op.getOperand(2); 2541 SDValue RHS = Op.getOperand(3); 2542 SDValue Dest = Op.getOperand(4); 2543 DebugLoc dl = Op.getDebugLoc(); 2544 2545 if (LHS.getValueType() == MVT::i32) { 2546 SDValue ARMcc; 2547 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2548 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2549 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2550 Chain, Dest, ARMcc, CCR, Cmp); 2551 } 2552 2553 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 2554 2555 if (UnsafeFPMath && 2556 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 2557 CC == ISD::SETNE || CC == ISD::SETUNE)) { 2558 SDValue Result = OptimizeVFPBrcond(Op, DAG); 2559 if (Result.getNode()) 2560 return Result; 2561 } 2562 2563 ARMCC::CondCodes CondCode, CondCode2; 2564 FPCCToARMCC(CC, CondCode, CondCode2); 2565 2566 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2567 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2568 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2569 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); 2570 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 2571 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2572 if (CondCode2 != ARMCC::AL) { 2573 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 2574 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 2575 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2576 } 2577 return Res; 2578} 2579 2580SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 2581 SDValue Chain = Op.getOperand(0); 2582 SDValue Table = Op.getOperand(1); 2583 SDValue Index = Op.getOperand(2); 2584 DebugLoc dl = Op.getDebugLoc(); 2585 2586 EVT PTy = getPointerTy(); 2587 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 2588 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2589 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 2590 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 2591 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 2592 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 2593 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 2594 if (Subtarget->isThumb2()) { 2595 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 2596 // which does another jump to the destination. This also makes it easier 2597 // to translate it to TBB / TBH later. 2598 // FIXME: This might not work if the function is extremely large. 2599 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 2600 Addr, Op.getOperand(2), JTI, UId); 2601 } 2602 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2603 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 2604 MachinePointerInfo::getJumpTable(), 2605 false, false, 0); 2606 Chain = Addr.getValue(1); 2607 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 2608 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2609 } else { 2610 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 2611 MachinePointerInfo::getJumpTable(), false, false, 0); 2612 Chain = Addr.getValue(1); 2613 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2614 } 2615} 2616 2617static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 2618 DebugLoc dl = Op.getDebugLoc(); 2619 unsigned Opc; 2620 2621 switch (Op.getOpcode()) { 2622 default: 2623 assert(0 && "Invalid opcode!"); 2624 case ISD::FP_TO_SINT: 2625 Opc = ARMISD::FTOSI; 2626 break; 2627 case ISD::FP_TO_UINT: 2628 Opc = ARMISD::FTOUI; 2629 break; 2630 } 2631 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 2632 return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); 2633} 2634 2635static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2636 EVT VT = Op.getValueType(); 2637 DebugLoc dl = Op.getDebugLoc(); 2638 unsigned Opc; 2639 2640 switch (Op.getOpcode()) { 2641 default: 2642 assert(0 && "Invalid opcode!"); 2643 case ISD::SINT_TO_FP: 2644 Opc = ARMISD::SITOF; 2645 break; 2646 case ISD::UINT_TO_FP: 2647 Opc = ARMISD::UITOF; 2648 break; 2649 } 2650 2651 Op = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Op.getOperand(0)); 2652 return DAG.getNode(Opc, dl, VT, Op); 2653} 2654 2655SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 2656 // Implement fcopysign with a fabs and a conditional fneg. 2657 SDValue Tmp0 = Op.getOperand(0); 2658 SDValue Tmp1 = Op.getOperand(1); 2659 DebugLoc dl = Op.getDebugLoc(); 2660 EVT VT = Op.getValueType(); 2661 EVT SrcVT = Tmp1.getValueType(); 2662 SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); 2663 SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32); 2664 SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); 2665 SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl); 2666 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2667 return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp); 2668} 2669 2670SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 2671 MachineFunction &MF = DAG.getMachineFunction(); 2672 MachineFrameInfo *MFI = MF.getFrameInfo(); 2673 MFI->setReturnAddressIsTaken(true); 2674 2675 EVT VT = Op.getValueType(); 2676 DebugLoc dl = Op.getDebugLoc(); 2677 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2678 if (Depth) { 2679 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 2680 SDValue Offset = DAG.getConstant(4, MVT::i32); 2681 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 2682 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 2683 MachinePointerInfo(), false, false, 0); 2684 } 2685 2686 // Return LR, which contains the return address. Mark it an implicit live-in. 2687 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 2688 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 2689} 2690 2691SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 2692 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 2693 MFI->setFrameAddressIsTaken(true); 2694 2695 EVT VT = Op.getValueType(); 2696 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 2697 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2698 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 2699 ? ARM::R7 : ARM::R11; 2700 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 2701 while (Depth--) 2702 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 2703 MachinePointerInfo(), 2704 false, false, 0); 2705 return FrameAddr; 2706} 2707 2708/// ExpandBIT_CONVERT - If the target supports VFP, this function is called to 2709/// expand a bit convert where either the source or destination type is i64 to 2710/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 2711/// operand type is illegal (e.g., v2f32 for a target that doesn't support 2712/// vectors), since the legalizer won't know what to do with that. 2713static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) { 2714 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 2715 DebugLoc dl = N->getDebugLoc(); 2716 SDValue Op = N->getOperand(0); 2717 2718 // This function is only supposed to be called for i64 types, either as the 2719 // source or destination of the bit convert. 2720 EVT SrcVT = Op.getValueType(); 2721 EVT DstVT = N->getValueType(0); 2722 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 2723 "ExpandBIT_CONVERT called for non-i64 type"); 2724 2725 // Turn i64->f64 into VMOVDRR. 2726 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 2727 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2728 DAG.getConstant(0, MVT::i32)); 2729 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 2730 DAG.getConstant(1, MVT::i32)); 2731 return DAG.getNode(ISD::BIT_CONVERT, dl, DstVT, 2732 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 2733 } 2734 2735 // Turn f64->i64 into VMOVRRD. 2736 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 2737 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 2738 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 2739 // Merge the pieces into a single i64 value. 2740 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 2741 } 2742 2743 return SDValue(); 2744} 2745 2746/// getZeroVector - Returns a vector of specified type with all zero elements. 2747/// Zero vectors are used to represent vector negation and in those cases 2748/// will be implemented with the NEON VNEG instruction. However, VNEG does 2749/// not support i64 elements, so sometimes the zero vectors will need to be 2750/// explicitly constructed. Regardless, use a canonical VMOV to create the 2751/// zero vector. 2752static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 2753 assert(VT.isVector() && "Expected a vector type"); 2754 // The canonical modified immediate encoding of a zero vector is....0! 2755 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 2756 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 2757 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 2758 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 2759} 2760 2761/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 2762/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2763SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 2764 SelectionDAG &DAG) const { 2765 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2766 EVT VT = Op.getValueType(); 2767 unsigned VTBits = VT.getSizeInBits(); 2768 DebugLoc dl = Op.getDebugLoc(); 2769 SDValue ShOpLo = Op.getOperand(0); 2770 SDValue ShOpHi = Op.getOperand(1); 2771 SDValue ShAmt = Op.getOperand(2); 2772 SDValue ARMcc; 2773 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 2774 2775 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 2776 2777 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2778 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2779 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 2780 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2781 DAG.getConstant(VTBits, MVT::i32)); 2782 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 2783 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2784 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 2785 2786 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2787 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2788 ARMcc, DAG, dl); 2789 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 2790 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 2791 CCR, Cmp); 2792 2793 SDValue Ops[2] = { Lo, Hi }; 2794 return DAG.getMergeValues(Ops, 2, dl); 2795} 2796 2797/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 2798/// i32 values and take a 2 x i32 value to shift plus a shift amount. 2799SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 2800 SelectionDAG &DAG) const { 2801 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 2802 EVT VT = Op.getValueType(); 2803 unsigned VTBits = VT.getSizeInBits(); 2804 DebugLoc dl = Op.getDebugLoc(); 2805 SDValue ShOpLo = Op.getOperand(0); 2806 SDValue ShOpHi = Op.getOperand(1); 2807 SDValue ShAmt = Op.getOperand(2); 2808 SDValue ARMcc; 2809 2810 assert(Op.getOpcode() == ISD::SHL_PARTS); 2811 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 2812 DAG.getConstant(VTBits, MVT::i32), ShAmt); 2813 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 2814 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 2815 DAG.getConstant(VTBits, MVT::i32)); 2816 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 2817 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 2818 2819 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 2820 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2821 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 2822 ARMcc, DAG, dl); 2823 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 2824 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 2825 CCR, Cmp); 2826 2827 SDValue Ops[2] = { Lo, Hi }; 2828 return DAG.getMergeValues(Ops, 2, dl); 2829} 2830 2831SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 2832 SelectionDAG &DAG) const { 2833 // The rounding mode is in bits 23:22 of the FPSCR. 2834 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 2835 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 2836 // so that the shift + and get folded into a bitfield extract. 2837 DebugLoc dl = Op.getDebugLoc(); 2838 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 2839 DAG.getConstant(Intrinsic::arm_get_fpscr, 2840 MVT::i32)); 2841 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 2842 DAG.getConstant(1U << 22, MVT::i32)); 2843 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 2844 DAG.getConstant(22, MVT::i32)); 2845 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 2846 DAG.getConstant(3, MVT::i32)); 2847} 2848 2849static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 2850 const ARMSubtarget *ST) { 2851 EVT VT = N->getValueType(0); 2852 DebugLoc dl = N->getDebugLoc(); 2853 2854 if (!ST->hasV6T2Ops()) 2855 return SDValue(); 2856 2857 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 2858 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 2859} 2860 2861static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 2862 const ARMSubtarget *ST) { 2863 EVT VT = N->getValueType(0); 2864 DebugLoc dl = N->getDebugLoc(); 2865 2866 // Lower vector shifts on NEON to use VSHL. 2867 if (VT.isVector()) { 2868 assert(ST->hasNEON() && "unexpected vector shift"); 2869 2870 // Left shifts translate directly to the vshiftu intrinsic. 2871 if (N->getOpcode() == ISD::SHL) 2872 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2873 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 2874 N->getOperand(0), N->getOperand(1)); 2875 2876 assert((N->getOpcode() == ISD::SRA || 2877 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 2878 2879 // NEON uses the same intrinsics for both left and right shifts. For 2880 // right shifts, the shift amounts are negative, so negate the vector of 2881 // shift amounts. 2882 EVT ShiftVT = N->getOperand(1).getValueType(); 2883 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 2884 getZeroVector(ShiftVT, DAG, dl), 2885 N->getOperand(1)); 2886 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 2887 Intrinsic::arm_neon_vshifts : 2888 Intrinsic::arm_neon_vshiftu); 2889 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 2890 DAG.getConstant(vshiftInt, MVT::i32), 2891 N->getOperand(0), NegatedCount); 2892 } 2893 2894 // We can get here for a node like i32 = ISD::SHL i32, i64 2895 if (VT != MVT::i64) 2896 return SDValue(); 2897 2898 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 2899 "Unknown shift to lower!"); 2900 2901 // We only lower SRA, SRL of 1 here, all others use generic lowering. 2902 if (!isa<ConstantSDNode>(N->getOperand(1)) || 2903 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 2904 return SDValue(); 2905 2906 // If we are in thumb mode, we don't have RRX. 2907 if (ST->isThumb1Only()) return SDValue(); 2908 2909 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 2910 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2911 DAG.getConstant(0, MVT::i32)); 2912 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 2913 DAG.getConstant(1, MVT::i32)); 2914 2915 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 2916 // captures the result into a carry flag. 2917 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 2918 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Flag), &Hi, 1); 2919 2920 // The low part is an ARMISD::RRX operand, which shifts the carry in. 2921 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 2922 2923 // Merge the pieces into a single i64 value. 2924 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 2925} 2926 2927static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 2928 SDValue TmpOp0, TmpOp1; 2929 bool Invert = false; 2930 bool Swap = false; 2931 unsigned Opc = 0; 2932 2933 SDValue Op0 = Op.getOperand(0); 2934 SDValue Op1 = Op.getOperand(1); 2935 SDValue CC = Op.getOperand(2); 2936 EVT VT = Op.getValueType(); 2937 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 2938 DebugLoc dl = Op.getDebugLoc(); 2939 2940 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 2941 switch (SetCCOpcode) { 2942 default: llvm_unreachable("Illegal FP comparison"); break; 2943 case ISD::SETUNE: 2944 case ISD::SETNE: Invert = true; // Fallthrough 2945 case ISD::SETOEQ: 2946 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2947 case ISD::SETOLT: 2948 case ISD::SETLT: Swap = true; // Fallthrough 2949 case ISD::SETOGT: 2950 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2951 case ISD::SETOLE: 2952 case ISD::SETLE: Swap = true; // Fallthrough 2953 case ISD::SETOGE: 2954 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2955 case ISD::SETUGE: Swap = true; // Fallthrough 2956 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 2957 case ISD::SETUGT: Swap = true; // Fallthrough 2958 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 2959 case ISD::SETUEQ: Invert = true; // Fallthrough 2960 case ISD::SETONE: 2961 // Expand this to (OLT | OGT). 2962 TmpOp0 = Op0; 2963 TmpOp1 = Op1; 2964 Opc = ISD::OR; 2965 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2966 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 2967 break; 2968 case ISD::SETUO: Invert = true; // Fallthrough 2969 case ISD::SETO: 2970 // Expand this to (OLT | OGE). 2971 TmpOp0 = Op0; 2972 TmpOp1 = Op1; 2973 Opc = ISD::OR; 2974 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 2975 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 2976 break; 2977 } 2978 } else { 2979 // Integer comparisons. 2980 switch (SetCCOpcode) { 2981 default: llvm_unreachable("Illegal integer comparison"); break; 2982 case ISD::SETNE: Invert = true; 2983 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 2984 case ISD::SETLT: Swap = true; 2985 case ISD::SETGT: Opc = ARMISD::VCGT; break; 2986 case ISD::SETLE: Swap = true; 2987 case ISD::SETGE: Opc = ARMISD::VCGE; break; 2988 case ISD::SETULT: Swap = true; 2989 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 2990 case ISD::SETULE: Swap = true; 2991 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 2992 } 2993 2994 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 2995 if (Opc == ARMISD::VCEQ) { 2996 2997 SDValue AndOp; 2998 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 2999 AndOp = Op0; 3000 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 3001 AndOp = Op1; 3002 3003 // Ignore bitconvert. 3004 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BIT_CONVERT) 3005 AndOp = AndOp.getOperand(0); 3006 3007 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 3008 Opc = ARMISD::VTST; 3009 Op0 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(0)); 3010 Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, VT, AndOp.getOperand(1)); 3011 Invert = !Invert; 3012 } 3013 } 3014 } 3015 3016 if (Swap) 3017 std::swap(Op0, Op1); 3018 3019 SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3020 3021 if (Invert) 3022 Result = DAG.getNOT(dl, Result, VT); 3023 3024 return Result; 3025} 3026 3027/// isNEONModifiedImm - Check if the specified splat value corresponds to a 3028/// valid vector constant for a NEON instruction with a "modified immediate" 3029/// operand (e.g., VMOV). If so, return the encoded value. 3030static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 3031 unsigned SplatBitSize, SelectionDAG &DAG, 3032 EVT &VT, bool is128Bits, bool isVMOV) { 3033 unsigned OpCmode, Imm; 3034 3035 // SplatBitSize is set to the smallest size that splats the vector, so a 3036 // zero vector will always have SplatBitSize == 8. However, NEON modified 3037 // immediate instructions others than VMOV do not support the 8-bit encoding 3038 // of a zero vector, and the default encoding of zero is supposed to be the 3039 // 32-bit version. 3040 if (SplatBits == 0) 3041 SplatBitSize = 32; 3042 3043 switch (SplatBitSize) { 3044 case 8: 3045 if (!isVMOV) 3046 return SDValue(); 3047 // Any 1-byte value is OK. Op=0, Cmode=1110. 3048 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 3049 OpCmode = 0xe; 3050 Imm = SplatBits; 3051 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 3052 break; 3053 3054 case 16: 3055 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 3056 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 3057 if ((SplatBits & ~0xff) == 0) { 3058 // Value = 0x00nn: Op=x, Cmode=100x. 3059 OpCmode = 0x8; 3060 Imm = SplatBits; 3061 break; 3062 } 3063 if ((SplatBits & ~0xff00) == 0) { 3064 // Value = 0xnn00: Op=x, Cmode=101x. 3065 OpCmode = 0xa; 3066 Imm = SplatBits >> 8; 3067 break; 3068 } 3069 return SDValue(); 3070 3071 case 32: 3072 // NEON's 32-bit VMOV supports splat values where: 3073 // * only one byte is nonzero, or 3074 // * the least significant byte is 0xff and the second byte is nonzero, or 3075 // * the least significant 2 bytes are 0xff and the third is nonzero. 3076 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 3077 if ((SplatBits & ~0xff) == 0) { 3078 // Value = 0x000000nn: Op=x, Cmode=000x. 3079 OpCmode = 0; 3080 Imm = SplatBits; 3081 break; 3082 } 3083 if ((SplatBits & ~0xff00) == 0) { 3084 // Value = 0x0000nn00: Op=x, Cmode=001x. 3085 OpCmode = 0x2; 3086 Imm = SplatBits >> 8; 3087 break; 3088 } 3089 if ((SplatBits & ~0xff0000) == 0) { 3090 // Value = 0x00nn0000: Op=x, Cmode=010x. 3091 OpCmode = 0x4; 3092 Imm = SplatBits >> 16; 3093 break; 3094 } 3095 if ((SplatBits & ~0xff000000) == 0) { 3096 // Value = 0xnn000000: Op=x, Cmode=011x. 3097 OpCmode = 0x6; 3098 Imm = SplatBits >> 24; 3099 break; 3100 } 3101 3102 if ((SplatBits & ~0xffff) == 0 && 3103 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 3104 // Value = 0x0000nnff: Op=x, Cmode=1100. 3105 OpCmode = 0xc; 3106 Imm = SplatBits >> 8; 3107 SplatBits |= 0xff; 3108 break; 3109 } 3110 3111 if ((SplatBits & ~0xffffff) == 0 && 3112 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 3113 // Value = 0x00nnffff: Op=x, Cmode=1101. 3114 OpCmode = 0xd; 3115 Imm = SplatBits >> 16; 3116 SplatBits |= 0xffff; 3117 break; 3118 } 3119 3120 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 3121 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 3122 // VMOV.I32. A (very) minor optimization would be to replicate the value 3123 // and fall through here to test for a valid 64-bit splat. But, then the 3124 // caller would also need to check and handle the change in size. 3125 return SDValue(); 3126 3127 case 64: { 3128 if (!isVMOV) 3129 return SDValue(); 3130 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 3131 uint64_t BitMask = 0xff; 3132 uint64_t Val = 0; 3133 unsigned ImmMask = 1; 3134 Imm = 0; 3135 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 3136 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 3137 Val |= BitMask; 3138 Imm |= ImmMask; 3139 } else if ((SplatBits & BitMask) != 0) { 3140 return SDValue(); 3141 } 3142 BitMask <<= 8; 3143 ImmMask <<= 1; 3144 } 3145 // Op=1, Cmode=1110. 3146 OpCmode = 0x1e; 3147 SplatBits = Val; 3148 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 3149 break; 3150 } 3151 3152 default: 3153 llvm_unreachable("unexpected size for isNEONModifiedImm"); 3154 return SDValue(); 3155 } 3156 3157 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 3158 return DAG.getTargetConstant(EncodedVal, MVT::i32); 3159} 3160 3161static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 3162 bool &ReverseVEXT, unsigned &Imm) { 3163 unsigned NumElts = VT.getVectorNumElements(); 3164 ReverseVEXT = false; 3165 3166 // Assume that the first shuffle index is not UNDEF. Fail if it is. 3167 if (M[0] < 0) 3168 return false; 3169 3170 Imm = M[0]; 3171 3172 // If this is a VEXT shuffle, the immediate value is the index of the first 3173 // element. The other shuffle indices must be the successive elements after 3174 // the first one. 3175 unsigned ExpectedElt = Imm; 3176 for (unsigned i = 1; i < NumElts; ++i) { 3177 // Increment the expected index. If it wraps around, it may still be 3178 // a VEXT but the source vectors must be swapped. 3179 ExpectedElt += 1; 3180 if (ExpectedElt == NumElts * 2) { 3181 ExpectedElt = 0; 3182 ReverseVEXT = true; 3183 } 3184 3185 if (M[i] < 0) continue; // ignore UNDEF indices 3186 if (ExpectedElt != static_cast<unsigned>(M[i])) 3187 return false; 3188 } 3189 3190 // Adjust the index value if the source operands will be swapped. 3191 if (ReverseVEXT) 3192 Imm -= NumElts; 3193 3194 return true; 3195} 3196 3197/// isVREVMask - Check if a vector shuffle corresponds to a VREV 3198/// instruction with the specified blocksize. (The order of the elements 3199/// within each block of the vector is reversed.) 3200static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 3201 unsigned BlockSize) { 3202 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 3203 "Only possible block sizes for VREV are: 16, 32, 64"); 3204 3205 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3206 if (EltSz == 64) 3207 return false; 3208 3209 unsigned NumElts = VT.getVectorNumElements(); 3210 unsigned BlockElts = M[0] + 1; 3211 // If the first shuffle index is UNDEF, be optimistic. 3212 if (M[0] < 0) 3213 BlockElts = BlockSize / EltSz; 3214 3215 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 3216 return false; 3217 3218 for (unsigned i = 0; i < NumElts; ++i) { 3219 if (M[i] < 0) continue; // ignore UNDEF indices 3220 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 3221 return false; 3222 } 3223 3224 return true; 3225} 3226 3227static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 3228 unsigned &WhichResult) { 3229 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3230 if (EltSz == 64) 3231 return false; 3232 3233 unsigned NumElts = VT.getVectorNumElements(); 3234 WhichResult = (M[0] == 0 ? 0 : 1); 3235 for (unsigned i = 0; i < NumElts; i += 2) { 3236 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3237 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 3238 return false; 3239 } 3240 return true; 3241} 3242 3243/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 3244/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3245/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 3246static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3247 unsigned &WhichResult) { 3248 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3249 if (EltSz == 64) 3250 return false; 3251 3252 unsigned NumElts = VT.getVectorNumElements(); 3253 WhichResult = (M[0] == 0 ? 0 : 1); 3254 for (unsigned i = 0; i < NumElts; i += 2) { 3255 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3256 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 3257 return false; 3258 } 3259 return true; 3260} 3261 3262static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 3263 unsigned &WhichResult) { 3264 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3265 if (EltSz == 64) 3266 return false; 3267 3268 unsigned NumElts = VT.getVectorNumElements(); 3269 WhichResult = (M[0] == 0 ? 0 : 1); 3270 for (unsigned i = 0; i != NumElts; ++i) { 3271 if (M[i] < 0) continue; // ignore UNDEF indices 3272 if ((unsigned) M[i] != 2 * i + WhichResult) 3273 return false; 3274 } 3275 3276 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3277 if (VT.is64BitVector() && EltSz == 32) 3278 return false; 3279 3280 return true; 3281} 3282 3283/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 3284/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3285/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 3286static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3287 unsigned &WhichResult) { 3288 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3289 if (EltSz == 64) 3290 return false; 3291 3292 unsigned Half = VT.getVectorNumElements() / 2; 3293 WhichResult = (M[0] == 0 ? 0 : 1); 3294 for (unsigned j = 0; j != 2; ++j) { 3295 unsigned Idx = WhichResult; 3296 for (unsigned i = 0; i != Half; ++i) { 3297 int MIdx = M[i + j * Half]; 3298 if (MIdx >= 0 && (unsigned) MIdx != Idx) 3299 return false; 3300 Idx += 2; 3301 } 3302 } 3303 3304 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3305 if (VT.is64BitVector() && EltSz == 32) 3306 return false; 3307 3308 return true; 3309} 3310 3311static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 3312 unsigned &WhichResult) { 3313 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3314 if (EltSz == 64) 3315 return false; 3316 3317 unsigned NumElts = VT.getVectorNumElements(); 3318 WhichResult = (M[0] == 0 ? 0 : 1); 3319 unsigned Idx = WhichResult * NumElts / 2; 3320 for (unsigned i = 0; i != NumElts; i += 2) { 3321 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3322 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 3323 return false; 3324 Idx += 1; 3325 } 3326 3327 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3328 if (VT.is64BitVector() && EltSz == 32) 3329 return false; 3330 3331 return true; 3332} 3333 3334/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 3335/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3336/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 3337static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3338 unsigned &WhichResult) { 3339 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3340 if (EltSz == 64) 3341 return false; 3342 3343 unsigned NumElts = VT.getVectorNumElements(); 3344 WhichResult = (M[0] == 0 ? 0 : 1); 3345 unsigned Idx = WhichResult * NumElts / 2; 3346 for (unsigned i = 0; i != NumElts; i += 2) { 3347 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3348 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 3349 return false; 3350 Idx += 1; 3351 } 3352 3353 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3354 if (VT.is64BitVector() && EltSz == 32) 3355 return false; 3356 3357 return true; 3358} 3359 3360// If N is an integer constant that can be moved into a register in one 3361// instruction, return an SDValue of such a constant (will become a MOV 3362// instruction). Otherwise return null. 3363static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 3364 const ARMSubtarget *ST, DebugLoc dl) { 3365 uint64_t Val; 3366 if (!isa<ConstantSDNode>(N)) 3367 return SDValue(); 3368 Val = cast<ConstantSDNode>(N)->getZExtValue(); 3369 3370 if (ST->isThumb1Only()) { 3371 if (Val <= 255 || ~Val <= 255) 3372 return DAG.getConstant(Val, MVT::i32); 3373 } else { 3374 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 3375 return DAG.getConstant(Val, MVT::i32); 3376 } 3377 return SDValue(); 3378} 3379 3380// If this is a case we can't handle, return null and let the default 3381// expansion code take care of it. 3382static SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 3383 const ARMSubtarget *ST) { 3384 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 3385 DebugLoc dl = Op.getDebugLoc(); 3386 EVT VT = Op.getValueType(); 3387 3388 APInt SplatBits, SplatUndef; 3389 unsigned SplatBitSize; 3390 bool HasAnyUndefs; 3391 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 3392 if (SplatBitSize <= 64) { 3393 // Check if an immediate VMOV works. 3394 EVT VmovVT; 3395 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 3396 SplatUndef.getZExtValue(), SplatBitSize, 3397 DAG, VmovVT, VT.is128BitVector(), true); 3398 if (Val.getNode()) { 3399 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 3400 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 3401 } 3402 3403 // Try an immediate VMVN. 3404 uint64_t NegatedImm = (SplatBits.getZExtValue() ^ 3405 ((1LL << SplatBitSize) - 1)); 3406 Val = isNEONModifiedImm(NegatedImm, 3407 SplatUndef.getZExtValue(), SplatBitSize, 3408 DAG, VmovVT, VT.is128BitVector(), false); 3409 if (Val.getNode()) { 3410 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 3411 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Vmov); 3412 } 3413 } 3414 } 3415 3416 // Scan through the operands to see if only one value is used. 3417 unsigned NumElts = VT.getVectorNumElements(); 3418 bool isOnlyLowElement = true; 3419 bool usesOnlyOneValue = true; 3420 bool isConstant = true; 3421 SDValue Value; 3422 for (unsigned i = 0; i < NumElts; ++i) { 3423 SDValue V = Op.getOperand(i); 3424 if (V.getOpcode() == ISD::UNDEF) 3425 continue; 3426 if (i > 0) 3427 isOnlyLowElement = false; 3428 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 3429 isConstant = false; 3430 3431 if (!Value.getNode()) 3432 Value = V; 3433 else if (V != Value) 3434 usesOnlyOneValue = false; 3435 } 3436 3437 if (!Value.getNode()) 3438 return DAG.getUNDEF(VT); 3439 3440 if (isOnlyLowElement) 3441 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 3442 3443 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3444 3445 if (EnableARMVDUPsplat) { 3446 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 3447 // i32 and try again. 3448 if (usesOnlyOneValue && EltSize <= 32) { 3449 if (!isConstant) 3450 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 3451 if (VT.getVectorElementType().isFloatingPoint()) { 3452 SmallVector<SDValue, 8> Ops; 3453 for (unsigned i = 0; i < NumElts; ++i) 3454 Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, 3455 Op.getOperand(i))); 3456 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, &Ops[0], 3457 NumElts); 3458 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, 3459 LowerBUILD_VECTOR(Val, DAG, ST)); 3460 } 3461 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 3462 if (Val.getNode()) 3463 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 3464 } 3465 } 3466 3467 // If all elements are constants and the case above didn't get hit, fall back 3468 // to the default expansion, which will generate a load from the constant 3469 // pool. 3470 if (isConstant) 3471 return SDValue(); 3472 3473 if (!EnableARMVDUPsplat) { 3474 // Use VDUP for non-constant splats. 3475 if (usesOnlyOneValue && EltSize <= 32) 3476 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 3477 } 3478 3479 // Vectors with 32- or 64-bit elements can be built by directly assigning 3480 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 3481 // will be legalized. 3482 if (EltSize >= 32) { 3483 // Do the expansion with floating-point types, since that is what the VFP 3484 // registers are defined to use, and since i64 is not legal. 3485 EVT EltVT = EVT::getFloatingPointVT(EltSize); 3486 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 3487 SmallVector<SDValue, 8> Ops; 3488 for (unsigned i = 0; i < NumElts; ++i) 3489 Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, dl, EltVT, Op.getOperand(i))); 3490 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 3491 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); 3492 } 3493 3494 return SDValue(); 3495} 3496 3497/// isShuffleMaskLegal - Targets can use this to indicate that they only 3498/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 3499/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 3500/// are assumed to be legal. 3501bool 3502ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 3503 EVT VT) const { 3504 if (VT.getVectorNumElements() == 4 && 3505 (VT.is128BitVector() || VT.is64BitVector())) { 3506 unsigned PFIndexes[4]; 3507 for (unsigned i = 0; i != 4; ++i) { 3508 if (M[i] < 0) 3509 PFIndexes[i] = 8; 3510 else 3511 PFIndexes[i] = M[i]; 3512 } 3513 3514 // Compute the index in the perfect shuffle table. 3515 unsigned PFTableIndex = 3516 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3517 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3518 unsigned Cost = (PFEntry >> 30); 3519 3520 if (Cost <= 4) 3521 return true; 3522 } 3523 3524 bool ReverseVEXT; 3525 unsigned Imm, WhichResult; 3526 3527 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3528 return (EltSize >= 32 || 3529 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 3530 isVREVMask(M, VT, 64) || 3531 isVREVMask(M, VT, 32) || 3532 isVREVMask(M, VT, 16) || 3533 isVEXTMask(M, VT, ReverseVEXT, Imm) || 3534 isVTRNMask(M, VT, WhichResult) || 3535 isVUZPMask(M, VT, WhichResult) || 3536 isVZIPMask(M, VT, WhichResult) || 3537 isVTRN_v_undef_Mask(M, VT, WhichResult) || 3538 isVUZP_v_undef_Mask(M, VT, WhichResult) || 3539 isVZIP_v_undef_Mask(M, VT, WhichResult)); 3540} 3541 3542/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 3543/// the specified operations to build the shuffle. 3544static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 3545 SDValue RHS, SelectionDAG &DAG, 3546 DebugLoc dl) { 3547 unsigned OpNum = (PFEntry >> 26) & 0x0F; 3548 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 3549 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 3550 3551 enum { 3552 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 3553 OP_VREV, 3554 OP_VDUP0, 3555 OP_VDUP1, 3556 OP_VDUP2, 3557 OP_VDUP3, 3558 OP_VEXT1, 3559 OP_VEXT2, 3560 OP_VEXT3, 3561 OP_VUZPL, // VUZP, left result 3562 OP_VUZPR, // VUZP, right result 3563 OP_VZIPL, // VZIP, left result 3564 OP_VZIPR, // VZIP, right result 3565 OP_VTRNL, // VTRN, left result 3566 OP_VTRNR // VTRN, right result 3567 }; 3568 3569 if (OpNum == OP_COPY) { 3570 if (LHSID == (1*9+2)*9+3) return LHS; 3571 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 3572 return RHS; 3573 } 3574 3575 SDValue OpLHS, OpRHS; 3576 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 3577 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 3578 EVT VT = OpLHS.getValueType(); 3579 3580 switch (OpNum) { 3581 default: llvm_unreachable("Unknown shuffle opcode!"); 3582 case OP_VREV: 3583 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 3584 case OP_VDUP0: 3585 case OP_VDUP1: 3586 case OP_VDUP2: 3587 case OP_VDUP3: 3588 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 3589 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 3590 case OP_VEXT1: 3591 case OP_VEXT2: 3592 case OP_VEXT3: 3593 return DAG.getNode(ARMISD::VEXT, dl, VT, 3594 OpLHS, OpRHS, 3595 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 3596 case OP_VUZPL: 3597 case OP_VUZPR: 3598 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3599 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 3600 case OP_VZIPL: 3601 case OP_VZIPR: 3602 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3603 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 3604 case OP_VTRNL: 3605 case OP_VTRNR: 3606 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3607 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 3608 } 3609} 3610 3611static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 3612 SDValue V1 = Op.getOperand(0); 3613 SDValue V2 = Op.getOperand(1); 3614 DebugLoc dl = Op.getDebugLoc(); 3615 EVT VT = Op.getValueType(); 3616 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 3617 SmallVector<int, 8> ShuffleMask; 3618 3619 // Convert shuffles that are directly supported on NEON to target-specific 3620 // DAG nodes, instead of keeping them as shuffles and matching them again 3621 // during code selection. This is more efficient and avoids the possibility 3622 // of inconsistencies between legalization and selection. 3623 // FIXME: floating-point vectors should be canonicalized to integer vectors 3624 // of the same time so that they get CSEd properly. 3625 SVN->getMask(ShuffleMask); 3626 3627 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3628 if (EltSize <= 32) { 3629 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 3630 int Lane = SVN->getSplatIndex(); 3631 // If this is undef splat, generate it via "just" vdup, if possible. 3632 if (Lane == -1) Lane = 0; 3633 3634 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 3635 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 3636 } 3637 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 3638 DAG.getConstant(Lane, MVT::i32)); 3639 } 3640 3641 bool ReverseVEXT; 3642 unsigned Imm; 3643 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 3644 if (ReverseVEXT) 3645 std::swap(V1, V2); 3646 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 3647 DAG.getConstant(Imm, MVT::i32)); 3648 } 3649 3650 if (isVREVMask(ShuffleMask, VT, 64)) 3651 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 3652 if (isVREVMask(ShuffleMask, VT, 32)) 3653 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 3654 if (isVREVMask(ShuffleMask, VT, 16)) 3655 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 3656 3657 // Check for Neon shuffles that modify both input vectors in place. 3658 // If both results are used, i.e., if there are two shuffles with the same 3659 // source operands and with masks corresponding to both results of one of 3660 // these operations, DAG memoization will ensure that a single node is 3661 // used for both shuffles. 3662 unsigned WhichResult; 3663 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 3664 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3665 V1, V2).getValue(WhichResult); 3666 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 3667 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3668 V1, V2).getValue(WhichResult); 3669 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 3670 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3671 V1, V2).getValue(WhichResult); 3672 3673 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3674 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 3675 V1, V1).getValue(WhichResult); 3676 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3677 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 3678 V1, V1).getValue(WhichResult); 3679 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 3680 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 3681 V1, V1).getValue(WhichResult); 3682 } 3683 3684 // If the shuffle is not directly supported and it has 4 elements, use 3685 // the PerfectShuffle-generated table to synthesize it from other shuffles. 3686 unsigned NumElts = VT.getVectorNumElements(); 3687 if (NumElts == 4) { 3688 unsigned PFIndexes[4]; 3689 for (unsigned i = 0; i != 4; ++i) { 3690 if (ShuffleMask[i] < 0) 3691 PFIndexes[i] = 8; 3692 else 3693 PFIndexes[i] = ShuffleMask[i]; 3694 } 3695 3696 // Compute the index in the perfect shuffle table. 3697 unsigned PFTableIndex = 3698 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3699 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3700 unsigned Cost = (PFEntry >> 30); 3701 3702 if (Cost <= 4) 3703 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 3704 } 3705 3706 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 3707 if (EltSize >= 32) { 3708 // Do the expansion with floating-point types, since that is what the VFP 3709 // registers are defined to use, and since i64 is not legal. 3710 EVT EltVT = EVT::getFloatingPointVT(EltSize); 3711 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 3712 V1 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V1); 3713 V2 = DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, V2); 3714 SmallVector<SDValue, 8> Ops; 3715 for (unsigned i = 0; i < NumElts; ++i) { 3716 if (ShuffleMask[i] < 0) 3717 Ops.push_back(DAG.getUNDEF(EltVT)); 3718 else 3719 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 3720 ShuffleMask[i] < (int)NumElts ? V1 : V2, 3721 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 3722 MVT::i32))); 3723 } 3724 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 3725 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Val); 3726 } 3727 3728 return SDValue(); 3729} 3730 3731static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 3732 EVT VT = Op.getValueType(); 3733 DebugLoc dl = Op.getDebugLoc(); 3734 SDValue Vec = Op.getOperand(0); 3735 SDValue Lane = Op.getOperand(1); 3736 assert(VT == MVT::i32 && 3737 Vec.getValueType().getVectorElementType().getSizeInBits() < 32 && 3738 "unexpected type for custom-lowering vector extract"); 3739 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 3740} 3741 3742static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 3743 // The only time a CONCAT_VECTORS operation can have legal types is when 3744 // two 64-bit vectors are concatenated to a 128-bit vector. 3745 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 3746 "unexpected CONCAT_VECTORS"); 3747 DebugLoc dl = Op.getDebugLoc(); 3748 SDValue Val = DAG.getUNDEF(MVT::v2f64); 3749 SDValue Op0 = Op.getOperand(0); 3750 SDValue Op1 = Op.getOperand(1); 3751 if (Op0.getOpcode() != ISD::UNDEF) 3752 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3753 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op0), 3754 DAG.getIntPtrConstant(0)); 3755 if (Op1.getOpcode() != ISD::UNDEF) 3756 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 3757 DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, Op1), 3758 DAG.getIntPtrConstant(1)); 3759 return DAG.getNode(ISD::BIT_CONVERT, dl, Op.getValueType(), Val); 3760} 3761 3762/// SkipExtension - For a node that is either a SIGN_EXTEND, ZERO_EXTEND, or 3763/// an extending load, return the unextended value. 3764static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { 3765 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 3766 return N->getOperand(0); 3767 LoadSDNode *LD = cast<LoadSDNode>(N); 3768 return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), 3769 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 3770 LD->isNonTemporal(), LD->getAlignment()); 3771} 3772 3773static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 3774 // Multiplications are only custom-lowered for 128-bit vectors so that 3775 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 3776 EVT VT = Op.getValueType(); 3777 assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); 3778 SDNode *N0 = Op.getOperand(0).getNode(); 3779 SDNode *N1 = Op.getOperand(1).getNode(); 3780 unsigned NewOpc = 0; 3781 if ((N0->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N0)) && 3782 (N1->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N1))) { 3783 NewOpc = ARMISD::VMULLs; 3784 } else if ((N0->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N0)) && 3785 (N1->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N1))) { 3786 NewOpc = ARMISD::VMULLu; 3787 } else if (VT.getSimpleVT().SimpleTy == MVT::v2i64) { 3788 // Fall through to expand this. It is not legal. 3789 return SDValue(); 3790 } else { 3791 // Other vector multiplications are legal. 3792 return Op; 3793 } 3794 3795 // Legalize to a VMULL instruction. 3796 DebugLoc DL = Op.getDebugLoc(); 3797 SDValue Op0 = SkipExtension(N0, DAG); 3798 SDValue Op1 = SkipExtension(N1, DAG); 3799 3800 assert(Op0.getValueType().is64BitVector() && 3801 Op1.getValueType().is64BitVector() && 3802 "unexpected types for extended operands to VMULL"); 3803 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 3804} 3805 3806SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 3807 switch (Op.getOpcode()) { 3808 default: llvm_unreachable("Don't know how to custom lower this!"); 3809 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3810 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 3811 case ISD::GlobalAddress: 3812 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 3813 LowerGlobalAddressELF(Op, DAG); 3814 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3815 case ISD::SELECT: return LowerSELECT(Op, DAG); 3816 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3817 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 3818 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 3819 case ISD::VASTART: return LowerVASTART(Op, DAG); 3820 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 3821 case ISD::SINT_TO_FP: 3822 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 3823 case ISD::FP_TO_SINT: 3824 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 3825 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 3826 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 3827 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3828 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 3829 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 3830 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 3831 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 3832 Subtarget); 3833 case ISD::BIT_CONVERT: return ExpandBIT_CONVERT(Op.getNode(), DAG); 3834 case ISD::SHL: 3835 case ISD::SRL: 3836 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 3837 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 3838 case ISD::SRL_PARTS: 3839 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 3840 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 3841 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 3842 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 3843 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3844 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 3845 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 3846 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 3847 case ISD::MUL: return LowerMUL(Op, DAG); 3848 } 3849 return SDValue(); 3850} 3851 3852/// ReplaceNodeResults - Replace the results of node with an illegal result 3853/// type with new values built out of custom code. 3854void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 3855 SmallVectorImpl<SDValue>&Results, 3856 SelectionDAG &DAG) const { 3857 SDValue Res; 3858 switch (N->getOpcode()) { 3859 default: 3860 llvm_unreachable("Don't know how to custom expand this!"); 3861 break; 3862 case ISD::BIT_CONVERT: 3863 Res = ExpandBIT_CONVERT(N, DAG); 3864 break; 3865 case ISD::SRL: 3866 case ISD::SRA: 3867 Res = LowerShift(N, DAG, Subtarget); 3868 break; 3869 } 3870 if (Res.getNode()) 3871 Results.push_back(Res); 3872} 3873 3874//===----------------------------------------------------------------------===// 3875// ARM Scheduler Hooks 3876//===----------------------------------------------------------------------===// 3877 3878MachineBasicBlock * 3879ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 3880 MachineBasicBlock *BB, 3881 unsigned Size) const { 3882 unsigned dest = MI->getOperand(0).getReg(); 3883 unsigned ptr = MI->getOperand(1).getReg(); 3884 unsigned oldval = MI->getOperand(2).getReg(); 3885 unsigned newval = MI->getOperand(3).getReg(); 3886 unsigned scratch = BB->getParent()->getRegInfo() 3887 .createVirtualRegister(ARM::GPRRegisterClass); 3888 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3889 DebugLoc dl = MI->getDebugLoc(); 3890 bool isThumb2 = Subtarget->isThumb2(); 3891 3892 unsigned ldrOpc, strOpc; 3893 switch (Size) { 3894 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3895 case 1: 3896 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3897 strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB; 3898 break; 3899 case 2: 3900 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3901 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3902 break; 3903 case 4: 3904 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3905 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3906 break; 3907 } 3908 3909 MachineFunction *MF = BB->getParent(); 3910 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3911 MachineFunction::iterator It = BB; 3912 ++It; // insert the new blocks after the current block 3913 3914 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3915 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 3916 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 3917 MF->insert(It, loop1MBB); 3918 MF->insert(It, loop2MBB); 3919 MF->insert(It, exitMBB); 3920 3921 // Transfer the remainder of BB and its successor edges to exitMBB. 3922 exitMBB->splice(exitMBB->begin(), BB, 3923 llvm::next(MachineBasicBlock::iterator(MI)), 3924 BB->end()); 3925 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 3926 3927 // thisMBB: 3928 // ... 3929 // fallthrough --> loop1MBB 3930 BB->addSuccessor(loop1MBB); 3931 3932 // loop1MBB: 3933 // ldrex dest, [ptr] 3934 // cmp dest, oldval 3935 // bne exitMBB 3936 BB = loop1MBB; 3937 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 3938 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 3939 .addReg(dest).addReg(oldval)); 3940 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3941 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3942 BB->addSuccessor(loop2MBB); 3943 BB->addSuccessor(exitMBB); 3944 3945 // loop2MBB: 3946 // strex scratch, newval, [ptr] 3947 // cmp scratch, #0 3948 // bne loop1MBB 3949 BB = loop2MBB; 3950 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) 3951 .addReg(ptr)); 3952 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 3953 .addReg(scratch).addImm(0)); 3954 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 3955 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 3956 BB->addSuccessor(loop1MBB); 3957 BB->addSuccessor(exitMBB); 3958 3959 // exitMBB: 3960 // ... 3961 BB = exitMBB; 3962 3963 MI->eraseFromParent(); // The instruction is gone now. 3964 3965 return BB; 3966} 3967 3968MachineBasicBlock * 3969ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 3970 unsigned Size, unsigned BinOpcode) const { 3971 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 3972 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3973 3974 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3975 MachineFunction *MF = BB->getParent(); 3976 MachineFunction::iterator It = BB; 3977 ++It; 3978 3979 unsigned dest = MI->getOperand(0).getReg(); 3980 unsigned ptr = MI->getOperand(1).getReg(); 3981 unsigned incr = MI->getOperand(2).getReg(); 3982 DebugLoc dl = MI->getDebugLoc(); 3983 3984 bool isThumb2 = Subtarget->isThumb2(); 3985 unsigned ldrOpc, strOpc; 3986 switch (Size) { 3987 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 3988 case 1: 3989 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 3990 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 3991 break; 3992 case 2: 3993 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 3994 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 3995 break; 3996 case 4: 3997 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 3998 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 3999 break; 4000 } 4001 4002 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4003 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4004 MF->insert(It, loopMBB); 4005 MF->insert(It, exitMBB); 4006 4007 // Transfer the remainder of BB and its successor edges to exitMBB. 4008 exitMBB->splice(exitMBB->begin(), BB, 4009 llvm::next(MachineBasicBlock::iterator(MI)), 4010 BB->end()); 4011 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 4012 4013 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 4014 unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 4015 unsigned scratch2 = (!BinOpcode) ? incr : 4016 RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 4017 4018 // thisMBB: 4019 // ... 4020 // fallthrough --> loopMBB 4021 BB->addSuccessor(loopMBB); 4022 4023 // loopMBB: 4024 // ldrex dest, ptr 4025 // <binop> scratch2, dest, incr 4026 // strex scratch, scratch2, ptr 4027 // cmp scratch, #0 4028 // bne- loopMBB 4029 // fallthrough --> exitMBB 4030 BB = loopMBB; 4031 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 4032 if (BinOpcode) { 4033 // operand order needs to go the other way for NAND 4034 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 4035 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 4036 addReg(incr).addReg(dest)).addReg(0); 4037 else 4038 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 4039 addReg(dest).addReg(incr)).addReg(0); 4040 } 4041 4042 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) 4043 .addReg(ptr)); 4044 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 4045 .addReg(scratch).addImm(0)); 4046 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 4047 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 4048 4049 BB->addSuccessor(loopMBB); 4050 BB->addSuccessor(exitMBB); 4051 4052 // exitMBB: 4053 // ... 4054 BB = exitMBB; 4055 4056 MI->eraseFromParent(); // The instruction is gone now. 4057 4058 return BB; 4059} 4060 4061static 4062MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 4063 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 4064 E = MBB->succ_end(); I != E; ++I) 4065 if (*I != Succ) 4066 return *I; 4067 llvm_unreachable("Expecting a BB with two successors!"); 4068} 4069 4070MachineBasicBlock * 4071ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 4072 MachineBasicBlock *BB) const { 4073 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4074 DebugLoc dl = MI->getDebugLoc(); 4075 bool isThumb2 = Subtarget->isThumb2(); 4076 switch (MI->getOpcode()) { 4077 default: 4078 MI->dump(); 4079 llvm_unreachable("Unexpected instr type to insert"); 4080 4081 case ARM::ATOMIC_LOAD_ADD_I8: 4082 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 4083 case ARM::ATOMIC_LOAD_ADD_I16: 4084 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 4085 case ARM::ATOMIC_LOAD_ADD_I32: 4086 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 4087 4088 case ARM::ATOMIC_LOAD_AND_I8: 4089 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 4090 case ARM::ATOMIC_LOAD_AND_I16: 4091 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 4092 case ARM::ATOMIC_LOAD_AND_I32: 4093 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 4094 4095 case ARM::ATOMIC_LOAD_OR_I8: 4096 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 4097 case ARM::ATOMIC_LOAD_OR_I16: 4098 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 4099 case ARM::ATOMIC_LOAD_OR_I32: 4100 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 4101 4102 case ARM::ATOMIC_LOAD_XOR_I8: 4103 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 4104 case ARM::ATOMIC_LOAD_XOR_I16: 4105 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 4106 case ARM::ATOMIC_LOAD_XOR_I32: 4107 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 4108 4109 case ARM::ATOMIC_LOAD_NAND_I8: 4110 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 4111 case ARM::ATOMIC_LOAD_NAND_I16: 4112 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 4113 case ARM::ATOMIC_LOAD_NAND_I32: 4114 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 4115 4116 case ARM::ATOMIC_LOAD_SUB_I8: 4117 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 4118 case ARM::ATOMIC_LOAD_SUB_I16: 4119 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 4120 case ARM::ATOMIC_LOAD_SUB_I32: 4121 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 4122 4123 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 4124 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 4125 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 4126 4127 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 4128 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 4129 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 4130 4131 case ARM::tMOVCCr_pseudo: { 4132 // To "insert" a SELECT_CC instruction, we actually have to insert the 4133 // diamond control-flow pattern. The incoming instruction knows the 4134 // destination vreg to set, the condition code register to branch on, the 4135 // true/false values to select between, and a branch opcode to use. 4136 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4137 MachineFunction::iterator It = BB; 4138 ++It; 4139 4140 // thisMBB: 4141 // ... 4142 // TrueVal = ... 4143 // cmpTY ccX, r1, r2 4144 // bCC copy1MBB 4145 // fallthrough --> copy0MBB 4146 MachineBasicBlock *thisMBB = BB; 4147 MachineFunction *F = BB->getParent(); 4148 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 4149 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 4150 F->insert(It, copy0MBB); 4151 F->insert(It, sinkMBB); 4152 4153 // Transfer the remainder of BB and its successor edges to sinkMBB. 4154 sinkMBB->splice(sinkMBB->begin(), BB, 4155 llvm::next(MachineBasicBlock::iterator(MI)), 4156 BB->end()); 4157 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 4158 4159 BB->addSuccessor(copy0MBB); 4160 BB->addSuccessor(sinkMBB); 4161 4162 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 4163 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 4164 4165 // copy0MBB: 4166 // %FalseValue = ... 4167 // # fallthrough to sinkMBB 4168 BB = copy0MBB; 4169 4170 // Update machine-CFG edges 4171 BB->addSuccessor(sinkMBB); 4172 4173 // sinkMBB: 4174 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4175 // ... 4176 BB = sinkMBB; 4177 BuildMI(*BB, BB->begin(), dl, 4178 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 4179 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 4180 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4181 4182 MI->eraseFromParent(); // The pseudo instruction is gone now. 4183 return BB; 4184 } 4185 4186 case ARM::BCCi64: 4187 case ARM::BCCZi64: { 4188 // Compare both parts that make up the double comparison separately for 4189 // equality. 4190 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 4191 4192 unsigned LHS1 = MI->getOperand(1).getReg(); 4193 unsigned LHS2 = MI->getOperand(2).getReg(); 4194 if (RHSisZero) { 4195 AddDefaultPred(BuildMI(BB, dl, 4196 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 4197 .addReg(LHS1).addImm(0)); 4198 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 4199 .addReg(LHS2).addImm(0) 4200 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 4201 } else { 4202 unsigned RHS1 = MI->getOperand(3).getReg(); 4203 unsigned RHS2 = MI->getOperand(4).getReg(); 4204 AddDefaultPred(BuildMI(BB, dl, 4205 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 4206 .addReg(LHS1).addReg(RHS1)); 4207 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 4208 .addReg(LHS2).addReg(RHS2) 4209 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 4210 } 4211 4212 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 4213 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 4214 if (MI->getOperand(0).getImm() == ARMCC::NE) 4215 std::swap(destMBB, exitMBB); 4216 4217 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 4218 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 4219 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) 4220 .addMBB(exitMBB); 4221 4222 MI->eraseFromParent(); // The pseudo instruction is gone now. 4223 return BB; 4224 } 4225 } 4226} 4227 4228//===----------------------------------------------------------------------===// 4229// ARM Optimization Hooks 4230//===----------------------------------------------------------------------===// 4231 4232static 4233SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 4234 TargetLowering::DAGCombinerInfo &DCI) { 4235 SelectionDAG &DAG = DCI.DAG; 4236 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4237 EVT VT = N->getValueType(0); 4238 unsigned Opc = N->getOpcode(); 4239 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 4240 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 4241 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 4242 ISD::CondCode CC = ISD::SETCC_INVALID; 4243 4244 if (isSlctCC) { 4245 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 4246 } else { 4247 SDValue CCOp = Slct.getOperand(0); 4248 if (CCOp.getOpcode() == ISD::SETCC) 4249 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 4250 } 4251 4252 bool DoXform = false; 4253 bool InvCC = false; 4254 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 4255 "Bad input!"); 4256 4257 if (LHS.getOpcode() == ISD::Constant && 4258 cast<ConstantSDNode>(LHS)->isNullValue()) { 4259 DoXform = true; 4260 } else if (CC != ISD::SETCC_INVALID && 4261 RHS.getOpcode() == ISD::Constant && 4262 cast<ConstantSDNode>(RHS)->isNullValue()) { 4263 std::swap(LHS, RHS); 4264 SDValue Op0 = Slct.getOperand(0); 4265 EVT OpVT = isSlctCC ? Op0.getValueType() : 4266 Op0.getOperand(0).getValueType(); 4267 bool isInt = OpVT.isInteger(); 4268 CC = ISD::getSetCCInverse(CC, isInt); 4269 4270 if (!TLI.isCondCodeLegal(CC, OpVT)) 4271 return SDValue(); // Inverse operator isn't legal. 4272 4273 DoXform = true; 4274 InvCC = true; 4275 } 4276 4277 if (DoXform) { 4278 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 4279 if (isSlctCC) 4280 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 4281 Slct.getOperand(0), Slct.getOperand(1), CC); 4282 SDValue CCOp = Slct.getOperand(0); 4283 if (InvCC) 4284 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 4285 CCOp.getOperand(0), CCOp.getOperand(1), CC); 4286 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 4287 CCOp, OtherOp, Result); 4288 } 4289 return SDValue(); 4290} 4291 4292/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 4293/// operands N0 and N1. This is a helper for PerformADDCombine that is 4294/// called with the default operands, and if that fails, with commuted 4295/// operands. 4296static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 4297 TargetLowering::DAGCombinerInfo &DCI) { 4298 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 4299 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 4300 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 4301 if (Result.getNode()) return Result; 4302 } 4303 return SDValue(); 4304} 4305 4306/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 4307/// 4308static SDValue PerformADDCombine(SDNode *N, 4309 TargetLowering::DAGCombinerInfo &DCI) { 4310 SDValue N0 = N->getOperand(0); 4311 SDValue N1 = N->getOperand(1); 4312 4313 // First try with the default operand order. 4314 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI); 4315 if (Result.getNode()) 4316 return Result; 4317 4318 // If that didn't work, try again with the operands commuted. 4319 return PerformADDCombineWithOperands(N, N1, N0, DCI); 4320} 4321 4322/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 4323/// 4324static SDValue PerformSUBCombine(SDNode *N, 4325 TargetLowering::DAGCombinerInfo &DCI) { 4326 SDValue N0 = N->getOperand(0); 4327 SDValue N1 = N->getOperand(1); 4328 4329 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 4330 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 4331 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 4332 if (Result.getNode()) return Result; 4333 } 4334 4335 return SDValue(); 4336} 4337 4338static SDValue PerformMULCombine(SDNode *N, 4339 TargetLowering::DAGCombinerInfo &DCI, 4340 const ARMSubtarget *Subtarget) { 4341 SelectionDAG &DAG = DCI.DAG; 4342 4343 if (Subtarget->isThumb1Only()) 4344 return SDValue(); 4345 4346 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 4347 return SDValue(); 4348 4349 EVT VT = N->getValueType(0); 4350 if (VT != MVT::i32) 4351 return SDValue(); 4352 4353 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 4354 if (!C) 4355 return SDValue(); 4356 4357 uint64_t MulAmt = C->getZExtValue(); 4358 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 4359 ShiftAmt = ShiftAmt & (32 - 1); 4360 SDValue V = N->getOperand(0); 4361 DebugLoc DL = N->getDebugLoc(); 4362 4363 SDValue Res; 4364 MulAmt >>= ShiftAmt; 4365 if (isPowerOf2_32(MulAmt - 1)) { 4366 // (mul x, 2^N + 1) => (add (shl x, N), x) 4367 Res = DAG.getNode(ISD::ADD, DL, VT, 4368 V, DAG.getNode(ISD::SHL, DL, VT, 4369 V, DAG.getConstant(Log2_32(MulAmt-1), 4370 MVT::i32))); 4371 } else if (isPowerOf2_32(MulAmt + 1)) { 4372 // (mul x, 2^N - 1) => (sub (shl x, N), x) 4373 Res = DAG.getNode(ISD::SUB, DL, VT, 4374 DAG.getNode(ISD::SHL, DL, VT, 4375 V, DAG.getConstant(Log2_32(MulAmt+1), 4376 MVT::i32)), 4377 V); 4378 } else 4379 return SDValue(); 4380 4381 if (ShiftAmt != 0) 4382 Res = DAG.getNode(ISD::SHL, DL, VT, Res, 4383 DAG.getConstant(ShiftAmt, MVT::i32)); 4384 4385 // Do not add new nodes to DAG combiner worklist. 4386 DCI.CombineTo(N, Res, false); 4387 return SDValue(); 4388} 4389 4390/// PerformORCombine - Target-specific dag combine xforms for ISD::OR 4391static SDValue PerformORCombine(SDNode *N, 4392 TargetLowering::DAGCombinerInfo &DCI, 4393 const ARMSubtarget *Subtarget) { 4394 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 4395 // reasonable. 4396 4397 // BFI is only available on V6T2+ 4398 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 4399 return SDValue(); 4400 4401 SelectionDAG &DAG = DCI.DAG; 4402 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 4403 DebugLoc DL = N->getDebugLoc(); 4404 // 1) or (and A, mask), val => ARMbfi A, val, mask 4405 // iff (val & mask) == val 4406 // 4407 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 4408 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 4409 // && CountPopulation_32(mask) == CountPopulation_32(~mask2) 4410 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 4411 // && CountPopulation_32(mask) == CountPopulation_32(~mask2) 4412 // (i.e., copy a bitfield value into another bitfield of the same width) 4413 if (N0.getOpcode() != ISD::AND) 4414 return SDValue(); 4415 4416 EVT VT = N->getValueType(0); 4417 if (VT != MVT::i32) 4418 return SDValue(); 4419 4420 4421 // The value and the mask need to be constants so we can verify this is 4422 // actually a bitfield set. If the mask is 0xffff, we can do better 4423 // via a movt instruction, so don't use BFI in that case. 4424 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4425 if (!C) 4426 return SDValue(); 4427 unsigned Mask = C->getZExtValue(); 4428 if (Mask == 0xffff) 4429 return SDValue(); 4430 SDValue Res; 4431 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 4432 if ((C = dyn_cast<ConstantSDNode>(N1))) { 4433 unsigned Val = C->getZExtValue(); 4434 if (!ARM::isBitFieldInvertedMask(Mask) || (Val & ~Mask) != Val) 4435 return SDValue(); 4436 Val >>= CountTrailingZeros_32(~Mask); 4437 4438 Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), 4439 DAG.getConstant(Val, MVT::i32), 4440 DAG.getConstant(Mask, MVT::i32)); 4441 4442 // Do not add new nodes to DAG combiner worklist. 4443 DCI.CombineTo(N, Res, false); 4444 } else if (N1.getOpcode() == ISD::AND) { 4445 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 4446 C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 4447 if (!C) 4448 return SDValue(); 4449 unsigned Mask2 = C->getZExtValue(); 4450 4451 if (ARM::isBitFieldInvertedMask(Mask) && 4452 ARM::isBitFieldInvertedMask(~Mask2) && 4453 (CountPopulation_32(Mask) == CountPopulation_32(~Mask2))) { 4454 // The pack halfword instruction works better for masks that fit it, 4455 // so use that when it's available. 4456 if (Subtarget->hasT2ExtractPack() && 4457 (Mask == 0xffff || Mask == 0xffff0000)) 4458 return SDValue(); 4459 // 2a 4460 unsigned lsb = CountTrailingZeros_32(Mask2); 4461 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 4462 DAG.getConstant(lsb, MVT::i32)); 4463 Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), Res, 4464 DAG.getConstant(Mask, MVT::i32)); 4465 // Do not add new nodes to DAG combiner worklist. 4466 DCI.CombineTo(N, Res, false); 4467 } else if (ARM::isBitFieldInvertedMask(~Mask) && 4468 ARM::isBitFieldInvertedMask(Mask2) && 4469 (CountPopulation_32(~Mask) == CountPopulation_32(Mask2))) { 4470 // The pack halfword instruction works better for masks that fit it, 4471 // so use that when it's available. 4472 if (Subtarget->hasT2ExtractPack() && 4473 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 4474 return SDValue(); 4475 // 2b 4476 unsigned lsb = CountTrailingZeros_32(Mask); 4477 Res = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 4478 DAG.getConstant(lsb, MVT::i32)); 4479 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 4480 DAG.getConstant(Mask2, MVT::i32)); 4481 // Do not add new nodes to DAG combiner worklist. 4482 DCI.CombineTo(N, Res, false); 4483 } 4484 } 4485 4486 return SDValue(); 4487} 4488 4489/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 4490/// ARMISD::VMOVRRD. 4491static SDValue PerformVMOVRRDCombine(SDNode *N, 4492 TargetLowering::DAGCombinerInfo &DCI) { 4493 // vmovrrd(vmovdrr x, y) -> x,y 4494 SDValue InDouble = N->getOperand(0); 4495 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 4496 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 4497 return SDValue(); 4498} 4499 4500/// PerformVMOVDRRCombine - Target-specific dag combine xforms for 4501/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 4502static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 4503 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 4504 SDValue Op0 = N->getOperand(0); 4505 SDValue Op1 = N->getOperand(1); 4506 if (Op0.getOpcode() == ISD::BIT_CONVERT) 4507 Op0 = Op0.getOperand(0); 4508 if (Op1.getOpcode() == ISD::BIT_CONVERT) 4509 Op1 = Op1.getOperand(0); 4510 if (Op0.getOpcode() == ARMISD::VMOVRRD && 4511 Op0.getNode() == Op1.getNode() && 4512 Op0.getResNo() == 0 && Op1.getResNo() == 1) 4513 return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), 4514 N->getValueType(0), Op0.getOperand(0)); 4515 return SDValue(); 4516} 4517 4518/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 4519/// ISD::BUILD_VECTOR. 4520static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG) { 4521 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 4522 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 4523 // into a pair of GPRs, which is fine when the value is used as a scalar, 4524 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 4525 if (N->getNumOperands() == 2) 4526 return PerformVMOVDRRCombine(N, DAG); 4527 4528 return SDValue(); 4529} 4530 4531/// PerformVDUPLANECombine - Target-specific dag combine xforms for 4532/// ARMISD::VDUPLANE. 4533static SDValue PerformVDUPLANECombine(SDNode *N, SelectionDAG &DAG) { 4534 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 4535 // redundant. 4536 SDValue Op = N->getOperand(0); 4537 EVT VT = N->getValueType(0); 4538 4539 // Ignore bit_converts. 4540 while (Op.getOpcode() == ISD::BIT_CONVERT) 4541 Op = Op.getOperand(0); 4542 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 4543 return SDValue(); 4544 4545 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 4546 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 4547 // The canonical VMOV for a zero vector uses a 32-bit element size. 4548 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 4549 unsigned EltBits; 4550 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 4551 EltSize = 8; 4552 if (EltSize > VT.getVectorElementType().getSizeInBits()) 4553 return SDValue(); 4554 4555 return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, Op); 4556} 4557 4558/// getVShiftImm - Check if this is a valid build_vector for the immediate 4559/// operand of a vector shift operation, where all the elements of the 4560/// build_vector must have the same constant integer value. 4561static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 4562 // Ignore bit_converts. 4563 while (Op.getOpcode() == ISD::BIT_CONVERT) 4564 Op = Op.getOperand(0); 4565 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 4566 APInt SplatBits, SplatUndef; 4567 unsigned SplatBitSize; 4568 bool HasAnyUndefs; 4569 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 4570 HasAnyUndefs, ElementBits) || 4571 SplatBitSize > ElementBits) 4572 return false; 4573 Cnt = SplatBits.getSExtValue(); 4574 return true; 4575} 4576 4577/// isVShiftLImm - Check if this is a valid build_vector for the immediate 4578/// operand of a vector shift left operation. That value must be in the range: 4579/// 0 <= Value < ElementBits for a left shift; or 4580/// 0 <= Value <= ElementBits for a long left shift. 4581static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 4582 assert(VT.isVector() && "vector shift count is not a vector type"); 4583 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 4584 if (! getVShiftImm(Op, ElementBits, Cnt)) 4585 return false; 4586 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 4587} 4588 4589/// isVShiftRImm - Check if this is a valid build_vector for the immediate 4590/// operand of a vector shift right operation. For a shift opcode, the value 4591/// is positive, but for an intrinsic the value count must be negative. The 4592/// absolute value must be in the range: 4593/// 1 <= |Value| <= ElementBits for a right shift; or 4594/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 4595static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 4596 int64_t &Cnt) { 4597 assert(VT.isVector() && "vector shift count is not a vector type"); 4598 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 4599 if (! getVShiftImm(Op, ElementBits, Cnt)) 4600 return false; 4601 if (isIntrinsic) 4602 Cnt = -Cnt; 4603 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 4604} 4605 4606/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 4607static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 4608 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4609 switch (IntNo) { 4610 default: 4611 // Don't do anything for most intrinsics. 4612 break; 4613 4614 // Vector shifts: check for immediate versions and lower them. 4615 // Note: This is done during DAG combining instead of DAG legalizing because 4616 // the build_vectors for 64-bit vector element shift counts are generally 4617 // not legal, and it is hard to see their values after they get legalized to 4618 // loads from a constant pool. 4619 case Intrinsic::arm_neon_vshifts: 4620 case Intrinsic::arm_neon_vshiftu: 4621 case Intrinsic::arm_neon_vshiftls: 4622 case Intrinsic::arm_neon_vshiftlu: 4623 case Intrinsic::arm_neon_vshiftn: 4624 case Intrinsic::arm_neon_vrshifts: 4625 case Intrinsic::arm_neon_vrshiftu: 4626 case Intrinsic::arm_neon_vrshiftn: 4627 case Intrinsic::arm_neon_vqshifts: 4628 case Intrinsic::arm_neon_vqshiftu: 4629 case Intrinsic::arm_neon_vqshiftsu: 4630 case Intrinsic::arm_neon_vqshiftns: 4631 case Intrinsic::arm_neon_vqshiftnu: 4632 case Intrinsic::arm_neon_vqshiftnsu: 4633 case Intrinsic::arm_neon_vqrshiftns: 4634 case Intrinsic::arm_neon_vqrshiftnu: 4635 case Intrinsic::arm_neon_vqrshiftnsu: { 4636 EVT VT = N->getOperand(1).getValueType(); 4637 int64_t Cnt; 4638 unsigned VShiftOpc = 0; 4639 4640 switch (IntNo) { 4641 case Intrinsic::arm_neon_vshifts: 4642 case Intrinsic::arm_neon_vshiftu: 4643 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 4644 VShiftOpc = ARMISD::VSHL; 4645 break; 4646 } 4647 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 4648 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 4649 ARMISD::VSHRs : ARMISD::VSHRu); 4650 break; 4651 } 4652 return SDValue(); 4653 4654 case Intrinsic::arm_neon_vshiftls: 4655 case Intrinsic::arm_neon_vshiftlu: 4656 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 4657 break; 4658 llvm_unreachable("invalid shift count for vshll intrinsic"); 4659 4660 case Intrinsic::arm_neon_vrshifts: 4661 case Intrinsic::arm_neon_vrshiftu: 4662 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 4663 break; 4664 return SDValue(); 4665 4666 case Intrinsic::arm_neon_vqshifts: 4667 case Intrinsic::arm_neon_vqshiftu: 4668 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 4669 break; 4670 return SDValue(); 4671 4672 case Intrinsic::arm_neon_vqshiftsu: 4673 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 4674 break; 4675 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 4676 4677 case Intrinsic::arm_neon_vshiftn: 4678 case Intrinsic::arm_neon_vrshiftn: 4679 case Intrinsic::arm_neon_vqshiftns: 4680 case Intrinsic::arm_neon_vqshiftnu: 4681 case Intrinsic::arm_neon_vqshiftnsu: 4682 case Intrinsic::arm_neon_vqrshiftns: 4683 case Intrinsic::arm_neon_vqrshiftnu: 4684 case Intrinsic::arm_neon_vqrshiftnsu: 4685 // Narrowing shifts require an immediate right shift. 4686 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 4687 break; 4688 llvm_unreachable("invalid shift count for narrowing vector shift " 4689 "intrinsic"); 4690 4691 default: 4692 llvm_unreachable("unhandled vector shift"); 4693 } 4694 4695 switch (IntNo) { 4696 case Intrinsic::arm_neon_vshifts: 4697 case Intrinsic::arm_neon_vshiftu: 4698 // Opcode already set above. 4699 break; 4700 case Intrinsic::arm_neon_vshiftls: 4701 case Intrinsic::arm_neon_vshiftlu: 4702 if (Cnt == VT.getVectorElementType().getSizeInBits()) 4703 VShiftOpc = ARMISD::VSHLLi; 4704 else 4705 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 4706 ARMISD::VSHLLs : ARMISD::VSHLLu); 4707 break; 4708 case Intrinsic::arm_neon_vshiftn: 4709 VShiftOpc = ARMISD::VSHRN; break; 4710 case Intrinsic::arm_neon_vrshifts: 4711 VShiftOpc = ARMISD::VRSHRs; break; 4712 case Intrinsic::arm_neon_vrshiftu: 4713 VShiftOpc = ARMISD::VRSHRu; break; 4714 case Intrinsic::arm_neon_vrshiftn: 4715 VShiftOpc = ARMISD::VRSHRN; break; 4716 case Intrinsic::arm_neon_vqshifts: 4717 VShiftOpc = ARMISD::VQSHLs; break; 4718 case Intrinsic::arm_neon_vqshiftu: 4719 VShiftOpc = ARMISD::VQSHLu; break; 4720 case Intrinsic::arm_neon_vqshiftsu: 4721 VShiftOpc = ARMISD::VQSHLsu; break; 4722 case Intrinsic::arm_neon_vqshiftns: 4723 VShiftOpc = ARMISD::VQSHRNs; break; 4724 case Intrinsic::arm_neon_vqshiftnu: 4725 VShiftOpc = ARMISD::VQSHRNu; break; 4726 case Intrinsic::arm_neon_vqshiftnsu: 4727 VShiftOpc = ARMISD::VQSHRNsu; break; 4728 case Intrinsic::arm_neon_vqrshiftns: 4729 VShiftOpc = ARMISD::VQRSHRNs; break; 4730 case Intrinsic::arm_neon_vqrshiftnu: 4731 VShiftOpc = ARMISD::VQRSHRNu; break; 4732 case Intrinsic::arm_neon_vqrshiftnsu: 4733 VShiftOpc = ARMISD::VQRSHRNsu; break; 4734 } 4735 4736 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 4737 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 4738 } 4739 4740 case Intrinsic::arm_neon_vshiftins: { 4741 EVT VT = N->getOperand(1).getValueType(); 4742 int64_t Cnt; 4743 unsigned VShiftOpc = 0; 4744 4745 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 4746 VShiftOpc = ARMISD::VSLI; 4747 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 4748 VShiftOpc = ARMISD::VSRI; 4749 else { 4750 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 4751 } 4752 4753 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 4754 N->getOperand(1), N->getOperand(2), 4755 DAG.getConstant(Cnt, MVT::i32)); 4756 } 4757 4758 case Intrinsic::arm_neon_vqrshifts: 4759 case Intrinsic::arm_neon_vqrshiftu: 4760 // No immediate versions of these to check for. 4761 break; 4762 } 4763 4764 return SDValue(); 4765} 4766 4767/// PerformShiftCombine - Checks for immediate versions of vector shifts and 4768/// lowers them. As with the vector shift intrinsics, this is done during DAG 4769/// combining instead of DAG legalizing because the build_vectors for 64-bit 4770/// vector element shift counts are generally not legal, and it is hard to see 4771/// their values after they get legalized to loads from a constant pool. 4772static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 4773 const ARMSubtarget *ST) { 4774 EVT VT = N->getValueType(0); 4775 4776 // Nothing to be done for scalar shifts. 4777 if (! VT.isVector()) 4778 return SDValue(); 4779 4780 assert(ST->hasNEON() && "unexpected vector shift"); 4781 int64_t Cnt; 4782 4783 switch (N->getOpcode()) { 4784 default: llvm_unreachable("unexpected shift opcode"); 4785 4786 case ISD::SHL: 4787 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 4788 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 4789 DAG.getConstant(Cnt, MVT::i32)); 4790 break; 4791 4792 case ISD::SRA: 4793 case ISD::SRL: 4794 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 4795 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 4796 ARMISD::VSHRs : ARMISD::VSHRu); 4797 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 4798 DAG.getConstant(Cnt, MVT::i32)); 4799 } 4800 } 4801 return SDValue(); 4802} 4803 4804/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 4805/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 4806static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 4807 const ARMSubtarget *ST) { 4808 SDValue N0 = N->getOperand(0); 4809 4810 // Check for sign- and zero-extensions of vector extract operations of 8- 4811 // and 16-bit vector elements. NEON supports these directly. They are 4812 // handled during DAG combining because type legalization will promote them 4813 // to 32-bit types and it is messy to recognize the operations after that. 4814 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 4815 SDValue Vec = N0.getOperand(0); 4816 SDValue Lane = N0.getOperand(1); 4817 EVT VT = N->getValueType(0); 4818 EVT EltVT = N0.getValueType(); 4819 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 4820 4821 if (VT == MVT::i32 && 4822 (EltVT == MVT::i8 || EltVT == MVT::i16) && 4823 TLI.isTypeLegal(Vec.getValueType())) { 4824 4825 unsigned Opc = 0; 4826 switch (N->getOpcode()) { 4827 default: llvm_unreachable("unexpected opcode"); 4828 case ISD::SIGN_EXTEND: 4829 Opc = ARMISD::VGETLANEs; 4830 break; 4831 case ISD::ZERO_EXTEND: 4832 case ISD::ANY_EXTEND: 4833 Opc = ARMISD::VGETLANEu; 4834 break; 4835 } 4836 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 4837 } 4838 } 4839 4840 return SDValue(); 4841} 4842 4843/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 4844/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 4845static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 4846 const ARMSubtarget *ST) { 4847 // If the target supports NEON, try to use vmax/vmin instructions for f32 4848 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 4849 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 4850 // a NaN; only do the transformation when it matches that behavior. 4851 4852 // For now only do this when using NEON for FP operations; if using VFP, it 4853 // is not obvious that the benefit outweighs the cost of switching to the 4854 // NEON pipeline. 4855 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 4856 N->getValueType(0) != MVT::f32) 4857 return SDValue(); 4858 4859 SDValue CondLHS = N->getOperand(0); 4860 SDValue CondRHS = N->getOperand(1); 4861 SDValue LHS = N->getOperand(2); 4862 SDValue RHS = N->getOperand(3); 4863 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 4864 4865 unsigned Opcode = 0; 4866 bool IsReversed; 4867 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 4868 IsReversed = false; // x CC y ? x : y 4869 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 4870 IsReversed = true ; // x CC y ? y : x 4871 } else { 4872 return SDValue(); 4873 } 4874 4875 bool IsUnordered; 4876 switch (CC) { 4877 default: break; 4878 case ISD::SETOLT: 4879 case ISD::SETOLE: 4880 case ISD::SETLT: 4881 case ISD::SETLE: 4882 case ISD::SETULT: 4883 case ISD::SETULE: 4884 // If LHS is NaN, an ordered comparison will be false and the result will 4885 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 4886 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 4887 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 4888 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 4889 break; 4890 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 4891 // will return -0, so vmin can only be used for unsafe math or if one of 4892 // the operands is known to be nonzero. 4893 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 4894 !UnsafeFPMath && 4895 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 4896 break; 4897 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 4898 break; 4899 4900 case ISD::SETOGT: 4901 case ISD::SETOGE: 4902 case ISD::SETGT: 4903 case ISD::SETGE: 4904 case ISD::SETUGT: 4905 case ISD::SETUGE: 4906 // If LHS is NaN, an ordered comparison will be false and the result will 4907 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 4908 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 4909 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 4910 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 4911 break; 4912 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 4913 // will return +0, so vmax can only be used for unsafe math or if one of 4914 // the operands is known to be nonzero. 4915 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 4916 !UnsafeFPMath && 4917 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 4918 break; 4919 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 4920 break; 4921 } 4922 4923 if (!Opcode) 4924 return SDValue(); 4925 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 4926} 4927 4928SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 4929 DAGCombinerInfo &DCI) const { 4930 switch (N->getOpcode()) { 4931 default: break; 4932 case ISD::ADD: return PerformADDCombine(N, DCI); 4933 case ISD::SUB: return PerformSUBCombine(N, DCI); 4934 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 4935 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 4936 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 4937 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 4938 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI.DAG); 4939 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI.DAG); 4940 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 4941 case ISD::SHL: 4942 case ISD::SRA: 4943 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 4944 case ISD::SIGN_EXTEND: 4945 case ISD::ZERO_EXTEND: 4946 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 4947 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 4948 } 4949 return SDValue(); 4950} 4951 4952bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 4953 if (!Subtarget->allowsUnalignedMem()) 4954 return false; 4955 4956 switch (VT.getSimpleVT().SimpleTy) { 4957 default: 4958 return false; 4959 case MVT::i8: 4960 case MVT::i16: 4961 case MVT::i32: 4962 return true; 4963 // FIXME: VLD1 etc with standard alignment is legal. 4964 } 4965} 4966 4967static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 4968 if (V < 0) 4969 return false; 4970 4971 unsigned Scale = 1; 4972 switch (VT.getSimpleVT().SimpleTy) { 4973 default: return false; 4974 case MVT::i1: 4975 case MVT::i8: 4976 // Scale == 1; 4977 break; 4978 case MVT::i16: 4979 // Scale == 2; 4980 Scale = 2; 4981 break; 4982 case MVT::i32: 4983 // Scale == 4; 4984 Scale = 4; 4985 break; 4986 } 4987 4988 if ((V & (Scale - 1)) != 0) 4989 return false; 4990 V /= Scale; 4991 return V == (V & ((1LL << 5) - 1)); 4992} 4993 4994static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 4995 const ARMSubtarget *Subtarget) { 4996 bool isNeg = false; 4997 if (V < 0) { 4998 isNeg = true; 4999 V = - V; 5000 } 5001 5002 switch (VT.getSimpleVT().SimpleTy) { 5003 default: return false; 5004 case MVT::i1: 5005 case MVT::i8: 5006 case MVT::i16: 5007 case MVT::i32: 5008 // + imm12 or - imm8 5009 if (isNeg) 5010 return V == (V & ((1LL << 8) - 1)); 5011 return V == (V & ((1LL << 12) - 1)); 5012 case MVT::f32: 5013 case MVT::f64: 5014 // Same as ARM mode. FIXME: NEON? 5015 if (!Subtarget->hasVFP2()) 5016 return false; 5017 if ((V & 3) != 0) 5018 return false; 5019 V >>= 2; 5020 return V == (V & ((1LL << 8) - 1)); 5021 } 5022} 5023 5024/// isLegalAddressImmediate - Return true if the integer value can be used 5025/// as the offset of the target addressing mode for load / store of the 5026/// given type. 5027static bool isLegalAddressImmediate(int64_t V, EVT VT, 5028 const ARMSubtarget *Subtarget) { 5029 if (V == 0) 5030 return true; 5031 5032 if (!VT.isSimple()) 5033 return false; 5034 5035 if (Subtarget->isThumb1Only()) 5036 return isLegalT1AddressImmediate(V, VT); 5037 else if (Subtarget->isThumb2()) 5038 return isLegalT2AddressImmediate(V, VT, Subtarget); 5039 5040 // ARM mode. 5041 if (V < 0) 5042 V = - V; 5043 switch (VT.getSimpleVT().SimpleTy) { 5044 default: return false; 5045 case MVT::i1: 5046 case MVT::i8: 5047 case MVT::i32: 5048 // +- imm12 5049 return V == (V & ((1LL << 12) - 1)); 5050 case MVT::i16: 5051 // +- imm8 5052 return V == (V & ((1LL << 8) - 1)); 5053 case MVT::f32: 5054 case MVT::f64: 5055 if (!Subtarget->hasVFP2()) // FIXME: NEON? 5056 return false; 5057 if ((V & 3) != 0) 5058 return false; 5059 V >>= 2; 5060 return V == (V & ((1LL << 8) - 1)); 5061 } 5062} 5063 5064bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 5065 EVT VT) const { 5066 int Scale = AM.Scale; 5067 if (Scale < 0) 5068 return false; 5069 5070 switch (VT.getSimpleVT().SimpleTy) { 5071 default: return false; 5072 case MVT::i1: 5073 case MVT::i8: 5074 case MVT::i16: 5075 case MVT::i32: 5076 if (Scale == 1) 5077 return true; 5078 // r + r << imm 5079 Scale = Scale & ~1; 5080 return Scale == 2 || Scale == 4 || Scale == 8; 5081 case MVT::i64: 5082 // r + r 5083 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 5084 return true; 5085 return false; 5086 case MVT::isVoid: 5087 // Note, we allow "void" uses (basically, uses that aren't loads or 5088 // stores), because arm allows folding a scale into many arithmetic 5089 // operations. This should be made more precise and revisited later. 5090 5091 // Allow r << imm, but the imm has to be a multiple of two. 5092 if (Scale & 1) return false; 5093 return isPowerOf2_32(Scale); 5094 } 5095} 5096 5097/// isLegalAddressingMode - Return true if the addressing mode represented 5098/// by AM is legal for this target, for a load/store of the specified type. 5099bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 5100 const Type *Ty) const { 5101 EVT VT = getValueType(Ty, true); 5102 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 5103 return false; 5104 5105 // Can never fold addr of global into load/store. 5106 if (AM.BaseGV) 5107 return false; 5108 5109 switch (AM.Scale) { 5110 case 0: // no scale reg, must be "r+i" or "r", or "i". 5111 break; 5112 case 1: 5113 if (Subtarget->isThumb1Only()) 5114 return false; 5115 // FALL THROUGH. 5116 default: 5117 // ARM doesn't support any R+R*scale+imm addr modes. 5118 if (AM.BaseOffs) 5119 return false; 5120 5121 if (!VT.isSimple()) 5122 return false; 5123 5124 if (Subtarget->isThumb2()) 5125 return isLegalT2ScaledAddressingMode(AM, VT); 5126 5127 int Scale = AM.Scale; 5128 switch (VT.getSimpleVT().SimpleTy) { 5129 default: return false; 5130 case MVT::i1: 5131 case MVT::i8: 5132 case MVT::i32: 5133 if (Scale < 0) Scale = -Scale; 5134 if (Scale == 1) 5135 return true; 5136 // r + r << imm 5137 return isPowerOf2_32(Scale & ~1); 5138 case MVT::i16: 5139 case MVT::i64: 5140 // r + r 5141 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 5142 return true; 5143 return false; 5144 5145 case MVT::isVoid: 5146 // Note, we allow "void" uses (basically, uses that aren't loads or 5147 // stores), because arm allows folding a scale into many arithmetic 5148 // operations. This should be made more precise and revisited later. 5149 5150 // Allow r << imm, but the imm has to be a multiple of two. 5151 if (Scale & 1) return false; 5152 return isPowerOf2_32(Scale); 5153 } 5154 break; 5155 } 5156 return true; 5157} 5158 5159/// isLegalICmpImmediate - Return true if the specified immediate is legal 5160/// icmp immediate, that is the target has icmp instructions which can compare 5161/// a register against the immediate without having to materialize the 5162/// immediate into a register. 5163bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 5164 if (!Subtarget->isThumb()) 5165 return ARM_AM::getSOImmVal(Imm) != -1; 5166 if (Subtarget->isThumb2()) 5167 return ARM_AM::getT2SOImmVal(Imm) != -1; 5168 return Imm >= 0 && Imm <= 255; 5169} 5170 5171static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 5172 bool isSEXTLoad, SDValue &Base, 5173 SDValue &Offset, bool &isInc, 5174 SelectionDAG &DAG) { 5175 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 5176 return false; 5177 5178 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 5179 // AddressingMode 3 5180 Base = Ptr->getOperand(0); 5181 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5182 int RHSC = (int)RHS->getZExtValue(); 5183 if (RHSC < 0 && RHSC > -256) { 5184 assert(Ptr->getOpcode() == ISD::ADD); 5185 isInc = false; 5186 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5187 return true; 5188 } 5189 } 5190 isInc = (Ptr->getOpcode() == ISD::ADD); 5191 Offset = Ptr->getOperand(1); 5192 return true; 5193 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 5194 // AddressingMode 2 5195 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5196 int RHSC = (int)RHS->getZExtValue(); 5197 if (RHSC < 0 && RHSC > -0x1000) { 5198 assert(Ptr->getOpcode() == ISD::ADD); 5199 isInc = false; 5200 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5201 Base = Ptr->getOperand(0); 5202 return true; 5203 } 5204 } 5205 5206 if (Ptr->getOpcode() == ISD::ADD) { 5207 isInc = true; 5208 ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); 5209 if (ShOpcVal != ARM_AM::no_shift) { 5210 Base = Ptr->getOperand(1); 5211 Offset = Ptr->getOperand(0); 5212 } else { 5213 Base = Ptr->getOperand(0); 5214 Offset = Ptr->getOperand(1); 5215 } 5216 return true; 5217 } 5218 5219 isInc = (Ptr->getOpcode() == ISD::ADD); 5220 Base = Ptr->getOperand(0); 5221 Offset = Ptr->getOperand(1); 5222 return true; 5223 } 5224 5225 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 5226 return false; 5227} 5228 5229static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 5230 bool isSEXTLoad, SDValue &Base, 5231 SDValue &Offset, bool &isInc, 5232 SelectionDAG &DAG) { 5233 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 5234 return false; 5235 5236 Base = Ptr->getOperand(0); 5237 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 5238 int RHSC = (int)RHS->getZExtValue(); 5239 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 5240 assert(Ptr->getOpcode() == ISD::ADD); 5241 isInc = false; 5242 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 5243 return true; 5244 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 5245 isInc = Ptr->getOpcode() == ISD::ADD; 5246 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 5247 return true; 5248 } 5249 } 5250 5251 return false; 5252} 5253 5254/// getPreIndexedAddressParts - returns true by value, base pointer and 5255/// offset pointer and addressing mode by reference if the node's address 5256/// can be legally represented as pre-indexed load / store address. 5257bool 5258ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 5259 SDValue &Offset, 5260 ISD::MemIndexedMode &AM, 5261 SelectionDAG &DAG) const { 5262 if (Subtarget->isThumb1Only()) 5263 return false; 5264 5265 EVT VT; 5266 SDValue Ptr; 5267 bool isSEXTLoad = false; 5268 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5269 Ptr = LD->getBasePtr(); 5270 VT = LD->getMemoryVT(); 5271 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 5272 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5273 Ptr = ST->getBasePtr(); 5274 VT = ST->getMemoryVT(); 5275 } else 5276 return false; 5277 5278 bool isInc; 5279 bool isLegal = false; 5280 if (Subtarget->isThumb2()) 5281 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 5282 Offset, isInc, DAG); 5283 else 5284 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 5285 Offset, isInc, DAG); 5286 if (!isLegal) 5287 return false; 5288 5289 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 5290 return true; 5291} 5292 5293/// getPostIndexedAddressParts - returns true by value, base pointer and 5294/// offset pointer and addressing mode by reference if this node can be 5295/// combined with a load / store to form a post-indexed load / store. 5296bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 5297 SDValue &Base, 5298 SDValue &Offset, 5299 ISD::MemIndexedMode &AM, 5300 SelectionDAG &DAG) const { 5301 if (Subtarget->isThumb1Only()) 5302 return false; 5303 5304 EVT VT; 5305 SDValue Ptr; 5306 bool isSEXTLoad = false; 5307 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5308 VT = LD->getMemoryVT(); 5309 Ptr = LD->getBasePtr(); 5310 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 5311 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5312 VT = ST->getMemoryVT(); 5313 Ptr = ST->getBasePtr(); 5314 } else 5315 return false; 5316 5317 bool isInc; 5318 bool isLegal = false; 5319 if (Subtarget->isThumb2()) 5320 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 5321 isInc, DAG); 5322 else 5323 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 5324 isInc, DAG); 5325 if (!isLegal) 5326 return false; 5327 5328 if (Ptr != Base) { 5329 // Swap base ptr and offset to catch more post-index load / store when 5330 // it's legal. In Thumb2 mode, offset must be an immediate. 5331 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 5332 !Subtarget->isThumb2()) 5333 std::swap(Base, Offset); 5334 5335 // Post-indexed load / store update the base pointer. 5336 if (Ptr != Base) 5337 return false; 5338 } 5339 5340 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 5341 return true; 5342} 5343 5344void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 5345 const APInt &Mask, 5346 APInt &KnownZero, 5347 APInt &KnownOne, 5348 const SelectionDAG &DAG, 5349 unsigned Depth) const { 5350 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 5351 switch (Op.getOpcode()) { 5352 default: break; 5353 case ARMISD::CMOV: { 5354 // Bits are known zero/one if known on the LHS and RHS. 5355 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 5356 if (KnownZero == 0 && KnownOne == 0) return; 5357 5358 APInt KnownZeroRHS, KnownOneRHS; 5359 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 5360 KnownZeroRHS, KnownOneRHS, Depth+1); 5361 KnownZero &= KnownZeroRHS; 5362 KnownOne &= KnownOneRHS; 5363 return; 5364 } 5365 } 5366} 5367 5368//===----------------------------------------------------------------------===// 5369// ARM Inline Assembly Support 5370//===----------------------------------------------------------------------===// 5371 5372/// getConstraintType - Given a constraint letter, return the type of 5373/// constraint it is for this target. 5374ARMTargetLowering::ConstraintType 5375ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 5376 if (Constraint.size() == 1) { 5377 switch (Constraint[0]) { 5378 default: break; 5379 case 'l': return C_RegisterClass; 5380 case 'w': return C_RegisterClass; 5381 } 5382 } 5383 return TargetLowering::getConstraintType(Constraint); 5384} 5385 5386std::pair<unsigned, const TargetRegisterClass*> 5387ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 5388 EVT VT) const { 5389 if (Constraint.size() == 1) { 5390 // GCC ARM Constraint Letters 5391 switch (Constraint[0]) { 5392 case 'l': 5393 if (Subtarget->isThumb()) 5394 return std::make_pair(0U, ARM::tGPRRegisterClass); 5395 else 5396 return std::make_pair(0U, ARM::GPRRegisterClass); 5397 case 'r': 5398 return std::make_pair(0U, ARM::GPRRegisterClass); 5399 case 'w': 5400 if (VT == MVT::f32) 5401 return std::make_pair(0U, ARM::SPRRegisterClass); 5402 if (VT.getSizeInBits() == 64) 5403 return std::make_pair(0U, ARM::DPRRegisterClass); 5404 if (VT.getSizeInBits() == 128) 5405 return std::make_pair(0U, ARM::QPRRegisterClass); 5406 break; 5407 } 5408 } 5409 if (StringRef("{cc}").equals_lower(Constraint)) 5410 return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); 5411 5412 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 5413} 5414 5415std::vector<unsigned> ARMTargetLowering:: 5416getRegClassForInlineAsmConstraint(const std::string &Constraint, 5417 EVT VT) const { 5418 if (Constraint.size() != 1) 5419 return std::vector<unsigned>(); 5420 5421 switch (Constraint[0]) { // GCC ARM Constraint Letters 5422 default: break; 5423 case 'l': 5424 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 5425 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 5426 0); 5427 case 'r': 5428 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 5429 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 5430 ARM::R8, ARM::R9, ARM::R10, ARM::R11, 5431 ARM::R12, ARM::LR, 0); 5432 case 'w': 5433 if (VT == MVT::f32) 5434 return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, 5435 ARM::S4, ARM::S5, ARM::S6, ARM::S7, 5436 ARM::S8, ARM::S9, ARM::S10, ARM::S11, 5437 ARM::S12,ARM::S13,ARM::S14,ARM::S15, 5438 ARM::S16,ARM::S17,ARM::S18,ARM::S19, 5439 ARM::S20,ARM::S21,ARM::S22,ARM::S23, 5440 ARM::S24,ARM::S25,ARM::S26,ARM::S27, 5441 ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); 5442 if (VT.getSizeInBits() == 64) 5443 return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, 5444 ARM::D4, ARM::D5, ARM::D6, ARM::D7, 5445 ARM::D8, ARM::D9, ARM::D10,ARM::D11, 5446 ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); 5447 if (VT.getSizeInBits() == 128) 5448 return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, 5449 ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); 5450 break; 5451 } 5452 5453 return std::vector<unsigned>(); 5454} 5455 5456/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 5457/// vector. If it is invalid, don't add anything to Ops. 5458void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 5459 char Constraint, 5460 std::vector<SDValue>&Ops, 5461 SelectionDAG &DAG) const { 5462 SDValue Result(0, 0); 5463 5464 switch (Constraint) { 5465 default: break; 5466 case 'I': case 'J': case 'K': case 'L': 5467 case 'M': case 'N': case 'O': 5468 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 5469 if (!C) 5470 return; 5471 5472 int64_t CVal64 = C->getSExtValue(); 5473 int CVal = (int) CVal64; 5474 // None of these constraints allow values larger than 32 bits. Check 5475 // that the value fits in an int. 5476 if (CVal != CVal64) 5477 return; 5478 5479 switch (Constraint) { 5480 case 'I': 5481 if (Subtarget->isThumb1Only()) { 5482 // This must be a constant between 0 and 255, for ADD 5483 // immediates. 5484 if (CVal >= 0 && CVal <= 255) 5485 break; 5486 } else if (Subtarget->isThumb2()) { 5487 // A constant that can be used as an immediate value in a 5488 // data-processing instruction. 5489 if (ARM_AM::getT2SOImmVal(CVal) != -1) 5490 break; 5491 } else { 5492 // A constant that can be used as an immediate value in a 5493 // data-processing instruction. 5494 if (ARM_AM::getSOImmVal(CVal) != -1) 5495 break; 5496 } 5497 return; 5498 5499 case 'J': 5500 if (Subtarget->isThumb()) { // FIXME thumb2 5501 // This must be a constant between -255 and -1, for negated ADD 5502 // immediates. This can be used in GCC with an "n" modifier that 5503 // prints the negated value, for use with SUB instructions. It is 5504 // not useful otherwise but is implemented for compatibility. 5505 if (CVal >= -255 && CVal <= -1) 5506 break; 5507 } else { 5508 // This must be a constant between -4095 and 4095. It is not clear 5509 // what this constraint is intended for. Implemented for 5510 // compatibility with GCC. 5511 if (CVal >= -4095 && CVal <= 4095) 5512 break; 5513 } 5514 return; 5515 5516 case 'K': 5517 if (Subtarget->isThumb1Only()) { 5518 // A 32-bit value where only one byte has a nonzero value. Exclude 5519 // zero to match GCC. This constraint is used by GCC internally for 5520 // constants that can be loaded with a move/shift combination. 5521 // It is not useful otherwise but is implemented for compatibility. 5522 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 5523 break; 5524 } else if (Subtarget->isThumb2()) { 5525 // A constant whose bitwise inverse can be used as an immediate 5526 // value in a data-processing instruction. This can be used in GCC 5527 // with a "B" modifier that prints the inverted value, for use with 5528 // BIC and MVN instructions. It is not useful otherwise but is 5529 // implemented for compatibility. 5530 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 5531 break; 5532 } else { 5533 // A constant whose bitwise inverse can be used as an immediate 5534 // value in a data-processing instruction. This can be used in GCC 5535 // with a "B" modifier that prints the inverted value, for use with 5536 // BIC and MVN instructions. It is not useful otherwise but is 5537 // implemented for compatibility. 5538 if (ARM_AM::getSOImmVal(~CVal) != -1) 5539 break; 5540 } 5541 return; 5542 5543 case 'L': 5544 if (Subtarget->isThumb1Only()) { 5545 // This must be a constant between -7 and 7, 5546 // for 3-operand ADD/SUB immediate instructions. 5547 if (CVal >= -7 && CVal < 7) 5548 break; 5549 } else if (Subtarget->isThumb2()) { 5550 // A constant whose negation can be used as an immediate value in a 5551 // data-processing instruction. This can be used in GCC with an "n" 5552 // modifier that prints the negated value, for use with SUB 5553 // instructions. It is not useful otherwise but is implemented for 5554 // compatibility. 5555 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 5556 break; 5557 } else { 5558 // A constant whose negation can be used as an immediate value in a 5559 // data-processing instruction. This can be used in GCC with an "n" 5560 // modifier that prints the negated value, for use with SUB 5561 // instructions. It is not useful otherwise but is implemented for 5562 // compatibility. 5563 if (ARM_AM::getSOImmVal(-CVal) != -1) 5564 break; 5565 } 5566 return; 5567 5568 case 'M': 5569 if (Subtarget->isThumb()) { // FIXME thumb2 5570 // This must be a multiple of 4 between 0 and 1020, for 5571 // ADD sp + immediate. 5572 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 5573 break; 5574 } else { 5575 // A power of two or a constant between 0 and 32. This is used in 5576 // GCC for the shift amount on shifted register operands, but it is 5577 // useful in general for any shift amounts. 5578 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 5579 break; 5580 } 5581 return; 5582 5583 case 'N': 5584 if (Subtarget->isThumb()) { // FIXME thumb2 5585 // This must be a constant between 0 and 31, for shift amounts. 5586 if (CVal >= 0 && CVal <= 31) 5587 break; 5588 } 5589 return; 5590 5591 case 'O': 5592 if (Subtarget->isThumb()) { // FIXME thumb2 5593 // This must be a multiple of 4 between -508 and 508, for 5594 // ADD/SUB sp = sp + immediate. 5595 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 5596 break; 5597 } 5598 return; 5599 } 5600 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 5601 break; 5602 } 5603 5604 if (Result.getNode()) { 5605 Ops.push_back(Result); 5606 return; 5607 } 5608 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 5609} 5610 5611bool 5612ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 5613 // The ARM target isn't yet aware of offsets. 5614 return false; 5615} 5616 5617int ARM::getVFPf32Imm(const APFloat &FPImm) { 5618 APInt Imm = FPImm.bitcastToAPInt(); 5619 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; 5620 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 5621 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits 5622 5623 // We can handle 4 bits of mantissa. 5624 // mantissa = (16+UInt(e:f:g:h))/16. 5625 if (Mantissa & 0x7ffff) 5626 return -1; 5627 Mantissa >>= 19; 5628 if ((Mantissa & 0xf) != Mantissa) 5629 return -1; 5630 5631 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 5632 if (Exp < -3 || Exp > 4) 5633 return -1; 5634 Exp = ((Exp+3) & 0x7) ^ 4; 5635 5636 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 5637} 5638 5639int ARM::getVFPf64Imm(const APFloat &FPImm) { 5640 APInt Imm = FPImm.bitcastToAPInt(); 5641 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; 5642 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 5643 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; 5644 5645 // We can handle 4 bits of mantissa. 5646 // mantissa = (16+UInt(e:f:g:h))/16. 5647 if (Mantissa & 0xffffffffffffLL) 5648 return -1; 5649 Mantissa >>= 48; 5650 if ((Mantissa & 0xf) != Mantissa) 5651 return -1; 5652 5653 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 5654 if (Exp < -3 || Exp > 4) 5655 return -1; 5656 Exp = ((Exp+3) & 0x7) ^ 4; 5657 5658 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 5659} 5660 5661bool ARM::isBitFieldInvertedMask(unsigned v) { 5662 if (v == 0xffffffff) 5663 return 0; 5664 // there can be 1's on either or both "outsides", all the "inside" 5665 // bits must be 0's 5666 unsigned int lsb = 0, msb = 31; 5667 while (v & (1 << msb)) --msb; 5668 while (v & (1 << lsb)) ++lsb; 5669 for (unsigned int i = lsb; i <= msb; ++i) { 5670 if (v & (1 << i)) 5671 return 0; 5672 } 5673 return 1; 5674} 5675 5676/// isFPImmLegal - Returns true if the target can instruction select the 5677/// specified FP immediate natively. If false, the legalizer will 5678/// materialize the FP immediate as a load from a constant pool. 5679bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 5680 if (!Subtarget->hasVFP3()) 5681 return false; 5682 if (VT == MVT::f32) 5683 return ARM::getVFPf32Imm(Imm) != -1; 5684 if (VT == MVT::f64) 5685 return ARM::getVFPf64Imm(Imm) != -1; 5686 return false; 5687} 5688 5689/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 5690/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 5691/// specified in the intrinsic calls. 5692bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 5693 const CallInst &I, 5694 unsigned Intrinsic) const { 5695 switch (Intrinsic) { 5696 case Intrinsic::arm_neon_vld1: 5697 case Intrinsic::arm_neon_vld2: 5698 case Intrinsic::arm_neon_vld3: 5699 case Intrinsic::arm_neon_vld4: 5700 case Intrinsic::arm_neon_vld2lane: 5701 case Intrinsic::arm_neon_vld3lane: 5702 case Intrinsic::arm_neon_vld4lane: { 5703 Info.opc = ISD::INTRINSIC_W_CHAIN; 5704 // Conservatively set memVT to the entire set of vectors loaded. 5705 uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; 5706 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 5707 Info.ptrVal = I.getArgOperand(0); 5708 Info.offset = 0; 5709 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 5710 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 5711 Info.vol = false; // volatile loads with NEON intrinsics not supported 5712 Info.readMem = true; 5713 Info.writeMem = false; 5714 return true; 5715 } 5716 case Intrinsic::arm_neon_vst1: 5717 case Intrinsic::arm_neon_vst2: 5718 case Intrinsic::arm_neon_vst3: 5719 case Intrinsic::arm_neon_vst4: 5720 case Intrinsic::arm_neon_vst2lane: 5721 case Intrinsic::arm_neon_vst3lane: 5722 case Intrinsic::arm_neon_vst4lane: { 5723 Info.opc = ISD::INTRINSIC_VOID; 5724 // Conservatively set memVT to the entire set of vectors stored. 5725 unsigned NumElts = 0; 5726 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 5727 const Type *ArgTy = I.getArgOperand(ArgI)->getType(); 5728 if (!ArgTy->isVectorTy()) 5729 break; 5730 NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; 5731 } 5732 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 5733 Info.ptrVal = I.getArgOperand(0); 5734 Info.offset = 0; 5735 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 5736 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 5737 Info.vol = false; // volatile stores with NEON intrinsics not supported 5738 Info.readMem = false; 5739 Info.writeMem = true; 5740 return true; 5741 } 5742 default: 5743 break; 5744 } 5745 5746 return false; 5747} 5748