ARMISelLowering.cpp revision 92e3916c3b750f7eb4f41e14e401434b713e558b
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-isel" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMCallingConv.h" 19#include "ARMConstantPoolValue.h" 20#include "ARMISelLowering.h" 21#include "ARMMachineFunctionInfo.h" 22#include "ARMPerfectShuffle.h" 23#include "ARMRegisterInfo.h" 24#include "ARMSubtarget.h" 25#include "ARMTargetMachine.h" 26#include "ARMTargetObjectFile.h" 27#include "llvm/CallingConv.h" 28#include "llvm/Constants.h" 29#include "llvm/Function.h" 30#include "llvm/GlobalValue.h" 31#include "llvm/Instruction.h" 32#include "llvm/Instructions.h" 33#include "llvm/Intrinsics.h" 34#include "llvm/Type.h" 35#include "llvm/CodeGen/CallingConvLower.h" 36#include "llvm/CodeGen/IntrinsicLowering.h" 37#include "llvm/CodeGen/MachineBasicBlock.h" 38#include "llvm/CodeGen/MachineFrameInfo.h" 39#include "llvm/CodeGen/MachineFunction.h" 40#include "llvm/CodeGen/MachineInstrBuilder.h" 41#include "llvm/CodeGen/MachineRegisterInfo.h" 42#include "llvm/CodeGen/PseudoSourceValue.h" 43#include "llvm/CodeGen/SelectionDAG.h" 44#include "llvm/MC/MCSectionMachO.h" 45#include "llvm/Target/TargetOptions.h" 46#include "llvm/ADT/VectorExtras.h" 47#include "llvm/ADT/StringExtras.h" 48#include "llvm/ADT/Statistic.h" 49#include "llvm/Support/CommandLine.h" 50#include "llvm/Support/ErrorHandling.h" 51#include "llvm/Support/MathExtras.h" 52#include "llvm/Support/raw_ostream.h" 53#include <sstream> 54using namespace llvm; 55 56STATISTIC(NumTailCalls, "Number of tail calls"); 57STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 58 59// This option should go away when tail calls fully work. 60static cl::opt<bool> 61EnableARMTailCalls("arm-tail-calls", cl::Hidden, 62 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 63 cl::init(false)); 64 65cl::opt<bool> 66EnableARMLongCalls("arm-long-calls", cl::Hidden, 67 cl::desc("Generate calls via indirect call instructions"), 68 cl::init(false)); 69 70static cl::opt<bool> 71ARMInterworking("arm-interworking", cl::Hidden, 72 cl::desc("Enable / disable ARM interworking (for debugging only)"), 73 cl::init(true)); 74 75void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 76 EVT PromotedBitwiseVT) { 77 if (VT != PromotedLdStVT) { 78 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 79 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 80 PromotedLdStVT.getSimpleVT()); 81 82 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 83 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 84 PromotedLdStVT.getSimpleVT()); 85 } 86 87 EVT ElemTy = VT.getVectorElementType(); 88 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 89 setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); 90 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 91 if (ElemTy != MVT::i32) { 92 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 93 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 94 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 95 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 96 } 97 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 98 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 99 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); 100 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); 101 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 102 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 103 if (VT.isInteger()) { 104 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 105 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 106 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 107 setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); 108 setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); 109 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 110 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 111 setTruncStoreAction(VT.getSimpleVT(), 112 (MVT::SimpleValueType)InnerVT, Expand); 113 } 114 setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); 115 116 // Promote all bit-wise operations. 117 if (VT.isInteger() && VT != PromotedBitwiseVT) { 118 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 119 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 120 PromotedBitwiseVT.getSimpleVT()); 121 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 122 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 123 PromotedBitwiseVT.getSimpleVT()); 124 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 125 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 126 PromotedBitwiseVT.getSimpleVT()); 127 } 128 129 // Neon does not support vector divide/remainder operations. 130 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 131 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 132 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 133 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 134 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 135 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 136} 137 138void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 139 addRegisterClass(VT, ARM::DPRRegisterClass); 140 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 141} 142 143void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 144 addRegisterClass(VT, ARM::QPRRegisterClass); 145 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 146} 147 148static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 149 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 150 return new TargetLoweringObjectFileMachO(); 151 152 return new ARMElfTargetObjectFile(); 153} 154 155ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 156 : TargetLowering(TM, createTLOF(TM)) { 157 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 158 RegInfo = TM.getRegisterInfo(); 159 Itins = TM.getInstrItineraryData(); 160 161 if (Subtarget->isTargetDarwin()) { 162 // Uses VFP for Thumb libfuncs if available. 163 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 164 // Single-precision floating-point arithmetic. 165 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 166 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 167 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 168 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 169 170 // Double-precision floating-point arithmetic. 171 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 172 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 173 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 174 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 175 176 // Single-precision comparisons. 177 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 178 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 179 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 180 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 181 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 182 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 183 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 184 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 185 186 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 187 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 188 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 189 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 190 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 191 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 192 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 193 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 194 195 // Double-precision comparisons. 196 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 197 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 198 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 199 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 200 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 201 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 202 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 203 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 204 205 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 206 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 207 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 208 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 209 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 210 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 211 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 212 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 213 214 // Floating-point to integer conversions. 215 // i64 conversions are done via library routines even when generating VFP 216 // instructions, so use the same ones. 217 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 218 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 219 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 220 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 221 222 // Conversions between floating types. 223 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 224 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 225 226 // Integer to floating-point conversions. 227 // i64 conversions are done via library routines even when generating VFP 228 // instructions, so use the same ones. 229 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 230 // e.g., __floatunsidf vs. __floatunssidfvfp. 231 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 232 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 233 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 234 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 235 } 236 } 237 238 // These libcalls are not available in 32-bit. 239 setLibcallName(RTLIB::SHL_I128, 0); 240 setLibcallName(RTLIB::SRL_I128, 0); 241 setLibcallName(RTLIB::SRA_I128, 0); 242 243 if (Subtarget->isAAPCS_ABI()) { 244 // Double-precision floating-point arithmetic helper functions 245 // RTABI chapter 4.1.2, Table 2 246 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 247 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 248 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 249 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 250 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 251 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 252 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 253 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 254 255 // Double-precision floating-point comparison helper functions 256 // RTABI chapter 4.1.2, Table 3 257 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 258 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 259 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 260 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 261 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 262 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 263 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 264 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 265 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 266 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 267 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 268 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 269 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 270 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 271 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 272 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 273 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 274 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 275 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 276 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 277 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 278 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 279 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 280 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 281 282 // Single-precision floating-point arithmetic helper functions 283 // RTABI chapter 4.1.2, Table 4 284 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 285 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 286 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 287 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 288 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 289 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 290 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 291 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 292 293 // Single-precision floating-point comparison helper functions 294 // RTABI chapter 4.1.2, Table 5 295 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 296 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 297 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 298 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 299 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 300 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 301 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 302 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 303 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 304 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 305 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 306 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 307 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 308 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 309 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 310 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 311 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 312 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 313 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 314 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 315 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 316 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 317 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 318 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 319 320 // Floating-point to integer conversions. 321 // RTABI chapter 4.1.2, Table 6 322 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 323 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 324 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 325 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 326 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 327 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 328 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 329 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 330 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 331 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 332 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 333 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 334 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 335 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 336 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 337 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 338 339 // Conversions between floating types. 340 // RTABI chapter 4.1.2, Table 7 341 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 342 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 343 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 344 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 345 346 // Integer to floating-point conversions. 347 // RTABI chapter 4.1.2, Table 8 348 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 349 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 350 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 351 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 352 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 353 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 354 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 355 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 356 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 357 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 358 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 359 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 360 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 361 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 362 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 363 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 364 365 // Long long helper functions 366 // RTABI chapter 4.2, Table 9 367 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 368 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 369 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 370 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 371 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 372 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 373 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 374 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 375 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 376 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 377 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 378 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 379 380 // Integer division functions 381 // RTABI chapter 4.3.1 382 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 383 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 384 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 385 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 386 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 387 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 388 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 389 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 390 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 391 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 392 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 393 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 394 } 395 396 if (Subtarget->isThumb1Only()) 397 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 398 else 399 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 400 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 401 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 402 if (!Subtarget->isFPOnlySP()) 403 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 404 405 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 406 } 407 408 if (Subtarget->hasNEON()) { 409 addDRTypeForNEON(MVT::v2f32); 410 addDRTypeForNEON(MVT::v8i8); 411 addDRTypeForNEON(MVT::v4i16); 412 addDRTypeForNEON(MVT::v2i32); 413 addDRTypeForNEON(MVT::v1i64); 414 415 addQRTypeForNEON(MVT::v4f32); 416 addQRTypeForNEON(MVT::v2f64); 417 addQRTypeForNEON(MVT::v16i8); 418 addQRTypeForNEON(MVT::v8i16); 419 addQRTypeForNEON(MVT::v4i32); 420 addQRTypeForNEON(MVT::v2i64); 421 422 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 423 // neither Neon nor VFP support any arithmetic operations on it. 424 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 425 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 426 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 427 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 428 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 429 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 430 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); 431 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 432 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 433 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 434 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 435 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 436 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 437 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 438 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 439 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 440 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 441 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 442 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 443 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 444 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 445 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 446 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 447 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 448 449 setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); 450 451 // Neon does not support some operations on v1i64 and v2i64 types. 452 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 453 // Custom handling for some quad-vector types to detect VMULL. 454 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 455 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 456 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 457 // Custom handling for some vector types to avoid expensive expansions 458 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 459 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 460 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 461 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 462 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); 463 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); 464 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 465 // a destination type that is wider than the source. 466 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 467 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 468 469 setTargetDAGCombine(ISD::INTRINSIC_VOID); 470 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 471 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 472 setTargetDAGCombine(ISD::SHL); 473 setTargetDAGCombine(ISD::SRL); 474 setTargetDAGCombine(ISD::SRA); 475 setTargetDAGCombine(ISD::SIGN_EXTEND); 476 setTargetDAGCombine(ISD::ZERO_EXTEND); 477 setTargetDAGCombine(ISD::ANY_EXTEND); 478 setTargetDAGCombine(ISD::SELECT_CC); 479 setTargetDAGCombine(ISD::BUILD_VECTOR); 480 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 481 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 482 setTargetDAGCombine(ISD::STORE); 483 } 484 485 computeRegisterProperties(); 486 487 // ARM does not have f32 extending load. 488 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 489 490 // ARM does not have i1 sign extending load. 491 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 492 493 // ARM supports all 4 flavors of integer indexed load / store. 494 if (!Subtarget->isThumb1Only()) { 495 for (unsigned im = (unsigned)ISD::PRE_INC; 496 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 497 setIndexedLoadAction(im, MVT::i1, Legal); 498 setIndexedLoadAction(im, MVT::i8, Legal); 499 setIndexedLoadAction(im, MVT::i16, Legal); 500 setIndexedLoadAction(im, MVT::i32, Legal); 501 setIndexedStoreAction(im, MVT::i1, Legal); 502 setIndexedStoreAction(im, MVT::i8, Legal); 503 setIndexedStoreAction(im, MVT::i16, Legal); 504 setIndexedStoreAction(im, MVT::i32, Legal); 505 } 506 } 507 508 // i64 operation support. 509 if (Subtarget->isThumb1Only()) { 510 setOperationAction(ISD::MUL, MVT::i64, Expand); 511 setOperationAction(ISD::MULHU, MVT::i32, Expand); 512 setOperationAction(ISD::MULHS, MVT::i32, Expand); 513 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 514 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 515 } else { 516 setOperationAction(ISD::MUL, MVT::i64, Expand); 517 setOperationAction(ISD::MULHU, MVT::i32, Expand); 518 if (!Subtarget->hasV6Ops()) 519 setOperationAction(ISD::MULHS, MVT::i32, Expand); 520 } 521 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 522 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 523 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 524 setOperationAction(ISD::SRL, MVT::i64, Custom); 525 setOperationAction(ISD::SRA, MVT::i64, Custom); 526 527 // ARM does not have ROTL. 528 setOperationAction(ISD::ROTL, MVT::i32, Expand); 529 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 530 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 531 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 532 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 533 534 // Only ARMv6 has BSWAP. 535 if (!Subtarget->hasV6Ops()) 536 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 537 538 // These are expanded into libcalls. 539 if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { 540 // v7M has a hardware divider 541 setOperationAction(ISD::SDIV, MVT::i32, Expand); 542 setOperationAction(ISD::UDIV, MVT::i32, Expand); 543 } 544 setOperationAction(ISD::SREM, MVT::i32, Expand); 545 setOperationAction(ISD::UREM, MVT::i32, Expand); 546 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 547 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 548 549 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 550 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 551 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 552 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 553 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 554 555 setOperationAction(ISD::TRAP, MVT::Other, Legal); 556 557 // Use the default implementation. 558 setOperationAction(ISD::VASTART, MVT::Other, Custom); 559 setOperationAction(ISD::VAARG, MVT::Other, Expand); 560 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 561 setOperationAction(ISD::VAEND, MVT::Other, Expand); 562 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 563 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 564 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 565 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 566 setExceptionPointerRegister(ARM::R0); 567 setExceptionSelectorRegister(ARM::R1); 568 569 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 570 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 571 // the default expansion. 572 if (Subtarget->hasDataBarrier() || 573 (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 574 // membarrier needs custom lowering; the rest are legal and handled 575 // normally. 576 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 577 } else { 578 // Set them all for expansion, which will force libcalls. 579 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 580 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); 581 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); 582 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 583 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); 584 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); 585 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 586 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); 587 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); 588 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 589 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); 590 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); 591 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 592 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); 593 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); 594 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 595 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); 596 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); 597 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 598 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); 599 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); 600 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 601 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); 602 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); 603 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 604 // Since the libcalls include locking, fold in the fences 605 setShouldFoldAtomicFences(true); 606 } 607 // 64-bit versions are always libcalls (for now) 608 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); 609 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); 610 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); 611 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); 612 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); 613 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); 614 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); 615 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); 616 617 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 618 619 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 620 if (!Subtarget->hasV6Ops()) { 621 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 622 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 623 } 624 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 625 626 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 627 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 628 // iff target supports vfp2. 629 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 630 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 631 } 632 633 // We want to custom lower some of our intrinsics. 634 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 635 if (Subtarget->isTargetDarwin()) { 636 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 637 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 638 setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom); 639 } 640 641 setOperationAction(ISD::SETCC, MVT::i32, Expand); 642 setOperationAction(ISD::SETCC, MVT::f32, Expand); 643 setOperationAction(ISD::SETCC, MVT::f64, Expand); 644 setOperationAction(ISD::SELECT, MVT::i32, Custom); 645 setOperationAction(ISD::SELECT, MVT::f32, Custom); 646 setOperationAction(ISD::SELECT, MVT::f64, Custom); 647 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 648 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 649 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 650 651 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 652 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 653 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 654 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 655 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 656 657 // We don't support sin/cos/fmod/copysign/pow 658 setOperationAction(ISD::FSIN, MVT::f64, Expand); 659 setOperationAction(ISD::FSIN, MVT::f32, Expand); 660 setOperationAction(ISD::FCOS, MVT::f32, Expand); 661 setOperationAction(ISD::FCOS, MVT::f64, Expand); 662 setOperationAction(ISD::FREM, MVT::f64, Expand); 663 setOperationAction(ISD::FREM, MVT::f32, Expand); 664 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 665 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 666 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 667 } 668 setOperationAction(ISD::FPOW, MVT::f64, Expand); 669 setOperationAction(ISD::FPOW, MVT::f32, Expand); 670 671 // Various VFP goodness 672 if (!UseSoftFloat && !Subtarget->isThumb1Only()) { 673 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 674 if (Subtarget->hasVFP2()) { 675 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 676 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 677 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 678 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 679 } 680 // Special handling for half-precision FP. 681 if (!Subtarget->hasFP16()) { 682 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 683 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 684 } 685 } 686 687 // We have target-specific dag combine patterns for the following nodes: 688 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 689 setTargetDAGCombine(ISD::ADD); 690 setTargetDAGCombine(ISD::SUB); 691 setTargetDAGCombine(ISD::MUL); 692 693 if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) 694 setTargetDAGCombine(ISD::OR); 695 if (Subtarget->hasNEON()) 696 setTargetDAGCombine(ISD::AND); 697 698 setStackPointerRegisterToSaveRestore(ARM::SP); 699 700 if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) 701 setSchedulingPreference(Sched::RegPressure); 702 else 703 setSchedulingPreference(Sched::Hybrid); 704 705 //// temporary - rewrite interface to use type 706 maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; 707 708 // On ARM arguments smaller than 4 bytes are extended, so all arguments 709 // are at least 4 bytes aligned. 710 setMinStackArgumentAlignment(4); 711 712 benefitFromCodePlacementOpt = true; 713} 714 715// FIXME: It might make sense to define the representative register class as the 716// nearest super-register that has a non-null superset. For example, DPR_VFP2 is 717// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 718// SPR's representative would be DPR_VFP2. This should work well if register 719// pressure tracking were modified such that a register use would increment the 720// pressure of the register class's representative and all of it's super 721// classes' representatives transitively. We have not implemented this because 722// of the difficulty prior to coalescing of modeling operand register classes 723// due to the common occurence of cross class copies and subregister insertions 724// and extractions. 725std::pair<const TargetRegisterClass*, uint8_t> 726ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 727 const TargetRegisterClass *RRC = 0; 728 uint8_t Cost = 1; 729 switch (VT.getSimpleVT().SimpleTy) { 730 default: 731 return TargetLowering::findRepresentativeClass(VT); 732 // Use DPR as representative register class for all floating point 733 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 734 // the cost is 1 for both f32 and f64. 735 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 736 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 737 RRC = ARM::DPRRegisterClass; 738 // When NEON is used for SP, only half of the register file is available 739 // because operations that define both SP and DP results will be constrained 740 // to the VFP2 class (D0-D15). We currently model this constraint prior to 741 // coalescing by double-counting the SP regs. See the FIXME above. 742 if (Subtarget->useNEONForSinglePrecisionFP()) 743 Cost = 2; 744 break; 745 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 746 case MVT::v4f32: case MVT::v2f64: 747 RRC = ARM::DPRRegisterClass; 748 Cost = 2; 749 break; 750 case MVT::v4i64: 751 RRC = ARM::DPRRegisterClass; 752 Cost = 4; 753 break; 754 case MVT::v8i64: 755 RRC = ARM::DPRRegisterClass; 756 Cost = 8; 757 break; 758 } 759 return std::make_pair(RRC, Cost); 760} 761 762const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 763 switch (Opcode) { 764 default: return 0; 765 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 766 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 767 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 768 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 769 case ARMISD::CALL: return "ARMISD::CALL"; 770 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 771 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 772 case ARMISD::tCALL: return "ARMISD::tCALL"; 773 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 774 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 775 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 776 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 777 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 778 case ARMISD::CMP: return "ARMISD::CMP"; 779 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 780 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 781 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 782 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 783 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 784 case ARMISD::CMOV: return "ARMISD::CMOV"; 785 786 case ARMISD::RBIT: return "ARMISD::RBIT"; 787 788 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 789 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 790 case ARMISD::SITOF: return "ARMISD::SITOF"; 791 case ARMISD::UITOF: return "ARMISD::UITOF"; 792 793 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 794 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 795 case ARMISD::RRX: return "ARMISD::RRX"; 796 797 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 798 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 799 800 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 801 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 802 case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP"; 803 804 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 805 806 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 807 808 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 809 810 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 811 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 812 813 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 814 815 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 816 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 817 case ARMISD::VCGE: return "ARMISD::VCGE"; 818 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 819 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 820 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 821 case ARMISD::VCGT: return "ARMISD::VCGT"; 822 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 823 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 824 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 825 case ARMISD::VTST: return "ARMISD::VTST"; 826 827 case ARMISD::VSHL: return "ARMISD::VSHL"; 828 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 829 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 830 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 831 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 832 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 833 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 834 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 835 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 836 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 837 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 838 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 839 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 840 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 841 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 842 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 843 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 844 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 845 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 846 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 847 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 848 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 849 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 850 case ARMISD::VDUP: return "ARMISD::VDUP"; 851 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 852 case ARMISD::VEXT: return "ARMISD::VEXT"; 853 case ARMISD::VREV64: return "ARMISD::VREV64"; 854 case ARMISD::VREV32: return "ARMISD::VREV32"; 855 case ARMISD::VREV16: return "ARMISD::VREV16"; 856 case ARMISD::VZIP: return "ARMISD::VZIP"; 857 case ARMISD::VUZP: return "ARMISD::VUZP"; 858 case ARMISD::VTRN: return "ARMISD::VTRN"; 859 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 860 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 861 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 862 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 863 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 864 case ARMISD::FMAX: return "ARMISD::FMAX"; 865 case ARMISD::FMIN: return "ARMISD::FMIN"; 866 case ARMISD::BFI: return "ARMISD::BFI"; 867 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 868 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 869 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 870 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 871 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 872 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 873 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 874 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 875 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 876 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 877 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 878 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 879 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 880 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 881 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 882 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 883 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 884 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 885 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 886 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 887 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 888 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 889 } 890} 891 892/// getRegClassFor - Return the register class that should be used for the 893/// specified value type. 894TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 895 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 896 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 897 // load / store 4 to 8 consecutive D registers. 898 if (Subtarget->hasNEON()) { 899 if (VT == MVT::v4i64) 900 return ARM::QQPRRegisterClass; 901 else if (VT == MVT::v8i64) 902 return ARM::QQQQPRRegisterClass; 903 } 904 return TargetLowering::getRegClassFor(VT); 905} 906 907// Create a fast isel object. 908FastISel * 909ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { 910 return ARM::createFastISel(funcInfo); 911} 912 913/// getFunctionAlignment - Return the Log2 alignment of this function. 914unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { 915 return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; 916} 917 918/// getMaximalGlobalOffset - Returns the maximal possible offset which can 919/// be used for loads / stores from the global. 920unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 921 return (Subtarget->isThumb1Only() ? 127 : 4095); 922} 923 924Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 925 unsigned NumVals = N->getNumValues(); 926 if (!NumVals) 927 return Sched::RegPressure; 928 929 for (unsigned i = 0; i != NumVals; ++i) { 930 EVT VT = N->getValueType(i); 931 if (VT == MVT::Glue || VT == MVT::Other) 932 continue; 933 if (VT.isFloatingPoint() || VT.isVector()) 934 return Sched::Latency; 935 } 936 937 if (!N->isMachineOpcode()) 938 return Sched::RegPressure; 939 940 // Load are scheduled for latency even if there instruction itinerary 941 // is not available. 942 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 943 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 944 945 if (TID.getNumDefs() == 0) 946 return Sched::RegPressure; 947 if (!Itins->isEmpty() && 948 Itins->getOperandCycle(TID.getSchedClass(), 0) > 2) 949 return Sched::Latency; 950 951 return Sched::RegPressure; 952} 953 954//===----------------------------------------------------------------------===// 955// Lowering Code 956//===----------------------------------------------------------------------===// 957 958/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 959static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 960 switch (CC) { 961 default: llvm_unreachable("Unknown condition code!"); 962 case ISD::SETNE: return ARMCC::NE; 963 case ISD::SETEQ: return ARMCC::EQ; 964 case ISD::SETGT: return ARMCC::GT; 965 case ISD::SETGE: return ARMCC::GE; 966 case ISD::SETLT: return ARMCC::LT; 967 case ISD::SETLE: return ARMCC::LE; 968 case ISD::SETUGT: return ARMCC::HI; 969 case ISD::SETUGE: return ARMCC::HS; 970 case ISD::SETULT: return ARMCC::LO; 971 case ISD::SETULE: return ARMCC::LS; 972 } 973} 974 975/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 976static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 977 ARMCC::CondCodes &CondCode2) { 978 CondCode2 = ARMCC::AL; 979 switch (CC) { 980 default: llvm_unreachable("Unknown FP condition!"); 981 case ISD::SETEQ: 982 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 983 case ISD::SETGT: 984 case ISD::SETOGT: CondCode = ARMCC::GT; break; 985 case ISD::SETGE: 986 case ISD::SETOGE: CondCode = ARMCC::GE; break; 987 case ISD::SETOLT: CondCode = ARMCC::MI; break; 988 case ISD::SETOLE: CondCode = ARMCC::LS; break; 989 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 990 case ISD::SETO: CondCode = ARMCC::VC; break; 991 case ISD::SETUO: CondCode = ARMCC::VS; break; 992 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 993 case ISD::SETUGT: CondCode = ARMCC::HI; break; 994 case ISD::SETUGE: CondCode = ARMCC::PL; break; 995 case ISD::SETLT: 996 case ISD::SETULT: CondCode = ARMCC::LT; break; 997 case ISD::SETLE: 998 case ISD::SETULE: CondCode = ARMCC::LE; break; 999 case ISD::SETNE: 1000 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1001 } 1002} 1003 1004//===----------------------------------------------------------------------===// 1005// Calling Convention Implementation 1006//===----------------------------------------------------------------------===// 1007 1008#include "ARMGenCallingConv.inc" 1009 1010/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1011/// given CallingConvention value. 1012CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1013 bool Return, 1014 bool isVarArg) const { 1015 switch (CC) { 1016 default: 1017 llvm_unreachable("Unsupported calling convention"); 1018 case CallingConv::Fast: 1019 if (Subtarget->hasVFP2() && !isVarArg) { 1020 if (!Subtarget->isAAPCS_ABI()) 1021 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1022 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1023 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1024 } 1025 // Fallthrough 1026 case CallingConv::C: { 1027 // Use target triple & subtarget features to do actual dispatch. 1028 if (!Subtarget->isAAPCS_ABI()) 1029 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1030 else if (Subtarget->hasVFP2() && 1031 FloatABIType == FloatABI::Hard && !isVarArg) 1032 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1033 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1034 } 1035 case CallingConv::ARM_AAPCS_VFP: 1036 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1037 case CallingConv::ARM_AAPCS: 1038 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1039 case CallingConv::ARM_APCS: 1040 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1041 } 1042} 1043 1044/// LowerCallResult - Lower the result values of a call into the 1045/// appropriate copies out of appropriate physical registers. 1046SDValue 1047ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1048 CallingConv::ID CallConv, bool isVarArg, 1049 const SmallVectorImpl<ISD::InputArg> &Ins, 1050 DebugLoc dl, SelectionDAG &DAG, 1051 SmallVectorImpl<SDValue> &InVals) const { 1052 1053 // Assign locations to each value returned by this call. 1054 SmallVector<CCValAssign, 16> RVLocs; 1055 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 1056 RVLocs, *DAG.getContext()); 1057 CCInfo.AnalyzeCallResult(Ins, 1058 CCAssignFnForNode(CallConv, /* Return*/ true, 1059 isVarArg)); 1060 1061 // Copy all of the result registers out of their specified physreg. 1062 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1063 CCValAssign VA = RVLocs[i]; 1064 1065 SDValue Val; 1066 if (VA.needsCustom()) { 1067 // Handle f64 or half of a v2f64. 1068 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1069 InFlag); 1070 Chain = Lo.getValue(1); 1071 InFlag = Lo.getValue(2); 1072 VA = RVLocs[++i]; // skip ahead to next loc 1073 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1074 InFlag); 1075 Chain = Hi.getValue(1); 1076 InFlag = Hi.getValue(2); 1077 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1078 1079 if (VA.getLocVT() == MVT::v2f64) { 1080 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1081 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1082 DAG.getConstant(0, MVT::i32)); 1083 1084 VA = RVLocs[++i]; // skip ahead to next loc 1085 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1086 Chain = Lo.getValue(1); 1087 InFlag = Lo.getValue(2); 1088 VA = RVLocs[++i]; // skip ahead to next loc 1089 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1090 Chain = Hi.getValue(1); 1091 InFlag = Hi.getValue(2); 1092 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1093 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1094 DAG.getConstant(1, MVT::i32)); 1095 } 1096 } else { 1097 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1098 InFlag); 1099 Chain = Val.getValue(1); 1100 InFlag = Val.getValue(2); 1101 } 1102 1103 switch (VA.getLocInfo()) { 1104 default: llvm_unreachable("Unknown loc info!"); 1105 case CCValAssign::Full: break; 1106 case CCValAssign::BCvt: 1107 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1108 break; 1109 } 1110 1111 InVals.push_back(Val); 1112 } 1113 1114 return Chain; 1115} 1116 1117/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1118/// by "Src" to address "Dst" of size "Size". Alignment information is 1119/// specified by the specific parameter attribute. The copy will be passed as 1120/// a byval function parameter. 1121/// Sometimes what we are copying is the end of a larger object, the part that 1122/// does not fit in registers. 1123static SDValue 1124CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 1125 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 1126 DebugLoc dl) { 1127 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 1128 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 1129 /*isVolatile=*/false, /*AlwaysInline=*/false, 1130 MachinePointerInfo(0), MachinePointerInfo(0)); 1131} 1132 1133/// LowerMemOpCallTo - Store the argument to the stack. 1134SDValue 1135ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1136 SDValue StackPtr, SDValue Arg, 1137 DebugLoc dl, SelectionDAG &DAG, 1138 const CCValAssign &VA, 1139 ISD::ArgFlagsTy Flags) const { 1140 unsigned LocMemOffset = VA.getLocMemOffset(); 1141 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1142 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1143 if (Flags.isByVal()) 1144 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); 1145 1146 return DAG.getStore(Chain, dl, Arg, PtrOff, 1147 MachinePointerInfo::getStack(LocMemOffset), 1148 false, false, 0); 1149} 1150 1151void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1152 SDValue Chain, SDValue &Arg, 1153 RegsToPassVector &RegsToPass, 1154 CCValAssign &VA, CCValAssign &NextVA, 1155 SDValue &StackPtr, 1156 SmallVector<SDValue, 8> &MemOpChains, 1157 ISD::ArgFlagsTy Flags) const { 1158 1159 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1160 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1161 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1162 1163 if (NextVA.isRegLoc()) 1164 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1165 else { 1166 assert(NextVA.isMemLoc()); 1167 if (StackPtr.getNode() == 0) 1168 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1169 1170 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1171 dl, DAG, NextVA, 1172 Flags)); 1173 } 1174} 1175 1176/// LowerCall - Lowering a call into a callseq_start <- 1177/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1178/// nodes. 1179SDValue 1180ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1181 CallingConv::ID CallConv, bool isVarArg, 1182 bool &isTailCall, 1183 const SmallVectorImpl<ISD::OutputArg> &Outs, 1184 const SmallVectorImpl<SDValue> &OutVals, 1185 const SmallVectorImpl<ISD::InputArg> &Ins, 1186 DebugLoc dl, SelectionDAG &DAG, 1187 SmallVectorImpl<SDValue> &InVals) const { 1188 MachineFunction &MF = DAG.getMachineFunction(); 1189 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1190 bool IsSibCall = false; 1191 // Temporarily disable tail calls so things don't break. 1192 if (!EnableARMTailCalls) 1193 isTailCall = false; 1194 if (isTailCall) { 1195 // Check if it's really possible to do a tail call. 1196 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1197 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1198 Outs, OutVals, Ins, DAG); 1199 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1200 // detected sibcalls. 1201 if (isTailCall) { 1202 ++NumTailCalls; 1203 IsSibCall = true; 1204 } 1205 } 1206 1207 // Analyze operands of the call, assigning locations to each operand. 1208 SmallVector<CCValAssign, 16> ArgLocs; 1209 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1210 *DAG.getContext()); 1211 CCInfo.AnalyzeCallOperands(Outs, 1212 CCAssignFnForNode(CallConv, /* Return*/ false, 1213 isVarArg)); 1214 1215 // Get a count of how many bytes are to be pushed on the stack. 1216 unsigned NumBytes = CCInfo.getNextStackOffset(); 1217 1218 // For tail calls, memory operands are available in our caller's stack. 1219 if (IsSibCall) 1220 NumBytes = 0; 1221 1222 // Adjust the stack pointer for the new arguments... 1223 // These operations are automatically eliminated by the prolog/epilog pass 1224 if (!IsSibCall) 1225 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1226 1227 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1228 1229 RegsToPassVector RegsToPass; 1230 SmallVector<SDValue, 8> MemOpChains; 1231 1232 // Walk the register/memloc assignments, inserting copies/loads. In the case 1233 // of tail call optimization, arguments are handled later. 1234 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1235 i != e; 1236 ++i, ++realArgIdx) { 1237 CCValAssign &VA = ArgLocs[i]; 1238 SDValue Arg = OutVals[realArgIdx]; 1239 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1240 bool isByVal = Flags.isByVal(); 1241 1242 // Promote the value if needed. 1243 switch (VA.getLocInfo()) { 1244 default: llvm_unreachable("Unknown loc info!"); 1245 case CCValAssign::Full: break; 1246 case CCValAssign::SExt: 1247 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1248 break; 1249 case CCValAssign::ZExt: 1250 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1251 break; 1252 case CCValAssign::AExt: 1253 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1254 break; 1255 case CCValAssign::BCvt: 1256 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1257 break; 1258 } 1259 1260 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1261 if (VA.needsCustom()) { 1262 if (VA.getLocVT() == MVT::v2f64) { 1263 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1264 DAG.getConstant(0, MVT::i32)); 1265 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1266 DAG.getConstant(1, MVT::i32)); 1267 1268 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1269 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1270 1271 VA = ArgLocs[++i]; // skip ahead to next loc 1272 if (VA.isRegLoc()) { 1273 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1274 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1275 } else { 1276 assert(VA.isMemLoc()); 1277 1278 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1279 dl, DAG, VA, Flags)); 1280 } 1281 } else { 1282 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1283 StackPtr, MemOpChains, Flags); 1284 } 1285 } else if (VA.isRegLoc()) { 1286 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1287 } else if (!IsSibCall || isByVal) { 1288 assert(VA.isMemLoc()); 1289 1290 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1291 dl, DAG, VA, Flags)); 1292 } 1293 } 1294 1295 if (!MemOpChains.empty()) 1296 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1297 &MemOpChains[0], MemOpChains.size()); 1298 1299 // Build a sequence of copy-to-reg nodes chained together with token chain 1300 // and flag operands which copy the outgoing args into the appropriate regs. 1301 SDValue InFlag; 1302 // Tail call byval lowering might overwrite argument registers so in case of 1303 // tail call optimization the copies to registers are lowered later. 1304 if (!isTailCall) 1305 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1306 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1307 RegsToPass[i].second, InFlag); 1308 InFlag = Chain.getValue(1); 1309 } 1310 1311 // For tail calls lower the arguments to the 'real' stack slot. 1312 if (isTailCall) { 1313 // Force all the incoming stack arguments to be loaded from the stack 1314 // before any new outgoing arguments are stored to the stack, because the 1315 // outgoing stack slots may alias the incoming argument stack slots, and 1316 // the alias isn't otherwise explicit. This is slightly more conservative 1317 // than necessary, because it means that each store effectively depends 1318 // on every argument instead of just those arguments it would clobber. 1319 1320 // Do not flag preceeding copytoreg stuff together with the following stuff. 1321 InFlag = SDValue(); 1322 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1323 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1324 RegsToPass[i].second, InFlag); 1325 InFlag = Chain.getValue(1); 1326 } 1327 InFlag =SDValue(); 1328 } 1329 1330 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1331 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1332 // node so that legalize doesn't hack it. 1333 bool isDirect = false; 1334 bool isARMFunc = false; 1335 bool isLocalARMFunc = false; 1336 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1337 1338 if (EnableARMLongCalls) { 1339 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1340 && "long-calls with non-static relocation model!"); 1341 // Handle a global address or an external symbol. If it's not one of 1342 // those, the target's already in a register, so we don't need to do 1343 // anything extra. 1344 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1345 const GlobalValue *GV = G->getGlobal(); 1346 // Create a constant pool entry for the callee address 1347 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1348 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1349 ARMPCLabelIndex, 1350 ARMCP::CPValue, 0); 1351 // Get the address of the callee into a register 1352 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1353 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1354 Callee = DAG.getLoad(getPointerTy(), dl, 1355 DAG.getEntryNode(), CPAddr, 1356 MachinePointerInfo::getConstantPool(), 1357 false, false, 0); 1358 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1359 const char *Sym = S->getSymbol(); 1360 1361 // Create a constant pool entry for the callee address 1362 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1363 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1364 Sym, ARMPCLabelIndex, 0); 1365 // Get the address of the callee into a register 1366 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1367 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1368 Callee = DAG.getLoad(getPointerTy(), dl, 1369 DAG.getEntryNode(), CPAddr, 1370 MachinePointerInfo::getConstantPool(), 1371 false, false, 0); 1372 } 1373 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1374 const GlobalValue *GV = G->getGlobal(); 1375 isDirect = true; 1376 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1377 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1378 getTargetMachine().getRelocationModel() != Reloc::Static; 1379 isARMFunc = !Subtarget->isThumb() || isStub; 1380 // ARM call to a local ARM function is predicable. 1381 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1382 // tBX takes a register source operand. 1383 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1384 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1385 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1386 ARMPCLabelIndex, 1387 ARMCP::CPValue, 4); 1388 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1389 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1390 Callee = DAG.getLoad(getPointerTy(), dl, 1391 DAG.getEntryNode(), CPAddr, 1392 MachinePointerInfo::getConstantPool(), 1393 false, false, 0); 1394 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1395 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1396 getPointerTy(), Callee, PICLabel); 1397 } else { 1398 // On ELF targets for PIC code, direct calls should go through the PLT 1399 unsigned OpFlags = 0; 1400 if (Subtarget->isTargetELF() && 1401 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1402 OpFlags = ARMII::MO_PLT; 1403 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1404 } 1405 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1406 isDirect = true; 1407 bool isStub = Subtarget->isTargetDarwin() && 1408 getTargetMachine().getRelocationModel() != Reloc::Static; 1409 isARMFunc = !Subtarget->isThumb() || isStub; 1410 // tBX takes a register source operand. 1411 const char *Sym = S->getSymbol(); 1412 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1413 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1414 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1415 Sym, ARMPCLabelIndex, 4); 1416 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1417 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1418 Callee = DAG.getLoad(getPointerTy(), dl, 1419 DAG.getEntryNode(), CPAddr, 1420 MachinePointerInfo::getConstantPool(), 1421 false, false, 0); 1422 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1423 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1424 getPointerTy(), Callee, PICLabel); 1425 } else { 1426 unsigned OpFlags = 0; 1427 // On ELF targets for PIC code, direct calls should go through the PLT 1428 if (Subtarget->isTargetELF() && 1429 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1430 OpFlags = ARMII::MO_PLT; 1431 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1432 } 1433 } 1434 1435 // FIXME: handle tail calls differently. 1436 unsigned CallOpc; 1437 if (Subtarget->isThumb()) { 1438 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1439 CallOpc = ARMISD::CALL_NOLINK; 1440 else 1441 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1442 } else { 1443 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1444 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1445 : ARMISD::CALL_NOLINK; 1446 } 1447 1448 std::vector<SDValue> Ops; 1449 Ops.push_back(Chain); 1450 Ops.push_back(Callee); 1451 1452 // Add argument registers to the end of the list so that they are known live 1453 // into the call. 1454 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1455 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1456 RegsToPass[i].second.getValueType())); 1457 1458 if (InFlag.getNode()) 1459 Ops.push_back(InFlag); 1460 1461 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1462 if (isTailCall) 1463 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1464 1465 // Returns a chain and a flag for retval copy to use. 1466 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1467 InFlag = Chain.getValue(1); 1468 1469 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1470 DAG.getIntPtrConstant(0, true), InFlag); 1471 if (!Ins.empty()) 1472 InFlag = Chain.getValue(1); 1473 1474 // Handle result values, copying them out of physregs into vregs that we 1475 // return. 1476 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1477 dl, DAG, InVals); 1478} 1479 1480/// HandleByVal - Every parameter *after* a byval parameter is passed 1481/// on the stack. Confiscate all the parameter registers to insure 1482/// this. 1483void 1484llvm::ARMTargetLowering::HandleByVal(CCState *State) const { 1485 static const unsigned RegList1[] = { 1486 ARM::R0, ARM::R1, ARM::R2, ARM::R3 1487 }; 1488 do {} while (State->AllocateReg(RegList1, 4)); 1489} 1490 1491/// MatchingStackOffset - Return true if the given stack call argument is 1492/// already available in the same position (relatively) of the caller's 1493/// incoming argument stack. 1494static 1495bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1496 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1497 const ARMInstrInfo *TII) { 1498 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1499 int FI = INT_MAX; 1500 if (Arg.getOpcode() == ISD::CopyFromReg) { 1501 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1502 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1503 return false; 1504 MachineInstr *Def = MRI->getVRegDef(VR); 1505 if (!Def) 1506 return false; 1507 if (!Flags.isByVal()) { 1508 if (!TII->isLoadFromStackSlot(Def, FI)) 1509 return false; 1510 } else { 1511 return false; 1512 } 1513 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1514 if (Flags.isByVal()) 1515 // ByVal argument is passed in as a pointer but it's now being 1516 // dereferenced. e.g. 1517 // define @foo(%struct.X* %A) { 1518 // tail call @bar(%struct.X* byval %A) 1519 // } 1520 return false; 1521 SDValue Ptr = Ld->getBasePtr(); 1522 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1523 if (!FINode) 1524 return false; 1525 FI = FINode->getIndex(); 1526 } else 1527 return false; 1528 1529 assert(FI != INT_MAX); 1530 if (!MFI->isFixedObjectIndex(FI)) 1531 return false; 1532 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1533} 1534 1535/// IsEligibleForTailCallOptimization - Check whether the call is eligible 1536/// for tail call optimization. Targets which want to do tail call 1537/// optimization should implement this function. 1538bool 1539ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1540 CallingConv::ID CalleeCC, 1541 bool isVarArg, 1542 bool isCalleeStructRet, 1543 bool isCallerStructRet, 1544 const SmallVectorImpl<ISD::OutputArg> &Outs, 1545 const SmallVectorImpl<SDValue> &OutVals, 1546 const SmallVectorImpl<ISD::InputArg> &Ins, 1547 SelectionDAG& DAG) const { 1548 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1549 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1550 bool CCMatch = CallerCC == CalleeCC; 1551 1552 // Look for obvious safe cases to perform tail call optimization that do not 1553 // require ABI changes. This is what gcc calls sibcall. 1554 1555 // Do not sibcall optimize vararg calls unless the call site is not passing 1556 // any arguments. 1557 if (isVarArg && !Outs.empty()) 1558 return false; 1559 1560 // Also avoid sibcall optimization if either caller or callee uses struct 1561 // return semantics. 1562 if (isCalleeStructRet || isCallerStructRet) 1563 return false; 1564 1565 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1566 // emitEpilogue is not ready for them. 1567 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1568 // LR. This means if we need to reload LR, it takes an extra instructions, 1569 // which outweighs the value of the tail call; but here we don't know yet 1570 // whether LR is going to be used. Probably the right approach is to 1571 // generate the tail call here and turn it back into CALL/RET in 1572 // emitEpilogue if LR is used. 1573 1574 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1575 // but we need to make sure there are enough registers; the only valid 1576 // registers are the 4 used for parameters. We don't currently do this 1577 // case. 1578 if (Subtarget->isThumb1Only()) 1579 return false; 1580 1581 // If the calling conventions do not match, then we'd better make sure the 1582 // results are returned in the same way as what the caller expects. 1583 if (!CCMatch) { 1584 SmallVector<CCValAssign, 16> RVLocs1; 1585 CCState CCInfo1(CalleeCC, false, getTargetMachine(), 1586 RVLocs1, *DAG.getContext()); 1587 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1588 1589 SmallVector<CCValAssign, 16> RVLocs2; 1590 CCState CCInfo2(CallerCC, false, getTargetMachine(), 1591 RVLocs2, *DAG.getContext()); 1592 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1593 1594 if (RVLocs1.size() != RVLocs2.size()) 1595 return false; 1596 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1597 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1598 return false; 1599 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1600 return false; 1601 if (RVLocs1[i].isRegLoc()) { 1602 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1603 return false; 1604 } else { 1605 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1606 return false; 1607 } 1608 } 1609 } 1610 1611 // If the callee takes no arguments then go on to check the results of the 1612 // call. 1613 if (!Outs.empty()) { 1614 // Check if stack adjustment is needed. For now, do not do this if any 1615 // argument is passed on the stack. 1616 SmallVector<CCValAssign, 16> ArgLocs; 1617 CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), 1618 ArgLocs, *DAG.getContext()); 1619 CCInfo.AnalyzeCallOperands(Outs, 1620 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1621 if (CCInfo.getNextStackOffset()) { 1622 MachineFunction &MF = DAG.getMachineFunction(); 1623 1624 // Check if the arguments are already laid out in the right way as 1625 // the caller's fixed stack objects. 1626 MachineFrameInfo *MFI = MF.getFrameInfo(); 1627 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1628 const ARMInstrInfo *TII = 1629 ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); 1630 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1631 i != e; 1632 ++i, ++realArgIdx) { 1633 CCValAssign &VA = ArgLocs[i]; 1634 EVT RegVT = VA.getLocVT(); 1635 SDValue Arg = OutVals[realArgIdx]; 1636 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1637 if (VA.getLocInfo() == CCValAssign::Indirect) 1638 return false; 1639 if (VA.needsCustom()) { 1640 // f64 and vector types are split into multiple registers or 1641 // register/stack-slot combinations. The types will not match 1642 // the registers; give up on memory f64 refs until we figure 1643 // out what to do about this. 1644 if (!VA.isRegLoc()) 1645 return false; 1646 if (!ArgLocs[++i].isRegLoc()) 1647 return false; 1648 if (RegVT == MVT::v2f64) { 1649 if (!ArgLocs[++i].isRegLoc()) 1650 return false; 1651 if (!ArgLocs[++i].isRegLoc()) 1652 return false; 1653 } 1654 } else if (!VA.isRegLoc()) { 1655 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1656 MFI, MRI, TII)) 1657 return false; 1658 } 1659 } 1660 } 1661 } 1662 1663 return true; 1664} 1665 1666SDValue 1667ARMTargetLowering::LowerReturn(SDValue Chain, 1668 CallingConv::ID CallConv, bool isVarArg, 1669 const SmallVectorImpl<ISD::OutputArg> &Outs, 1670 const SmallVectorImpl<SDValue> &OutVals, 1671 DebugLoc dl, SelectionDAG &DAG) const { 1672 1673 // CCValAssign - represent the assignment of the return value to a location. 1674 SmallVector<CCValAssign, 16> RVLocs; 1675 1676 // CCState - Info about the registers and stack slots. 1677 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, 1678 *DAG.getContext()); 1679 1680 // Analyze outgoing return values. 1681 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1682 isVarArg)); 1683 1684 // If this is the first return lowered for this function, add 1685 // the regs to the liveout set for the function. 1686 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1687 for (unsigned i = 0; i != RVLocs.size(); ++i) 1688 if (RVLocs[i].isRegLoc()) 1689 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1690 } 1691 1692 SDValue Flag; 1693 1694 // Copy the result values into the output registers. 1695 for (unsigned i = 0, realRVLocIdx = 0; 1696 i != RVLocs.size(); 1697 ++i, ++realRVLocIdx) { 1698 CCValAssign &VA = RVLocs[i]; 1699 assert(VA.isRegLoc() && "Can only return in registers!"); 1700 1701 SDValue Arg = OutVals[realRVLocIdx]; 1702 1703 switch (VA.getLocInfo()) { 1704 default: llvm_unreachable("Unknown loc info!"); 1705 case CCValAssign::Full: break; 1706 case CCValAssign::BCvt: 1707 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1708 break; 1709 } 1710 1711 if (VA.needsCustom()) { 1712 if (VA.getLocVT() == MVT::v2f64) { 1713 // Extract the first half and return it in two registers. 1714 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1715 DAG.getConstant(0, MVT::i32)); 1716 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1717 DAG.getVTList(MVT::i32, MVT::i32), Half); 1718 1719 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1720 Flag = Chain.getValue(1); 1721 VA = RVLocs[++i]; // skip ahead to next loc 1722 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1723 HalfGPRs.getValue(1), Flag); 1724 Flag = Chain.getValue(1); 1725 VA = RVLocs[++i]; // skip ahead to next loc 1726 1727 // Extract the 2nd half and fall through to handle it as an f64 value. 1728 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1729 DAG.getConstant(1, MVT::i32)); 1730 } 1731 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1732 // available. 1733 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1734 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1735 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1736 Flag = Chain.getValue(1); 1737 VA = RVLocs[++i]; // skip ahead to next loc 1738 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1739 Flag); 1740 } else 1741 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1742 1743 // Guarantee that all emitted copies are 1744 // stuck together, avoiding something bad. 1745 Flag = Chain.getValue(1); 1746 } 1747 1748 SDValue result; 1749 if (Flag.getNode()) 1750 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1751 else // Return Void 1752 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1753 1754 return result; 1755} 1756 1757bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { 1758 if (N->getNumValues() != 1) 1759 return false; 1760 if (!N->hasNUsesOfValue(1, 0)) 1761 return false; 1762 1763 unsigned NumCopies = 0; 1764 SDNode* Copies[2]; 1765 SDNode *Use = *N->use_begin(); 1766 if (Use->getOpcode() == ISD::CopyToReg) { 1767 Copies[NumCopies++] = Use; 1768 } else if (Use->getOpcode() == ARMISD::VMOVRRD) { 1769 // f64 returned in a pair of GPRs. 1770 for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); 1771 UI != UE; ++UI) { 1772 if (UI->getOpcode() != ISD::CopyToReg) 1773 return false; 1774 Copies[UI.getUse().getResNo()] = *UI; 1775 ++NumCopies; 1776 } 1777 } else if (Use->getOpcode() == ISD::BITCAST) { 1778 // f32 returned in a single GPR. 1779 if (!Use->hasNUsesOfValue(1, 0)) 1780 return false; 1781 Use = *Use->use_begin(); 1782 if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) 1783 return false; 1784 Copies[NumCopies++] = Use; 1785 } else { 1786 return false; 1787 } 1788 1789 if (NumCopies != 1 && NumCopies != 2) 1790 return false; 1791 1792 bool HasRet = false; 1793 for (unsigned i = 0; i < NumCopies; ++i) { 1794 SDNode *Copy = Copies[i]; 1795 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 1796 UI != UE; ++UI) { 1797 if (UI->getOpcode() == ISD::CopyToReg) { 1798 SDNode *Use = *UI; 1799 if (Use == Copies[0] || Use == Copies[1]) 1800 continue; 1801 return false; 1802 } 1803 if (UI->getOpcode() != ARMISD::RET_FLAG) 1804 return false; 1805 HasRet = true; 1806 } 1807 } 1808 1809 return HasRet; 1810} 1811 1812bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 1813 if (!EnableARMTailCalls) 1814 return false; 1815 1816 if (!CI->isTailCall()) 1817 return false; 1818 1819 return !Subtarget->isThumb1Only(); 1820} 1821 1822// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1823// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1824// one of the above mentioned nodes. It has to be wrapped because otherwise 1825// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1826// be used to form addressing mode. These wrapped nodes will be selected 1827// into MOVi. 1828static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1829 EVT PtrVT = Op.getValueType(); 1830 // FIXME there is no actual debug info here 1831 DebugLoc dl = Op.getDebugLoc(); 1832 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1833 SDValue Res; 1834 if (CP->isMachineConstantPoolEntry()) 1835 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1836 CP->getAlignment()); 1837 else 1838 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1839 CP->getAlignment()); 1840 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1841} 1842 1843unsigned ARMTargetLowering::getJumpTableEncoding() const { 1844 return MachineJumpTableInfo::EK_Inline; 1845} 1846 1847SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 1848 SelectionDAG &DAG) const { 1849 MachineFunction &MF = DAG.getMachineFunction(); 1850 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1851 unsigned ARMPCLabelIndex = 0; 1852 DebugLoc DL = Op.getDebugLoc(); 1853 EVT PtrVT = getPointerTy(); 1854 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1855 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1856 SDValue CPAddr; 1857 if (RelocM == Reloc::Static) { 1858 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1859 } else { 1860 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1861 ARMPCLabelIndex = AFI->createPICLabelUId(); 1862 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, 1863 ARMCP::CPBlockAddress, 1864 PCAdj); 1865 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1866 } 1867 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1868 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1869 MachinePointerInfo::getConstantPool(), 1870 false, false, 0); 1871 if (RelocM == Reloc::Static) 1872 return Result; 1873 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1874 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1875} 1876 1877// Lower ISD::GlobalTLSAddress using the "general dynamic" model 1878SDValue 1879ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1880 SelectionDAG &DAG) const { 1881 DebugLoc dl = GA->getDebugLoc(); 1882 EVT PtrVT = getPointerTy(); 1883 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1884 MachineFunction &MF = DAG.getMachineFunction(); 1885 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1886 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1887 ARMConstantPoolValue *CPV = 1888 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1889 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 1890 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1891 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1892 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1893 MachinePointerInfo::getConstantPool(), 1894 false, false, 0); 1895 SDValue Chain = Argument.getValue(1); 1896 1897 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1898 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1899 1900 // call __tls_get_addr. 1901 ArgListTy Args; 1902 ArgListEntry Entry; 1903 Entry.Node = Argument; 1904 Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); 1905 Args.push_back(Entry); 1906 // FIXME: is there useful debug info available here? 1907 std::pair<SDValue, SDValue> CallResult = 1908 LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), 1909 false, false, false, false, 1910 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1911 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 1912 return CallResult.first; 1913} 1914 1915// Lower ISD::GlobalTLSAddress using the "initial exec" or 1916// "local exec" model. 1917SDValue 1918ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 1919 SelectionDAG &DAG) const { 1920 const GlobalValue *GV = GA->getGlobal(); 1921 DebugLoc dl = GA->getDebugLoc(); 1922 SDValue Offset; 1923 SDValue Chain = DAG.getEntryNode(); 1924 EVT PtrVT = getPointerTy(); 1925 // Get the Thread Pointer 1926 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1927 1928 if (GV->isDeclaration()) { 1929 MachineFunction &MF = DAG.getMachineFunction(); 1930 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1931 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1932 // Initial exec model. 1933 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1934 ARMConstantPoolValue *CPV = 1935 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1936 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); 1937 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1938 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1939 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1940 MachinePointerInfo::getConstantPool(), 1941 false, false, 0); 1942 Chain = Offset.getValue(1); 1943 1944 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1945 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 1946 1947 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1948 MachinePointerInfo::getConstantPool(), 1949 false, false, 0); 1950 } else { 1951 // local exec model 1952 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF); 1953 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1954 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1955 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1956 MachinePointerInfo::getConstantPool(), 1957 false, false, 0); 1958 } 1959 1960 // The address of the thread local variable is the add of the thread 1961 // pointer with the offset of the variable. 1962 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 1963} 1964 1965SDValue 1966ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 1967 // TODO: implement the "local dynamic" model 1968 assert(Subtarget->isTargetELF() && 1969 "TLS not implemented for non-ELF targets"); 1970 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1971 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 1972 // otherwise use the "Local Exec" TLS Model 1973 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 1974 return LowerToTLSGeneralDynamicModel(GA, DAG); 1975 else 1976 return LowerToTLSExecModels(GA, DAG); 1977} 1978 1979SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 1980 SelectionDAG &DAG) const { 1981 EVT PtrVT = getPointerTy(); 1982 DebugLoc dl = Op.getDebugLoc(); 1983 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 1984 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1985 if (RelocM == Reloc::PIC_) { 1986 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 1987 ARMConstantPoolValue *CPV = 1988 new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 1989 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1990 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1991 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 1992 CPAddr, 1993 MachinePointerInfo::getConstantPool(), 1994 false, false, 0); 1995 SDValue Chain = Result.getValue(1); 1996 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 1997 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 1998 if (!UseGOTOFF) 1999 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2000 MachinePointerInfo::getGOT(), false, false, 0); 2001 return Result; 2002 } 2003 2004 // If we have T2 ops, we can materialize the address directly via movt/movw 2005 // pair. This is always cheaper. 2006 if (Subtarget->useMovt()) { 2007 ++NumMovwMovt; 2008 // FIXME: Once remat is capable of dealing with instructions with register 2009 // operands, expand this into two nodes. 2010 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2011 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2012 } else { 2013 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2014 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2015 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2016 MachinePointerInfo::getConstantPool(), 2017 false, false, 0); 2018 } 2019} 2020 2021SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2022 SelectionDAG &DAG) const { 2023 EVT PtrVT = getPointerTy(); 2024 DebugLoc dl = Op.getDebugLoc(); 2025 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2026 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2027 MachineFunction &MF = DAG.getMachineFunction(); 2028 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2029 2030 if (Subtarget->useMovt()) { 2031 ++NumMovwMovt; 2032 // FIXME: Once remat is capable of dealing with instructions with register 2033 // operands, expand this into two nodes. 2034 if (RelocM == Reloc::Static) 2035 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2036 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2037 2038 unsigned Wrapper = (RelocM == Reloc::PIC_) 2039 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2040 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2041 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2042 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2043 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2044 MachinePointerInfo::getGOT(), false, false, 0); 2045 return Result; 2046 } 2047 2048 unsigned ARMPCLabelIndex = 0; 2049 SDValue CPAddr; 2050 if (RelocM == Reloc::Static) { 2051 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2052 } else { 2053 ARMPCLabelIndex = AFI->createPICLabelUId(); 2054 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2055 ARMConstantPoolValue *CPV = 2056 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); 2057 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2058 } 2059 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2060 2061 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2062 MachinePointerInfo::getConstantPool(), 2063 false, false, 0); 2064 SDValue Chain = Result.getValue(1); 2065 2066 if (RelocM == Reloc::PIC_) { 2067 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2068 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2069 } 2070 2071 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2072 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2073 false, false, 0); 2074 2075 return Result; 2076} 2077 2078SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2079 SelectionDAG &DAG) const { 2080 assert(Subtarget->isTargetELF() && 2081 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2082 MachineFunction &MF = DAG.getMachineFunction(); 2083 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2084 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2085 EVT PtrVT = getPointerTy(); 2086 DebugLoc dl = Op.getDebugLoc(); 2087 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2088 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 2089 "_GLOBAL_OFFSET_TABLE_", 2090 ARMPCLabelIndex, PCAdj); 2091 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2092 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2093 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2094 MachinePointerInfo::getConstantPool(), 2095 false, false, 0); 2096 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2097 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2098} 2099 2100SDValue 2101ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) 2102 const { 2103 DebugLoc dl = Op.getDebugLoc(); 2104 return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, 2105 Op.getOperand(0), Op.getOperand(1)); 2106} 2107 2108SDValue 2109ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2110 DebugLoc dl = Op.getDebugLoc(); 2111 SDValue Val = DAG.getConstant(0, MVT::i32); 2112 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), 2113 Op.getOperand(1), Val); 2114} 2115 2116SDValue 2117ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2118 DebugLoc dl = Op.getDebugLoc(); 2119 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2120 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2121} 2122 2123SDValue 2124ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2125 const ARMSubtarget *Subtarget) const { 2126 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2127 DebugLoc dl = Op.getDebugLoc(); 2128 switch (IntNo) { 2129 default: return SDValue(); // Don't custom lower most intrinsics. 2130 case Intrinsic::arm_thread_pointer: { 2131 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2132 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2133 } 2134 case Intrinsic::eh_sjlj_lsda: { 2135 MachineFunction &MF = DAG.getMachineFunction(); 2136 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2137 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2138 EVT PtrVT = getPointerTy(); 2139 DebugLoc dl = Op.getDebugLoc(); 2140 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2141 SDValue CPAddr; 2142 unsigned PCAdj = (RelocM != Reloc::PIC_) 2143 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2144 ARMConstantPoolValue *CPV = 2145 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, 2146 ARMCP::CPLSDA, PCAdj); 2147 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2148 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2149 SDValue Result = 2150 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2151 MachinePointerInfo::getConstantPool(), 2152 false, false, 0); 2153 2154 if (RelocM == Reloc::PIC_) { 2155 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2156 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2157 } 2158 return Result; 2159 } 2160 case Intrinsic::arm_neon_vmulls: 2161 case Intrinsic::arm_neon_vmullu: { 2162 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2163 ? ARMISD::VMULLs : ARMISD::VMULLu; 2164 return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), 2165 Op.getOperand(1), Op.getOperand(2)); 2166 } 2167 } 2168} 2169 2170static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 2171 const ARMSubtarget *Subtarget) { 2172 DebugLoc dl = Op.getDebugLoc(); 2173 if (!Subtarget->hasDataBarrier()) { 2174 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2175 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2176 // here. 2177 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2178 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2179 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2180 DAG.getConstant(0, MVT::i32)); 2181 } 2182 2183 SDValue Op5 = Op.getOperand(5); 2184 bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; 2185 unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2186 unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 2187 bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); 2188 2189 ARM_MB::MemBOpt DMBOpt; 2190 if (isDeviceBarrier) 2191 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; 2192 else 2193 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; 2194 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2195 DAG.getConstant(DMBOpt, MVT::i32)); 2196} 2197 2198static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2199 const ARMSubtarget *Subtarget) { 2200 // ARM pre v5TE and Thumb1 does not have preload instructions. 2201 if (!(Subtarget->isThumb2() || 2202 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2203 // Just preserve the chain. 2204 return Op.getOperand(0); 2205 2206 DebugLoc dl = Op.getDebugLoc(); 2207 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2208 if (!isRead && 2209 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2210 // ARMv7 with MP extension has PLDW. 2211 return Op.getOperand(0); 2212 2213 if (Subtarget->isThumb()) 2214 // Invert the bits. 2215 isRead = ~isRead & 1; 2216 unsigned isData = Subtarget->isThumb() ? 0 : 1; 2217 2218 // Currently there is no intrinsic that matches pli. 2219 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2220 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2221 DAG.getConstant(isData, MVT::i32)); 2222} 2223 2224static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2225 MachineFunction &MF = DAG.getMachineFunction(); 2226 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2227 2228 // vastart just stores the address of the VarArgsFrameIndex slot into the 2229 // memory location argument. 2230 DebugLoc dl = Op.getDebugLoc(); 2231 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2232 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2233 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2234 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2235 MachinePointerInfo(SV), false, false, 0); 2236} 2237 2238SDValue 2239ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2240 SDValue &Root, SelectionDAG &DAG, 2241 DebugLoc dl) const { 2242 MachineFunction &MF = DAG.getMachineFunction(); 2243 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2244 2245 TargetRegisterClass *RC; 2246 if (AFI->isThumb1OnlyFunction()) 2247 RC = ARM::tGPRRegisterClass; 2248 else 2249 RC = ARM::GPRRegisterClass; 2250 2251 // Transform the arguments stored in physical registers into virtual ones. 2252 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2253 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2254 2255 SDValue ArgValue2; 2256 if (NextVA.isMemLoc()) { 2257 MachineFrameInfo *MFI = MF.getFrameInfo(); 2258 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2259 2260 // Create load node to retrieve arguments from the stack. 2261 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2262 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2263 MachinePointerInfo::getFixedStack(FI), 2264 false, false, 0); 2265 } else { 2266 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2267 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2268 } 2269 2270 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2271} 2272 2273SDValue 2274ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2275 CallingConv::ID CallConv, bool isVarArg, 2276 const SmallVectorImpl<ISD::InputArg> 2277 &Ins, 2278 DebugLoc dl, SelectionDAG &DAG, 2279 SmallVectorImpl<SDValue> &InVals) 2280 const { 2281 2282 MachineFunction &MF = DAG.getMachineFunction(); 2283 MachineFrameInfo *MFI = MF.getFrameInfo(); 2284 2285 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2286 2287 // Assign locations to all of the incoming arguments. 2288 SmallVector<CCValAssign, 16> ArgLocs; 2289 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 2290 *DAG.getContext()); 2291 CCInfo.AnalyzeFormalArguments(Ins, 2292 CCAssignFnForNode(CallConv, /* Return*/ false, 2293 isVarArg)); 2294 2295 SmallVector<SDValue, 16> ArgValues; 2296 int lastInsIndex = -1; 2297 2298 SDValue ArgValue; 2299 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2300 CCValAssign &VA = ArgLocs[i]; 2301 2302 // Arguments stored in registers. 2303 if (VA.isRegLoc()) { 2304 EVT RegVT = VA.getLocVT(); 2305 2306 if (VA.needsCustom()) { 2307 // f64 and vector types are split up into multiple registers or 2308 // combinations of registers and stack slots. 2309 if (VA.getLocVT() == MVT::v2f64) { 2310 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2311 Chain, DAG, dl); 2312 VA = ArgLocs[++i]; // skip ahead to next loc 2313 SDValue ArgValue2; 2314 if (VA.isMemLoc()) { 2315 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2316 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2317 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2318 MachinePointerInfo::getFixedStack(FI), 2319 false, false, 0); 2320 } else { 2321 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2322 Chain, DAG, dl); 2323 } 2324 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2325 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2326 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2327 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2328 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2329 } else 2330 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2331 2332 } else { 2333 TargetRegisterClass *RC; 2334 2335 if (RegVT == MVT::f32) 2336 RC = ARM::SPRRegisterClass; 2337 else if (RegVT == MVT::f64) 2338 RC = ARM::DPRRegisterClass; 2339 else if (RegVT == MVT::v2f64) 2340 RC = ARM::QPRRegisterClass; 2341 else if (RegVT == MVT::i32) 2342 RC = (AFI->isThumb1OnlyFunction() ? 2343 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 2344 else 2345 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2346 2347 // Transform the arguments in physical registers into virtual ones. 2348 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2349 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2350 } 2351 2352 // If this is an 8 or 16-bit value, it is really passed promoted 2353 // to 32 bits. Insert an assert[sz]ext to capture this, then 2354 // truncate to the right size. 2355 switch (VA.getLocInfo()) { 2356 default: llvm_unreachable("Unknown loc info!"); 2357 case CCValAssign::Full: break; 2358 case CCValAssign::BCvt: 2359 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 2360 break; 2361 case CCValAssign::SExt: 2362 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2363 DAG.getValueType(VA.getValVT())); 2364 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2365 break; 2366 case CCValAssign::ZExt: 2367 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2368 DAG.getValueType(VA.getValVT())); 2369 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2370 break; 2371 } 2372 2373 InVals.push_back(ArgValue); 2374 2375 } else { // VA.isRegLoc() 2376 2377 // sanity check 2378 assert(VA.isMemLoc()); 2379 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2380 2381 int index = ArgLocs[i].getValNo(); 2382 2383 // Some Ins[] entries become multiple ArgLoc[] entries. 2384 // Process them only once. 2385 if (index != lastInsIndex) 2386 { 2387 ISD::ArgFlagsTy Flags = Ins[index].Flags; 2388 // FIXME: For now, all byval parameter objects are marked mutable. This can be 2389 // changed with more analysis. 2390 // In case of tail call optimization mark all arguments mutable. Since they 2391 // could be overwritten by lowering of arguments in case of a tail call. 2392 if (Flags.isByVal()) { 2393 int FI = MFI->CreateFixedObject(Flags.getByValSize(), 2394 VA.getLocMemOffset(), false); 2395 InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); 2396 } else { 2397 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 2398 VA.getLocMemOffset(), true); 2399 2400 // Create load nodes to retrieve arguments from the stack. 2401 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2402 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2403 MachinePointerInfo::getFixedStack(FI), 2404 false, false, 0)); 2405 } 2406 lastInsIndex = index; 2407 } 2408 } 2409 } 2410 2411 // varargs 2412 if (isVarArg) { 2413 static const unsigned GPRArgRegs[] = { 2414 ARM::R0, ARM::R1, ARM::R2, ARM::R3 2415 }; 2416 2417 unsigned NumGPRs = CCInfo.getFirstUnallocated 2418 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2419 2420 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2421 unsigned VARegSize = (4 - NumGPRs) * 4; 2422 unsigned VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2423 unsigned ArgOffset = CCInfo.getNextStackOffset(); 2424 if (VARegSaveSize) { 2425 // If this function is vararg, store any remaining integer argument regs 2426 // to their spots on the stack so that they may be loaded by deferencing 2427 // the result of va_next. 2428 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2429 AFI->setVarArgsFrameIndex( 2430 MFI->CreateFixedObject(VARegSaveSize, 2431 ArgOffset + VARegSaveSize - VARegSize, 2432 false)); 2433 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2434 getPointerTy()); 2435 2436 SmallVector<SDValue, 4> MemOps; 2437 for (; NumGPRs < 4; ++NumGPRs) { 2438 TargetRegisterClass *RC; 2439 if (AFI->isThumb1OnlyFunction()) 2440 RC = ARM::tGPRRegisterClass; 2441 else 2442 RC = ARM::GPRRegisterClass; 2443 2444 unsigned VReg = MF.addLiveIn(GPRArgRegs[NumGPRs], RC); 2445 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2446 SDValue Store = 2447 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2448 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), 2449 false, false, 0); 2450 MemOps.push_back(Store); 2451 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2452 DAG.getConstant(4, getPointerTy())); 2453 } 2454 if (!MemOps.empty()) 2455 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2456 &MemOps[0], MemOps.size()); 2457 } else 2458 // This will point to the next argument passed via stack. 2459 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2460 } 2461 2462 return Chain; 2463} 2464 2465/// isFloatingPointZero - Return true if this is +0.0. 2466static bool isFloatingPointZero(SDValue Op) { 2467 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2468 return CFP->getValueAPF().isPosZero(); 2469 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2470 // Maybe this has already been legalized into the constant pool? 2471 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2472 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2473 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2474 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2475 return CFP->getValueAPF().isPosZero(); 2476 } 2477 } 2478 return false; 2479} 2480 2481/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2482/// the given operands. 2483SDValue 2484ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2485 SDValue &ARMcc, SelectionDAG &DAG, 2486 DebugLoc dl) const { 2487 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2488 unsigned C = RHSC->getZExtValue(); 2489 if (!isLegalICmpImmediate(C)) { 2490 // Constant does not fit, try adjusting it by one? 2491 switch (CC) { 2492 default: break; 2493 case ISD::SETLT: 2494 case ISD::SETGE: 2495 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 2496 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2497 RHS = DAG.getConstant(C-1, MVT::i32); 2498 } 2499 break; 2500 case ISD::SETULT: 2501 case ISD::SETUGE: 2502 if (C != 0 && isLegalICmpImmediate(C-1)) { 2503 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2504 RHS = DAG.getConstant(C-1, MVT::i32); 2505 } 2506 break; 2507 case ISD::SETLE: 2508 case ISD::SETGT: 2509 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 2510 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2511 RHS = DAG.getConstant(C+1, MVT::i32); 2512 } 2513 break; 2514 case ISD::SETULE: 2515 case ISD::SETUGT: 2516 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 2517 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2518 RHS = DAG.getConstant(C+1, MVT::i32); 2519 } 2520 break; 2521 } 2522 } 2523 } 2524 2525 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2526 ARMISD::NodeType CompareType; 2527 switch (CondCode) { 2528 default: 2529 CompareType = ARMISD::CMP; 2530 break; 2531 case ARMCC::EQ: 2532 case ARMCC::NE: 2533 // Uses only Z Flag 2534 CompareType = ARMISD::CMPZ; 2535 break; 2536 } 2537 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2538 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 2539} 2540 2541/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2542SDValue 2543ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2544 DebugLoc dl) const { 2545 SDValue Cmp; 2546 if (!isFloatingPointZero(RHS)) 2547 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 2548 else 2549 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 2550 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 2551} 2552 2553/// duplicateCmp - Glue values can have only one use, so this function 2554/// duplicates a comparison node. 2555SDValue 2556ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 2557 unsigned Opc = Cmp.getOpcode(); 2558 DebugLoc DL = Cmp.getDebugLoc(); 2559 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 2560 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2561 2562 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 2563 Cmp = Cmp.getOperand(0); 2564 Opc = Cmp.getOpcode(); 2565 if (Opc == ARMISD::CMPFP) 2566 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2567 else { 2568 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 2569 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 2570 } 2571 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 2572} 2573 2574SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2575 SDValue Cond = Op.getOperand(0); 2576 SDValue SelectTrue = Op.getOperand(1); 2577 SDValue SelectFalse = Op.getOperand(2); 2578 DebugLoc dl = Op.getDebugLoc(); 2579 2580 // Convert: 2581 // 2582 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 2583 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 2584 // 2585 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 2586 const ConstantSDNode *CMOVTrue = 2587 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 2588 const ConstantSDNode *CMOVFalse = 2589 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 2590 2591 if (CMOVTrue && CMOVFalse) { 2592 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 2593 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 2594 2595 SDValue True; 2596 SDValue False; 2597 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 2598 True = SelectTrue; 2599 False = SelectFalse; 2600 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 2601 True = SelectFalse; 2602 False = SelectTrue; 2603 } 2604 2605 if (True.getNode() && False.getNode()) { 2606 EVT VT = Cond.getValueType(); 2607 SDValue ARMcc = Cond.getOperand(2); 2608 SDValue CCR = Cond.getOperand(3); 2609 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 2610 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 2611 } 2612 } 2613 } 2614 2615 return DAG.getSelectCC(dl, Cond, 2616 DAG.getConstant(0, Cond.getValueType()), 2617 SelectTrue, SelectFalse, ISD::SETNE); 2618} 2619 2620SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2621 EVT VT = Op.getValueType(); 2622 SDValue LHS = Op.getOperand(0); 2623 SDValue RHS = Op.getOperand(1); 2624 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2625 SDValue TrueVal = Op.getOperand(2); 2626 SDValue FalseVal = Op.getOperand(3); 2627 DebugLoc dl = Op.getDebugLoc(); 2628 2629 if (LHS.getValueType() == MVT::i32) { 2630 SDValue ARMcc; 2631 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2632 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2633 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 2634 } 2635 2636 ARMCC::CondCodes CondCode, CondCode2; 2637 FPCCToARMCC(CC, CondCode, CondCode2); 2638 2639 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2640 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2641 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2642 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2643 ARMcc, CCR, Cmp); 2644 if (CondCode2 != ARMCC::AL) { 2645 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2646 // FIXME: Needs another CMP because flag can have but one use. 2647 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2648 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2649 Result, TrueVal, ARMcc2, CCR, Cmp2); 2650 } 2651 return Result; 2652} 2653 2654/// canChangeToInt - Given the fp compare operand, return true if it is suitable 2655/// to morph to an integer compare sequence. 2656static bool canChangeToInt(SDValue Op, bool &SeenZero, 2657 const ARMSubtarget *Subtarget) { 2658 SDNode *N = Op.getNode(); 2659 if (!N->hasOneUse()) 2660 // Otherwise it requires moving the value from fp to integer registers. 2661 return false; 2662 if (!N->getNumValues()) 2663 return false; 2664 EVT VT = Op.getValueType(); 2665 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2666 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2667 // vmrs are very slow, e.g. cortex-a8. 2668 return false; 2669 2670 if (isFloatingPointZero(Op)) { 2671 SeenZero = true; 2672 return true; 2673 } 2674 return ISD::isNormalLoad(N); 2675} 2676 2677static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2678 if (isFloatingPointZero(Op)) 2679 return DAG.getConstant(0, MVT::i32); 2680 2681 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2682 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2683 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 2684 Ld->isVolatile(), Ld->isNonTemporal(), 2685 Ld->getAlignment()); 2686 2687 llvm_unreachable("Unknown VFP cmp argument!"); 2688} 2689 2690static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2691 SDValue &RetVal1, SDValue &RetVal2) { 2692 if (isFloatingPointZero(Op)) { 2693 RetVal1 = DAG.getConstant(0, MVT::i32); 2694 RetVal2 = DAG.getConstant(0, MVT::i32); 2695 return; 2696 } 2697 2698 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2699 SDValue Ptr = Ld->getBasePtr(); 2700 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2701 Ld->getChain(), Ptr, 2702 Ld->getPointerInfo(), 2703 Ld->isVolatile(), Ld->isNonTemporal(), 2704 Ld->getAlignment()); 2705 2706 EVT PtrType = Ptr.getValueType(); 2707 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2708 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2709 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2710 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2711 Ld->getChain(), NewPtr, 2712 Ld->getPointerInfo().getWithOffset(4), 2713 Ld->isVolatile(), Ld->isNonTemporal(), 2714 NewAlign); 2715 return; 2716 } 2717 2718 llvm_unreachable("Unknown VFP cmp argument!"); 2719} 2720 2721/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2722/// f32 and even f64 comparisons to integer ones. 2723SDValue 2724ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 2725 SDValue Chain = Op.getOperand(0); 2726 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2727 SDValue LHS = Op.getOperand(2); 2728 SDValue RHS = Op.getOperand(3); 2729 SDValue Dest = Op.getOperand(4); 2730 DebugLoc dl = Op.getDebugLoc(); 2731 2732 bool SeenZero = false; 2733 if (canChangeToInt(LHS, SeenZero, Subtarget) && 2734 canChangeToInt(RHS, SeenZero, Subtarget) && 2735 // If one of the operand is zero, it's safe to ignore the NaN case since 2736 // we only care about equality comparisons. 2737 (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { 2738 // If unsafe fp math optimization is enabled and there are no other uses of 2739 // the CMP operands, and the condition code is EQ or NE, we can optimize it 2740 // to an integer comparison. 2741 if (CC == ISD::SETOEQ) 2742 CC = ISD::SETEQ; 2743 else if (CC == ISD::SETUNE) 2744 CC = ISD::SETNE; 2745 2746 SDValue ARMcc; 2747 if (LHS.getValueType() == MVT::f32) { 2748 LHS = bitcastf32Toi32(LHS, DAG); 2749 RHS = bitcastf32Toi32(RHS, DAG); 2750 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2751 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2752 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2753 Chain, Dest, ARMcc, CCR, Cmp); 2754 } 2755 2756 SDValue LHS1, LHS2; 2757 SDValue RHS1, RHS2; 2758 expandf64Toi32(LHS, DAG, LHS1, LHS2); 2759 expandf64Toi32(RHS, DAG, RHS1, RHS2); 2760 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2761 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2762 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 2763 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 2764 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 2765 } 2766 2767 return SDValue(); 2768} 2769 2770SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 2771 SDValue Chain = Op.getOperand(0); 2772 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2773 SDValue LHS = Op.getOperand(2); 2774 SDValue RHS = Op.getOperand(3); 2775 SDValue Dest = Op.getOperand(4); 2776 DebugLoc dl = Op.getDebugLoc(); 2777 2778 if (LHS.getValueType() == MVT::i32) { 2779 SDValue ARMcc; 2780 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2781 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2782 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2783 Chain, Dest, ARMcc, CCR, Cmp); 2784 } 2785 2786 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 2787 2788 if (UnsafeFPMath && 2789 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 2790 CC == ISD::SETNE || CC == ISD::SETUNE)) { 2791 SDValue Result = OptimizeVFPBrcond(Op, DAG); 2792 if (Result.getNode()) 2793 return Result; 2794 } 2795 2796 ARMCC::CondCodes CondCode, CondCode2; 2797 FPCCToARMCC(CC, CondCode, CondCode2); 2798 2799 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2800 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2801 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2802 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 2803 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 2804 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2805 if (CondCode2 != ARMCC::AL) { 2806 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 2807 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 2808 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2809 } 2810 return Res; 2811} 2812 2813SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 2814 SDValue Chain = Op.getOperand(0); 2815 SDValue Table = Op.getOperand(1); 2816 SDValue Index = Op.getOperand(2); 2817 DebugLoc dl = Op.getDebugLoc(); 2818 2819 EVT PTy = getPointerTy(); 2820 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 2821 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2822 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 2823 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 2824 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 2825 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 2826 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 2827 if (Subtarget->isThumb2()) { 2828 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 2829 // which does another jump to the destination. This also makes it easier 2830 // to translate it to TBB / TBH later. 2831 // FIXME: This might not work if the function is extremely large. 2832 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 2833 Addr, Op.getOperand(2), JTI, UId); 2834 } 2835 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2836 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 2837 MachinePointerInfo::getJumpTable(), 2838 false, false, 0); 2839 Chain = Addr.getValue(1); 2840 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 2841 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2842 } else { 2843 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 2844 MachinePointerInfo::getJumpTable(), false, false, 0); 2845 Chain = Addr.getValue(1); 2846 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2847 } 2848} 2849 2850static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 2851 DebugLoc dl = Op.getDebugLoc(); 2852 unsigned Opc; 2853 2854 switch (Op.getOpcode()) { 2855 default: 2856 assert(0 && "Invalid opcode!"); 2857 case ISD::FP_TO_SINT: 2858 Opc = ARMISD::FTOSI; 2859 break; 2860 case ISD::FP_TO_UINT: 2861 Opc = ARMISD::FTOUI; 2862 break; 2863 } 2864 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 2865 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 2866} 2867 2868static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2869 EVT VT = Op.getValueType(); 2870 DebugLoc dl = Op.getDebugLoc(); 2871 2872 EVT OperandVT = Op.getOperand(0).getValueType(); 2873 assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!"); 2874 if (VT != MVT::v4f32) 2875 return DAG.UnrollVectorOp(Op.getNode()); 2876 2877 unsigned CastOpc; 2878 unsigned Opc; 2879 switch (Op.getOpcode()) { 2880 default: 2881 assert(0 && "Invalid opcode!"); 2882 case ISD::SINT_TO_FP: 2883 CastOpc = ISD::SIGN_EXTEND; 2884 Opc = ISD::SINT_TO_FP; 2885 break; 2886 case ISD::UINT_TO_FP: 2887 CastOpc = ISD::ZERO_EXTEND; 2888 Opc = ISD::UINT_TO_FP; 2889 break; 2890 } 2891 2892 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 2893 return DAG.getNode(Opc, dl, VT, Op); 2894} 2895 2896static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2897 EVT VT = Op.getValueType(); 2898 if (VT.isVector()) 2899 return LowerVectorINT_TO_FP(Op, DAG); 2900 2901 DebugLoc dl = Op.getDebugLoc(); 2902 unsigned Opc; 2903 2904 switch (Op.getOpcode()) { 2905 default: 2906 assert(0 && "Invalid opcode!"); 2907 case ISD::SINT_TO_FP: 2908 Opc = ARMISD::SITOF; 2909 break; 2910 case ISD::UINT_TO_FP: 2911 Opc = ARMISD::UITOF; 2912 break; 2913 } 2914 2915 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 2916 return DAG.getNode(Opc, dl, VT, Op); 2917} 2918 2919SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 2920 // Implement fcopysign with a fabs and a conditional fneg. 2921 SDValue Tmp0 = Op.getOperand(0); 2922 SDValue Tmp1 = Op.getOperand(1); 2923 DebugLoc dl = Op.getDebugLoc(); 2924 EVT VT = Op.getValueType(); 2925 EVT SrcVT = Tmp1.getValueType(); 2926 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 2927 Tmp0.getOpcode() == ARMISD::VMOVDRR; 2928 bool UseNEON = !InGPR && Subtarget->hasNEON(); 2929 2930 if (UseNEON) { 2931 // Use VBSL to copy the sign bit. 2932 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 2933 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 2934 DAG.getTargetConstant(EncodedVal, MVT::i32)); 2935 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 2936 if (VT == MVT::f64) 2937 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 2938 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 2939 DAG.getConstant(32, MVT::i32)); 2940 else /*if (VT == MVT::f32)*/ 2941 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 2942 if (SrcVT == MVT::f32) { 2943 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 2944 if (VT == MVT::f64) 2945 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 2946 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 2947 DAG.getConstant(32, MVT::i32)); 2948 } 2949 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 2950 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 2951 2952 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 2953 MVT::i32); 2954 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 2955 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 2956 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 2957 2958 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 2959 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 2960 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 2961 if (VT == MVT::f32) { 2962 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 2963 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 2964 DAG.getConstant(0, MVT::i32)); 2965 } else { 2966 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 2967 } 2968 2969 return Res; 2970 } 2971 2972 // Bitcast operand 1 to i32. 2973 if (SrcVT == MVT::f64) 2974 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 2975 &Tmp1, 1).getValue(1); 2976 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 2977 2978 // Or in the signbit with integer operations. 2979 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 2980 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 2981 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 2982 if (VT == MVT::f32) { 2983 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 2984 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 2985 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 2986 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 2987 } 2988 2989 // f64: Or the high part with signbit and then combine two parts. 2990 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 2991 &Tmp0, 1); 2992 SDValue Lo = Tmp0.getValue(0); 2993 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 2994 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 2995 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 2996} 2997 2998SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 2999 MachineFunction &MF = DAG.getMachineFunction(); 3000 MachineFrameInfo *MFI = MF.getFrameInfo(); 3001 MFI->setReturnAddressIsTaken(true); 3002 3003 EVT VT = Op.getValueType(); 3004 DebugLoc dl = Op.getDebugLoc(); 3005 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3006 if (Depth) { 3007 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3008 SDValue Offset = DAG.getConstant(4, MVT::i32); 3009 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3010 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3011 MachinePointerInfo(), false, false, 0); 3012 } 3013 3014 // Return LR, which contains the return address. Mark it an implicit live-in. 3015 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3016 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3017} 3018 3019SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3020 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3021 MFI->setFrameAddressIsTaken(true); 3022 3023 EVT VT = Op.getValueType(); 3024 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 3025 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3026 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3027 ? ARM::R7 : ARM::R11; 3028 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3029 while (Depth--) 3030 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3031 MachinePointerInfo(), 3032 false, false, 0); 3033 return FrameAddr; 3034} 3035 3036/// ExpandBITCAST - If the target supports VFP, this function is called to 3037/// expand a bit convert where either the source or destination type is i64 to 3038/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3039/// operand type is illegal (e.g., v2f32 for a target that doesn't support 3040/// vectors), since the legalizer won't know what to do with that. 3041static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3042 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3043 DebugLoc dl = N->getDebugLoc(); 3044 SDValue Op = N->getOperand(0); 3045 3046 // This function is only supposed to be called for i64 types, either as the 3047 // source or destination of the bit convert. 3048 EVT SrcVT = Op.getValueType(); 3049 EVT DstVT = N->getValueType(0); 3050 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3051 "ExpandBITCAST called for non-i64 type"); 3052 3053 // Turn i64->f64 into VMOVDRR. 3054 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3055 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3056 DAG.getConstant(0, MVT::i32)); 3057 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3058 DAG.getConstant(1, MVT::i32)); 3059 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3060 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3061 } 3062 3063 // Turn f64->i64 into VMOVRRD. 3064 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3065 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3066 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3067 // Merge the pieces into a single i64 value. 3068 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3069 } 3070 3071 return SDValue(); 3072} 3073 3074/// getZeroVector - Returns a vector of specified type with all zero elements. 3075/// Zero vectors are used to represent vector negation and in those cases 3076/// will be implemented with the NEON VNEG instruction. However, VNEG does 3077/// not support i64 elements, so sometimes the zero vectors will need to be 3078/// explicitly constructed. Regardless, use a canonical VMOV to create the 3079/// zero vector. 3080static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 3081 assert(VT.isVector() && "Expected a vector type"); 3082 // The canonical modified immediate encoding of a zero vector is....0! 3083 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3084 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3085 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3086 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3087} 3088 3089/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3090/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3091SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3092 SelectionDAG &DAG) const { 3093 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3094 EVT VT = Op.getValueType(); 3095 unsigned VTBits = VT.getSizeInBits(); 3096 DebugLoc dl = Op.getDebugLoc(); 3097 SDValue ShOpLo = Op.getOperand(0); 3098 SDValue ShOpHi = Op.getOperand(1); 3099 SDValue ShAmt = Op.getOperand(2); 3100 SDValue ARMcc; 3101 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3102 3103 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3104 3105 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3106 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3107 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3108 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3109 DAG.getConstant(VTBits, MVT::i32)); 3110 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3111 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3112 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3113 3114 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3115 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3116 ARMcc, DAG, dl); 3117 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3118 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3119 CCR, Cmp); 3120 3121 SDValue Ops[2] = { Lo, Hi }; 3122 return DAG.getMergeValues(Ops, 2, dl); 3123} 3124 3125/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3126/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3127SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3128 SelectionDAG &DAG) const { 3129 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3130 EVT VT = Op.getValueType(); 3131 unsigned VTBits = VT.getSizeInBits(); 3132 DebugLoc dl = Op.getDebugLoc(); 3133 SDValue ShOpLo = Op.getOperand(0); 3134 SDValue ShOpHi = Op.getOperand(1); 3135 SDValue ShAmt = Op.getOperand(2); 3136 SDValue ARMcc; 3137 3138 assert(Op.getOpcode() == ISD::SHL_PARTS); 3139 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3140 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3141 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3142 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3143 DAG.getConstant(VTBits, MVT::i32)); 3144 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3145 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3146 3147 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3148 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3149 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3150 ARMcc, DAG, dl); 3151 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3152 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3153 CCR, Cmp); 3154 3155 SDValue Ops[2] = { Lo, Hi }; 3156 return DAG.getMergeValues(Ops, 2, dl); 3157} 3158 3159SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3160 SelectionDAG &DAG) const { 3161 // The rounding mode is in bits 23:22 of the FPSCR. 3162 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3163 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3164 // so that the shift + and get folded into a bitfield extract. 3165 DebugLoc dl = Op.getDebugLoc(); 3166 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3167 DAG.getConstant(Intrinsic::arm_get_fpscr, 3168 MVT::i32)); 3169 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3170 DAG.getConstant(1U << 22, MVT::i32)); 3171 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3172 DAG.getConstant(22, MVT::i32)); 3173 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3174 DAG.getConstant(3, MVT::i32)); 3175} 3176 3177static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3178 const ARMSubtarget *ST) { 3179 EVT VT = N->getValueType(0); 3180 DebugLoc dl = N->getDebugLoc(); 3181 3182 if (!ST->hasV6T2Ops()) 3183 return SDValue(); 3184 3185 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3186 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3187} 3188 3189static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 3190 const ARMSubtarget *ST) { 3191 EVT VT = N->getValueType(0); 3192 DebugLoc dl = N->getDebugLoc(); 3193 3194 if (!VT.isVector()) 3195 return SDValue(); 3196 3197 // Lower vector shifts on NEON to use VSHL. 3198 assert(ST->hasNEON() && "unexpected vector shift"); 3199 3200 // Left shifts translate directly to the vshiftu intrinsic. 3201 if (N->getOpcode() == ISD::SHL) 3202 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3203 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 3204 N->getOperand(0), N->getOperand(1)); 3205 3206 assert((N->getOpcode() == ISD::SRA || 3207 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 3208 3209 // NEON uses the same intrinsics for both left and right shifts. For 3210 // right shifts, the shift amounts are negative, so negate the vector of 3211 // shift amounts. 3212 EVT ShiftVT = N->getOperand(1).getValueType(); 3213 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 3214 getZeroVector(ShiftVT, DAG, dl), 3215 N->getOperand(1)); 3216 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 3217 Intrinsic::arm_neon_vshifts : 3218 Intrinsic::arm_neon_vshiftu); 3219 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3220 DAG.getConstant(vshiftInt, MVT::i32), 3221 N->getOperand(0), NegatedCount); 3222} 3223 3224static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 3225 const ARMSubtarget *ST) { 3226 EVT VT = N->getValueType(0); 3227 DebugLoc dl = N->getDebugLoc(); 3228 3229 // We can get here for a node like i32 = ISD::SHL i32, i64 3230 if (VT != MVT::i64) 3231 return SDValue(); 3232 3233 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 3234 "Unknown shift to lower!"); 3235 3236 // We only lower SRA, SRL of 1 here, all others use generic lowering. 3237 if (!isa<ConstantSDNode>(N->getOperand(1)) || 3238 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 3239 return SDValue(); 3240 3241 // If we are in thumb mode, we don't have RRX. 3242 if (ST->isThumb1Only()) return SDValue(); 3243 3244 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 3245 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3246 DAG.getConstant(0, MVT::i32)); 3247 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3248 DAG.getConstant(1, MVT::i32)); 3249 3250 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 3251 // captures the result into a carry flag. 3252 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 3253 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 3254 3255 // The low part is an ARMISD::RRX operand, which shifts the carry in. 3256 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 3257 3258 // Merge the pieces into a single i64 value. 3259 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 3260} 3261 3262static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 3263 SDValue TmpOp0, TmpOp1; 3264 bool Invert = false; 3265 bool Swap = false; 3266 unsigned Opc = 0; 3267 3268 SDValue Op0 = Op.getOperand(0); 3269 SDValue Op1 = Op.getOperand(1); 3270 SDValue CC = Op.getOperand(2); 3271 EVT VT = Op.getValueType(); 3272 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3273 DebugLoc dl = Op.getDebugLoc(); 3274 3275 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 3276 switch (SetCCOpcode) { 3277 default: llvm_unreachable("Illegal FP comparison"); break; 3278 case ISD::SETUNE: 3279 case ISD::SETNE: Invert = true; // Fallthrough 3280 case ISD::SETOEQ: 3281 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3282 case ISD::SETOLT: 3283 case ISD::SETLT: Swap = true; // Fallthrough 3284 case ISD::SETOGT: 3285 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3286 case ISD::SETOLE: 3287 case ISD::SETLE: Swap = true; // Fallthrough 3288 case ISD::SETOGE: 3289 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3290 case ISD::SETUGE: Swap = true; // Fallthrough 3291 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 3292 case ISD::SETUGT: Swap = true; // Fallthrough 3293 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 3294 case ISD::SETUEQ: Invert = true; // Fallthrough 3295 case ISD::SETONE: 3296 // Expand this to (OLT | OGT). 3297 TmpOp0 = Op0; 3298 TmpOp1 = Op1; 3299 Opc = ISD::OR; 3300 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3301 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 3302 break; 3303 case ISD::SETUO: Invert = true; // Fallthrough 3304 case ISD::SETO: 3305 // Expand this to (OLT | OGE). 3306 TmpOp0 = Op0; 3307 TmpOp1 = Op1; 3308 Opc = ISD::OR; 3309 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3310 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 3311 break; 3312 } 3313 } else { 3314 // Integer comparisons. 3315 switch (SetCCOpcode) { 3316 default: llvm_unreachable("Illegal integer comparison"); break; 3317 case ISD::SETNE: Invert = true; 3318 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3319 case ISD::SETLT: Swap = true; 3320 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3321 case ISD::SETLE: Swap = true; 3322 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3323 case ISD::SETULT: Swap = true; 3324 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 3325 case ISD::SETULE: Swap = true; 3326 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 3327 } 3328 3329 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 3330 if (Opc == ARMISD::VCEQ) { 3331 3332 SDValue AndOp; 3333 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3334 AndOp = Op0; 3335 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 3336 AndOp = Op1; 3337 3338 // Ignore bitconvert. 3339 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 3340 AndOp = AndOp.getOperand(0); 3341 3342 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 3343 Opc = ARMISD::VTST; 3344 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 3345 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 3346 Invert = !Invert; 3347 } 3348 } 3349 } 3350 3351 if (Swap) 3352 std::swap(Op0, Op1); 3353 3354 // If one of the operands is a constant vector zero, attempt to fold the 3355 // comparison to a specialized compare-against-zero form. 3356 SDValue SingleOp; 3357 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3358 SingleOp = Op0; 3359 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 3360 if (Opc == ARMISD::VCGE) 3361 Opc = ARMISD::VCLEZ; 3362 else if (Opc == ARMISD::VCGT) 3363 Opc = ARMISD::VCLTZ; 3364 SingleOp = Op1; 3365 } 3366 3367 SDValue Result; 3368 if (SingleOp.getNode()) { 3369 switch (Opc) { 3370 case ARMISD::VCEQ: 3371 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 3372 case ARMISD::VCGE: 3373 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 3374 case ARMISD::VCLEZ: 3375 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 3376 case ARMISD::VCGT: 3377 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 3378 case ARMISD::VCLTZ: 3379 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 3380 default: 3381 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3382 } 3383 } else { 3384 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3385 } 3386 3387 if (Invert) 3388 Result = DAG.getNOT(dl, Result, VT); 3389 3390 return Result; 3391} 3392 3393/// isNEONModifiedImm - Check if the specified splat value corresponds to a 3394/// valid vector constant for a NEON instruction with a "modified immediate" 3395/// operand (e.g., VMOV). If so, return the encoded value. 3396static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 3397 unsigned SplatBitSize, SelectionDAG &DAG, 3398 EVT &VT, bool is128Bits, NEONModImmType type) { 3399 unsigned OpCmode, Imm; 3400 3401 // SplatBitSize is set to the smallest size that splats the vector, so a 3402 // zero vector will always have SplatBitSize == 8. However, NEON modified 3403 // immediate instructions others than VMOV do not support the 8-bit encoding 3404 // of a zero vector, and the default encoding of zero is supposed to be the 3405 // 32-bit version. 3406 if (SplatBits == 0) 3407 SplatBitSize = 32; 3408 3409 switch (SplatBitSize) { 3410 case 8: 3411 if (type != VMOVModImm) 3412 return SDValue(); 3413 // Any 1-byte value is OK. Op=0, Cmode=1110. 3414 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 3415 OpCmode = 0xe; 3416 Imm = SplatBits; 3417 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 3418 break; 3419 3420 case 16: 3421 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 3422 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 3423 if ((SplatBits & ~0xff) == 0) { 3424 // Value = 0x00nn: Op=x, Cmode=100x. 3425 OpCmode = 0x8; 3426 Imm = SplatBits; 3427 break; 3428 } 3429 if ((SplatBits & ~0xff00) == 0) { 3430 // Value = 0xnn00: Op=x, Cmode=101x. 3431 OpCmode = 0xa; 3432 Imm = SplatBits >> 8; 3433 break; 3434 } 3435 return SDValue(); 3436 3437 case 32: 3438 // NEON's 32-bit VMOV supports splat values where: 3439 // * only one byte is nonzero, or 3440 // * the least significant byte is 0xff and the second byte is nonzero, or 3441 // * the least significant 2 bytes are 0xff and the third is nonzero. 3442 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 3443 if ((SplatBits & ~0xff) == 0) { 3444 // Value = 0x000000nn: Op=x, Cmode=000x. 3445 OpCmode = 0; 3446 Imm = SplatBits; 3447 break; 3448 } 3449 if ((SplatBits & ~0xff00) == 0) { 3450 // Value = 0x0000nn00: Op=x, Cmode=001x. 3451 OpCmode = 0x2; 3452 Imm = SplatBits >> 8; 3453 break; 3454 } 3455 if ((SplatBits & ~0xff0000) == 0) { 3456 // Value = 0x00nn0000: Op=x, Cmode=010x. 3457 OpCmode = 0x4; 3458 Imm = SplatBits >> 16; 3459 break; 3460 } 3461 if ((SplatBits & ~0xff000000) == 0) { 3462 // Value = 0xnn000000: Op=x, Cmode=011x. 3463 OpCmode = 0x6; 3464 Imm = SplatBits >> 24; 3465 break; 3466 } 3467 3468 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 3469 if (type == OtherModImm) return SDValue(); 3470 3471 if ((SplatBits & ~0xffff) == 0 && 3472 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 3473 // Value = 0x0000nnff: Op=x, Cmode=1100. 3474 OpCmode = 0xc; 3475 Imm = SplatBits >> 8; 3476 SplatBits |= 0xff; 3477 break; 3478 } 3479 3480 if ((SplatBits & ~0xffffff) == 0 && 3481 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 3482 // Value = 0x00nnffff: Op=x, Cmode=1101. 3483 OpCmode = 0xd; 3484 Imm = SplatBits >> 16; 3485 SplatBits |= 0xffff; 3486 break; 3487 } 3488 3489 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 3490 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 3491 // VMOV.I32. A (very) minor optimization would be to replicate the value 3492 // and fall through here to test for a valid 64-bit splat. But, then the 3493 // caller would also need to check and handle the change in size. 3494 return SDValue(); 3495 3496 case 64: { 3497 if (type != VMOVModImm) 3498 return SDValue(); 3499 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 3500 uint64_t BitMask = 0xff; 3501 uint64_t Val = 0; 3502 unsigned ImmMask = 1; 3503 Imm = 0; 3504 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 3505 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 3506 Val |= BitMask; 3507 Imm |= ImmMask; 3508 } else if ((SplatBits & BitMask) != 0) { 3509 return SDValue(); 3510 } 3511 BitMask <<= 8; 3512 ImmMask <<= 1; 3513 } 3514 // Op=1, Cmode=1110. 3515 OpCmode = 0x1e; 3516 SplatBits = Val; 3517 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 3518 break; 3519 } 3520 3521 default: 3522 llvm_unreachable("unexpected size for isNEONModifiedImm"); 3523 return SDValue(); 3524 } 3525 3526 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 3527 return DAG.getTargetConstant(EncodedVal, MVT::i32); 3528} 3529 3530static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 3531 bool &ReverseVEXT, unsigned &Imm) { 3532 unsigned NumElts = VT.getVectorNumElements(); 3533 ReverseVEXT = false; 3534 3535 // Assume that the first shuffle index is not UNDEF. Fail if it is. 3536 if (M[0] < 0) 3537 return false; 3538 3539 Imm = M[0]; 3540 3541 // If this is a VEXT shuffle, the immediate value is the index of the first 3542 // element. The other shuffle indices must be the successive elements after 3543 // the first one. 3544 unsigned ExpectedElt = Imm; 3545 for (unsigned i = 1; i < NumElts; ++i) { 3546 // Increment the expected index. If it wraps around, it may still be 3547 // a VEXT but the source vectors must be swapped. 3548 ExpectedElt += 1; 3549 if (ExpectedElt == NumElts * 2) { 3550 ExpectedElt = 0; 3551 ReverseVEXT = true; 3552 } 3553 3554 if (M[i] < 0) continue; // ignore UNDEF indices 3555 if (ExpectedElt != static_cast<unsigned>(M[i])) 3556 return false; 3557 } 3558 3559 // Adjust the index value if the source operands will be swapped. 3560 if (ReverseVEXT) 3561 Imm -= NumElts; 3562 3563 return true; 3564} 3565 3566/// isVREVMask - Check if a vector shuffle corresponds to a VREV 3567/// instruction with the specified blocksize. (The order of the elements 3568/// within each block of the vector is reversed.) 3569static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 3570 unsigned BlockSize) { 3571 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 3572 "Only possible block sizes for VREV are: 16, 32, 64"); 3573 3574 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3575 if (EltSz == 64) 3576 return false; 3577 3578 unsigned NumElts = VT.getVectorNumElements(); 3579 unsigned BlockElts = M[0] + 1; 3580 // If the first shuffle index is UNDEF, be optimistic. 3581 if (M[0] < 0) 3582 BlockElts = BlockSize / EltSz; 3583 3584 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 3585 return false; 3586 3587 for (unsigned i = 0; i < NumElts; ++i) { 3588 if (M[i] < 0) continue; // ignore UNDEF indices 3589 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 3590 return false; 3591 } 3592 3593 return true; 3594} 3595 3596static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) { 3597 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 3598 // range, then 0 is placed into the resulting vector. So pretty much any mask 3599 // of 8 elements can work here. 3600 return VT == MVT::v8i8 && M.size() == 8; 3601} 3602 3603static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 3604 unsigned &WhichResult) { 3605 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3606 if (EltSz == 64) 3607 return false; 3608 3609 unsigned NumElts = VT.getVectorNumElements(); 3610 WhichResult = (M[0] == 0 ? 0 : 1); 3611 for (unsigned i = 0; i < NumElts; i += 2) { 3612 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3613 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 3614 return false; 3615 } 3616 return true; 3617} 3618 3619/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 3620/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3621/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 3622static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3623 unsigned &WhichResult) { 3624 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3625 if (EltSz == 64) 3626 return false; 3627 3628 unsigned NumElts = VT.getVectorNumElements(); 3629 WhichResult = (M[0] == 0 ? 0 : 1); 3630 for (unsigned i = 0; i < NumElts; i += 2) { 3631 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3632 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 3633 return false; 3634 } 3635 return true; 3636} 3637 3638static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 3639 unsigned &WhichResult) { 3640 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3641 if (EltSz == 64) 3642 return false; 3643 3644 unsigned NumElts = VT.getVectorNumElements(); 3645 WhichResult = (M[0] == 0 ? 0 : 1); 3646 for (unsigned i = 0; i != NumElts; ++i) { 3647 if (M[i] < 0) continue; // ignore UNDEF indices 3648 if ((unsigned) M[i] != 2 * i + WhichResult) 3649 return false; 3650 } 3651 3652 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3653 if (VT.is64BitVector() && EltSz == 32) 3654 return false; 3655 3656 return true; 3657} 3658 3659/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 3660/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3661/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 3662static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3663 unsigned &WhichResult) { 3664 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3665 if (EltSz == 64) 3666 return false; 3667 3668 unsigned Half = VT.getVectorNumElements() / 2; 3669 WhichResult = (M[0] == 0 ? 0 : 1); 3670 for (unsigned j = 0; j != 2; ++j) { 3671 unsigned Idx = WhichResult; 3672 for (unsigned i = 0; i != Half; ++i) { 3673 int MIdx = M[i + j * Half]; 3674 if (MIdx >= 0 && (unsigned) MIdx != Idx) 3675 return false; 3676 Idx += 2; 3677 } 3678 } 3679 3680 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3681 if (VT.is64BitVector() && EltSz == 32) 3682 return false; 3683 3684 return true; 3685} 3686 3687static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 3688 unsigned &WhichResult) { 3689 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3690 if (EltSz == 64) 3691 return false; 3692 3693 unsigned NumElts = VT.getVectorNumElements(); 3694 WhichResult = (M[0] == 0 ? 0 : 1); 3695 unsigned Idx = WhichResult * NumElts / 2; 3696 for (unsigned i = 0; i != NumElts; i += 2) { 3697 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3698 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 3699 return false; 3700 Idx += 1; 3701 } 3702 3703 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3704 if (VT.is64BitVector() && EltSz == 32) 3705 return false; 3706 3707 return true; 3708} 3709 3710/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 3711/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3712/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 3713static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3714 unsigned &WhichResult) { 3715 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3716 if (EltSz == 64) 3717 return false; 3718 3719 unsigned NumElts = VT.getVectorNumElements(); 3720 WhichResult = (M[0] == 0 ? 0 : 1); 3721 unsigned Idx = WhichResult * NumElts / 2; 3722 for (unsigned i = 0; i != NumElts; i += 2) { 3723 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3724 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 3725 return false; 3726 Idx += 1; 3727 } 3728 3729 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3730 if (VT.is64BitVector() && EltSz == 32) 3731 return false; 3732 3733 return true; 3734} 3735 3736// If N is an integer constant that can be moved into a register in one 3737// instruction, return an SDValue of such a constant (will become a MOV 3738// instruction). Otherwise return null. 3739static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 3740 const ARMSubtarget *ST, DebugLoc dl) { 3741 uint64_t Val; 3742 if (!isa<ConstantSDNode>(N)) 3743 return SDValue(); 3744 Val = cast<ConstantSDNode>(N)->getZExtValue(); 3745 3746 if (ST->isThumb1Only()) { 3747 if (Val <= 255 || ~Val <= 255) 3748 return DAG.getConstant(Val, MVT::i32); 3749 } else { 3750 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 3751 return DAG.getConstant(Val, MVT::i32); 3752 } 3753 return SDValue(); 3754} 3755 3756// If this is a case we can't handle, return null and let the default 3757// expansion code take care of it. 3758SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 3759 const ARMSubtarget *ST) const { 3760 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 3761 DebugLoc dl = Op.getDebugLoc(); 3762 EVT VT = Op.getValueType(); 3763 3764 APInt SplatBits, SplatUndef; 3765 unsigned SplatBitSize; 3766 bool HasAnyUndefs; 3767 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 3768 if (SplatBitSize <= 64) { 3769 // Check if an immediate VMOV works. 3770 EVT VmovVT; 3771 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 3772 SplatUndef.getZExtValue(), SplatBitSize, 3773 DAG, VmovVT, VT.is128BitVector(), 3774 VMOVModImm); 3775 if (Val.getNode()) { 3776 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 3777 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3778 } 3779 3780 // Try an immediate VMVN. 3781 uint64_t NegatedImm = (SplatBits.getZExtValue() ^ 3782 ((1LL << SplatBitSize) - 1)); 3783 Val = isNEONModifiedImm(NegatedImm, 3784 SplatUndef.getZExtValue(), SplatBitSize, 3785 DAG, VmovVT, VT.is128BitVector(), 3786 VMVNModImm); 3787 if (Val.getNode()) { 3788 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 3789 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3790 } 3791 } 3792 } 3793 3794 // Scan through the operands to see if only one value is used. 3795 unsigned NumElts = VT.getVectorNumElements(); 3796 bool isOnlyLowElement = true; 3797 bool usesOnlyOneValue = true; 3798 bool isConstant = true; 3799 SDValue Value; 3800 for (unsigned i = 0; i < NumElts; ++i) { 3801 SDValue V = Op.getOperand(i); 3802 if (V.getOpcode() == ISD::UNDEF) 3803 continue; 3804 if (i > 0) 3805 isOnlyLowElement = false; 3806 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 3807 isConstant = false; 3808 3809 if (!Value.getNode()) 3810 Value = V; 3811 else if (V != Value) 3812 usesOnlyOneValue = false; 3813 } 3814 3815 if (!Value.getNode()) 3816 return DAG.getUNDEF(VT); 3817 3818 if (isOnlyLowElement) 3819 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 3820 3821 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3822 3823 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 3824 // i32 and try again. 3825 if (usesOnlyOneValue && EltSize <= 32) { 3826 if (!isConstant) 3827 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 3828 if (VT.getVectorElementType().isFloatingPoint()) { 3829 SmallVector<SDValue, 8> Ops; 3830 for (unsigned i = 0; i < NumElts; ++i) 3831 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 3832 Op.getOperand(i))); 3833 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 3834 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 3835 Val = LowerBUILD_VECTOR(Val, DAG, ST); 3836 if (Val.getNode()) 3837 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 3838 } 3839 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 3840 if (Val.getNode()) 3841 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 3842 } 3843 3844 // If all elements are constants and the case above didn't get hit, fall back 3845 // to the default expansion, which will generate a load from the constant 3846 // pool. 3847 if (isConstant) 3848 return SDValue(); 3849 3850 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 3851 if (NumElts >= 4) { 3852 SDValue shuffle = ReconstructShuffle(Op, DAG); 3853 if (shuffle != SDValue()) 3854 return shuffle; 3855 } 3856 3857 // Vectors with 32- or 64-bit elements can be built by directly assigning 3858 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 3859 // will be legalized. 3860 if (EltSize >= 32) { 3861 // Do the expansion with floating-point types, since that is what the VFP 3862 // registers are defined to use, and since i64 is not legal. 3863 EVT EltVT = EVT::getFloatingPointVT(EltSize); 3864 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 3865 SmallVector<SDValue, 8> Ops; 3866 for (unsigned i = 0; i < NumElts; ++i) 3867 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 3868 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 3869 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 3870 } 3871 3872 return SDValue(); 3873} 3874 3875// Gather data to see if the operation can be modelled as a 3876// shuffle in combination with VEXTs. 3877SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 3878 SelectionDAG &DAG) const { 3879 DebugLoc dl = Op.getDebugLoc(); 3880 EVT VT = Op.getValueType(); 3881 unsigned NumElts = VT.getVectorNumElements(); 3882 3883 SmallVector<SDValue, 2> SourceVecs; 3884 SmallVector<unsigned, 2> MinElts; 3885 SmallVector<unsigned, 2> MaxElts; 3886 3887 for (unsigned i = 0; i < NumElts; ++i) { 3888 SDValue V = Op.getOperand(i); 3889 if (V.getOpcode() == ISD::UNDEF) 3890 continue; 3891 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 3892 // A shuffle can only come from building a vector from various 3893 // elements of other vectors. 3894 return SDValue(); 3895 } 3896 3897 // Record this extraction against the appropriate vector if possible... 3898 SDValue SourceVec = V.getOperand(0); 3899 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 3900 bool FoundSource = false; 3901 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 3902 if (SourceVecs[j] == SourceVec) { 3903 if (MinElts[j] > EltNo) 3904 MinElts[j] = EltNo; 3905 if (MaxElts[j] < EltNo) 3906 MaxElts[j] = EltNo; 3907 FoundSource = true; 3908 break; 3909 } 3910 } 3911 3912 // Or record a new source if not... 3913 if (!FoundSource) { 3914 SourceVecs.push_back(SourceVec); 3915 MinElts.push_back(EltNo); 3916 MaxElts.push_back(EltNo); 3917 } 3918 } 3919 3920 // Currently only do something sane when at most two source vectors 3921 // involved. 3922 if (SourceVecs.size() > 2) 3923 return SDValue(); 3924 3925 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 3926 int VEXTOffsets[2] = {0, 0}; 3927 3928 // This loop extracts the usage patterns of the source vectors 3929 // and prepares appropriate SDValues for a shuffle if possible. 3930 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 3931 if (SourceVecs[i].getValueType() == VT) { 3932 // No VEXT necessary 3933 ShuffleSrcs[i] = SourceVecs[i]; 3934 VEXTOffsets[i] = 0; 3935 continue; 3936 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 3937 // It probably isn't worth padding out a smaller vector just to 3938 // break it down again in a shuffle. 3939 return SDValue(); 3940 } 3941 3942 // Since only 64-bit and 128-bit vectors are legal on ARM and 3943 // we've eliminated the other cases... 3944 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 3945 "unexpected vector sizes in ReconstructShuffle"); 3946 3947 if (MaxElts[i] - MinElts[i] >= NumElts) { 3948 // Span too large for a VEXT to cope 3949 return SDValue(); 3950 } 3951 3952 if (MinElts[i] >= NumElts) { 3953 // The extraction can just take the second half 3954 VEXTOffsets[i] = NumElts; 3955 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 3956 SourceVecs[i], 3957 DAG.getIntPtrConstant(NumElts)); 3958 } else if (MaxElts[i] < NumElts) { 3959 // The extraction can just take the first half 3960 VEXTOffsets[i] = 0; 3961 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 3962 SourceVecs[i], 3963 DAG.getIntPtrConstant(0)); 3964 } else { 3965 // An actual VEXT is needed 3966 VEXTOffsets[i] = MinElts[i]; 3967 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 3968 SourceVecs[i], 3969 DAG.getIntPtrConstant(0)); 3970 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 3971 SourceVecs[i], 3972 DAG.getIntPtrConstant(NumElts)); 3973 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 3974 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 3975 } 3976 } 3977 3978 SmallVector<int, 8> Mask; 3979 3980 for (unsigned i = 0; i < NumElts; ++i) { 3981 SDValue Entry = Op.getOperand(i); 3982 if (Entry.getOpcode() == ISD::UNDEF) { 3983 Mask.push_back(-1); 3984 continue; 3985 } 3986 3987 SDValue ExtractVec = Entry.getOperand(0); 3988 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 3989 .getOperand(1))->getSExtValue(); 3990 if (ExtractVec == SourceVecs[0]) { 3991 Mask.push_back(ExtractElt - VEXTOffsets[0]); 3992 } else { 3993 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 3994 } 3995 } 3996 3997 // Final check before we try to produce nonsense... 3998 if (isShuffleMaskLegal(Mask, VT)) 3999 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 4000 &Mask[0]); 4001 4002 return SDValue(); 4003} 4004 4005/// isShuffleMaskLegal - Targets can use this to indicate that they only 4006/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4007/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4008/// are assumed to be legal. 4009bool 4010ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 4011 EVT VT) const { 4012 if (VT.getVectorNumElements() == 4 && 4013 (VT.is128BitVector() || VT.is64BitVector())) { 4014 unsigned PFIndexes[4]; 4015 for (unsigned i = 0; i != 4; ++i) { 4016 if (M[i] < 0) 4017 PFIndexes[i] = 8; 4018 else 4019 PFIndexes[i] = M[i]; 4020 } 4021 4022 // Compute the index in the perfect shuffle table. 4023 unsigned PFTableIndex = 4024 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4025 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4026 unsigned Cost = (PFEntry >> 30); 4027 4028 if (Cost <= 4) 4029 return true; 4030 } 4031 4032 bool ReverseVEXT; 4033 unsigned Imm, WhichResult; 4034 4035 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4036 return (EltSize >= 32 || 4037 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 4038 isVREVMask(M, VT, 64) || 4039 isVREVMask(M, VT, 32) || 4040 isVREVMask(M, VT, 16) || 4041 isVEXTMask(M, VT, ReverseVEXT, Imm) || 4042 isVTBLMask(M, VT) || 4043 isVTRNMask(M, VT, WhichResult) || 4044 isVUZPMask(M, VT, WhichResult) || 4045 isVZIPMask(M, VT, WhichResult) || 4046 isVTRN_v_undef_Mask(M, VT, WhichResult) || 4047 isVUZP_v_undef_Mask(M, VT, WhichResult) || 4048 isVZIP_v_undef_Mask(M, VT, WhichResult)); 4049} 4050 4051/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 4052/// the specified operations to build the shuffle. 4053static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 4054 SDValue RHS, SelectionDAG &DAG, 4055 DebugLoc dl) { 4056 unsigned OpNum = (PFEntry >> 26) & 0x0F; 4057 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 4058 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 4059 4060 enum { 4061 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 4062 OP_VREV, 4063 OP_VDUP0, 4064 OP_VDUP1, 4065 OP_VDUP2, 4066 OP_VDUP3, 4067 OP_VEXT1, 4068 OP_VEXT2, 4069 OP_VEXT3, 4070 OP_VUZPL, // VUZP, left result 4071 OP_VUZPR, // VUZP, right result 4072 OP_VZIPL, // VZIP, left result 4073 OP_VZIPR, // VZIP, right result 4074 OP_VTRNL, // VTRN, left result 4075 OP_VTRNR // VTRN, right result 4076 }; 4077 4078 if (OpNum == OP_COPY) { 4079 if (LHSID == (1*9+2)*9+3) return LHS; 4080 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 4081 return RHS; 4082 } 4083 4084 SDValue OpLHS, OpRHS; 4085 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 4086 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 4087 EVT VT = OpLHS.getValueType(); 4088 4089 switch (OpNum) { 4090 default: llvm_unreachable("Unknown shuffle opcode!"); 4091 case OP_VREV: 4092 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 4093 case OP_VDUP0: 4094 case OP_VDUP1: 4095 case OP_VDUP2: 4096 case OP_VDUP3: 4097 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4098 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 4099 case OP_VEXT1: 4100 case OP_VEXT2: 4101 case OP_VEXT3: 4102 return DAG.getNode(ARMISD::VEXT, dl, VT, 4103 OpLHS, OpRHS, 4104 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 4105 case OP_VUZPL: 4106 case OP_VUZPR: 4107 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4108 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 4109 case OP_VZIPL: 4110 case OP_VZIPR: 4111 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4112 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 4113 case OP_VTRNL: 4114 case OP_VTRNR: 4115 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4116 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 4117 } 4118} 4119 4120static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 4121 SmallVectorImpl<int> &ShuffleMask, 4122 SelectionDAG &DAG) { 4123 // Check to see if we can use the VTBL instruction. 4124 SDValue V1 = Op.getOperand(0); 4125 SDValue V2 = Op.getOperand(1); 4126 DebugLoc DL = Op.getDebugLoc(); 4127 4128 SmallVector<SDValue, 8> VTBLMask; 4129 for (SmallVectorImpl<int>::iterator 4130 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 4131 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 4132 4133 if (V2.getNode()->getOpcode() == ISD::UNDEF) 4134 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 4135 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4136 &VTBLMask[0], 8)); 4137 4138 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 4139 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4140 &VTBLMask[0], 8)); 4141} 4142 4143static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 4144 SDValue V1 = Op.getOperand(0); 4145 SDValue V2 = Op.getOperand(1); 4146 DebugLoc dl = Op.getDebugLoc(); 4147 EVT VT = Op.getValueType(); 4148 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 4149 SmallVector<int, 8> ShuffleMask; 4150 4151 // Convert shuffles that are directly supported on NEON to target-specific 4152 // DAG nodes, instead of keeping them as shuffles and matching them again 4153 // during code selection. This is more efficient and avoids the possibility 4154 // of inconsistencies between legalization and selection. 4155 // FIXME: floating-point vectors should be canonicalized to integer vectors 4156 // of the same time so that they get CSEd properly. 4157 SVN->getMask(ShuffleMask); 4158 4159 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4160 if (EltSize <= 32) { 4161 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 4162 int Lane = SVN->getSplatIndex(); 4163 // If this is undef splat, generate it via "just" vdup, if possible. 4164 if (Lane == -1) Lane = 0; 4165 4166 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4167 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4168 } 4169 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 4170 DAG.getConstant(Lane, MVT::i32)); 4171 } 4172 4173 bool ReverseVEXT; 4174 unsigned Imm; 4175 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 4176 if (ReverseVEXT) 4177 std::swap(V1, V2); 4178 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 4179 DAG.getConstant(Imm, MVT::i32)); 4180 } 4181 4182 if (isVREVMask(ShuffleMask, VT, 64)) 4183 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 4184 if (isVREVMask(ShuffleMask, VT, 32)) 4185 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 4186 if (isVREVMask(ShuffleMask, VT, 16)) 4187 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 4188 4189 // Check for Neon shuffles that modify both input vectors in place. 4190 // If both results are used, i.e., if there are two shuffles with the same 4191 // source operands and with masks corresponding to both results of one of 4192 // these operations, DAG memoization will ensure that a single node is 4193 // used for both shuffles. 4194 unsigned WhichResult; 4195 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 4196 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4197 V1, V2).getValue(WhichResult); 4198 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 4199 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4200 V1, V2).getValue(WhichResult); 4201 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 4202 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4203 V1, V2).getValue(WhichResult); 4204 4205 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4206 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4207 V1, V1).getValue(WhichResult); 4208 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4209 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4210 V1, V1).getValue(WhichResult); 4211 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4212 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4213 V1, V1).getValue(WhichResult); 4214 } 4215 4216 // If the shuffle is not directly supported and it has 4 elements, use 4217 // the PerfectShuffle-generated table to synthesize it from other shuffles. 4218 unsigned NumElts = VT.getVectorNumElements(); 4219 if (NumElts == 4) { 4220 unsigned PFIndexes[4]; 4221 for (unsigned i = 0; i != 4; ++i) { 4222 if (ShuffleMask[i] < 0) 4223 PFIndexes[i] = 8; 4224 else 4225 PFIndexes[i] = ShuffleMask[i]; 4226 } 4227 4228 // Compute the index in the perfect shuffle table. 4229 unsigned PFTableIndex = 4230 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4231 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4232 unsigned Cost = (PFEntry >> 30); 4233 4234 if (Cost <= 4) 4235 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 4236 } 4237 4238 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 4239 if (EltSize >= 32) { 4240 // Do the expansion with floating-point types, since that is what the VFP 4241 // registers are defined to use, and since i64 is not legal. 4242 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4243 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4244 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 4245 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 4246 SmallVector<SDValue, 8> Ops; 4247 for (unsigned i = 0; i < NumElts; ++i) { 4248 if (ShuffleMask[i] < 0) 4249 Ops.push_back(DAG.getUNDEF(EltVT)); 4250 else 4251 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 4252 ShuffleMask[i] < (int)NumElts ? V1 : V2, 4253 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 4254 MVT::i32))); 4255 } 4256 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4257 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4258 } 4259 4260 if (VT == MVT::v8i8) { 4261 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 4262 if (NewOp.getNode()) 4263 return NewOp; 4264 } 4265 4266 return SDValue(); 4267} 4268 4269static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4270 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 4271 SDValue Lane = Op.getOperand(1); 4272 if (!isa<ConstantSDNode>(Lane)) 4273 return SDValue(); 4274 4275 SDValue Vec = Op.getOperand(0); 4276 if (Op.getValueType() == MVT::i32 && 4277 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 4278 DebugLoc dl = Op.getDebugLoc(); 4279 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 4280 } 4281 4282 return Op; 4283} 4284 4285static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 4286 // The only time a CONCAT_VECTORS operation can have legal types is when 4287 // two 64-bit vectors are concatenated to a 128-bit vector. 4288 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 4289 "unexpected CONCAT_VECTORS"); 4290 DebugLoc dl = Op.getDebugLoc(); 4291 SDValue Val = DAG.getUNDEF(MVT::v2f64); 4292 SDValue Op0 = Op.getOperand(0); 4293 SDValue Op1 = Op.getOperand(1); 4294 if (Op0.getOpcode() != ISD::UNDEF) 4295 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4296 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 4297 DAG.getIntPtrConstant(0)); 4298 if (Op1.getOpcode() != ISD::UNDEF) 4299 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4300 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 4301 DAG.getIntPtrConstant(1)); 4302 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 4303} 4304 4305/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 4306/// element has been zero/sign-extended, depending on the isSigned parameter, 4307/// from an integer type half its size. 4308static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 4309 bool isSigned) { 4310 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 4311 EVT VT = N->getValueType(0); 4312 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 4313 SDNode *BVN = N->getOperand(0).getNode(); 4314 if (BVN->getValueType(0) != MVT::v4i32 || 4315 BVN->getOpcode() != ISD::BUILD_VECTOR) 4316 return false; 4317 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4318 unsigned HiElt = 1 - LoElt; 4319 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 4320 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 4321 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 4322 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 4323 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 4324 return false; 4325 if (isSigned) { 4326 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 4327 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 4328 return true; 4329 } else { 4330 if (Hi0->isNullValue() && Hi1->isNullValue()) 4331 return true; 4332 } 4333 return false; 4334 } 4335 4336 if (N->getOpcode() != ISD::BUILD_VECTOR) 4337 return false; 4338 4339 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 4340 SDNode *Elt = N->getOperand(i).getNode(); 4341 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 4342 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4343 unsigned HalfSize = EltSize / 2; 4344 if (isSigned) { 4345 int64_t SExtVal = C->getSExtValue(); 4346 if ((SExtVal >> HalfSize) != (SExtVal >> EltSize)) 4347 return false; 4348 } else { 4349 if ((C->getZExtValue() >> HalfSize) != 0) 4350 return false; 4351 } 4352 continue; 4353 } 4354 return false; 4355 } 4356 4357 return true; 4358} 4359 4360/// isSignExtended - Check if a node is a vector value that is sign-extended 4361/// or a constant BUILD_VECTOR with sign-extended elements. 4362static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 4363 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 4364 return true; 4365 if (isExtendedBUILD_VECTOR(N, DAG, true)) 4366 return true; 4367 return false; 4368} 4369 4370/// isZeroExtended - Check if a node is a vector value that is zero-extended 4371/// or a constant BUILD_VECTOR with zero-extended elements. 4372static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 4373 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 4374 return true; 4375 if (isExtendedBUILD_VECTOR(N, DAG, false)) 4376 return true; 4377 return false; 4378} 4379 4380/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending 4381/// load, or BUILD_VECTOR with extended elements, return the unextended value. 4382static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { 4383 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 4384 return N->getOperand(0); 4385 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 4386 return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), 4387 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 4388 LD->isNonTemporal(), LD->getAlignment()); 4389 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 4390 // have been legalized as a BITCAST from v4i32. 4391 if (N->getOpcode() == ISD::BITCAST) { 4392 SDNode *BVN = N->getOperand(0).getNode(); 4393 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 4394 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 4395 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4396 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, 4397 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 4398 } 4399 // Construct a new BUILD_VECTOR with elements truncated to half the size. 4400 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 4401 EVT VT = N->getValueType(0); 4402 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 4403 unsigned NumElts = VT.getVectorNumElements(); 4404 MVT TruncVT = MVT::getIntegerVT(EltSize); 4405 SmallVector<SDValue, 8> Ops; 4406 for (unsigned i = 0; i != NumElts; ++i) { 4407 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 4408 const APInt &CInt = C->getAPIntValue(); 4409 Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT)); 4410 } 4411 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 4412 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 4413} 4414 4415static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 4416 unsigned Opcode = N->getOpcode(); 4417 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4418 SDNode *N0 = N->getOperand(0).getNode(); 4419 SDNode *N1 = N->getOperand(1).getNode(); 4420 return N0->hasOneUse() && N1->hasOneUse() && 4421 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 4422 } 4423 return false; 4424} 4425 4426static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 4427 unsigned Opcode = N->getOpcode(); 4428 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4429 SDNode *N0 = N->getOperand(0).getNode(); 4430 SDNode *N1 = N->getOperand(1).getNode(); 4431 return N0->hasOneUse() && N1->hasOneUse() && 4432 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 4433 } 4434 return false; 4435} 4436 4437static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 4438 // Multiplications are only custom-lowered for 128-bit vectors so that 4439 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 4440 EVT VT = Op.getValueType(); 4441 assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); 4442 SDNode *N0 = Op.getOperand(0).getNode(); 4443 SDNode *N1 = Op.getOperand(1).getNode(); 4444 unsigned NewOpc = 0; 4445 bool isMLA = false; 4446 bool isN0SExt = isSignExtended(N0, DAG); 4447 bool isN1SExt = isSignExtended(N1, DAG); 4448 if (isN0SExt && isN1SExt) 4449 NewOpc = ARMISD::VMULLs; 4450 else { 4451 bool isN0ZExt = isZeroExtended(N0, DAG); 4452 bool isN1ZExt = isZeroExtended(N1, DAG); 4453 if (isN0ZExt && isN1ZExt) 4454 NewOpc = ARMISD::VMULLu; 4455 else if (isN1SExt || isN1ZExt) { 4456 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 4457 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 4458 if (isN1SExt && isAddSubSExt(N0, DAG)) { 4459 NewOpc = ARMISD::VMULLs; 4460 isMLA = true; 4461 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 4462 NewOpc = ARMISD::VMULLu; 4463 isMLA = true; 4464 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 4465 std::swap(N0, N1); 4466 NewOpc = ARMISD::VMULLu; 4467 isMLA = true; 4468 } 4469 } 4470 4471 if (!NewOpc) { 4472 if (VT == MVT::v2i64) 4473 // Fall through to expand this. It is not legal. 4474 return SDValue(); 4475 else 4476 // Other vector multiplications are legal. 4477 return Op; 4478 } 4479 } 4480 4481 // Legalize to a VMULL instruction. 4482 DebugLoc DL = Op.getDebugLoc(); 4483 SDValue Op0; 4484 SDValue Op1 = SkipExtension(N1, DAG); 4485 if (!isMLA) { 4486 Op0 = SkipExtension(N0, DAG); 4487 assert(Op0.getValueType().is64BitVector() && 4488 Op1.getValueType().is64BitVector() && 4489 "unexpected types for extended operands to VMULL"); 4490 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 4491 } 4492 4493 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 4494 // isel lowering to take advantage of no-stall back to back vmul + vmla. 4495 // vmull q0, d4, d6 4496 // vmlal q0, d5, d6 4497 // is faster than 4498 // vaddl q0, d4, d5 4499 // vmovl q1, d6 4500 // vmul q0, q0, q1 4501 SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); 4502 SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); 4503 EVT Op1VT = Op1.getValueType(); 4504 return DAG.getNode(N0->getOpcode(), DL, VT, 4505 DAG.getNode(NewOpc, DL, VT, 4506 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 4507 DAG.getNode(NewOpc, DL, VT, 4508 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 4509} 4510 4511static SDValue 4512LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { 4513 // Convert to float 4514 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 4515 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 4516 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 4517 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 4518 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 4519 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 4520 // Get reciprocal estimate. 4521 // float4 recip = vrecpeq_f32(yf); 4522 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4523 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 4524 // Because char has a smaller range than uchar, we can actually get away 4525 // without any newton steps. This requires that we use a weird bias 4526 // of 0xb000, however (again, this has been exhaustively tested). 4527 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 4528 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 4529 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 4530 Y = DAG.getConstant(0xb000, MVT::i32); 4531 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 4532 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 4533 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 4534 // Convert back to short. 4535 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 4536 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 4537 return X; 4538} 4539 4540static SDValue 4541LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { 4542 SDValue N2; 4543 // Convert to float. 4544 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 4545 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 4546 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 4547 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 4548 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4549 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4550 4551 // Use reciprocal estimate and one refinement step. 4552 // float4 recip = vrecpeq_f32(yf); 4553 // recip *= vrecpsq_f32(yf, recip); 4554 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4555 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 4556 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4557 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4558 N1, N2); 4559 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4560 // Because short has a smaller range than ushort, we can actually get away 4561 // with only a single newton step. This requires that we use a weird bias 4562 // of 89, however (again, this has been exhaustively tested). 4563 // float4 result = as_float4(as_int4(xf*recip) + 89); 4564 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4565 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4566 N1 = DAG.getConstant(89, MVT::i32); 4567 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 4568 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 4569 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 4570 // Convert back to integer and return. 4571 // return vmovn_s32(vcvt_s32_f32(result)); 4572 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 4573 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 4574 return N0; 4575} 4576 4577static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 4578 EVT VT = Op.getValueType(); 4579 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4580 "unexpected type for custom-lowering ISD::SDIV"); 4581 4582 DebugLoc dl = Op.getDebugLoc(); 4583 SDValue N0 = Op.getOperand(0); 4584 SDValue N1 = Op.getOperand(1); 4585 SDValue N2, N3; 4586 4587 if (VT == MVT::v8i8) { 4588 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 4589 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 4590 4591 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4592 DAG.getIntPtrConstant(4)); 4593 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4594 DAG.getIntPtrConstant(4)); 4595 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4596 DAG.getIntPtrConstant(0)); 4597 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4598 DAG.getIntPtrConstant(0)); 4599 4600 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 4601 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 4602 4603 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 4604 N0 = LowerCONCAT_VECTORS(N0, DAG); 4605 4606 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 4607 return N0; 4608 } 4609 return LowerSDIV_v4i16(N0, N1, dl, DAG); 4610} 4611 4612static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 4613 EVT VT = Op.getValueType(); 4614 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4615 "unexpected type for custom-lowering ISD::UDIV"); 4616 4617 DebugLoc dl = Op.getDebugLoc(); 4618 SDValue N0 = Op.getOperand(0); 4619 SDValue N1 = Op.getOperand(1); 4620 SDValue N2, N3; 4621 4622 if (VT == MVT::v8i8) { 4623 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 4624 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 4625 4626 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4627 DAG.getIntPtrConstant(4)); 4628 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4629 DAG.getIntPtrConstant(4)); 4630 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4631 DAG.getIntPtrConstant(0)); 4632 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4633 DAG.getIntPtrConstant(0)); 4634 4635 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 4636 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 4637 4638 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 4639 N0 = LowerCONCAT_VECTORS(N0, DAG); 4640 4641 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 4642 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 4643 N0); 4644 return N0; 4645 } 4646 4647 // v4i16 sdiv ... Convert to float. 4648 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 4649 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 4650 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 4651 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 4652 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4653 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4654 4655 // Use reciprocal estimate and two refinement steps. 4656 // float4 recip = vrecpeq_f32(yf); 4657 // recip *= vrecpsq_f32(yf, recip); 4658 // recip *= vrecpsq_f32(yf, recip); 4659 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4660 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 4661 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4662 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4663 N1, N2); 4664 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4665 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4666 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4667 N1, N2); 4668 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4669 // Simply multiplying by the reciprocal estimate can leave us a few ulps 4670 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 4671 // and that it will never cause us to return an answer too large). 4672 // float4 result = as_float4(as_int4(xf*recip) + 89); 4673 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4674 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4675 N1 = DAG.getConstant(2, MVT::i32); 4676 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 4677 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 4678 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 4679 // Convert back to integer and return. 4680 // return vmovn_u32(vcvt_s32_f32(result)); 4681 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 4682 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 4683 return N0; 4684} 4685 4686SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 4687 switch (Op.getOpcode()) { 4688 default: llvm_unreachable("Don't know how to custom lower this!"); 4689 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4690 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 4691 case ISD::GlobalAddress: 4692 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 4693 LowerGlobalAddressELF(Op, DAG); 4694 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4695 case ISD::SELECT: return LowerSELECT(Op, DAG); 4696 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 4697 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 4698 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 4699 case ISD::VASTART: return LowerVASTART(Op, DAG); 4700 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 4701 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 4702 case ISD::SINT_TO_FP: 4703 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 4704 case ISD::FP_TO_SINT: 4705 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 4706 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4707 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4708 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4709 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 4710 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 4711 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 4712 case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); 4713 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 4714 Subtarget); 4715 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 4716 case ISD::SHL: 4717 case ISD::SRL: 4718 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 4719 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 4720 case ISD::SRL_PARTS: 4721 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 4722 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 4723 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 4724 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 4725 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4726 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4727 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 4728 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 4729 case ISD::MUL: return LowerMUL(Op, DAG); 4730 case ISD::SDIV: return LowerSDIV(Op, DAG); 4731 case ISD::UDIV: return LowerUDIV(Op, DAG); 4732 } 4733 return SDValue(); 4734} 4735 4736/// ReplaceNodeResults - Replace the results of node with an illegal result 4737/// type with new values built out of custom code. 4738void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 4739 SmallVectorImpl<SDValue>&Results, 4740 SelectionDAG &DAG) const { 4741 SDValue Res; 4742 switch (N->getOpcode()) { 4743 default: 4744 llvm_unreachable("Don't know how to custom expand this!"); 4745 break; 4746 case ISD::BITCAST: 4747 Res = ExpandBITCAST(N, DAG); 4748 break; 4749 case ISD::SRL: 4750 case ISD::SRA: 4751 Res = Expand64BitShift(N, DAG, Subtarget); 4752 break; 4753 } 4754 if (Res.getNode()) 4755 Results.push_back(Res); 4756} 4757 4758//===----------------------------------------------------------------------===// 4759// ARM Scheduler Hooks 4760//===----------------------------------------------------------------------===// 4761 4762MachineBasicBlock * 4763ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 4764 MachineBasicBlock *BB, 4765 unsigned Size) const { 4766 unsigned dest = MI->getOperand(0).getReg(); 4767 unsigned ptr = MI->getOperand(1).getReg(); 4768 unsigned oldval = MI->getOperand(2).getReg(); 4769 unsigned newval = MI->getOperand(3).getReg(); 4770 unsigned scratch = BB->getParent()->getRegInfo() 4771 .createVirtualRegister(ARM::GPRRegisterClass); 4772 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4773 DebugLoc dl = MI->getDebugLoc(); 4774 bool isThumb2 = Subtarget->isThumb2(); 4775 4776 unsigned ldrOpc, strOpc; 4777 switch (Size) { 4778 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 4779 case 1: 4780 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 4781 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 4782 break; 4783 case 2: 4784 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 4785 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 4786 break; 4787 case 4: 4788 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 4789 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 4790 break; 4791 } 4792 4793 MachineFunction *MF = BB->getParent(); 4794 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4795 MachineFunction::iterator It = BB; 4796 ++It; // insert the new blocks after the current block 4797 4798 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 4799 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 4800 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4801 MF->insert(It, loop1MBB); 4802 MF->insert(It, loop2MBB); 4803 MF->insert(It, exitMBB); 4804 4805 // Transfer the remainder of BB and its successor edges to exitMBB. 4806 exitMBB->splice(exitMBB->begin(), BB, 4807 llvm::next(MachineBasicBlock::iterator(MI)), 4808 BB->end()); 4809 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 4810 4811 // thisMBB: 4812 // ... 4813 // fallthrough --> loop1MBB 4814 BB->addSuccessor(loop1MBB); 4815 4816 // loop1MBB: 4817 // ldrex dest, [ptr] 4818 // cmp dest, oldval 4819 // bne exitMBB 4820 BB = loop1MBB; 4821 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 4822 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 4823 .addReg(dest).addReg(oldval)); 4824 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 4825 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 4826 BB->addSuccessor(loop2MBB); 4827 BB->addSuccessor(exitMBB); 4828 4829 // loop2MBB: 4830 // strex scratch, newval, [ptr] 4831 // cmp scratch, #0 4832 // bne loop1MBB 4833 BB = loop2MBB; 4834 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) 4835 .addReg(ptr)); 4836 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 4837 .addReg(scratch).addImm(0)); 4838 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 4839 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 4840 BB->addSuccessor(loop1MBB); 4841 BB->addSuccessor(exitMBB); 4842 4843 // exitMBB: 4844 // ... 4845 BB = exitMBB; 4846 4847 MI->eraseFromParent(); // The instruction is gone now. 4848 4849 return BB; 4850} 4851 4852MachineBasicBlock * 4853ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 4854 unsigned Size, unsigned BinOpcode) const { 4855 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 4856 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4857 4858 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4859 MachineFunction *MF = BB->getParent(); 4860 MachineFunction::iterator It = BB; 4861 ++It; 4862 4863 unsigned dest = MI->getOperand(0).getReg(); 4864 unsigned ptr = MI->getOperand(1).getReg(); 4865 unsigned incr = MI->getOperand(2).getReg(); 4866 DebugLoc dl = MI->getDebugLoc(); 4867 4868 bool isThumb2 = Subtarget->isThumb2(); 4869 unsigned ldrOpc, strOpc; 4870 switch (Size) { 4871 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 4872 case 1: 4873 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 4874 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 4875 break; 4876 case 2: 4877 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 4878 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 4879 break; 4880 case 4: 4881 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 4882 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 4883 break; 4884 } 4885 4886 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4887 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4888 MF->insert(It, loopMBB); 4889 MF->insert(It, exitMBB); 4890 4891 // Transfer the remainder of BB and its successor edges to exitMBB. 4892 exitMBB->splice(exitMBB->begin(), BB, 4893 llvm::next(MachineBasicBlock::iterator(MI)), 4894 BB->end()); 4895 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 4896 4897 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 4898 unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 4899 unsigned scratch2 = (!BinOpcode) ? incr : 4900 RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 4901 4902 // thisMBB: 4903 // ... 4904 // fallthrough --> loopMBB 4905 BB->addSuccessor(loopMBB); 4906 4907 // loopMBB: 4908 // ldrex dest, ptr 4909 // <binop> scratch2, dest, incr 4910 // strex scratch, scratch2, ptr 4911 // cmp scratch, #0 4912 // bne- loopMBB 4913 // fallthrough --> exitMBB 4914 BB = loopMBB; 4915 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 4916 if (BinOpcode) { 4917 // operand order needs to go the other way for NAND 4918 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 4919 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 4920 addReg(incr).addReg(dest)).addReg(0); 4921 else 4922 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 4923 addReg(dest).addReg(incr)).addReg(0); 4924 } 4925 4926 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) 4927 .addReg(ptr)); 4928 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 4929 .addReg(scratch).addImm(0)); 4930 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 4931 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 4932 4933 BB->addSuccessor(loopMBB); 4934 BB->addSuccessor(exitMBB); 4935 4936 // exitMBB: 4937 // ... 4938 BB = exitMBB; 4939 4940 MI->eraseFromParent(); // The instruction is gone now. 4941 4942 return BB; 4943} 4944 4945static 4946MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 4947 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 4948 E = MBB->succ_end(); I != E; ++I) 4949 if (*I != Succ) 4950 return *I; 4951 llvm_unreachable("Expecting a BB with two successors!"); 4952} 4953 4954MachineBasicBlock * 4955ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 4956 MachineBasicBlock *BB) const { 4957 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4958 DebugLoc dl = MI->getDebugLoc(); 4959 bool isThumb2 = Subtarget->isThumb2(); 4960 switch (MI->getOpcode()) { 4961 default: 4962 MI->dump(); 4963 llvm_unreachable("Unexpected instr type to insert"); 4964 4965 case ARM::ATOMIC_LOAD_ADD_I8: 4966 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 4967 case ARM::ATOMIC_LOAD_ADD_I16: 4968 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 4969 case ARM::ATOMIC_LOAD_ADD_I32: 4970 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 4971 4972 case ARM::ATOMIC_LOAD_AND_I8: 4973 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 4974 case ARM::ATOMIC_LOAD_AND_I16: 4975 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 4976 case ARM::ATOMIC_LOAD_AND_I32: 4977 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 4978 4979 case ARM::ATOMIC_LOAD_OR_I8: 4980 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 4981 case ARM::ATOMIC_LOAD_OR_I16: 4982 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 4983 case ARM::ATOMIC_LOAD_OR_I32: 4984 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 4985 4986 case ARM::ATOMIC_LOAD_XOR_I8: 4987 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 4988 case ARM::ATOMIC_LOAD_XOR_I16: 4989 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 4990 case ARM::ATOMIC_LOAD_XOR_I32: 4991 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 4992 4993 case ARM::ATOMIC_LOAD_NAND_I8: 4994 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 4995 case ARM::ATOMIC_LOAD_NAND_I16: 4996 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 4997 case ARM::ATOMIC_LOAD_NAND_I32: 4998 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 4999 5000 case ARM::ATOMIC_LOAD_SUB_I8: 5001 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 5002 case ARM::ATOMIC_LOAD_SUB_I16: 5003 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 5004 case ARM::ATOMIC_LOAD_SUB_I32: 5005 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 5006 5007 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 5008 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 5009 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 5010 5011 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 5012 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 5013 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 5014 5015 case ARM::tMOVCCr_pseudo: { 5016 // To "insert" a SELECT_CC instruction, we actually have to insert the 5017 // diamond control-flow pattern. The incoming instruction knows the 5018 // destination vreg to set, the condition code register to branch on, the 5019 // true/false values to select between, and a branch opcode to use. 5020 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5021 MachineFunction::iterator It = BB; 5022 ++It; 5023 5024 // thisMBB: 5025 // ... 5026 // TrueVal = ... 5027 // cmpTY ccX, r1, r2 5028 // bCC copy1MBB 5029 // fallthrough --> copy0MBB 5030 MachineBasicBlock *thisMBB = BB; 5031 MachineFunction *F = BB->getParent(); 5032 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 5033 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 5034 F->insert(It, copy0MBB); 5035 F->insert(It, sinkMBB); 5036 5037 // Transfer the remainder of BB and its successor edges to sinkMBB. 5038 sinkMBB->splice(sinkMBB->begin(), BB, 5039 llvm::next(MachineBasicBlock::iterator(MI)), 5040 BB->end()); 5041 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 5042 5043 BB->addSuccessor(copy0MBB); 5044 BB->addSuccessor(sinkMBB); 5045 5046 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 5047 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 5048 5049 // copy0MBB: 5050 // %FalseValue = ... 5051 // # fallthrough to sinkMBB 5052 BB = copy0MBB; 5053 5054 // Update machine-CFG edges 5055 BB->addSuccessor(sinkMBB); 5056 5057 // sinkMBB: 5058 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5059 // ... 5060 BB = sinkMBB; 5061 BuildMI(*BB, BB->begin(), dl, 5062 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 5063 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5064 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5065 5066 MI->eraseFromParent(); // The pseudo instruction is gone now. 5067 return BB; 5068 } 5069 5070 case ARM::BCCi64: 5071 case ARM::BCCZi64: { 5072 // If there is an unconditional branch to the other successor, remove it. 5073 BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); 5074 5075 // Compare both parts that make up the double comparison separately for 5076 // equality. 5077 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 5078 5079 unsigned LHS1 = MI->getOperand(1).getReg(); 5080 unsigned LHS2 = MI->getOperand(2).getReg(); 5081 if (RHSisZero) { 5082 AddDefaultPred(BuildMI(BB, dl, 5083 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5084 .addReg(LHS1).addImm(0)); 5085 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5086 .addReg(LHS2).addImm(0) 5087 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 5088 } else { 5089 unsigned RHS1 = MI->getOperand(3).getReg(); 5090 unsigned RHS2 = MI->getOperand(4).getReg(); 5091 AddDefaultPred(BuildMI(BB, dl, 5092 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5093 .addReg(LHS1).addReg(RHS1)); 5094 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5095 .addReg(LHS2).addReg(RHS2) 5096 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 5097 } 5098 5099 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 5100 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 5101 if (MI->getOperand(0).getImm() == ARMCC::NE) 5102 std::swap(destMBB, exitMBB); 5103 5104 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5105 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 5106 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) 5107 .addMBB(exitMBB); 5108 5109 MI->eraseFromParent(); // The pseudo instruction is gone now. 5110 return BB; 5111 } 5112 } 5113} 5114 5115//===----------------------------------------------------------------------===// 5116// ARM Optimization Hooks 5117//===----------------------------------------------------------------------===// 5118 5119static 5120SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 5121 TargetLowering::DAGCombinerInfo &DCI) { 5122 SelectionDAG &DAG = DCI.DAG; 5123 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5124 EVT VT = N->getValueType(0); 5125 unsigned Opc = N->getOpcode(); 5126 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 5127 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 5128 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 5129 ISD::CondCode CC = ISD::SETCC_INVALID; 5130 5131 if (isSlctCC) { 5132 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 5133 } else { 5134 SDValue CCOp = Slct.getOperand(0); 5135 if (CCOp.getOpcode() == ISD::SETCC) 5136 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 5137 } 5138 5139 bool DoXform = false; 5140 bool InvCC = false; 5141 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 5142 "Bad input!"); 5143 5144 if (LHS.getOpcode() == ISD::Constant && 5145 cast<ConstantSDNode>(LHS)->isNullValue()) { 5146 DoXform = true; 5147 } else if (CC != ISD::SETCC_INVALID && 5148 RHS.getOpcode() == ISD::Constant && 5149 cast<ConstantSDNode>(RHS)->isNullValue()) { 5150 std::swap(LHS, RHS); 5151 SDValue Op0 = Slct.getOperand(0); 5152 EVT OpVT = isSlctCC ? Op0.getValueType() : 5153 Op0.getOperand(0).getValueType(); 5154 bool isInt = OpVT.isInteger(); 5155 CC = ISD::getSetCCInverse(CC, isInt); 5156 5157 if (!TLI.isCondCodeLegal(CC, OpVT)) 5158 return SDValue(); // Inverse operator isn't legal. 5159 5160 DoXform = true; 5161 InvCC = true; 5162 } 5163 5164 if (DoXform) { 5165 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 5166 if (isSlctCC) 5167 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 5168 Slct.getOperand(0), Slct.getOperand(1), CC); 5169 SDValue CCOp = Slct.getOperand(0); 5170 if (InvCC) 5171 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 5172 CCOp.getOperand(0), CCOp.getOperand(1), CC); 5173 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 5174 CCOp, OtherOp, Result); 5175 } 5176 return SDValue(); 5177} 5178 5179/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 5180/// operands N0 and N1. This is a helper for PerformADDCombine that is 5181/// called with the default operands, and if that fails, with commuted 5182/// operands. 5183static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 5184 TargetLowering::DAGCombinerInfo &DCI) { 5185 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 5186 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 5187 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 5188 if (Result.getNode()) return Result; 5189 } 5190 return SDValue(); 5191} 5192 5193/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 5194/// 5195static SDValue PerformADDCombine(SDNode *N, 5196 TargetLowering::DAGCombinerInfo &DCI) { 5197 SDValue N0 = N->getOperand(0); 5198 SDValue N1 = N->getOperand(1); 5199 5200 // First try with the default operand order. 5201 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI); 5202 if (Result.getNode()) 5203 return Result; 5204 5205 // If that didn't work, try again with the operands commuted. 5206 return PerformADDCombineWithOperands(N, N1, N0, DCI); 5207} 5208 5209/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 5210/// 5211static SDValue PerformSUBCombine(SDNode *N, 5212 TargetLowering::DAGCombinerInfo &DCI) { 5213 SDValue N0 = N->getOperand(0); 5214 SDValue N1 = N->getOperand(1); 5215 5216 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 5217 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 5218 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 5219 if (Result.getNode()) return Result; 5220 } 5221 5222 return SDValue(); 5223} 5224 5225static SDValue PerformMULCombine(SDNode *N, 5226 TargetLowering::DAGCombinerInfo &DCI, 5227 const ARMSubtarget *Subtarget) { 5228 SelectionDAG &DAG = DCI.DAG; 5229 5230 if (Subtarget->isThumb1Only()) 5231 return SDValue(); 5232 5233 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 5234 return SDValue(); 5235 5236 EVT VT = N->getValueType(0); 5237 if (VT != MVT::i32) 5238 return SDValue(); 5239 5240 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 5241 if (!C) 5242 return SDValue(); 5243 5244 uint64_t MulAmt = C->getZExtValue(); 5245 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 5246 ShiftAmt = ShiftAmt & (32 - 1); 5247 SDValue V = N->getOperand(0); 5248 DebugLoc DL = N->getDebugLoc(); 5249 5250 SDValue Res; 5251 MulAmt >>= ShiftAmt; 5252 if (isPowerOf2_32(MulAmt - 1)) { 5253 // (mul x, 2^N + 1) => (add (shl x, N), x) 5254 Res = DAG.getNode(ISD::ADD, DL, VT, 5255 V, DAG.getNode(ISD::SHL, DL, VT, 5256 V, DAG.getConstant(Log2_32(MulAmt-1), 5257 MVT::i32))); 5258 } else if (isPowerOf2_32(MulAmt + 1)) { 5259 // (mul x, 2^N - 1) => (sub (shl x, N), x) 5260 Res = DAG.getNode(ISD::SUB, DL, VT, 5261 DAG.getNode(ISD::SHL, DL, VT, 5262 V, DAG.getConstant(Log2_32(MulAmt+1), 5263 MVT::i32)), 5264 V); 5265 } else 5266 return SDValue(); 5267 5268 if (ShiftAmt != 0) 5269 Res = DAG.getNode(ISD::SHL, DL, VT, Res, 5270 DAG.getConstant(ShiftAmt, MVT::i32)); 5271 5272 // Do not add new nodes to DAG combiner worklist. 5273 DCI.CombineTo(N, Res, false); 5274 return SDValue(); 5275} 5276 5277static SDValue PerformANDCombine(SDNode *N, 5278 TargetLowering::DAGCombinerInfo &DCI) { 5279 5280 // Attempt to use immediate-form VBIC 5281 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 5282 DebugLoc dl = N->getDebugLoc(); 5283 EVT VT = N->getValueType(0); 5284 SelectionDAG &DAG = DCI.DAG; 5285 5286 APInt SplatBits, SplatUndef; 5287 unsigned SplatBitSize; 5288 bool HasAnyUndefs; 5289 if (BVN && 5290 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 5291 if (SplatBitSize <= 64) { 5292 EVT VbicVT; 5293 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 5294 SplatUndef.getZExtValue(), SplatBitSize, 5295 DAG, VbicVT, VT.is128BitVector(), 5296 OtherModImm); 5297 if (Val.getNode()) { 5298 SDValue Input = 5299 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 5300 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 5301 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 5302 } 5303 } 5304 } 5305 5306 return SDValue(); 5307} 5308 5309/// PerformORCombine - Target-specific dag combine xforms for ISD::OR 5310static SDValue PerformORCombine(SDNode *N, 5311 TargetLowering::DAGCombinerInfo &DCI, 5312 const ARMSubtarget *Subtarget) { 5313 // Attempt to use immediate-form VORR 5314 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 5315 DebugLoc dl = N->getDebugLoc(); 5316 EVT VT = N->getValueType(0); 5317 SelectionDAG &DAG = DCI.DAG; 5318 5319 APInt SplatBits, SplatUndef; 5320 unsigned SplatBitSize; 5321 bool HasAnyUndefs; 5322 if (BVN && Subtarget->hasNEON() && 5323 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 5324 if (SplatBitSize <= 64) { 5325 EVT VorrVT; 5326 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 5327 SplatUndef.getZExtValue(), SplatBitSize, 5328 DAG, VorrVT, VT.is128BitVector(), 5329 OtherModImm); 5330 if (Val.getNode()) { 5331 SDValue Input = 5332 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 5333 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 5334 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 5335 } 5336 } 5337 } 5338 5339 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 5340 // reasonable. 5341 5342 // BFI is only available on V6T2+ 5343 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 5344 return SDValue(); 5345 5346 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 5347 DebugLoc DL = N->getDebugLoc(); 5348 // 1) or (and A, mask), val => ARMbfi A, val, mask 5349 // iff (val & mask) == val 5350 // 5351 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 5352 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 5353 // && mask == ~mask2 5354 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 5355 // && ~mask == mask2 5356 // (i.e., copy a bitfield value into another bitfield of the same width) 5357 if (N0.getOpcode() != ISD::AND) 5358 return SDValue(); 5359 5360 if (VT != MVT::i32) 5361 return SDValue(); 5362 5363 SDValue N00 = N0.getOperand(0); 5364 5365 // The value and the mask need to be constants so we can verify this is 5366 // actually a bitfield set. If the mask is 0xffff, we can do better 5367 // via a movt instruction, so don't use BFI in that case. 5368 SDValue MaskOp = N0.getOperand(1); 5369 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 5370 if (!MaskC) 5371 return SDValue(); 5372 unsigned Mask = MaskC->getZExtValue(); 5373 if (Mask == 0xffff) 5374 return SDValue(); 5375 SDValue Res; 5376 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 5377 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 5378 if (N1C) { 5379 unsigned Val = N1C->getZExtValue(); 5380 if ((Val & ~Mask) != Val) 5381 return SDValue(); 5382 5383 if (ARM::isBitFieldInvertedMask(Mask)) { 5384 Val >>= CountTrailingZeros_32(~Mask); 5385 5386 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 5387 DAG.getConstant(Val, MVT::i32), 5388 DAG.getConstant(Mask, MVT::i32)); 5389 5390 // Do not add new nodes to DAG combiner worklist. 5391 DCI.CombineTo(N, Res, false); 5392 return SDValue(); 5393 } 5394 } else if (N1.getOpcode() == ISD::AND) { 5395 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 5396 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 5397 if (!N11C) 5398 return SDValue(); 5399 unsigned Mask2 = N11C->getZExtValue(); 5400 5401 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 5402 // as is to match. 5403 if (ARM::isBitFieldInvertedMask(Mask) && 5404 (Mask == ~Mask2)) { 5405 // The pack halfword instruction works better for masks that fit it, 5406 // so use that when it's available. 5407 if (Subtarget->hasT2ExtractPack() && 5408 (Mask == 0xffff || Mask == 0xffff0000)) 5409 return SDValue(); 5410 // 2a 5411 unsigned amt = CountTrailingZeros_32(Mask2); 5412 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 5413 DAG.getConstant(amt, MVT::i32)); 5414 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 5415 DAG.getConstant(Mask, MVT::i32)); 5416 // Do not add new nodes to DAG combiner worklist. 5417 DCI.CombineTo(N, Res, false); 5418 return SDValue(); 5419 } else if (ARM::isBitFieldInvertedMask(~Mask) && 5420 (~Mask == Mask2)) { 5421 // The pack halfword instruction works better for masks that fit it, 5422 // so use that when it's available. 5423 if (Subtarget->hasT2ExtractPack() && 5424 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 5425 return SDValue(); 5426 // 2b 5427 unsigned lsb = CountTrailingZeros_32(Mask); 5428 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 5429 DAG.getConstant(lsb, MVT::i32)); 5430 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 5431 DAG.getConstant(Mask2, MVT::i32)); 5432 // Do not add new nodes to DAG combiner worklist. 5433 DCI.CombineTo(N, Res, false); 5434 return SDValue(); 5435 } 5436 } 5437 5438 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 5439 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 5440 ARM::isBitFieldInvertedMask(~Mask)) { 5441 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 5442 // where lsb(mask) == #shamt and masked bits of B are known zero. 5443 SDValue ShAmt = N00.getOperand(1); 5444 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 5445 unsigned LSB = CountTrailingZeros_32(Mask); 5446 if (ShAmtC != LSB) 5447 return SDValue(); 5448 5449 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 5450 DAG.getConstant(~Mask, MVT::i32)); 5451 5452 // Do not add new nodes to DAG combiner worklist. 5453 DCI.CombineTo(N, Res, false); 5454 } 5455 5456 return SDValue(); 5457} 5458 5459/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff 5460/// C1 & C2 == C1. 5461static SDValue PerformBFICombine(SDNode *N, 5462 TargetLowering::DAGCombinerInfo &DCI) { 5463 SDValue N1 = N->getOperand(1); 5464 if (N1.getOpcode() == ISD::AND) { 5465 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 5466 if (!N11C) 5467 return SDValue(); 5468 unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 5469 unsigned Mask2 = N11C->getZExtValue(); 5470 if ((Mask & Mask2) == Mask2) 5471 return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), 5472 N->getOperand(0), N1.getOperand(0), 5473 N->getOperand(2)); 5474 } 5475 return SDValue(); 5476} 5477 5478/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 5479/// ARMISD::VMOVRRD. 5480static SDValue PerformVMOVRRDCombine(SDNode *N, 5481 TargetLowering::DAGCombinerInfo &DCI) { 5482 // vmovrrd(vmovdrr x, y) -> x,y 5483 SDValue InDouble = N->getOperand(0); 5484 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 5485 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 5486 return SDValue(); 5487} 5488 5489/// PerformVMOVDRRCombine - Target-specific dag combine xforms for 5490/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 5491static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 5492 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 5493 SDValue Op0 = N->getOperand(0); 5494 SDValue Op1 = N->getOperand(1); 5495 if (Op0.getOpcode() == ISD::BITCAST) 5496 Op0 = Op0.getOperand(0); 5497 if (Op1.getOpcode() == ISD::BITCAST) 5498 Op1 = Op1.getOperand(0); 5499 if (Op0.getOpcode() == ARMISD::VMOVRRD && 5500 Op0.getNode() == Op1.getNode() && 5501 Op0.getResNo() == 0 && Op1.getResNo() == 1) 5502 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 5503 N->getValueType(0), Op0.getOperand(0)); 5504 return SDValue(); 5505} 5506 5507/// PerformSTORECombine - Target-specific dag combine xforms for 5508/// ISD::STORE. 5509static SDValue PerformSTORECombine(SDNode *N, 5510 TargetLowering::DAGCombinerInfo &DCI) { 5511 // Bitcast an i64 store extracted from a vector to f64. 5512 // Otherwise, the i64 value will be legalized to a pair of i32 values. 5513 StoreSDNode *St = cast<StoreSDNode>(N); 5514 SDValue StVal = St->getValue(); 5515 if (!ISD::isNormalStore(St) || St->isVolatile() || 5516 StVal.getValueType() != MVT::i64 || 5517 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 5518 return SDValue(); 5519 5520 SelectionDAG &DAG = DCI.DAG; 5521 DebugLoc dl = StVal.getDebugLoc(); 5522 SDValue IntVec = StVal.getOperand(0); 5523 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 5524 IntVec.getValueType().getVectorNumElements()); 5525 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 5526 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 5527 Vec, StVal.getOperand(1)); 5528 dl = N->getDebugLoc(); 5529 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 5530 // Make the DAGCombiner fold the bitcasts. 5531 DCI.AddToWorklist(Vec.getNode()); 5532 DCI.AddToWorklist(ExtElt.getNode()); 5533 DCI.AddToWorklist(V.getNode()); 5534 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 5535 St->getPointerInfo(), St->isVolatile(), 5536 St->isNonTemporal(), St->getAlignment(), 5537 St->getTBAAInfo()); 5538} 5539 5540/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 5541/// are normal, non-volatile loads. If so, it is profitable to bitcast an 5542/// i64 vector to have f64 elements, since the value can then be loaded 5543/// directly into a VFP register. 5544static bool hasNormalLoadOperand(SDNode *N) { 5545 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 5546 for (unsigned i = 0; i < NumElts; ++i) { 5547 SDNode *Elt = N->getOperand(i).getNode(); 5548 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 5549 return true; 5550 } 5551 return false; 5552} 5553 5554/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 5555/// ISD::BUILD_VECTOR. 5556static SDValue PerformBUILD_VECTORCombine(SDNode *N, 5557 TargetLowering::DAGCombinerInfo &DCI){ 5558 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 5559 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 5560 // into a pair of GPRs, which is fine when the value is used as a scalar, 5561 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 5562 SelectionDAG &DAG = DCI.DAG; 5563 if (N->getNumOperands() == 2) { 5564 SDValue RV = PerformVMOVDRRCombine(N, DAG); 5565 if (RV.getNode()) 5566 return RV; 5567 } 5568 5569 // Load i64 elements as f64 values so that type legalization does not split 5570 // them up into i32 values. 5571 EVT VT = N->getValueType(0); 5572 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 5573 return SDValue(); 5574 DebugLoc dl = N->getDebugLoc(); 5575 SmallVector<SDValue, 8> Ops; 5576 unsigned NumElts = VT.getVectorNumElements(); 5577 for (unsigned i = 0; i < NumElts; ++i) { 5578 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 5579 Ops.push_back(V); 5580 // Make the DAGCombiner fold the bitcast. 5581 DCI.AddToWorklist(V.getNode()); 5582 } 5583 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 5584 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); 5585 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 5586} 5587 5588/// PerformInsertEltCombine - Target-specific dag combine xforms for 5589/// ISD::INSERT_VECTOR_ELT. 5590static SDValue PerformInsertEltCombine(SDNode *N, 5591 TargetLowering::DAGCombinerInfo &DCI) { 5592 // Bitcast an i64 load inserted into a vector to f64. 5593 // Otherwise, the i64 value will be legalized to a pair of i32 values. 5594 EVT VT = N->getValueType(0); 5595 SDNode *Elt = N->getOperand(1).getNode(); 5596 if (VT.getVectorElementType() != MVT::i64 || 5597 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 5598 return SDValue(); 5599 5600 SelectionDAG &DAG = DCI.DAG; 5601 DebugLoc dl = N->getDebugLoc(); 5602 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 5603 VT.getVectorNumElements()); 5604 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 5605 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 5606 // Make the DAGCombiner fold the bitcasts. 5607 DCI.AddToWorklist(Vec.getNode()); 5608 DCI.AddToWorklist(V.getNode()); 5609 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 5610 Vec, V, N->getOperand(2)); 5611 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 5612} 5613 5614/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 5615/// ISD::VECTOR_SHUFFLE. 5616static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 5617 // The LLVM shufflevector instruction does not require the shuffle mask 5618 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 5619 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 5620 // operands do not match the mask length, they are extended by concatenating 5621 // them with undef vectors. That is probably the right thing for other 5622 // targets, but for NEON it is better to concatenate two double-register 5623 // size vector operands into a single quad-register size vector. Do that 5624 // transformation here: 5625 // shuffle(concat(v1, undef), concat(v2, undef)) -> 5626 // shuffle(concat(v1, v2), undef) 5627 SDValue Op0 = N->getOperand(0); 5628 SDValue Op1 = N->getOperand(1); 5629 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 5630 Op1.getOpcode() != ISD::CONCAT_VECTORS || 5631 Op0.getNumOperands() != 2 || 5632 Op1.getNumOperands() != 2) 5633 return SDValue(); 5634 SDValue Concat0Op1 = Op0.getOperand(1); 5635 SDValue Concat1Op1 = Op1.getOperand(1); 5636 if (Concat0Op1.getOpcode() != ISD::UNDEF || 5637 Concat1Op1.getOpcode() != ISD::UNDEF) 5638 return SDValue(); 5639 // Skip the transformation if any of the types are illegal. 5640 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5641 EVT VT = N->getValueType(0); 5642 if (!TLI.isTypeLegal(VT) || 5643 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 5644 !TLI.isTypeLegal(Concat1Op1.getValueType())) 5645 return SDValue(); 5646 5647 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, 5648 Op0.getOperand(0), Op1.getOperand(0)); 5649 // Translate the shuffle mask. 5650 SmallVector<int, 16> NewMask; 5651 unsigned NumElts = VT.getVectorNumElements(); 5652 unsigned HalfElts = NumElts/2; 5653 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 5654 for (unsigned n = 0; n < NumElts; ++n) { 5655 int MaskElt = SVN->getMaskElt(n); 5656 int NewElt = -1; 5657 if (MaskElt < (int)HalfElts) 5658 NewElt = MaskElt; 5659 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 5660 NewElt = HalfElts + MaskElt - NumElts; 5661 NewMask.push_back(NewElt); 5662 } 5663 return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, 5664 DAG.getUNDEF(VT), NewMask.data()); 5665} 5666 5667/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 5668/// NEON load/store intrinsics to merge base address updates. 5669static SDValue CombineBaseUpdate(SDNode *N, 5670 TargetLowering::DAGCombinerInfo &DCI) { 5671 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 5672 return SDValue(); 5673 5674 SelectionDAG &DAG = DCI.DAG; 5675 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 5676 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 5677 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 5678 SDValue Addr = N->getOperand(AddrOpIdx); 5679 5680 // Search for a use of the address operand that is an increment. 5681 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 5682 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 5683 SDNode *User = *UI; 5684 if (User->getOpcode() != ISD::ADD || 5685 UI.getUse().getResNo() != Addr.getResNo()) 5686 continue; 5687 5688 // Check that the add is independent of the load/store. Otherwise, folding 5689 // it would create a cycle. 5690 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 5691 continue; 5692 5693 // Find the new opcode for the updating load/store. 5694 bool isLoad = true; 5695 bool isLaneOp = false; 5696 unsigned NewOpc = 0; 5697 unsigned NumVecs = 0; 5698 if (isIntrinsic) { 5699 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 5700 switch (IntNo) { 5701 default: assert(0 && "unexpected intrinsic for Neon base update"); 5702 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 5703 NumVecs = 1; break; 5704 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 5705 NumVecs = 2; break; 5706 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 5707 NumVecs = 3; break; 5708 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 5709 NumVecs = 4; break; 5710 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 5711 NumVecs = 2; isLaneOp = true; break; 5712 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 5713 NumVecs = 3; isLaneOp = true; break; 5714 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 5715 NumVecs = 4; isLaneOp = true; break; 5716 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 5717 NumVecs = 1; isLoad = false; break; 5718 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 5719 NumVecs = 2; isLoad = false; break; 5720 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 5721 NumVecs = 3; isLoad = false; break; 5722 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 5723 NumVecs = 4; isLoad = false; break; 5724 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 5725 NumVecs = 2; isLoad = false; isLaneOp = true; break; 5726 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 5727 NumVecs = 3; isLoad = false; isLaneOp = true; break; 5728 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 5729 NumVecs = 4; isLoad = false; isLaneOp = true; break; 5730 } 5731 } else { 5732 isLaneOp = true; 5733 switch (N->getOpcode()) { 5734 default: assert(0 && "unexpected opcode for Neon base update"); 5735 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 5736 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 5737 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 5738 } 5739 } 5740 5741 // Find the size of memory referenced by the load/store. 5742 EVT VecTy; 5743 if (isLoad) 5744 VecTy = N->getValueType(0); 5745 else 5746 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 5747 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 5748 if (isLaneOp) 5749 NumBytes /= VecTy.getVectorNumElements(); 5750 5751 // If the increment is a constant, it must match the memory ref size. 5752 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 5753 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 5754 uint64_t IncVal = CInc->getZExtValue(); 5755 if (IncVal != NumBytes) 5756 continue; 5757 } else if (NumBytes >= 3 * 16) { 5758 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 5759 // separate instructions that make it harder to use a non-constant update. 5760 continue; 5761 } 5762 5763 // Create the new updating load/store node. 5764 EVT Tys[6]; 5765 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 5766 unsigned n; 5767 for (n = 0; n < NumResultVecs; ++n) 5768 Tys[n] = VecTy; 5769 Tys[n++] = MVT::i32; 5770 Tys[n] = MVT::Other; 5771 SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); 5772 SmallVector<SDValue, 8> Ops; 5773 Ops.push_back(N->getOperand(0)); // incoming chain 5774 Ops.push_back(N->getOperand(AddrOpIdx)); 5775 Ops.push_back(Inc); 5776 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 5777 Ops.push_back(N->getOperand(i)); 5778 } 5779 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 5780 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, 5781 Ops.data(), Ops.size(), 5782 MemInt->getMemoryVT(), 5783 MemInt->getMemOperand()); 5784 5785 // Update the uses. 5786 std::vector<SDValue> NewResults; 5787 for (unsigned i = 0; i < NumResultVecs; ++i) { 5788 NewResults.push_back(SDValue(UpdN.getNode(), i)); 5789 } 5790 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 5791 DCI.CombineTo(N, NewResults); 5792 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 5793 5794 break; 5795 } 5796 return SDValue(); 5797} 5798 5799/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 5800/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 5801/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 5802/// return true. 5803static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 5804 SelectionDAG &DAG = DCI.DAG; 5805 EVT VT = N->getValueType(0); 5806 // vldN-dup instructions only support 64-bit vectors for N > 1. 5807 if (!VT.is64BitVector()) 5808 return false; 5809 5810 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 5811 SDNode *VLD = N->getOperand(0).getNode(); 5812 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 5813 return false; 5814 unsigned NumVecs = 0; 5815 unsigned NewOpc = 0; 5816 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 5817 if (IntNo == Intrinsic::arm_neon_vld2lane) { 5818 NumVecs = 2; 5819 NewOpc = ARMISD::VLD2DUP; 5820 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 5821 NumVecs = 3; 5822 NewOpc = ARMISD::VLD3DUP; 5823 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 5824 NumVecs = 4; 5825 NewOpc = ARMISD::VLD4DUP; 5826 } else { 5827 return false; 5828 } 5829 5830 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 5831 // numbers match the load. 5832 unsigned VLDLaneNo = 5833 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 5834 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 5835 UI != UE; ++UI) { 5836 // Ignore uses of the chain result. 5837 if (UI.getUse().getResNo() == NumVecs) 5838 continue; 5839 SDNode *User = *UI; 5840 if (User->getOpcode() != ARMISD::VDUPLANE || 5841 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 5842 return false; 5843 } 5844 5845 // Create the vldN-dup node. 5846 EVT Tys[5]; 5847 unsigned n; 5848 for (n = 0; n < NumVecs; ++n) 5849 Tys[n] = VT; 5850 Tys[n] = MVT::Other; 5851 SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); 5852 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 5853 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 5854 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, 5855 Ops, 2, VLDMemInt->getMemoryVT(), 5856 VLDMemInt->getMemOperand()); 5857 5858 // Update the uses. 5859 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 5860 UI != UE; ++UI) { 5861 unsigned ResNo = UI.getUse().getResNo(); 5862 // Ignore uses of the chain result. 5863 if (ResNo == NumVecs) 5864 continue; 5865 SDNode *User = *UI; 5866 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 5867 } 5868 5869 // Now the vldN-lane intrinsic is dead except for its chain result. 5870 // Update uses of the chain. 5871 std::vector<SDValue> VLDDupResults; 5872 for (unsigned n = 0; n < NumVecs; ++n) 5873 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 5874 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 5875 DCI.CombineTo(VLD, VLDDupResults); 5876 5877 return true; 5878} 5879 5880/// PerformVDUPLANECombine - Target-specific dag combine xforms for 5881/// ARMISD::VDUPLANE. 5882static SDValue PerformVDUPLANECombine(SDNode *N, 5883 TargetLowering::DAGCombinerInfo &DCI) { 5884 SDValue Op = N->getOperand(0); 5885 5886 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 5887 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 5888 if (CombineVLDDUP(N, DCI)) 5889 return SDValue(N, 0); 5890 5891 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 5892 // redundant. Ignore bit_converts for now; element sizes are checked below. 5893 while (Op.getOpcode() == ISD::BITCAST) 5894 Op = Op.getOperand(0); 5895 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 5896 return SDValue(); 5897 5898 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 5899 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 5900 // The canonical VMOV for a zero vector uses a 32-bit element size. 5901 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 5902 unsigned EltBits; 5903 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 5904 EltSize = 8; 5905 EVT VT = N->getValueType(0); 5906 if (EltSize > VT.getVectorElementType().getSizeInBits()) 5907 return SDValue(); 5908 5909 return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); 5910} 5911 5912/// getVShiftImm - Check if this is a valid build_vector for the immediate 5913/// operand of a vector shift operation, where all the elements of the 5914/// build_vector must have the same constant integer value. 5915static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 5916 // Ignore bit_converts. 5917 while (Op.getOpcode() == ISD::BITCAST) 5918 Op = Op.getOperand(0); 5919 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 5920 APInt SplatBits, SplatUndef; 5921 unsigned SplatBitSize; 5922 bool HasAnyUndefs; 5923 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 5924 HasAnyUndefs, ElementBits) || 5925 SplatBitSize > ElementBits) 5926 return false; 5927 Cnt = SplatBits.getSExtValue(); 5928 return true; 5929} 5930 5931/// isVShiftLImm - Check if this is a valid build_vector for the immediate 5932/// operand of a vector shift left operation. That value must be in the range: 5933/// 0 <= Value < ElementBits for a left shift; or 5934/// 0 <= Value <= ElementBits for a long left shift. 5935static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 5936 assert(VT.isVector() && "vector shift count is not a vector type"); 5937 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 5938 if (! getVShiftImm(Op, ElementBits, Cnt)) 5939 return false; 5940 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 5941} 5942 5943/// isVShiftRImm - Check if this is a valid build_vector for the immediate 5944/// operand of a vector shift right operation. For a shift opcode, the value 5945/// is positive, but for an intrinsic the value count must be negative. The 5946/// absolute value must be in the range: 5947/// 1 <= |Value| <= ElementBits for a right shift; or 5948/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 5949static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 5950 int64_t &Cnt) { 5951 assert(VT.isVector() && "vector shift count is not a vector type"); 5952 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 5953 if (! getVShiftImm(Op, ElementBits, Cnt)) 5954 return false; 5955 if (isIntrinsic) 5956 Cnt = -Cnt; 5957 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 5958} 5959 5960/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 5961static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 5962 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 5963 switch (IntNo) { 5964 default: 5965 // Don't do anything for most intrinsics. 5966 break; 5967 5968 // Vector shifts: check for immediate versions and lower them. 5969 // Note: This is done during DAG combining instead of DAG legalizing because 5970 // the build_vectors for 64-bit vector element shift counts are generally 5971 // not legal, and it is hard to see their values after they get legalized to 5972 // loads from a constant pool. 5973 case Intrinsic::arm_neon_vshifts: 5974 case Intrinsic::arm_neon_vshiftu: 5975 case Intrinsic::arm_neon_vshiftls: 5976 case Intrinsic::arm_neon_vshiftlu: 5977 case Intrinsic::arm_neon_vshiftn: 5978 case Intrinsic::arm_neon_vrshifts: 5979 case Intrinsic::arm_neon_vrshiftu: 5980 case Intrinsic::arm_neon_vrshiftn: 5981 case Intrinsic::arm_neon_vqshifts: 5982 case Intrinsic::arm_neon_vqshiftu: 5983 case Intrinsic::arm_neon_vqshiftsu: 5984 case Intrinsic::arm_neon_vqshiftns: 5985 case Intrinsic::arm_neon_vqshiftnu: 5986 case Intrinsic::arm_neon_vqshiftnsu: 5987 case Intrinsic::arm_neon_vqrshiftns: 5988 case Intrinsic::arm_neon_vqrshiftnu: 5989 case Intrinsic::arm_neon_vqrshiftnsu: { 5990 EVT VT = N->getOperand(1).getValueType(); 5991 int64_t Cnt; 5992 unsigned VShiftOpc = 0; 5993 5994 switch (IntNo) { 5995 case Intrinsic::arm_neon_vshifts: 5996 case Intrinsic::arm_neon_vshiftu: 5997 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 5998 VShiftOpc = ARMISD::VSHL; 5999 break; 6000 } 6001 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 6002 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 6003 ARMISD::VSHRs : ARMISD::VSHRu); 6004 break; 6005 } 6006 return SDValue(); 6007 6008 case Intrinsic::arm_neon_vshiftls: 6009 case Intrinsic::arm_neon_vshiftlu: 6010 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 6011 break; 6012 llvm_unreachable("invalid shift count for vshll intrinsic"); 6013 6014 case Intrinsic::arm_neon_vrshifts: 6015 case Intrinsic::arm_neon_vrshiftu: 6016 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 6017 break; 6018 return SDValue(); 6019 6020 case Intrinsic::arm_neon_vqshifts: 6021 case Intrinsic::arm_neon_vqshiftu: 6022 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 6023 break; 6024 return SDValue(); 6025 6026 case Intrinsic::arm_neon_vqshiftsu: 6027 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 6028 break; 6029 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 6030 6031 case Intrinsic::arm_neon_vshiftn: 6032 case Intrinsic::arm_neon_vrshiftn: 6033 case Intrinsic::arm_neon_vqshiftns: 6034 case Intrinsic::arm_neon_vqshiftnu: 6035 case Intrinsic::arm_neon_vqshiftnsu: 6036 case Intrinsic::arm_neon_vqrshiftns: 6037 case Intrinsic::arm_neon_vqrshiftnu: 6038 case Intrinsic::arm_neon_vqrshiftnsu: 6039 // Narrowing shifts require an immediate right shift. 6040 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 6041 break; 6042 llvm_unreachable("invalid shift count for narrowing vector shift " 6043 "intrinsic"); 6044 6045 default: 6046 llvm_unreachable("unhandled vector shift"); 6047 } 6048 6049 switch (IntNo) { 6050 case Intrinsic::arm_neon_vshifts: 6051 case Intrinsic::arm_neon_vshiftu: 6052 // Opcode already set above. 6053 break; 6054 case Intrinsic::arm_neon_vshiftls: 6055 case Intrinsic::arm_neon_vshiftlu: 6056 if (Cnt == VT.getVectorElementType().getSizeInBits()) 6057 VShiftOpc = ARMISD::VSHLLi; 6058 else 6059 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 6060 ARMISD::VSHLLs : ARMISD::VSHLLu); 6061 break; 6062 case Intrinsic::arm_neon_vshiftn: 6063 VShiftOpc = ARMISD::VSHRN; break; 6064 case Intrinsic::arm_neon_vrshifts: 6065 VShiftOpc = ARMISD::VRSHRs; break; 6066 case Intrinsic::arm_neon_vrshiftu: 6067 VShiftOpc = ARMISD::VRSHRu; break; 6068 case Intrinsic::arm_neon_vrshiftn: 6069 VShiftOpc = ARMISD::VRSHRN; break; 6070 case Intrinsic::arm_neon_vqshifts: 6071 VShiftOpc = ARMISD::VQSHLs; break; 6072 case Intrinsic::arm_neon_vqshiftu: 6073 VShiftOpc = ARMISD::VQSHLu; break; 6074 case Intrinsic::arm_neon_vqshiftsu: 6075 VShiftOpc = ARMISD::VQSHLsu; break; 6076 case Intrinsic::arm_neon_vqshiftns: 6077 VShiftOpc = ARMISD::VQSHRNs; break; 6078 case Intrinsic::arm_neon_vqshiftnu: 6079 VShiftOpc = ARMISD::VQSHRNu; break; 6080 case Intrinsic::arm_neon_vqshiftnsu: 6081 VShiftOpc = ARMISD::VQSHRNsu; break; 6082 case Intrinsic::arm_neon_vqrshiftns: 6083 VShiftOpc = ARMISD::VQRSHRNs; break; 6084 case Intrinsic::arm_neon_vqrshiftnu: 6085 VShiftOpc = ARMISD::VQRSHRNu; break; 6086 case Intrinsic::arm_neon_vqrshiftnsu: 6087 VShiftOpc = ARMISD::VQRSHRNsu; break; 6088 } 6089 6090 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 6091 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 6092 } 6093 6094 case Intrinsic::arm_neon_vshiftins: { 6095 EVT VT = N->getOperand(1).getValueType(); 6096 int64_t Cnt; 6097 unsigned VShiftOpc = 0; 6098 6099 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 6100 VShiftOpc = ARMISD::VSLI; 6101 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 6102 VShiftOpc = ARMISD::VSRI; 6103 else { 6104 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 6105 } 6106 6107 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 6108 N->getOperand(1), N->getOperand(2), 6109 DAG.getConstant(Cnt, MVT::i32)); 6110 } 6111 6112 case Intrinsic::arm_neon_vqrshifts: 6113 case Intrinsic::arm_neon_vqrshiftu: 6114 // No immediate versions of these to check for. 6115 break; 6116 } 6117 6118 return SDValue(); 6119} 6120 6121/// PerformShiftCombine - Checks for immediate versions of vector shifts and 6122/// lowers them. As with the vector shift intrinsics, this is done during DAG 6123/// combining instead of DAG legalizing because the build_vectors for 64-bit 6124/// vector element shift counts are generally not legal, and it is hard to see 6125/// their values after they get legalized to loads from a constant pool. 6126static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 6127 const ARMSubtarget *ST) { 6128 EVT VT = N->getValueType(0); 6129 6130 // Nothing to be done for scalar shifts. 6131 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6132 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 6133 return SDValue(); 6134 6135 assert(ST->hasNEON() && "unexpected vector shift"); 6136 int64_t Cnt; 6137 6138 switch (N->getOpcode()) { 6139 default: llvm_unreachable("unexpected shift opcode"); 6140 6141 case ISD::SHL: 6142 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 6143 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 6144 DAG.getConstant(Cnt, MVT::i32)); 6145 break; 6146 6147 case ISD::SRA: 6148 case ISD::SRL: 6149 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 6150 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 6151 ARMISD::VSHRs : ARMISD::VSHRu); 6152 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 6153 DAG.getConstant(Cnt, MVT::i32)); 6154 } 6155 } 6156 return SDValue(); 6157} 6158 6159/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 6160/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 6161static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 6162 const ARMSubtarget *ST) { 6163 SDValue N0 = N->getOperand(0); 6164 6165 // Check for sign- and zero-extensions of vector extract operations of 8- 6166 // and 16-bit vector elements. NEON supports these directly. They are 6167 // handled during DAG combining because type legalization will promote them 6168 // to 32-bit types and it is messy to recognize the operations after that. 6169 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 6170 SDValue Vec = N0.getOperand(0); 6171 SDValue Lane = N0.getOperand(1); 6172 EVT VT = N->getValueType(0); 6173 EVT EltVT = N0.getValueType(); 6174 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6175 6176 if (VT == MVT::i32 && 6177 (EltVT == MVT::i8 || EltVT == MVT::i16) && 6178 TLI.isTypeLegal(Vec.getValueType()) && 6179 isa<ConstantSDNode>(Lane)) { 6180 6181 unsigned Opc = 0; 6182 switch (N->getOpcode()) { 6183 default: llvm_unreachable("unexpected opcode"); 6184 case ISD::SIGN_EXTEND: 6185 Opc = ARMISD::VGETLANEs; 6186 break; 6187 case ISD::ZERO_EXTEND: 6188 case ISD::ANY_EXTEND: 6189 Opc = ARMISD::VGETLANEu; 6190 break; 6191 } 6192 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 6193 } 6194 } 6195 6196 return SDValue(); 6197} 6198 6199/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 6200/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 6201static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 6202 const ARMSubtarget *ST) { 6203 // If the target supports NEON, try to use vmax/vmin instructions for f32 6204 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 6205 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 6206 // a NaN; only do the transformation when it matches that behavior. 6207 6208 // For now only do this when using NEON for FP operations; if using VFP, it 6209 // is not obvious that the benefit outweighs the cost of switching to the 6210 // NEON pipeline. 6211 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 6212 N->getValueType(0) != MVT::f32) 6213 return SDValue(); 6214 6215 SDValue CondLHS = N->getOperand(0); 6216 SDValue CondRHS = N->getOperand(1); 6217 SDValue LHS = N->getOperand(2); 6218 SDValue RHS = N->getOperand(3); 6219 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 6220 6221 unsigned Opcode = 0; 6222 bool IsReversed; 6223 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 6224 IsReversed = false; // x CC y ? x : y 6225 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 6226 IsReversed = true ; // x CC y ? y : x 6227 } else { 6228 return SDValue(); 6229 } 6230 6231 bool IsUnordered; 6232 switch (CC) { 6233 default: break; 6234 case ISD::SETOLT: 6235 case ISD::SETOLE: 6236 case ISD::SETLT: 6237 case ISD::SETLE: 6238 case ISD::SETULT: 6239 case ISD::SETULE: 6240 // If LHS is NaN, an ordered comparison will be false and the result will 6241 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 6242 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 6243 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 6244 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 6245 break; 6246 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 6247 // will return -0, so vmin can only be used for unsafe math or if one of 6248 // the operands is known to be nonzero. 6249 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 6250 !UnsafeFPMath && 6251 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 6252 break; 6253 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 6254 break; 6255 6256 case ISD::SETOGT: 6257 case ISD::SETOGE: 6258 case ISD::SETGT: 6259 case ISD::SETGE: 6260 case ISD::SETUGT: 6261 case ISD::SETUGE: 6262 // If LHS is NaN, an ordered comparison will be false and the result will 6263 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 6264 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 6265 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 6266 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 6267 break; 6268 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 6269 // will return +0, so vmax can only be used for unsafe math or if one of 6270 // the operands is known to be nonzero. 6271 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 6272 !UnsafeFPMath && 6273 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 6274 break; 6275 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 6276 break; 6277 } 6278 6279 if (!Opcode) 6280 return SDValue(); 6281 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 6282} 6283 6284SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 6285 DAGCombinerInfo &DCI) const { 6286 switch (N->getOpcode()) { 6287 default: break; 6288 case ISD::ADD: return PerformADDCombine(N, DCI); 6289 case ISD::SUB: return PerformSUBCombine(N, DCI); 6290 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 6291 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 6292 case ISD::AND: return PerformANDCombine(N, DCI); 6293 case ARMISD::BFI: return PerformBFICombine(N, DCI); 6294 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 6295 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 6296 case ISD::STORE: return PerformSTORECombine(N, DCI); 6297 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); 6298 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 6299 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 6300 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 6301 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 6302 case ISD::SHL: 6303 case ISD::SRA: 6304 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 6305 case ISD::SIGN_EXTEND: 6306 case ISD::ZERO_EXTEND: 6307 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 6308 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 6309 case ARMISD::VLD2DUP: 6310 case ARMISD::VLD3DUP: 6311 case ARMISD::VLD4DUP: 6312 return CombineBaseUpdate(N, DCI); 6313 case ISD::INTRINSIC_VOID: 6314 case ISD::INTRINSIC_W_CHAIN: 6315 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 6316 case Intrinsic::arm_neon_vld1: 6317 case Intrinsic::arm_neon_vld2: 6318 case Intrinsic::arm_neon_vld3: 6319 case Intrinsic::arm_neon_vld4: 6320 case Intrinsic::arm_neon_vld2lane: 6321 case Intrinsic::arm_neon_vld3lane: 6322 case Intrinsic::arm_neon_vld4lane: 6323 case Intrinsic::arm_neon_vst1: 6324 case Intrinsic::arm_neon_vst2: 6325 case Intrinsic::arm_neon_vst3: 6326 case Intrinsic::arm_neon_vst4: 6327 case Intrinsic::arm_neon_vst2lane: 6328 case Intrinsic::arm_neon_vst3lane: 6329 case Intrinsic::arm_neon_vst4lane: 6330 return CombineBaseUpdate(N, DCI); 6331 default: break; 6332 } 6333 break; 6334 } 6335 return SDValue(); 6336} 6337 6338bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 6339 EVT VT) const { 6340 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 6341} 6342 6343bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 6344 if (!Subtarget->allowsUnalignedMem()) 6345 return false; 6346 6347 switch (VT.getSimpleVT().SimpleTy) { 6348 default: 6349 return false; 6350 case MVT::i8: 6351 case MVT::i16: 6352 case MVT::i32: 6353 return true; 6354 // FIXME: VLD1 etc with standard alignment is legal. 6355 } 6356} 6357 6358static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 6359 if (V < 0) 6360 return false; 6361 6362 unsigned Scale = 1; 6363 switch (VT.getSimpleVT().SimpleTy) { 6364 default: return false; 6365 case MVT::i1: 6366 case MVT::i8: 6367 // Scale == 1; 6368 break; 6369 case MVT::i16: 6370 // Scale == 2; 6371 Scale = 2; 6372 break; 6373 case MVT::i32: 6374 // Scale == 4; 6375 Scale = 4; 6376 break; 6377 } 6378 6379 if ((V & (Scale - 1)) != 0) 6380 return false; 6381 V /= Scale; 6382 return V == (V & ((1LL << 5) - 1)); 6383} 6384 6385static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 6386 const ARMSubtarget *Subtarget) { 6387 bool isNeg = false; 6388 if (V < 0) { 6389 isNeg = true; 6390 V = - V; 6391 } 6392 6393 switch (VT.getSimpleVT().SimpleTy) { 6394 default: return false; 6395 case MVT::i1: 6396 case MVT::i8: 6397 case MVT::i16: 6398 case MVT::i32: 6399 // + imm12 or - imm8 6400 if (isNeg) 6401 return V == (V & ((1LL << 8) - 1)); 6402 return V == (V & ((1LL << 12) - 1)); 6403 case MVT::f32: 6404 case MVT::f64: 6405 // Same as ARM mode. FIXME: NEON? 6406 if (!Subtarget->hasVFP2()) 6407 return false; 6408 if ((V & 3) != 0) 6409 return false; 6410 V >>= 2; 6411 return V == (V & ((1LL << 8) - 1)); 6412 } 6413} 6414 6415/// isLegalAddressImmediate - Return true if the integer value can be used 6416/// as the offset of the target addressing mode for load / store of the 6417/// given type. 6418static bool isLegalAddressImmediate(int64_t V, EVT VT, 6419 const ARMSubtarget *Subtarget) { 6420 if (V == 0) 6421 return true; 6422 6423 if (!VT.isSimple()) 6424 return false; 6425 6426 if (Subtarget->isThumb1Only()) 6427 return isLegalT1AddressImmediate(V, VT); 6428 else if (Subtarget->isThumb2()) 6429 return isLegalT2AddressImmediate(V, VT, Subtarget); 6430 6431 // ARM mode. 6432 if (V < 0) 6433 V = - V; 6434 switch (VT.getSimpleVT().SimpleTy) { 6435 default: return false; 6436 case MVT::i1: 6437 case MVT::i8: 6438 case MVT::i32: 6439 // +- imm12 6440 return V == (V & ((1LL << 12) - 1)); 6441 case MVT::i16: 6442 // +- imm8 6443 return V == (V & ((1LL << 8) - 1)); 6444 case MVT::f32: 6445 case MVT::f64: 6446 if (!Subtarget->hasVFP2()) // FIXME: NEON? 6447 return false; 6448 if ((V & 3) != 0) 6449 return false; 6450 V >>= 2; 6451 return V == (V & ((1LL << 8) - 1)); 6452 } 6453} 6454 6455bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 6456 EVT VT) const { 6457 int Scale = AM.Scale; 6458 if (Scale < 0) 6459 return false; 6460 6461 switch (VT.getSimpleVT().SimpleTy) { 6462 default: return false; 6463 case MVT::i1: 6464 case MVT::i8: 6465 case MVT::i16: 6466 case MVT::i32: 6467 if (Scale == 1) 6468 return true; 6469 // r + r << imm 6470 Scale = Scale & ~1; 6471 return Scale == 2 || Scale == 4 || Scale == 8; 6472 case MVT::i64: 6473 // r + r 6474 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 6475 return true; 6476 return false; 6477 case MVT::isVoid: 6478 // Note, we allow "void" uses (basically, uses that aren't loads or 6479 // stores), because arm allows folding a scale into many arithmetic 6480 // operations. This should be made more precise and revisited later. 6481 6482 // Allow r << imm, but the imm has to be a multiple of two. 6483 if (Scale & 1) return false; 6484 return isPowerOf2_32(Scale); 6485 } 6486} 6487 6488/// isLegalAddressingMode - Return true if the addressing mode represented 6489/// by AM is legal for this target, for a load/store of the specified type. 6490bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 6491 const Type *Ty) const { 6492 EVT VT = getValueType(Ty, true); 6493 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 6494 return false; 6495 6496 // Can never fold addr of global into load/store. 6497 if (AM.BaseGV) 6498 return false; 6499 6500 switch (AM.Scale) { 6501 case 0: // no scale reg, must be "r+i" or "r", or "i". 6502 break; 6503 case 1: 6504 if (Subtarget->isThumb1Only()) 6505 return false; 6506 // FALL THROUGH. 6507 default: 6508 // ARM doesn't support any R+R*scale+imm addr modes. 6509 if (AM.BaseOffs) 6510 return false; 6511 6512 if (!VT.isSimple()) 6513 return false; 6514 6515 if (Subtarget->isThumb2()) 6516 return isLegalT2ScaledAddressingMode(AM, VT); 6517 6518 int Scale = AM.Scale; 6519 switch (VT.getSimpleVT().SimpleTy) { 6520 default: return false; 6521 case MVT::i1: 6522 case MVT::i8: 6523 case MVT::i32: 6524 if (Scale < 0) Scale = -Scale; 6525 if (Scale == 1) 6526 return true; 6527 // r + r << imm 6528 return isPowerOf2_32(Scale & ~1); 6529 case MVT::i16: 6530 case MVT::i64: 6531 // r + r 6532 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 6533 return true; 6534 return false; 6535 6536 case MVT::isVoid: 6537 // Note, we allow "void" uses (basically, uses that aren't loads or 6538 // stores), because arm allows folding a scale into many arithmetic 6539 // operations. This should be made more precise and revisited later. 6540 6541 // Allow r << imm, but the imm has to be a multiple of two. 6542 if (Scale & 1) return false; 6543 return isPowerOf2_32(Scale); 6544 } 6545 break; 6546 } 6547 return true; 6548} 6549 6550/// isLegalICmpImmediate - Return true if the specified immediate is legal 6551/// icmp immediate, that is the target has icmp instructions which can compare 6552/// a register against the immediate without having to materialize the 6553/// immediate into a register. 6554bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 6555 if (!Subtarget->isThumb()) 6556 return ARM_AM::getSOImmVal(Imm) != -1; 6557 if (Subtarget->isThumb2()) 6558 return ARM_AM::getT2SOImmVal(Imm) != -1; 6559 return Imm >= 0 && Imm <= 255; 6560} 6561 6562static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 6563 bool isSEXTLoad, SDValue &Base, 6564 SDValue &Offset, bool &isInc, 6565 SelectionDAG &DAG) { 6566 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 6567 return false; 6568 6569 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 6570 // AddressingMode 3 6571 Base = Ptr->getOperand(0); 6572 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 6573 int RHSC = (int)RHS->getZExtValue(); 6574 if (RHSC < 0 && RHSC > -256) { 6575 assert(Ptr->getOpcode() == ISD::ADD); 6576 isInc = false; 6577 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 6578 return true; 6579 } 6580 } 6581 isInc = (Ptr->getOpcode() == ISD::ADD); 6582 Offset = Ptr->getOperand(1); 6583 return true; 6584 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 6585 // AddressingMode 2 6586 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 6587 int RHSC = (int)RHS->getZExtValue(); 6588 if (RHSC < 0 && RHSC > -0x1000) { 6589 assert(Ptr->getOpcode() == ISD::ADD); 6590 isInc = false; 6591 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 6592 Base = Ptr->getOperand(0); 6593 return true; 6594 } 6595 } 6596 6597 if (Ptr->getOpcode() == ISD::ADD) { 6598 isInc = true; 6599 ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); 6600 if (ShOpcVal != ARM_AM::no_shift) { 6601 Base = Ptr->getOperand(1); 6602 Offset = Ptr->getOperand(0); 6603 } else { 6604 Base = Ptr->getOperand(0); 6605 Offset = Ptr->getOperand(1); 6606 } 6607 return true; 6608 } 6609 6610 isInc = (Ptr->getOpcode() == ISD::ADD); 6611 Base = Ptr->getOperand(0); 6612 Offset = Ptr->getOperand(1); 6613 return true; 6614 } 6615 6616 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 6617 return false; 6618} 6619 6620static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 6621 bool isSEXTLoad, SDValue &Base, 6622 SDValue &Offset, bool &isInc, 6623 SelectionDAG &DAG) { 6624 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 6625 return false; 6626 6627 Base = Ptr->getOperand(0); 6628 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 6629 int RHSC = (int)RHS->getZExtValue(); 6630 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 6631 assert(Ptr->getOpcode() == ISD::ADD); 6632 isInc = false; 6633 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 6634 return true; 6635 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 6636 isInc = Ptr->getOpcode() == ISD::ADD; 6637 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 6638 return true; 6639 } 6640 } 6641 6642 return false; 6643} 6644 6645/// getPreIndexedAddressParts - returns true by value, base pointer and 6646/// offset pointer and addressing mode by reference if the node's address 6647/// can be legally represented as pre-indexed load / store address. 6648bool 6649ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 6650 SDValue &Offset, 6651 ISD::MemIndexedMode &AM, 6652 SelectionDAG &DAG) const { 6653 if (Subtarget->isThumb1Only()) 6654 return false; 6655 6656 EVT VT; 6657 SDValue Ptr; 6658 bool isSEXTLoad = false; 6659 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 6660 Ptr = LD->getBasePtr(); 6661 VT = LD->getMemoryVT(); 6662 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 6663 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 6664 Ptr = ST->getBasePtr(); 6665 VT = ST->getMemoryVT(); 6666 } else 6667 return false; 6668 6669 bool isInc; 6670 bool isLegal = false; 6671 if (Subtarget->isThumb2()) 6672 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 6673 Offset, isInc, DAG); 6674 else 6675 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 6676 Offset, isInc, DAG); 6677 if (!isLegal) 6678 return false; 6679 6680 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 6681 return true; 6682} 6683 6684/// getPostIndexedAddressParts - returns true by value, base pointer and 6685/// offset pointer and addressing mode by reference if this node can be 6686/// combined with a load / store to form a post-indexed load / store. 6687bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 6688 SDValue &Base, 6689 SDValue &Offset, 6690 ISD::MemIndexedMode &AM, 6691 SelectionDAG &DAG) const { 6692 if (Subtarget->isThumb1Only()) 6693 return false; 6694 6695 EVT VT; 6696 SDValue Ptr; 6697 bool isSEXTLoad = false; 6698 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 6699 VT = LD->getMemoryVT(); 6700 Ptr = LD->getBasePtr(); 6701 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 6702 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 6703 VT = ST->getMemoryVT(); 6704 Ptr = ST->getBasePtr(); 6705 } else 6706 return false; 6707 6708 bool isInc; 6709 bool isLegal = false; 6710 if (Subtarget->isThumb2()) 6711 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 6712 isInc, DAG); 6713 else 6714 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 6715 isInc, DAG); 6716 if (!isLegal) 6717 return false; 6718 6719 if (Ptr != Base) { 6720 // Swap base ptr and offset to catch more post-index load / store when 6721 // it's legal. In Thumb2 mode, offset must be an immediate. 6722 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 6723 !Subtarget->isThumb2()) 6724 std::swap(Base, Offset); 6725 6726 // Post-indexed load / store update the base pointer. 6727 if (Ptr != Base) 6728 return false; 6729 } 6730 6731 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 6732 return true; 6733} 6734 6735void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 6736 const APInt &Mask, 6737 APInt &KnownZero, 6738 APInt &KnownOne, 6739 const SelectionDAG &DAG, 6740 unsigned Depth) const { 6741 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 6742 switch (Op.getOpcode()) { 6743 default: break; 6744 case ARMISD::CMOV: { 6745 // Bits are known zero/one if known on the LHS and RHS. 6746 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 6747 if (KnownZero == 0 && KnownOne == 0) return; 6748 6749 APInt KnownZeroRHS, KnownOneRHS; 6750 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 6751 KnownZeroRHS, KnownOneRHS, Depth+1); 6752 KnownZero &= KnownZeroRHS; 6753 KnownOne &= KnownOneRHS; 6754 return; 6755 } 6756 } 6757} 6758 6759//===----------------------------------------------------------------------===// 6760// ARM Inline Assembly Support 6761//===----------------------------------------------------------------------===// 6762 6763bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 6764 // Looking for "rev" which is V6+. 6765 if (!Subtarget->hasV6Ops()) 6766 return false; 6767 6768 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 6769 std::string AsmStr = IA->getAsmString(); 6770 SmallVector<StringRef, 4> AsmPieces; 6771 SplitString(AsmStr, AsmPieces, ";\n"); 6772 6773 switch (AsmPieces.size()) { 6774 default: return false; 6775 case 1: 6776 AsmStr = AsmPieces[0]; 6777 AsmPieces.clear(); 6778 SplitString(AsmStr, AsmPieces, " \t,"); 6779 6780 // rev $0, $1 6781 if (AsmPieces.size() == 3 && 6782 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 6783 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 6784 const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 6785 if (Ty && Ty->getBitWidth() == 32) 6786 return IntrinsicLowering::LowerToByteSwap(CI); 6787 } 6788 break; 6789 } 6790 6791 return false; 6792} 6793 6794/// getConstraintType - Given a constraint letter, return the type of 6795/// constraint it is for this target. 6796ARMTargetLowering::ConstraintType 6797ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 6798 if (Constraint.size() == 1) { 6799 switch (Constraint[0]) { 6800 default: break; 6801 case 'l': return C_RegisterClass; 6802 case 'w': return C_RegisterClass; 6803 } 6804 } 6805 return TargetLowering::getConstraintType(Constraint); 6806} 6807 6808/// Examine constraint type and operand type and determine a weight value. 6809/// This object must already have been set up with the operand type 6810/// and the current alternative constraint selected. 6811TargetLowering::ConstraintWeight 6812ARMTargetLowering::getSingleConstraintMatchWeight( 6813 AsmOperandInfo &info, const char *constraint) const { 6814 ConstraintWeight weight = CW_Invalid; 6815 Value *CallOperandVal = info.CallOperandVal; 6816 // If we don't have a value, we can't do a match, 6817 // but allow it at the lowest weight. 6818 if (CallOperandVal == NULL) 6819 return CW_Default; 6820 const Type *type = CallOperandVal->getType(); 6821 // Look at the constraint type. 6822 switch (*constraint) { 6823 default: 6824 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 6825 break; 6826 case 'l': 6827 if (type->isIntegerTy()) { 6828 if (Subtarget->isThumb()) 6829 weight = CW_SpecificReg; 6830 else 6831 weight = CW_Register; 6832 } 6833 break; 6834 case 'w': 6835 if (type->isFloatingPointTy()) 6836 weight = CW_Register; 6837 break; 6838 } 6839 return weight; 6840} 6841 6842std::pair<unsigned, const TargetRegisterClass*> 6843ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 6844 EVT VT) const { 6845 if (Constraint.size() == 1) { 6846 // GCC ARM Constraint Letters 6847 switch (Constraint[0]) { 6848 case 'l': 6849 if (Subtarget->isThumb()) 6850 return std::make_pair(0U, ARM::tGPRRegisterClass); 6851 else 6852 return std::make_pair(0U, ARM::GPRRegisterClass); 6853 case 'r': 6854 return std::make_pair(0U, ARM::GPRRegisterClass); 6855 case 'w': 6856 if (VT == MVT::f32) 6857 return std::make_pair(0U, ARM::SPRRegisterClass); 6858 if (VT.getSizeInBits() == 64) 6859 return std::make_pair(0U, ARM::DPRRegisterClass); 6860 if (VT.getSizeInBits() == 128) 6861 return std::make_pair(0U, ARM::QPRRegisterClass); 6862 break; 6863 } 6864 } 6865 if (StringRef("{cc}").equals_lower(Constraint)) 6866 return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); 6867 6868 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 6869} 6870 6871std::vector<unsigned> ARMTargetLowering:: 6872getRegClassForInlineAsmConstraint(const std::string &Constraint, 6873 EVT VT) const { 6874 if (Constraint.size() != 1) 6875 return std::vector<unsigned>(); 6876 6877 switch (Constraint[0]) { // GCC ARM Constraint Letters 6878 default: break; 6879 case 'l': 6880 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 6881 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 6882 0); 6883 case 'r': 6884 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 6885 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 6886 ARM::R8, ARM::R9, ARM::R10, ARM::R11, 6887 ARM::R12, ARM::LR, 0); 6888 case 'w': 6889 if (VT == MVT::f32) 6890 return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, 6891 ARM::S4, ARM::S5, ARM::S6, ARM::S7, 6892 ARM::S8, ARM::S9, ARM::S10, ARM::S11, 6893 ARM::S12,ARM::S13,ARM::S14,ARM::S15, 6894 ARM::S16,ARM::S17,ARM::S18,ARM::S19, 6895 ARM::S20,ARM::S21,ARM::S22,ARM::S23, 6896 ARM::S24,ARM::S25,ARM::S26,ARM::S27, 6897 ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); 6898 if (VT.getSizeInBits() == 64) 6899 return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, 6900 ARM::D4, ARM::D5, ARM::D6, ARM::D7, 6901 ARM::D8, ARM::D9, ARM::D10,ARM::D11, 6902 ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); 6903 if (VT.getSizeInBits() == 128) 6904 return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, 6905 ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); 6906 break; 6907 } 6908 6909 return std::vector<unsigned>(); 6910} 6911 6912/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 6913/// vector. If it is invalid, don't add anything to Ops. 6914void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 6915 char Constraint, 6916 std::vector<SDValue>&Ops, 6917 SelectionDAG &DAG) const { 6918 SDValue Result(0, 0); 6919 6920 switch (Constraint) { 6921 default: break; 6922 case 'I': case 'J': case 'K': case 'L': 6923 case 'M': case 'N': case 'O': 6924 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 6925 if (!C) 6926 return; 6927 6928 int64_t CVal64 = C->getSExtValue(); 6929 int CVal = (int) CVal64; 6930 // None of these constraints allow values larger than 32 bits. Check 6931 // that the value fits in an int. 6932 if (CVal != CVal64) 6933 return; 6934 6935 switch (Constraint) { 6936 case 'I': 6937 if (Subtarget->isThumb1Only()) { 6938 // This must be a constant between 0 and 255, for ADD 6939 // immediates. 6940 if (CVal >= 0 && CVal <= 255) 6941 break; 6942 } else if (Subtarget->isThumb2()) { 6943 // A constant that can be used as an immediate value in a 6944 // data-processing instruction. 6945 if (ARM_AM::getT2SOImmVal(CVal) != -1) 6946 break; 6947 } else { 6948 // A constant that can be used as an immediate value in a 6949 // data-processing instruction. 6950 if (ARM_AM::getSOImmVal(CVal) != -1) 6951 break; 6952 } 6953 return; 6954 6955 case 'J': 6956 if (Subtarget->isThumb()) { // FIXME thumb2 6957 // This must be a constant between -255 and -1, for negated ADD 6958 // immediates. This can be used in GCC with an "n" modifier that 6959 // prints the negated value, for use with SUB instructions. It is 6960 // not useful otherwise but is implemented for compatibility. 6961 if (CVal >= -255 && CVal <= -1) 6962 break; 6963 } else { 6964 // This must be a constant between -4095 and 4095. It is not clear 6965 // what this constraint is intended for. Implemented for 6966 // compatibility with GCC. 6967 if (CVal >= -4095 && CVal <= 4095) 6968 break; 6969 } 6970 return; 6971 6972 case 'K': 6973 if (Subtarget->isThumb1Only()) { 6974 // A 32-bit value where only one byte has a nonzero value. Exclude 6975 // zero to match GCC. This constraint is used by GCC internally for 6976 // constants that can be loaded with a move/shift combination. 6977 // It is not useful otherwise but is implemented for compatibility. 6978 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 6979 break; 6980 } else if (Subtarget->isThumb2()) { 6981 // A constant whose bitwise inverse can be used as an immediate 6982 // value in a data-processing instruction. This can be used in GCC 6983 // with a "B" modifier that prints the inverted value, for use with 6984 // BIC and MVN instructions. It is not useful otherwise but is 6985 // implemented for compatibility. 6986 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 6987 break; 6988 } else { 6989 // A constant whose bitwise inverse can be used as an immediate 6990 // value in a data-processing instruction. This can be used in GCC 6991 // with a "B" modifier that prints the inverted value, for use with 6992 // BIC and MVN instructions. It is not useful otherwise but is 6993 // implemented for compatibility. 6994 if (ARM_AM::getSOImmVal(~CVal) != -1) 6995 break; 6996 } 6997 return; 6998 6999 case 'L': 7000 if (Subtarget->isThumb1Only()) { 7001 // This must be a constant between -7 and 7, 7002 // for 3-operand ADD/SUB immediate instructions. 7003 if (CVal >= -7 && CVal < 7) 7004 break; 7005 } else if (Subtarget->isThumb2()) { 7006 // A constant whose negation can be used as an immediate value in a 7007 // data-processing instruction. This can be used in GCC with an "n" 7008 // modifier that prints the negated value, for use with SUB 7009 // instructions. It is not useful otherwise but is implemented for 7010 // compatibility. 7011 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 7012 break; 7013 } else { 7014 // A constant whose negation can be used as an immediate value in a 7015 // data-processing instruction. This can be used in GCC with an "n" 7016 // modifier that prints the negated value, for use with SUB 7017 // instructions. It is not useful otherwise but is implemented for 7018 // compatibility. 7019 if (ARM_AM::getSOImmVal(-CVal) != -1) 7020 break; 7021 } 7022 return; 7023 7024 case 'M': 7025 if (Subtarget->isThumb()) { // FIXME thumb2 7026 // This must be a multiple of 4 between 0 and 1020, for 7027 // ADD sp + immediate. 7028 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 7029 break; 7030 } else { 7031 // A power of two or a constant between 0 and 32. This is used in 7032 // GCC for the shift amount on shifted register operands, but it is 7033 // useful in general for any shift amounts. 7034 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 7035 break; 7036 } 7037 return; 7038 7039 case 'N': 7040 if (Subtarget->isThumb()) { // FIXME thumb2 7041 // This must be a constant between 0 and 31, for shift amounts. 7042 if (CVal >= 0 && CVal <= 31) 7043 break; 7044 } 7045 return; 7046 7047 case 'O': 7048 if (Subtarget->isThumb()) { // FIXME thumb2 7049 // This must be a multiple of 4 between -508 and 508, for 7050 // ADD/SUB sp = sp + immediate. 7051 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 7052 break; 7053 } 7054 return; 7055 } 7056 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 7057 break; 7058 } 7059 7060 if (Result.getNode()) { 7061 Ops.push_back(Result); 7062 return; 7063 } 7064 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 7065} 7066 7067bool 7068ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 7069 // The ARM target isn't yet aware of offsets. 7070 return false; 7071} 7072 7073int ARM::getVFPf32Imm(const APFloat &FPImm) { 7074 APInt Imm = FPImm.bitcastToAPInt(); 7075 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; 7076 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 7077 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits 7078 7079 // We can handle 4 bits of mantissa. 7080 // mantissa = (16+UInt(e:f:g:h))/16. 7081 if (Mantissa & 0x7ffff) 7082 return -1; 7083 Mantissa >>= 19; 7084 if ((Mantissa & 0xf) != Mantissa) 7085 return -1; 7086 7087 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 7088 if (Exp < -3 || Exp > 4) 7089 return -1; 7090 Exp = ((Exp+3) & 0x7) ^ 4; 7091 7092 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 7093} 7094 7095int ARM::getVFPf64Imm(const APFloat &FPImm) { 7096 APInt Imm = FPImm.bitcastToAPInt(); 7097 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; 7098 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 7099 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; 7100 7101 // We can handle 4 bits of mantissa. 7102 // mantissa = (16+UInt(e:f:g:h))/16. 7103 if (Mantissa & 0xffffffffffffLL) 7104 return -1; 7105 Mantissa >>= 48; 7106 if ((Mantissa & 0xf) != Mantissa) 7107 return -1; 7108 7109 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 7110 if (Exp < -3 || Exp > 4) 7111 return -1; 7112 Exp = ((Exp+3) & 0x7) ^ 4; 7113 7114 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 7115} 7116 7117bool ARM::isBitFieldInvertedMask(unsigned v) { 7118 if (v == 0xffffffff) 7119 return 0; 7120 // there can be 1's on either or both "outsides", all the "inside" 7121 // bits must be 0's 7122 unsigned int lsb = 0, msb = 31; 7123 while (v & (1 << msb)) --msb; 7124 while (v & (1 << lsb)) ++lsb; 7125 for (unsigned int i = lsb; i <= msb; ++i) { 7126 if (v & (1 << i)) 7127 return 0; 7128 } 7129 return 1; 7130} 7131 7132/// isFPImmLegal - Returns true if the target can instruction select the 7133/// specified FP immediate natively. If false, the legalizer will 7134/// materialize the FP immediate as a load from a constant pool. 7135bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 7136 if (!Subtarget->hasVFP3()) 7137 return false; 7138 if (VT == MVT::f32) 7139 return ARM::getVFPf32Imm(Imm) != -1; 7140 if (VT == MVT::f64) 7141 return ARM::getVFPf64Imm(Imm) != -1; 7142 return false; 7143} 7144 7145/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 7146/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 7147/// specified in the intrinsic calls. 7148bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 7149 const CallInst &I, 7150 unsigned Intrinsic) const { 7151 switch (Intrinsic) { 7152 case Intrinsic::arm_neon_vld1: 7153 case Intrinsic::arm_neon_vld2: 7154 case Intrinsic::arm_neon_vld3: 7155 case Intrinsic::arm_neon_vld4: 7156 case Intrinsic::arm_neon_vld2lane: 7157 case Intrinsic::arm_neon_vld3lane: 7158 case Intrinsic::arm_neon_vld4lane: { 7159 Info.opc = ISD::INTRINSIC_W_CHAIN; 7160 // Conservatively set memVT to the entire set of vectors loaded. 7161 uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; 7162 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 7163 Info.ptrVal = I.getArgOperand(0); 7164 Info.offset = 0; 7165 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 7166 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 7167 Info.vol = false; // volatile loads with NEON intrinsics not supported 7168 Info.readMem = true; 7169 Info.writeMem = false; 7170 return true; 7171 } 7172 case Intrinsic::arm_neon_vst1: 7173 case Intrinsic::arm_neon_vst2: 7174 case Intrinsic::arm_neon_vst3: 7175 case Intrinsic::arm_neon_vst4: 7176 case Intrinsic::arm_neon_vst2lane: 7177 case Intrinsic::arm_neon_vst3lane: 7178 case Intrinsic::arm_neon_vst4lane: { 7179 Info.opc = ISD::INTRINSIC_VOID; 7180 // Conservatively set memVT to the entire set of vectors stored. 7181 unsigned NumElts = 0; 7182 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 7183 const Type *ArgTy = I.getArgOperand(ArgI)->getType(); 7184 if (!ArgTy->isVectorTy()) 7185 break; 7186 NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; 7187 } 7188 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 7189 Info.ptrVal = I.getArgOperand(0); 7190 Info.offset = 0; 7191 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 7192 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 7193 Info.vol = false; // volatile stores with NEON intrinsics not supported 7194 Info.readMem = false; 7195 Info.writeMem = true; 7196 return true; 7197 } 7198 default: 7199 break; 7200 } 7201 7202 return false; 7203} 7204