ARMISelLowering.cpp revision cca82149adef8306a295abdc963213ae3b11bbb6
1//===-- ARMISelLowering.cpp - ARM DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the interfaces that ARM uses to lower LLVM code into a 11// selection DAG. 12// 13//===----------------------------------------------------------------------===// 14 15#define DEBUG_TYPE "arm-isel" 16#include "ARM.h" 17#include "ARMAddressingModes.h" 18#include "ARMCallingConv.h" 19#include "ARMConstantPoolValue.h" 20#include "ARMISelLowering.h" 21#include "ARMMachineFunctionInfo.h" 22#include "ARMPerfectShuffle.h" 23#include "ARMRegisterInfo.h" 24#include "ARMSubtarget.h" 25#include "ARMTargetMachine.h" 26#include "ARMTargetObjectFile.h" 27#include "llvm/CallingConv.h" 28#include "llvm/Constants.h" 29#include "llvm/Function.h" 30#include "llvm/GlobalValue.h" 31#include "llvm/Instruction.h" 32#include "llvm/Instructions.h" 33#include "llvm/Intrinsics.h" 34#include "llvm/Type.h" 35#include "llvm/CodeGen/CallingConvLower.h" 36#include "llvm/CodeGen/IntrinsicLowering.h" 37#include "llvm/CodeGen/MachineBasicBlock.h" 38#include "llvm/CodeGen/MachineFrameInfo.h" 39#include "llvm/CodeGen/MachineFunction.h" 40#include "llvm/CodeGen/MachineInstrBuilder.h" 41#include "llvm/CodeGen/MachineRegisterInfo.h" 42#include "llvm/CodeGen/PseudoSourceValue.h" 43#include "llvm/CodeGen/SelectionDAG.h" 44#include "llvm/MC/MCSectionMachO.h" 45#include "llvm/Target/TargetOptions.h" 46#include "llvm/ADT/VectorExtras.h" 47#include "llvm/ADT/StringExtras.h" 48#include "llvm/ADT/Statistic.h" 49#include "llvm/Support/CommandLine.h" 50#include "llvm/Support/ErrorHandling.h" 51#include "llvm/Support/MathExtras.h" 52#include "llvm/Support/raw_ostream.h" 53#include <sstream> 54using namespace llvm; 55 56STATISTIC(NumTailCalls, "Number of tail calls"); 57STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt"); 58 59// This option should go away when tail calls fully work. 60static cl::opt<bool> 61EnableARMTailCalls("arm-tail-calls", cl::Hidden, 62 cl::desc("Generate tail calls (TEMPORARY OPTION)."), 63 cl::init(false)); 64 65cl::opt<bool> 66EnableARMLongCalls("arm-long-calls", cl::Hidden, 67 cl::desc("Generate calls via indirect call instructions"), 68 cl::init(false)); 69 70static cl::opt<bool> 71ARMInterworking("arm-interworking", cl::Hidden, 72 cl::desc("Enable / disable ARM interworking (for debugging only)"), 73 cl::init(true)); 74 75// The APCS parameter registers. 76static const unsigned GPRArgRegs[] = { 77 ARM::R0, ARM::R1, ARM::R2, ARM::R3 78}; 79 80void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, 81 EVT PromotedBitwiseVT) { 82 if (VT != PromotedLdStVT) { 83 setOperationAction(ISD::LOAD, VT.getSimpleVT(), Promote); 84 AddPromotedToType (ISD::LOAD, VT.getSimpleVT(), 85 PromotedLdStVT.getSimpleVT()); 86 87 setOperationAction(ISD::STORE, VT.getSimpleVT(), Promote); 88 AddPromotedToType (ISD::STORE, VT.getSimpleVT(), 89 PromotedLdStVT.getSimpleVT()); 90 } 91 92 EVT ElemTy = VT.getVectorElementType(); 93 if (ElemTy != MVT::i64 && ElemTy != MVT::f64) 94 setOperationAction(ISD::VSETCC, VT.getSimpleVT(), Custom); 95 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); 96 if (ElemTy != MVT::i32) { 97 setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); 98 setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); 99 setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); 100 setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Expand); 101 } 102 setOperationAction(ISD::BUILD_VECTOR, VT.getSimpleVT(), Custom); 103 setOperationAction(ISD::VECTOR_SHUFFLE, VT.getSimpleVT(), Custom); 104 setOperationAction(ISD::CONCAT_VECTORS, VT.getSimpleVT(), Legal); 105 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); 106 setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); 107 setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); 108 if (VT.isInteger()) { 109 setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); 110 setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); 111 setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); 112 setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); 113 setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); 114 for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 115 InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) 116 setTruncStoreAction(VT.getSimpleVT(), 117 (MVT::SimpleValueType)InnerVT, Expand); 118 } 119 setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); 120 121 // Promote all bit-wise operations. 122 if (VT.isInteger() && VT != PromotedBitwiseVT) { 123 setOperationAction(ISD::AND, VT.getSimpleVT(), Promote); 124 AddPromotedToType (ISD::AND, VT.getSimpleVT(), 125 PromotedBitwiseVT.getSimpleVT()); 126 setOperationAction(ISD::OR, VT.getSimpleVT(), Promote); 127 AddPromotedToType (ISD::OR, VT.getSimpleVT(), 128 PromotedBitwiseVT.getSimpleVT()); 129 setOperationAction(ISD::XOR, VT.getSimpleVT(), Promote); 130 AddPromotedToType (ISD::XOR, VT.getSimpleVT(), 131 PromotedBitwiseVT.getSimpleVT()); 132 } 133 134 // Neon does not support vector divide/remainder operations. 135 setOperationAction(ISD::SDIV, VT.getSimpleVT(), Expand); 136 setOperationAction(ISD::UDIV, VT.getSimpleVT(), Expand); 137 setOperationAction(ISD::FDIV, VT.getSimpleVT(), Expand); 138 setOperationAction(ISD::SREM, VT.getSimpleVT(), Expand); 139 setOperationAction(ISD::UREM, VT.getSimpleVT(), Expand); 140 setOperationAction(ISD::FREM, VT.getSimpleVT(), Expand); 141} 142 143void ARMTargetLowering::addDRTypeForNEON(EVT VT) { 144 addRegisterClass(VT, ARM::DPRRegisterClass); 145 addTypeForNEON(VT, MVT::f64, MVT::v2i32); 146} 147 148void ARMTargetLowering::addQRTypeForNEON(EVT VT) { 149 addRegisterClass(VT, ARM::QPRRegisterClass); 150 addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); 151} 152 153static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { 154 if (TM.getSubtarget<ARMSubtarget>().isTargetDarwin()) 155 return new TargetLoweringObjectFileMachO(); 156 157 return new ARMElfTargetObjectFile(); 158} 159 160ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) 161 : TargetLowering(TM, createTLOF(TM)) { 162 Subtarget = &TM.getSubtarget<ARMSubtarget>(); 163 RegInfo = TM.getRegisterInfo(); 164 Itins = TM.getInstrItineraryData(); 165 166 if (Subtarget->isTargetDarwin()) { 167 // Uses VFP for Thumb libfuncs if available. 168 if (Subtarget->isThumb() && Subtarget->hasVFP2()) { 169 // Single-precision floating-point arithmetic. 170 setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); 171 setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); 172 setLibcallName(RTLIB::MUL_F32, "__mulsf3vfp"); 173 setLibcallName(RTLIB::DIV_F32, "__divsf3vfp"); 174 175 // Double-precision floating-point arithmetic. 176 setLibcallName(RTLIB::ADD_F64, "__adddf3vfp"); 177 setLibcallName(RTLIB::SUB_F64, "__subdf3vfp"); 178 setLibcallName(RTLIB::MUL_F64, "__muldf3vfp"); 179 setLibcallName(RTLIB::DIV_F64, "__divdf3vfp"); 180 181 // Single-precision comparisons. 182 setLibcallName(RTLIB::OEQ_F32, "__eqsf2vfp"); 183 setLibcallName(RTLIB::UNE_F32, "__nesf2vfp"); 184 setLibcallName(RTLIB::OLT_F32, "__ltsf2vfp"); 185 setLibcallName(RTLIB::OLE_F32, "__lesf2vfp"); 186 setLibcallName(RTLIB::OGE_F32, "__gesf2vfp"); 187 setLibcallName(RTLIB::OGT_F32, "__gtsf2vfp"); 188 setLibcallName(RTLIB::UO_F32, "__unordsf2vfp"); 189 setLibcallName(RTLIB::O_F32, "__unordsf2vfp"); 190 191 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 192 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETNE); 193 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 194 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 195 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 196 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 197 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 198 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 199 200 // Double-precision comparisons. 201 setLibcallName(RTLIB::OEQ_F64, "__eqdf2vfp"); 202 setLibcallName(RTLIB::UNE_F64, "__nedf2vfp"); 203 setLibcallName(RTLIB::OLT_F64, "__ltdf2vfp"); 204 setLibcallName(RTLIB::OLE_F64, "__ledf2vfp"); 205 setLibcallName(RTLIB::OGE_F64, "__gedf2vfp"); 206 setLibcallName(RTLIB::OGT_F64, "__gtdf2vfp"); 207 setLibcallName(RTLIB::UO_F64, "__unorddf2vfp"); 208 setLibcallName(RTLIB::O_F64, "__unorddf2vfp"); 209 210 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 211 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETNE); 212 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 213 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 214 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 215 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 216 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 217 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 218 219 // Floating-point to integer conversions. 220 // i64 conversions are done via library routines even when generating VFP 221 // instructions, so use the same ones. 222 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__fixdfsivfp"); 223 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__fixunsdfsivfp"); 224 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__fixsfsivfp"); 225 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__fixunssfsivfp"); 226 227 // Conversions between floating types. 228 setLibcallName(RTLIB::FPROUND_F64_F32, "__truncdfsf2vfp"); 229 setLibcallName(RTLIB::FPEXT_F32_F64, "__extendsfdf2vfp"); 230 231 // Integer to floating-point conversions. 232 // i64 conversions are done via library routines even when generating VFP 233 // instructions, so use the same ones. 234 // FIXME: There appears to be some naming inconsistency in ARM libgcc: 235 // e.g., __floatunsidf vs. __floatunssidfvfp. 236 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__floatsidfvfp"); 237 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__floatunssidfvfp"); 238 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__floatsisfvfp"); 239 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__floatunssisfvfp"); 240 } 241 } 242 243 // These libcalls are not available in 32-bit. 244 setLibcallName(RTLIB::SHL_I128, 0); 245 setLibcallName(RTLIB::SRL_I128, 0); 246 setLibcallName(RTLIB::SRA_I128, 0); 247 248 if (Subtarget->isAAPCS_ABI()) { 249 // Double-precision floating-point arithmetic helper functions 250 // RTABI chapter 4.1.2, Table 2 251 setLibcallName(RTLIB::ADD_F64, "__aeabi_dadd"); 252 setLibcallName(RTLIB::DIV_F64, "__aeabi_ddiv"); 253 setLibcallName(RTLIB::MUL_F64, "__aeabi_dmul"); 254 setLibcallName(RTLIB::SUB_F64, "__aeabi_dsub"); 255 setLibcallCallingConv(RTLIB::ADD_F64, CallingConv::ARM_AAPCS); 256 setLibcallCallingConv(RTLIB::DIV_F64, CallingConv::ARM_AAPCS); 257 setLibcallCallingConv(RTLIB::MUL_F64, CallingConv::ARM_AAPCS); 258 setLibcallCallingConv(RTLIB::SUB_F64, CallingConv::ARM_AAPCS); 259 260 // Double-precision floating-point comparison helper functions 261 // RTABI chapter 4.1.2, Table 3 262 setLibcallName(RTLIB::OEQ_F64, "__aeabi_dcmpeq"); 263 setCmpLibcallCC(RTLIB::OEQ_F64, ISD::SETNE); 264 setLibcallName(RTLIB::UNE_F64, "__aeabi_dcmpeq"); 265 setCmpLibcallCC(RTLIB::UNE_F64, ISD::SETEQ); 266 setLibcallName(RTLIB::OLT_F64, "__aeabi_dcmplt"); 267 setCmpLibcallCC(RTLIB::OLT_F64, ISD::SETNE); 268 setLibcallName(RTLIB::OLE_F64, "__aeabi_dcmple"); 269 setCmpLibcallCC(RTLIB::OLE_F64, ISD::SETNE); 270 setLibcallName(RTLIB::OGE_F64, "__aeabi_dcmpge"); 271 setCmpLibcallCC(RTLIB::OGE_F64, ISD::SETNE); 272 setLibcallName(RTLIB::OGT_F64, "__aeabi_dcmpgt"); 273 setCmpLibcallCC(RTLIB::OGT_F64, ISD::SETNE); 274 setLibcallName(RTLIB::UO_F64, "__aeabi_dcmpun"); 275 setCmpLibcallCC(RTLIB::UO_F64, ISD::SETNE); 276 setLibcallName(RTLIB::O_F64, "__aeabi_dcmpun"); 277 setCmpLibcallCC(RTLIB::O_F64, ISD::SETEQ); 278 setLibcallCallingConv(RTLIB::OEQ_F64, CallingConv::ARM_AAPCS); 279 setLibcallCallingConv(RTLIB::UNE_F64, CallingConv::ARM_AAPCS); 280 setLibcallCallingConv(RTLIB::OLT_F64, CallingConv::ARM_AAPCS); 281 setLibcallCallingConv(RTLIB::OLE_F64, CallingConv::ARM_AAPCS); 282 setLibcallCallingConv(RTLIB::OGE_F64, CallingConv::ARM_AAPCS); 283 setLibcallCallingConv(RTLIB::OGT_F64, CallingConv::ARM_AAPCS); 284 setLibcallCallingConv(RTLIB::UO_F64, CallingConv::ARM_AAPCS); 285 setLibcallCallingConv(RTLIB::O_F64, CallingConv::ARM_AAPCS); 286 287 // Single-precision floating-point arithmetic helper functions 288 // RTABI chapter 4.1.2, Table 4 289 setLibcallName(RTLIB::ADD_F32, "__aeabi_fadd"); 290 setLibcallName(RTLIB::DIV_F32, "__aeabi_fdiv"); 291 setLibcallName(RTLIB::MUL_F32, "__aeabi_fmul"); 292 setLibcallName(RTLIB::SUB_F32, "__aeabi_fsub"); 293 setLibcallCallingConv(RTLIB::ADD_F32, CallingConv::ARM_AAPCS); 294 setLibcallCallingConv(RTLIB::DIV_F32, CallingConv::ARM_AAPCS); 295 setLibcallCallingConv(RTLIB::MUL_F32, CallingConv::ARM_AAPCS); 296 setLibcallCallingConv(RTLIB::SUB_F32, CallingConv::ARM_AAPCS); 297 298 // Single-precision floating-point comparison helper functions 299 // RTABI chapter 4.1.2, Table 5 300 setLibcallName(RTLIB::OEQ_F32, "__aeabi_fcmpeq"); 301 setCmpLibcallCC(RTLIB::OEQ_F32, ISD::SETNE); 302 setLibcallName(RTLIB::UNE_F32, "__aeabi_fcmpeq"); 303 setCmpLibcallCC(RTLIB::UNE_F32, ISD::SETEQ); 304 setLibcallName(RTLIB::OLT_F32, "__aeabi_fcmplt"); 305 setCmpLibcallCC(RTLIB::OLT_F32, ISD::SETNE); 306 setLibcallName(RTLIB::OLE_F32, "__aeabi_fcmple"); 307 setCmpLibcallCC(RTLIB::OLE_F32, ISD::SETNE); 308 setLibcallName(RTLIB::OGE_F32, "__aeabi_fcmpge"); 309 setCmpLibcallCC(RTLIB::OGE_F32, ISD::SETNE); 310 setLibcallName(RTLIB::OGT_F32, "__aeabi_fcmpgt"); 311 setCmpLibcallCC(RTLIB::OGT_F32, ISD::SETNE); 312 setLibcallName(RTLIB::UO_F32, "__aeabi_fcmpun"); 313 setCmpLibcallCC(RTLIB::UO_F32, ISD::SETNE); 314 setLibcallName(RTLIB::O_F32, "__aeabi_fcmpun"); 315 setCmpLibcallCC(RTLIB::O_F32, ISD::SETEQ); 316 setLibcallCallingConv(RTLIB::OEQ_F32, CallingConv::ARM_AAPCS); 317 setLibcallCallingConv(RTLIB::UNE_F32, CallingConv::ARM_AAPCS); 318 setLibcallCallingConv(RTLIB::OLT_F32, CallingConv::ARM_AAPCS); 319 setLibcallCallingConv(RTLIB::OLE_F32, CallingConv::ARM_AAPCS); 320 setLibcallCallingConv(RTLIB::OGE_F32, CallingConv::ARM_AAPCS); 321 setLibcallCallingConv(RTLIB::OGT_F32, CallingConv::ARM_AAPCS); 322 setLibcallCallingConv(RTLIB::UO_F32, CallingConv::ARM_AAPCS); 323 setLibcallCallingConv(RTLIB::O_F32, CallingConv::ARM_AAPCS); 324 325 // Floating-point to integer conversions. 326 // RTABI chapter 4.1.2, Table 6 327 setLibcallName(RTLIB::FPTOSINT_F64_I32, "__aeabi_d2iz"); 328 setLibcallName(RTLIB::FPTOUINT_F64_I32, "__aeabi_d2uiz"); 329 setLibcallName(RTLIB::FPTOSINT_F64_I64, "__aeabi_d2lz"); 330 setLibcallName(RTLIB::FPTOUINT_F64_I64, "__aeabi_d2ulz"); 331 setLibcallName(RTLIB::FPTOSINT_F32_I32, "__aeabi_f2iz"); 332 setLibcallName(RTLIB::FPTOUINT_F32_I32, "__aeabi_f2uiz"); 333 setLibcallName(RTLIB::FPTOSINT_F32_I64, "__aeabi_f2lz"); 334 setLibcallName(RTLIB::FPTOUINT_F32_I64, "__aeabi_f2ulz"); 335 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I32, CallingConv::ARM_AAPCS); 336 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I32, CallingConv::ARM_AAPCS); 337 setLibcallCallingConv(RTLIB::FPTOSINT_F64_I64, CallingConv::ARM_AAPCS); 338 setLibcallCallingConv(RTLIB::FPTOUINT_F64_I64, CallingConv::ARM_AAPCS); 339 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I32, CallingConv::ARM_AAPCS); 340 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I32, CallingConv::ARM_AAPCS); 341 setLibcallCallingConv(RTLIB::FPTOSINT_F32_I64, CallingConv::ARM_AAPCS); 342 setLibcallCallingConv(RTLIB::FPTOUINT_F32_I64, CallingConv::ARM_AAPCS); 343 344 // Conversions between floating types. 345 // RTABI chapter 4.1.2, Table 7 346 setLibcallName(RTLIB::FPROUND_F64_F32, "__aeabi_d2f"); 347 setLibcallName(RTLIB::FPEXT_F32_F64, "__aeabi_f2d"); 348 setLibcallCallingConv(RTLIB::FPROUND_F64_F32, CallingConv::ARM_AAPCS); 349 setLibcallCallingConv(RTLIB::FPEXT_F32_F64, CallingConv::ARM_AAPCS); 350 351 // Integer to floating-point conversions. 352 // RTABI chapter 4.1.2, Table 8 353 setLibcallName(RTLIB::SINTTOFP_I32_F64, "__aeabi_i2d"); 354 setLibcallName(RTLIB::UINTTOFP_I32_F64, "__aeabi_ui2d"); 355 setLibcallName(RTLIB::SINTTOFP_I64_F64, "__aeabi_l2d"); 356 setLibcallName(RTLIB::UINTTOFP_I64_F64, "__aeabi_ul2d"); 357 setLibcallName(RTLIB::SINTTOFP_I32_F32, "__aeabi_i2f"); 358 setLibcallName(RTLIB::UINTTOFP_I32_F32, "__aeabi_ui2f"); 359 setLibcallName(RTLIB::SINTTOFP_I64_F32, "__aeabi_l2f"); 360 setLibcallName(RTLIB::UINTTOFP_I64_F32, "__aeabi_ul2f"); 361 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 362 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F64, CallingConv::ARM_AAPCS); 363 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 364 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F64, CallingConv::ARM_AAPCS); 365 setLibcallCallingConv(RTLIB::SINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 366 setLibcallCallingConv(RTLIB::UINTTOFP_I32_F32, CallingConv::ARM_AAPCS); 367 setLibcallCallingConv(RTLIB::SINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 368 setLibcallCallingConv(RTLIB::UINTTOFP_I64_F32, CallingConv::ARM_AAPCS); 369 370 // Long long helper functions 371 // RTABI chapter 4.2, Table 9 372 setLibcallName(RTLIB::MUL_I64, "__aeabi_lmul"); 373 setLibcallName(RTLIB::SDIV_I64, "__aeabi_ldivmod"); 374 setLibcallName(RTLIB::UDIV_I64, "__aeabi_uldivmod"); 375 setLibcallName(RTLIB::SHL_I64, "__aeabi_llsl"); 376 setLibcallName(RTLIB::SRL_I64, "__aeabi_llsr"); 377 setLibcallName(RTLIB::SRA_I64, "__aeabi_lasr"); 378 setLibcallCallingConv(RTLIB::MUL_I64, CallingConv::ARM_AAPCS); 379 setLibcallCallingConv(RTLIB::SDIV_I64, CallingConv::ARM_AAPCS); 380 setLibcallCallingConv(RTLIB::UDIV_I64, CallingConv::ARM_AAPCS); 381 setLibcallCallingConv(RTLIB::SHL_I64, CallingConv::ARM_AAPCS); 382 setLibcallCallingConv(RTLIB::SRL_I64, CallingConv::ARM_AAPCS); 383 setLibcallCallingConv(RTLIB::SRA_I64, CallingConv::ARM_AAPCS); 384 385 // Integer division functions 386 // RTABI chapter 4.3.1 387 setLibcallName(RTLIB::SDIV_I8, "__aeabi_idiv"); 388 setLibcallName(RTLIB::SDIV_I16, "__aeabi_idiv"); 389 setLibcallName(RTLIB::SDIV_I32, "__aeabi_idiv"); 390 setLibcallName(RTLIB::UDIV_I8, "__aeabi_uidiv"); 391 setLibcallName(RTLIB::UDIV_I16, "__aeabi_uidiv"); 392 setLibcallName(RTLIB::UDIV_I32, "__aeabi_uidiv"); 393 setLibcallCallingConv(RTLIB::SDIV_I8, CallingConv::ARM_AAPCS); 394 setLibcallCallingConv(RTLIB::SDIV_I16, CallingConv::ARM_AAPCS); 395 setLibcallCallingConv(RTLIB::SDIV_I32, CallingConv::ARM_AAPCS); 396 setLibcallCallingConv(RTLIB::UDIV_I8, CallingConv::ARM_AAPCS); 397 setLibcallCallingConv(RTLIB::UDIV_I16, CallingConv::ARM_AAPCS); 398 setLibcallCallingConv(RTLIB::UDIV_I32, CallingConv::ARM_AAPCS); 399 } 400 401 // Use divmod iOS compiler-rt calls. 402 if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { 403 setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); 404 setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); 405 } 406 407 if (Subtarget->isThumb1Only()) 408 addRegisterClass(MVT::i32, ARM::tGPRRegisterClass); 409 else 410 addRegisterClass(MVT::i32, ARM::GPRRegisterClass); 411 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 412 addRegisterClass(MVT::f32, ARM::SPRRegisterClass); 413 if (!Subtarget->isFPOnlySP()) 414 addRegisterClass(MVT::f64, ARM::DPRRegisterClass); 415 416 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 417 } 418 419 if (Subtarget->hasNEON()) { 420 addDRTypeForNEON(MVT::v2f32); 421 addDRTypeForNEON(MVT::v8i8); 422 addDRTypeForNEON(MVT::v4i16); 423 addDRTypeForNEON(MVT::v2i32); 424 addDRTypeForNEON(MVT::v1i64); 425 426 addQRTypeForNEON(MVT::v4f32); 427 addQRTypeForNEON(MVT::v2f64); 428 addQRTypeForNEON(MVT::v16i8); 429 addQRTypeForNEON(MVT::v8i16); 430 addQRTypeForNEON(MVT::v4i32); 431 addQRTypeForNEON(MVT::v2i64); 432 433 // v2f64 is legal so that QR subregs can be extracted as f64 elements, but 434 // neither Neon nor VFP support any arithmetic operations on it. 435 setOperationAction(ISD::FADD, MVT::v2f64, Expand); 436 setOperationAction(ISD::FSUB, MVT::v2f64, Expand); 437 setOperationAction(ISD::FMUL, MVT::v2f64, Expand); 438 setOperationAction(ISD::FDIV, MVT::v2f64, Expand); 439 setOperationAction(ISD::FREM, MVT::v2f64, Expand); 440 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); 441 setOperationAction(ISD::VSETCC, MVT::v2f64, Expand); 442 setOperationAction(ISD::FNEG, MVT::v2f64, Expand); 443 setOperationAction(ISD::FABS, MVT::v2f64, Expand); 444 setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); 445 setOperationAction(ISD::FSIN, MVT::v2f64, Expand); 446 setOperationAction(ISD::FCOS, MVT::v2f64, Expand); 447 setOperationAction(ISD::FPOWI, MVT::v2f64, Expand); 448 setOperationAction(ISD::FPOW, MVT::v2f64, Expand); 449 setOperationAction(ISD::FLOG, MVT::v2f64, Expand); 450 setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); 451 setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); 452 setOperationAction(ISD::FEXP, MVT::v2f64, Expand); 453 setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); 454 setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); 455 setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); 456 setOperationAction(ISD::FRINT, MVT::v2f64, Expand); 457 setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); 458 setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); 459 460 setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); 461 462 // Neon does not support some operations on v1i64 and v2i64 types. 463 setOperationAction(ISD::MUL, MVT::v1i64, Expand); 464 // Custom handling for some quad-vector types to detect VMULL. 465 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 466 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 467 setOperationAction(ISD::MUL, MVT::v2i64, Custom); 468 // Custom handling for some vector types to avoid expensive expansions 469 setOperationAction(ISD::SDIV, MVT::v4i16, Custom); 470 setOperationAction(ISD::SDIV, MVT::v8i8, Custom); 471 setOperationAction(ISD::UDIV, MVT::v4i16, Custom); 472 setOperationAction(ISD::UDIV, MVT::v8i8, Custom); 473 setOperationAction(ISD::VSETCC, MVT::v1i64, Expand); 474 setOperationAction(ISD::VSETCC, MVT::v2i64, Expand); 475 // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with 476 // a destination type that is wider than the source. 477 setOperationAction(ISD::SINT_TO_FP, MVT::v4i16, Custom); 478 setOperationAction(ISD::UINT_TO_FP, MVT::v4i16, Custom); 479 480 setTargetDAGCombine(ISD::INTRINSIC_VOID); 481 setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); 482 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 483 setTargetDAGCombine(ISD::SHL); 484 setTargetDAGCombine(ISD::SRL); 485 setTargetDAGCombine(ISD::SRA); 486 setTargetDAGCombine(ISD::SIGN_EXTEND); 487 setTargetDAGCombine(ISD::ZERO_EXTEND); 488 setTargetDAGCombine(ISD::ANY_EXTEND); 489 setTargetDAGCombine(ISD::SELECT_CC); 490 setTargetDAGCombine(ISD::BUILD_VECTOR); 491 setTargetDAGCombine(ISD::VECTOR_SHUFFLE); 492 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); 493 setTargetDAGCombine(ISD::STORE); 494 } 495 496 computeRegisterProperties(); 497 498 // ARM does not have f32 extending load. 499 setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); 500 501 // ARM does not have i1 sign extending load. 502 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 503 504 // ARM supports all 4 flavors of integer indexed load / store. 505 if (!Subtarget->isThumb1Only()) { 506 for (unsigned im = (unsigned)ISD::PRE_INC; 507 im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) { 508 setIndexedLoadAction(im, MVT::i1, Legal); 509 setIndexedLoadAction(im, MVT::i8, Legal); 510 setIndexedLoadAction(im, MVT::i16, Legal); 511 setIndexedLoadAction(im, MVT::i32, Legal); 512 setIndexedStoreAction(im, MVT::i1, Legal); 513 setIndexedStoreAction(im, MVT::i8, Legal); 514 setIndexedStoreAction(im, MVT::i16, Legal); 515 setIndexedStoreAction(im, MVT::i32, Legal); 516 } 517 } 518 519 // i64 operation support. 520 setOperationAction(ISD::MUL, MVT::i64, Expand); 521 setOperationAction(ISD::MULHU, MVT::i32, Expand); 522 if (Subtarget->isThumb1Only()) { 523 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 524 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 525 } 526 if (Subtarget->isThumb1Only() || !Subtarget->hasV6Ops()) 527 setOperationAction(ISD::MULHS, MVT::i32, Expand); 528 529 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 530 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 531 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 532 setOperationAction(ISD::SRL, MVT::i64, Custom); 533 setOperationAction(ISD::SRA, MVT::i64, Custom); 534 535 // ARM does not have ROTL. 536 setOperationAction(ISD::ROTL, MVT::i32, Expand); 537 setOperationAction(ISD::CTTZ, MVT::i32, Custom); 538 setOperationAction(ISD::CTPOP, MVT::i32, Expand); 539 if (!Subtarget->hasV5TOps() || Subtarget->isThumb1Only()) 540 setOperationAction(ISD::CTLZ, MVT::i32, Expand); 541 542 // Only ARMv6 has BSWAP. 543 if (!Subtarget->hasV6Ops()) 544 setOperationAction(ISD::BSWAP, MVT::i32, Expand); 545 546 // These are expanded into libcalls. 547 if (!Subtarget->hasDivide() || !Subtarget->isThumb2()) { 548 // v7M has a hardware divider 549 setOperationAction(ISD::SDIV, MVT::i32, Expand); 550 setOperationAction(ISD::UDIV, MVT::i32, Expand); 551 } 552 setOperationAction(ISD::SREM, MVT::i32, Expand); 553 setOperationAction(ISD::UREM, MVT::i32, Expand); 554 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 555 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 556 557 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 558 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 559 setOperationAction(ISD::GLOBAL_OFFSET_TABLE, MVT::i32, Custom); 560 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 561 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 562 563 setOperationAction(ISD::TRAP, MVT::Other, Legal); 564 565 // Use the default implementation. 566 setOperationAction(ISD::VASTART, MVT::Other, Custom); 567 setOperationAction(ISD::VAARG, MVT::Other, Expand); 568 setOperationAction(ISD::VACOPY, MVT::Other, Expand); 569 setOperationAction(ISD::VAEND, MVT::Other, Expand); 570 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); 571 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); 572 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 573 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 574 setExceptionPointerRegister(ARM::R0); 575 setExceptionSelectorRegister(ARM::R1); 576 577 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); 578 // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use 579 // the default expansion. 580 if (Subtarget->hasDataBarrier() || 581 (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { 582 // membarrier needs custom lowering; the rest are legal and handled 583 // normally. 584 setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom); 585 } else { 586 // Set them all for expansion, which will force libcalls. 587 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 588 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i8, Expand); 589 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i16, Expand); 590 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); 591 setOperationAction(ISD::ATOMIC_SWAP, MVT::i8, Expand); 592 setOperationAction(ISD::ATOMIC_SWAP, MVT::i16, Expand); 593 setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); 594 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i8, Expand); 595 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i16, Expand); 596 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); 597 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i8, Expand); 598 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i16, Expand); 599 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i32, Expand); 600 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i8, Expand); 601 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i16, Expand); 602 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i32, Expand); 603 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i8, Expand); 604 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i16, Expand); 605 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i32, Expand); 606 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i8, Expand); 607 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, Expand); 608 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, Expand); 609 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i8, Expand); 610 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i16, Expand); 611 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i32, Expand); 612 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i8, Expand); 613 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i16, Expand); 614 setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i32, Expand); 615 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i8, Expand); 616 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i16, Expand); 617 setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i32, Expand); 618 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i8, Expand); 619 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i16, Expand); 620 setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i32, Expand); 621 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i8, Expand); 622 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i16, Expand); 623 setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand); 624 // Since the libcalls include locking, fold in the fences 625 setShouldFoldAtomicFences(true); 626 } 627 // 64-bit versions are always libcalls (for now) 628 setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Expand); 629 setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Expand); 630 setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Expand); 631 setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Expand); 632 setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Expand); 633 setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Expand); 634 setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Expand); 635 setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Expand); 636 637 setOperationAction(ISD::PREFETCH, MVT::Other, Custom); 638 639 // Requires SXTB/SXTH, available on v6 and up in both ARM and Thumb modes. 640 if (!Subtarget->hasV6Ops()) { 641 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand); 642 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand); 643 } 644 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 645 646 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 647 // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR 648 // iff target supports vfp2. 649 setOperationAction(ISD::BITCAST, MVT::i64, Custom); 650 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 651 } 652 653 // We want to custom lower some of our intrinsics. 654 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 655 if (Subtarget->isTargetDarwin()) { 656 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 657 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 658 setOperationAction(ISD::EH_SJLJ_DISPATCHSETUP, MVT::Other, Custom); 659 } 660 661 setOperationAction(ISD::SETCC, MVT::i32, Expand); 662 setOperationAction(ISD::SETCC, MVT::f32, Expand); 663 setOperationAction(ISD::SETCC, MVT::f64, Expand); 664 setOperationAction(ISD::SELECT, MVT::i32, Custom); 665 setOperationAction(ISD::SELECT, MVT::f32, Custom); 666 setOperationAction(ISD::SELECT, MVT::f64, Custom); 667 setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); 668 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 669 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 670 671 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 672 setOperationAction(ISD::BR_CC, MVT::i32, Custom); 673 setOperationAction(ISD::BR_CC, MVT::f32, Custom); 674 setOperationAction(ISD::BR_CC, MVT::f64, Custom); 675 setOperationAction(ISD::BR_JT, MVT::Other, Custom); 676 677 // We don't support sin/cos/fmod/copysign/pow 678 setOperationAction(ISD::FSIN, MVT::f64, Expand); 679 setOperationAction(ISD::FSIN, MVT::f32, Expand); 680 setOperationAction(ISD::FCOS, MVT::f32, Expand); 681 setOperationAction(ISD::FCOS, MVT::f64, Expand); 682 setOperationAction(ISD::FREM, MVT::f64, Expand); 683 setOperationAction(ISD::FREM, MVT::f32, Expand); 684 if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only()) { 685 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); 686 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); 687 } 688 setOperationAction(ISD::FPOW, MVT::f64, Expand); 689 setOperationAction(ISD::FPOW, MVT::f32, Expand); 690 691 // Various VFP goodness 692 if (!UseSoftFloat && !Subtarget->isThumb1Only()) { 693 // int <-> fp are custom expanded into bit_convert + ARMISD ops. 694 if (Subtarget->hasVFP2()) { 695 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 696 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 697 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 698 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 699 } 700 // Special handling for half-precision FP. 701 if (!Subtarget->hasFP16()) { 702 setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); 703 setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); 704 } 705 } 706 707 // We have target-specific dag combine patterns for the following nodes: 708 // ARMISD::VMOVRRD - No need to call setTargetDAGCombine 709 setTargetDAGCombine(ISD::ADD); 710 setTargetDAGCombine(ISD::SUB); 711 setTargetDAGCombine(ISD::MUL); 712 713 if (Subtarget->hasV6T2Ops() || Subtarget->hasNEON()) 714 setTargetDAGCombine(ISD::OR); 715 if (Subtarget->hasNEON()) 716 setTargetDAGCombine(ISD::AND); 717 718 setStackPointerRegisterToSaveRestore(ARM::SP); 719 720 if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) 721 setSchedulingPreference(Sched::RegPressure); 722 else 723 setSchedulingPreference(Sched::Hybrid); 724 725 //// temporary - rewrite interface to use type 726 maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; 727 728 // On ARM arguments smaller than 4 bytes are extended, so all arguments 729 // are at least 4 bytes aligned. 730 setMinStackArgumentAlignment(4); 731 732 benefitFromCodePlacementOpt = true; 733} 734 735// FIXME: It might make sense to define the representative register class as the 736// nearest super-register that has a non-null superset. For example, DPR_VFP2 is 737// a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, 738// SPR's representative would be DPR_VFP2. This should work well if register 739// pressure tracking were modified such that a register use would increment the 740// pressure of the register class's representative and all of it's super 741// classes' representatives transitively. We have not implemented this because 742// of the difficulty prior to coalescing of modeling operand register classes 743// due to the common occurrence of cross class copies and subregister insertions 744// and extractions. 745std::pair<const TargetRegisterClass*, uint8_t> 746ARMTargetLowering::findRepresentativeClass(EVT VT) const{ 747 const TargetRegisterClass *RRC = 0; 748 uint8_t Cost = 1; 749 switch (VT.getSimpleVT().SimpleTy) { 750 default: 751 return TargetLowering::findRepresentativeClass(VT); 752 // Use DPR as representative register class for all floating point 753 // and vector types. Since there are 32 SPR registers and 32 DPR registers so 754 // the cost is 1 for both f32 and f64. 755 case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: 756 case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: 757 RRC = ARM::DPRRegisterClass; 758 // When NEON is used for SP, only half of the register file is available 759 // because operations that define both SP and DP results will be constrained 760 // to the VFP2 class (D0-D15). We currently model this constraint prior to 761 // coalescing by double-counting the SP regs. See the FIXME above. 762 if (Subtarget->useNEONForSinglePrecisionFP()) 763 Cost = 2; 764 break; 765 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: 766 case MVT::v4f32: case MVT::v2f64: 767 RRC = ARM::DPRRegisterClass; 768 Cost = 2; 769 break; 770 case MVT::v4i64: 771 RRC = ARM::DPRRegisterClass; 772 Cost = 4; 773 break; 774 case MVT::v8i64: 775 RRC = ARM::DPRRegisterClass; 776 Cost = 8; 777 break; 778 } 779 return std::make_pair(RRC, Cost); 780} 781 782const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { 783 switch (Opcode) { 784 default: return 0; 785 case ARMISD::Wrapper: return "ARMISD::Wrapper"; 786 case ARMISD::WrapperDYN: return "ARMISD::WrapperDYN"; 787 case ARMISD::WrapperPIC: return "ARMISD::WrapperPIC"; 788 case ARMISD::WrapperJT: return "ARMISD::WrapperJT"; 789 case ARMISD::CALL: return "ARMISD::CALL"; 790 case ARMISD::CALL_PRED: return "ARMISD::CALL_PRED"; 791 case ARMISD::CALL_NOLINK: return "ARMISD::CALL_NOLINK"; 792 case ARMISD::tCALL: return "ARMISD::tCALL"; 793 case ARMISD::BRCOND: return "ARMISD::BRCOND"; 794 case ARMISD::BR_JT: return "ARMISD::BR_JT"; 795 case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; 796 case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; 797 case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; 798 case ARMISD::CMP: return "ARMISD::CMP"; 799 case ARMISD::CMPZ: return "ARMISD::CMPZ"; 800 case ARMISD::CMPFP: return "ARMISD::CMPFP"; 801 case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0"; 802 case ARMISD::BCC_i64: return "ARMISD::BCC_i64"; 803 case ARMISD::FMSTAT: return "ARMISD::FMSTAT"; 804 case ARMISD::CMOV: return "ARMISD::CMOV"; 805 806 case ARMISD::RBIT: return "ARMISD::RBIT"; 807 808 case ARMISD::FTOSI: return "ARMISD::FTOSI"; 809 case ARMISD::FTOUI: return "ARMISD::FTOUI"; 810 case ARMISD::SITOF: return "ARMISD::SITOF"; 811 case ARMISD::UITOF: return "ARMISD::UITOF"; 812 813 case ARMISD::SRL_FLAG: return "ARMISD::SRL_FLAG"; 814 case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG"; 815 case ARMISD::RRX: return "ARMISD::RRX"; 816 817 case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD"; 818 case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR"; 819 820 case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP"; 821 case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP"; 822 case ARMISD::EH_SJLJ_DISPATCHSETUP:return "ARMISD::EH_SJLJ_DISPATCHSETUP"; 823 824 case ARMISD::TC_RETURN: return "ARMISD::TC_RETURN"; 825 826 case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; 827 828 case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; 829 830 case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; 831 case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; 832 833 case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; 834 835 case ARMISD::VCEQ: return "ARMISD::VCEQ"; 836 case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; 837 case ARMISD::VCGE: return "ARMISD::VCGE"; 838 case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; 839 case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; 840 case ARMISD::VCGEU: return "ARMISD::VCGEU"; 841 case ARMISD::VCGT: return "ARMISD::VCGT"; 842 case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; 843 case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; 844 case ARMISD::VCGTU: return "ARMISD::VCGTU"; 845 case ARMISD::VTST: return "ARMISD::VTST"; 846 847 case ARMISD::VSHL: return "ARMISD::VSHL"; 848 case ARMISD::VSHRs: return "ARMISD::VSHRs"; 849 case ARMISD::VSHRu: return "ARMISD::VSHRu"; 850 case ARMISD::VSHLLs: return "ARMISD::VSHLLs"; 851 case ARMISD::VSHLLu: return "ARMISD::VSHLLu"; 852 case ARMISD::VSHLLi: return "ARMISD::VSHLLi"; 853 case ARMISD::VSHRN: return "ARMISD::VSHRN"; 854 case ARMISD::VRSHRs: return "ARMISD::VRSHRs"; 855 case ARMISD::VRSHRu: return "ARMISD::VRSHRu"; 856 case ARMISD::VRSHRN: return "ARMISD::VRSHRN"; 857 case ARMISD::VQSHLs: return "ARMISD::VQSHLs"; 858 case ARMISD::VQSHLu: return "ARMISD::VQSHLu"; 859 case ARMISD::VQSHLsu: return "ARMISD::VQSHLsu"; 860 case ARMISD::VQSHRNs: return "ARMISD::VQSHRNs"; 861 case ARMISD::VQSHRNu: return "ARMISD::VQSHRNu"; 862 case ARMISD::VQSHRNsu: return "ARMISD::VQSHRNsu"; 863 case ARMISD::VQRSHRNs: return "ARMISD::VQRSHRNs"; 864 case ARMISD::VQRSHRNu: return "ARMISD::VQRSHRNu"; 865 case ARMISD::VQRSHRNsu: return "ARMISD::VQRSHRNsu"; 866 case ARMISD::VGETLANEu: return "ARMISD::VGETLANEu"; 867 case ARMISD::VGETLANEs: return "ARMISD::VGETLANEs"; 868 case ARMISD::VMOVIMM: return "ARMISD::VMOVIMM"; 869 case ARMISD::VMVNIMM: return "ARMISD::VMVNIMM"; 870 case ARMISD::VDUP: return "ARMISD::VDUP"; 871 case ARMISD::VDUPLANE: return "ARMISD::VDUPLANE"; 872 case ARMISD::VEXT: return "ARMISD::VEXT"; 873 case ARMISD::VREV64: return "ARMISD::VREV64"; 874 case ARMISD::VREV32: return "ARMISD::VREV32"; 875 case ARMISD::VREV16: return "ARMISD::VREV16"; 876 case ARMISD::VZIP: return "ARMISD::VZIP"; 877 case ARMISD::VUZP: return "ARMISD::VUZP"; 878 case ARMISD::VTRN: return "ARMISD::VTRN"; 879 case ARMISD::VTBL1: return "ARMISD::VTBL1"; 880 case ARMISD::VTBL2: return "ARMISD::VTBL2"; 881 case ARMISD::VMULLs: return "ARMISD::VMULLs"; 882 case ARMISD::VMULLu: return "ARMISD::VMULLu"; 883 case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; 884 case ARMISD::FMAX: return "ARMISD::FMAX"; 885 case ARMISD::FMIN: return "ARMISD::FMIN"; 886 case ARMISD::BFI: return "ARMISD::BFI"; 887 case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; 888 case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; 889 case ARMISD::VBSL: return "ARMISD::VBSL"; 890 case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP"; 891 case ARMISD::VLD3DUP: return "ARMISD::VLD3DUP"; 892 case ARMISD::VLD4DUP: return "ARMISD::VLD4DUP"; 893 case ARMISD::VLD1_UPD: return "ARMISD::VLD1_UPD"; 894 case ARMISD::VLD2_UPD: return "ARMISD::VLD2_UPD"; 895 case ARMISD::VLD3_UPD: return "ARMISD::VLD3_UPD"; 896 case ARMISD::VLD4_UPD: return "ARMISD::VLD4_UPD"; 897 case ARMISD::VLD2LN_UPD: return "ARMISD::VLD2LN_UPD"; 898 case ARMISD::VLD3LN_UPD: return "ARMISD::VLD3LN_UPD"; 899 case ARMISD::VLD4LN_UPD: return "ARMISD::VLD4LN_UPD"; 900 case ARMISD::VLD2DUP_UPD: return "ARMISD::VLD2DUP_UPD"; 901 case ARMISD::VLD3DUP_UPD: return "ARMISD::VLD3DUP_UPD"; 902 case ARMISD::VLD4DUP_UPD: return "ARMISD::VLD4DUP_UPD"; 903 case ARMISD::VST1_UPD: return "ARMISD::VST1_UPD"; 904 case ARMISD::VST2_UPD: return "ARMISD::VST2_UPD"; 905 case ARMISD::VST3_UPD: return "ARMISD::VST3_UPD"; 906 case ARMISD::VST4_UPD: return "ARMISD::VST4_UPD"; 907 case ARMISD::VST2LN_UPD: return "ARMISD::VST2LN_UPD"; 908 case ARMISD::VST3LN_UPD: return "ARMISD::VST3LN_UPD"; 909 case ARMISD::VST4LN_UPD: return "ARMISD::VST4LN_UPD"; 910 } 911} 912 913/// getRegClassFor - Return the register class that should be used for the 914/// specified value type. 915TargetRegisterClass *ARMTargetLowering::getRegClassFor(EVT VT) const { 916 // Map v4i64 to QQ registers but do not make the type legal. Similarly map 917 // v8i64 to QQQQ registers. v4i64 and v8i64 are only used for REG_SEQUENCE to 918 // load / store 4 to 8 consecutive D registers. 919 if (Subtarget->hasNEON()) { 920 if (VT == MVT::v4i64) 921 return ARM::QQPRRegisterClass; 922 else if (VT == MVT::v8i64) 923 return ARM::QQQQPRRegisterClass; 924 } 925 return TargetLowering::getRegClassFor(VT); 926} 927 928// Create a fast isel object. 929FastISel * 930ARMTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo) const { 931 return ARM::createFastISel(funcInfo); 932} 933 934/// getFunctionAlignment - Return the Log2 alignment of this function. 935unsigned ARMTargetLowering::getFunctionAlignment(const Function *F) const { 936 return getTargetMachine().getSubtarget<ARMSubtarget>().isThumb() ? 1 : 2; 937} 938 939/// getMaximalGlobalOffset - Returns the maximal possible offset which can 940/// be used for loads / stores from the global. 941unsigned ARMTargetLowering::getMaximalGlobalOffset() const { 942 return (Subtarget->isThumb1Only() ? 127 : 4095); 943} 944 945Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { 946 unsigned NumVals = N->getNumValues(); 947 if (!NumVals) 948 return Sched::RegPressure; 949 950 for (unsigned i = 0; i != NumVals; ++i) { 951 EVT VT = N->getValueType(i); 952 if (VT == MVT::Glue || VT == MVT::Other) 953 continue; 954 if (VT.isFloatingPoint() || VT.isVector()) 955 return Sched::Latency; 956 } 957 958 if (!N->isMachineOpcode()) 959 return Sched::RegPressure; 960 961 // Load are scheduled for latency even if there instruction itinerary 962 // is not available. 963 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 964 const TargetInstrDesc &TID = TII->get(N->getMachineOpcode()); 965 966 if (TID.getNumDefs() == 0) 967 return Sched::RegPressure; 968 if (!Itins->isEmpty() && 969 Itins->getOperandCycle(TID.getSchedClass(), 0) > 2) 970 return Sched::Latency; 971 972 return Sched::RegPressure; 973} 974 975//===----------------------------------------------------------------------===// 976// Lowering Code 977//===----------------------------------------------------------------------===// 978 979/// IntCCToARMCC - Convert a DAG integer condition code to an ARM CC 980static ARMCC::CondCodes IntCCToARMCC(ISD::CondCode CC) { 981 switch (CC) { 982 default: llvm_unreachable("Unknown condition code!"); 983 case ISD::SETNE: return ARMCC::NE; 984 case ISD::SETEQ: return ARMCC::EQ; 985 case ISD::SETGT: return ARMCC::GT; 986 case ISD::SETGE: return ARMCC::GE; 987 case ISD::SETLT: return ARMCC::LT; 988 case ISD::SETLE: return ARMCC::LE; 989 case ISD::SETUGT: return ARMCC::HI; 990 case ISD::SETUGE: return ARMCC::HS; 991 case ISD::SETULT: return ARMCC::LO; 992 case ISD::SETULE: return ARMCC::LS; 993 } 994} 995 996/// FPCCToARMCC - Convert a DAG fp condition code to an ARM CC. 997static void FPCCToARMCC(ISD::CondCode CC, ARMCC::CondCodes &CondCode, 998 ARMCC::CondCodes &CondCode2) { 999 CondCode2 = ARMCC::AL; 1000 switch (CC) { 1001 default: llvm_unreachable("Unknown FP condition!"); 1002 case ISD::SETEQ: 1003 case ISD::SETOEQ: CondCode = ARMCC::EQ; break; 1004 case ISD::SETGT: 1005 case ISD::SETOGT: CondCode = ARMCC::GT; break; 1006 case ISD::SETGE: 1007 case ISD::SETOGE: CondCode = ARMCC::GE; break; 1008 case ISD::SETOLT: CondCode = ARMCC::MI; break; 1009 case ISD::SETOLE: CondCode = ARMCC::LS; break; 1010 case ISD::SETONE: CondCode = ARMCC::MI; CondCode2 = ARMCC::GT; break; 1011 case ISD::SETO: CondCode = ARMCC::VC; break; 1012 case ISD::SETUO: CondCode = ARMCC::VS; break; 1013 case ISD::SETUEQ: CondCode = ARMCC::EQ; CondCode2 = ARMCC::VS; break; 1014 case ISD::SETUGT: CondCode = ARMCC::HI; break; 1015 case ISD::SETUGE: CondCode = ARMCC::PL; break; 1016 case ISD::SETLT: 1017 case ISD::SETULT: CondCode = ARMCC::LT; break; 1018 case ISD::SETLE: 1019 case ISD::SETULE: CondCode = ARMCC::LE; break; 1020 case ISD::SETNE: 1021 case ISD::SETUNE: CondCode = ARMCC::NE; break; 1022 } 1023} 1024 1025//===----------------------------------------------------------------------===// 1026// Calling Convention Implementation 1027//===----------------------------------------------------------------------===// 1028 1029#include "ARMGenCallingConv.inc" 1030 1031/// CCAssignFnForNode - Selects the correct CCAssignFn for a the 1032/// given CallingConvention value. 1033CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC, 1034 bool Return, 1035 bool isVarArg) const { 1036 switch (CC) { 1037 default: 1038 llvm_unreachable("Unsupported calling convention"); 1039 case CallingConv::Fast: 1040 if (Subtarget->hasVFP2() && !isVarArg) { 1041 if (!Subtarget->isAAPCS_ABI()) 1042 return (Return ? RetFastCC_ARM_APCS : FastCC_ARM_APCS); 1043 // For AAPCS ABI targets, just use VFP variant of the calling convention. 1044 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1045 } 1046 // Fallthrough 1047 case CallingConv::C: { 1048 // Use target triple & subtarget features to do actual dispatch. 1049 if (!Subtarget->isAAPCS_ABI()) 1050 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1051 else if (Subtarget->hasVFP2() && 1052 FloatABIType == FloatABI::Hard && !isVarArg) 1053 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1054 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1055 } 1056 case CallingConv::ARM_AAPCS_VFP: 1057 return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP); 1058 case CallingConv::ARM_AAPCS: 1059 return (Return ? RetCC_ARM_AAPCS : CC_ARM_AAPCS); 1060 case CallingConv::ARM_APCS: 1061 return (Return ? RetCC_ARM_APCS : CC_ARM_APCS); 1062 } 1063} 1064 1065/// LowerCallResult - Lower the result values of a call into the 1066/// appropriate copies out of appropriate physical registers. 1067SDValue 1068ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 1069 CallingConv::ID CallConv, bool isVarArg, 1070 const SmallVectorImpl<ISD::InputArg> &Ins, 1071 DebugLoc dl, SelectionDAG &DAG, 1072 SmallVectorImpl<SDValue> &InVals) const { 1073 1074 // Assign locations to each value returned by this call. 1075 SmallVector<CCValAssign, 16> RVLocs; 1076 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), 1077 RVLocs, *DAG.getContext()); 1078 CCInfo.AnalyzeCallResult(Ins, 1079 CCAssignFnForNode(CallConv, /* Return*/ true, 1080 isVarArg)); 1081 1082 // Copy all of the result registers out of their specified physreg. 1083 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1084 CCValAssign VA = RVLocs[i]; 1085 1086 SDValue Val; 1087 if (VA.needsCustom()) { 1088 // Handle f64 or half of a v2f64. 1089 SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1090 InFlag); 1091 Chain = Lo.getValue(1); 1092 InFlag = Lo.getValue(2); 1093 VA = RVLocs[++i]; // skip ahead to next loc 1094 SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, 1095 InFlag); 1096 Chain = Hi.getValue(1); 1097 InFlag = Hi.getValue(2); 1098 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1099 1100 if (VA.getLocVT() == MVT::v2f64) { 1101 SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 1102 Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1103 DAG.getConstant(0, MVT::i32)); 1104 1105 VA = RVLocs[++i]; // skip ahead to next loc 1106 Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1107 Chain = Lo.getValue(1); 1108 InFlag = Lo.getValue(2); 1109 VA = RVLocs[++i]; // skip ahead to next loc 1110 Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag); 1111 Chain = Hi.getValue(1); 1112 InFlag = Hi.getValue(2); 1113 Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 1114 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val, 1115 DAG.getConstant(1, MVT::i32)); 1116 } 1117 } else { 1118 Val = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), VA.getLocVT(), 1119 InFlag); 1120 Chain = Val.getValue(1); 1121 InFlag = Val.getValue(2); 1122 } 1123 1124 switch (VA.getLocInfo()) { 1125 default: llvm_unreachable("Unknown loc info!"); 1126 case CCValAssign::Full: break; 1127 case CCValAssign::BCvt: 1128 Val = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), Val); 1129 break; 1130 } 1131 1132 InVals.push_back(Val); 1133 } 1134 1135 return Chain; 1136} 1137 1138/// LowerMemOpCallTo - Store the argument to the stack. 1139SDValue 1140ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, 1141 SDValue StackPtr, SDValue Arg, 1142 DebugLoc dl, SelectionDAG &DAG, 1143 const CCValAssign &VA, 1144 ISD::ArgFlagsTy Flags) const { 1145 unsigned LocMemOffset = VA.getLocMemOffset(); 1146 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 1147 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 1148 return DAG.getStore(Chain, dl, Arg, PtrOff, 1149 MachinePointerInfo::getStack(LocMemOffset), 1150 false, false, 0); 1151} 1152 1153void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, 1154 SDValue Chain, SDValue &Arg, 1155 RegsToPassVector &RegsToPass, 1156 CCValAssign &VA, CCValAssign &NextVA, 1157 SDValue &StackPtr, 1158 SmallVector<SDValue, 8> &MemOpChains, 1159 ISD::ArgFlagsTy Flags) const { 1160 1161 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1162 DAG.getVTList(MVT::i32, MVT::i32), Arg); 1163 RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd)); 1164 1165 if (NextVA.isRegLoc()) 1166 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), fmrrd.getValue(1))); 1167 else { 1168 assert(NextVA.isMemLoc()); 1169 if (StackPtr.getNode() == 0) 1170 StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1171 1172 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, fmrrd.getValue(1), 1173 dl, DAG, NextVA, 1174 Flags)); 1175 } 1176} 1177 1178/// LowerCall - Lowering a call into a callseq_start <- 1179/// ARMISD:CALL <- callseq_end chain. Also add input and output parameter 1180/// nodes. 1181SDValue 1182ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, 1183 CallingConv::ID CallConv, bool isVarArg, 1184 bool &isTailCall, 1185 const SmallVectorImpl<ISD::OutputArg> &Outs, 1186 const SmallVectorImpl<SDValue> &OutVals, 1187 const SmallVectorImpl<ISD::InputArg> &Ins, 1188 DebugLoc dl, SelectionDAG &DAG, 1189 SmallVectorImpl<SDValue> &InVals) const { 1190 MachineFunction &MF = DAG.getMachineFunction(); 1191 bool IsStructRet = (Outs.empty()) ? false : Outs[0].Flags.isSRet(); 1192 bool IsSibCall = false; 1193 // Temporarily disable tail calls so things don't break. 1194 if (!EnableARMTailCalls) 1195 isTailCall = false; 1196 if (isTailCall) { 1197 // Check if it's really possible to do a tail call. 1198 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, 1199 isVarArg, IsStructRet, MF.getFunction()->hasStructRetAttr(), 1200 Outs, OutVals, Ins, DAG); 1201 // We don't support GuaranteedTailCallOpt for ARM, only automatically 1202 // detected sibcalls. 1203 if (isTailCall) { 1204 ++NumTailCalls; 1205 IsSibCall = true; 1206 } 1207 } 1208 1209 // Analyze operands of the call, assigning locations to each operand. 1210 SmallVector<CCValAssign, 16> ArgLocs; 1211 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 1212 *DAG.getContext()); 1213 CCInfo.setCallOrPrologue(Call); 1214 CCInfo.AnalyzeCallOperands(Outs, 1215 CCAssignFnForNode(CallConv, /* Return*/ false, 1216 isVarArg)); 1217 1218 // Get a count of how many bytes are to be pushed on the stack. 1219 unsigned NumBytes = CCInfo.getNextStackOffset(); 1220 1221 // For tail calls, memory operands are available in our caller's stack. 1222 if (IsSibCall) 1223 NumBytes = 0; 1224 1225 // Adjust the stack pointer for the new arguments... 1226 // These operations are automatically eliminated by the prolog/epilog pass 1227 if (!IsSibCall) 1228 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 1229 1230 SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); 1231 1232 RegsToPassVector RegsToPass; 1233 SmallVector<SDValue, 8> MemOpChains; 1234 1235 // Walk the register/memloc assignments, inserting copies/loads. In the case 1236 // of tail call optimization, arguments are handled later. 1237 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1238 i != e; 1239 ++i, ++realArgIdx) { 1240 CCValAssign &VA = ArgLocs[i]; 1241 SDValue Arg = OutVals[realArgIdx]; 1242 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1243 bool isByVal = Flags.isByVal(); 1244 1245 // Promote the value if needed. 1246 switch (VA.getLocInfo()) { 1247 default: llvm_unreachable("Unknown loc info!"); 1248 case CCValAssign::Full: break; 1249 case CCValAssign::SExt: 1250 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 1251 break; 1252 case CCValAssign::ZExt: 1253 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 1254 break; 1255 case CCValAssign::AExt: 1256 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 1257 break; 1258 case CCValAssign::BCvt: 1259 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1260 break; 1261 } 1262 1263 // f64 and v2f64 might be passed in i32 pairs and must be split into pieces 1264 if (VA.needsCustom()) { 1265 if (VA.getLocVT() == MVT::v2f64) { 1266 SDValue Op0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1267 DAG.getConstant(0, MVT::i32)); 1268 SDValue Op1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1269 DAG.getConstant(1, MVT::i32)); 1270 1271 PassF64ArgInRegs(dl, DAG, Chain, Op0, RegsToPass, 1272 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1273 1274 VA = ArgLocs[++i]; // skip ahead to next loc 1275 if (VA.isRegLoc()) { 1276 PassF64ArgInRegs(dl, DAG, Chain, Op1, RegsToPass, 1277 VA, ArgLocs[++i], StackPtr, MemOpChains, Flags); 1278 } else { 1279 assert(VA.isMemLoc()); 1280 1281 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1, 1282 dl, DAG, VA, Flags)); 1283 } 1284 } else { 1285 PassF64ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++i], 1286 StackPtr, MemOpChains, Flags); 1287 } 1288 } else if (VA.isRegLoc()) { 1289 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 1290 } else if (isByVal) { 1291 assert(VA.isMemLoc()); 1292 unsigned offset = 0; 1293 1294 // True if this byval aggregate will be split between registers 1295 // and memory. 1296 if (CCInfo.isFirstByValRegValid()) { 1297 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1298 unsigned int i, j; 1299 for (i = 0, j = CCInfo.getFirstByValReg(); j < ARM::R4; i++, j++) { 1300 SDValue Const = DAG.getConstant(4*i, MVT::i32); 1301 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 1302 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 1303 MachinePointerInfo(), 1304 false, false, 0); 1305 MemOpChains.push_back(Load.getValue(1)); 1306 RegsToPass.push_back(std::make_pair(j, Load)); 1307 } 1308 offset = ARM::R4 - CCInfo.getFirstByValReg(); 1309 CCInfo.clearFirstByValReg(); 1310 } 1311 1312 unsigned LocMemOffset = VA.getLocMemOffset(); 1313 SDValue StkPtrOff = DAG.getIntPtrConstant(LocMemOffset); 1314 SDValue Dst = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, 1315 StkPtrOff); 1316 SDValue SrcOffset = DAG.getIntPtrConstant(4*offset); 1317 SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); 1318 SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, 1319 MVT::i32); 1320 MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, 1321 Flags.getByValAlign(), 1322 /*isVolatile=*/false, 1323 /*AlwaysInline=*/false, 1324 MachinePointerInfo(0), 1325 MachinePointerInfo(0))); 1326 1327 } else if (!IsSibCall) { 1328 assert(VA.isMemLoc()); 1329 1330 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, 1331 dl, DAG, VA, Flags)); 1332 } 1333 } 1334 1335 if (!MemOpChains.empty()) 1336 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 1337 &MemOpChains[0], MemOpChains.size()); 1338 1339 // Build a sequence of copy-to-reg nodes chained together with token chain 1340 // and flag operands which copy the outgoing args into the appropriate regs. 1341 SDValue InFlag; 1342 // Tail call byval lowering might overwrite argument registers so in case of 1343 // tail call optimization the copies to registers are lowered later. 1344 if (!isTailCall) 1345 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1346 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1347 RegsToPass[i].second, InFlag); 1348 InFlag = Chain.getValue(1); 1349 } 1350 1351 // For tail calls lower the arguments to the 'real' stack slot. 1352 if (isTailCall) { 1353 // Force all the incoming stack arguments to be loaded from the stack 1354 // before any new outgoing arguments are stored to the stack, because the 1355 // outgoing stack slots may alias the incoming argument stack slots, and 1356 // the alias isn't otherwise explicit. This is slightly more conservative 1357 // than necessary, because it means that each store effectively depends 1358 // on every argument instead of just those arguments it would clobber. 1359 1360 // Do not flag preceding copytoreg stuff together with the following stuff. 1361 InFlag = SDValue(); 1362 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1363 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 1364 RegsToPass[i].second, InFlag); 1365 InFlag = Chain.getValue(1); 1366 } 1367 InFlag =SDValue(); 1368 } 1369 1370 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1371 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1372 // node so that legalize doesn't hack it. 1373 bool isDirect = false; 1374 bool isARMFunc = false; 1375 bool isLocalARMFunc = false; 1376 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1377 1378 if (EnableARMLongCalls) { 1379 assert (getTargetMachine().getRelocationModel() == Reloc::Static 1380 && "long-calls with non-static relocation model!"); 1381 // Handle a global address or an external symbol. If it's not one of 1382 // those, the target's already in a register, so we don't need to do 1383 // anything extra. 1384 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1385 const GlobalValue *GV = G->getGlobal(); 1386 // Create a constant pool entry for the callee address 1387 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1388 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1389 ARMPCLabelIndex, 1390 ARMCP::CPValue, 0); 1391 // Get the address of the callee into a register 1392 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1393 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1394 Callee = DAG.getLoad(getPointerTy(), dl, 1395 DAG.getEntryNode(), CPAddr, 1396 MachinePointerInfo::getConstantPool(), 1397 false, false, 0); 1398 } else if (ExternalSymbolSDNode *S=dyn_cast<ExternalSymbolSDNode>(Callee)) { 1399 const char *Sym = S->getSymbol(); 1400 1401 // Create a constant pool entry for the callee address 1402 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1403 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1404 Sym, ARMPCLabelIndex, 0); 1405 // Get the address of the callee into a register 1406 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1407 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1408 Callee = DAG.getLoad(getPointerTy(), dl, 1409 DAG.getEntryNode(), CPAddr, 1410 MachinePointerInfo::getConstantPool(), 1411 false, false, 0); 1412 } 1413 } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 1414 const GlobalValue *GV = G->getGlobal(); 1415 isDirect = true; 1416 bool isExt = GV->isDeclaration() || GV->isWeakForLinker(); 1417 bool isStub = (isExt && Subtarget->isTargetDarwin()) && 1418 getTargetMachine().getRelocationModel() != Reloc::Static; 1419 isARMFunc = !Subtarget->isThumb() || isStub; 1420 // ARM call to a local ARM function is predicable. 1421 isLocalARMFunc = !Subtarget->isThumb() && (!isExt || !ARMInterworking); 1422 // tBX takes a register source operand. 1423 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1424 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1425 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, 1426 ARMPCLabelIndex, 1427 ARMCP::CPValue, 4); 1428 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1429 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1430 Callee = DAG.getLoad(getPointerTy(), dl, 1431 DAG.getEntryNode(), CPAddr, 1432 MachinePointerInfo::getConstantPool(), 1433 false, false, 0); 1434 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1435 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1436 getPointerTy(), Callee, PICLabel); 1437 } else { 1438 // On ELF targets for PIC code, direct calls should go through the PLT 1439 unsigned OpFlags = 0; 1440 if (Subtarget->isTargetELF() && 1441 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1442 OpFlags = ARMII::MO_PLT; 1443 Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); 1444 } 1445 } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 1446 isDirect = true; 1447 bool isStub = Subtarget->isTargetDarwin() && 1448 getTargetMachine().getRelocationModel() != Reloc::Static; 1449 isARMFunc = !Subtarget->isThumb() || isStub; 1450 // tBX takes a register source operand. 1451 const char *Sym = S->getSymbol(); 1452 if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) { 1453 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1454 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 1455 Sym, ARMPCLabelIndex, 4); 1456 SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4); 1457 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 1458 Callee = DAG.getLoad(getPointerTy(), dl, 1459 DAG.getEntryNode(), CPAddr, 1460 MachinePointerInfo::getConstantPool(), 1461 false, false, 0); 1462 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1463 Callee = DAG.getNode(ARMISD::PIC_ADD, dl, 1464 getPointerTy(), Callee, PICLabel); 1465 } else { 1466 unsigned OpFlags = 0; 1467 // On ELF targets for PIC code, direct calls should go through the PLT 1468 if (Subtarget->isTargetELF() && 1469 getTargetMachine().getRelocationModel() == Reloc::PIC_) 1470 OpFlags = ARMII::MO_PLT; 1471 Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), OpFlags); 1472 } 1473 } 1474 1475 // FIXME: handle tail calls differently. 1476 unsigned CallOpc; 1477 if (Subtarget->isThumb()) { 1478 if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) 1479 CallOpc = ARMISD::CALL_NOLINK; 1480 else 1481 CallOpc = isARMFunc ? ARMISD::CALL : ARMISD::tCALL; 1482 } else { 1483 CallOpc = (isDirect || Subtarget->hasV5TOps()) 1484 ? (isLocalARMFunc ? ARMISD::CALL_PRED : ARMISD::CALL) 1485 : ARMISD::CALL_NOLINK; 1486 } 1487 1488 std::vector<SDValue> Ops; 1489 Ops.push_back(Chain); 1490 Ops.push_back(Callee); 1491 1492 // Add argument registers to the end of the list so that they are known live 1493 // into the call. 1494 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1495 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1496 RegsToPass[i].second.getValueType())); 1497 1498 if (InFlag.getNode()) 1499 Ops.push_back(InFlag); 1500 1501 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); 1502 if (isTailCall) 1503 return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, &Ops[0], Ops.size()); 1504 1505 // Returns a chain and a flag for retval copy to use. 1506 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 1507 InFlag = Chain.getValue(1); 1508 1509 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 1510 DAG.getIntPtrConstant(0, true), InFlag); 1511 if (!Ins.empty()) 1512 InFlag = Chain.getValue(1); 1513 1514 // Handle result values, copying them out of physregs into vregs that we 1515 // return. 1516 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, 1517 dl, DAG, InVals); 1518} 1519 1520/// HandleByVal - Every parameter *after* a byval parameter is passed 1521/// on the stack. Remember the next parameter register to allocate, 1522/// and then confiscate the rest of the parameter registers to insure 1523/// this. 1524void 1525llvm::ARMTargetLowering::HandleByVal(CCState *State, unsigned &size) const { 1526 unsigned reg = State->AllocateReg(GPRArgRegs, 4); 1527 assert((State->getCallOrPrologue() == Prologue || 1528 State->getCallOrPrologue() == Call) && 1529 "unhandled ParmContext"); 1530 if ((!State->isFirstByValRegValid()) && 1531 (ARM::R0 <= reg) && (reg <= ARM::R3)) { 1532 State->setFirstByValReg(reg); 1533 // At a call site, a byval parameter that is split between 1534 // registers and memory needs its size truncated here. In a 1535 // function prologue, such byval parameters are reassembled in 1536 // memory, and are not truncated. 1537 if (State->getCallOrPrologue() == Call) { 1538 unsigned excess = 4 * (ARM::R4 - reg); 1539 assert(size >= excess && "expected larger existing stack allocation"); 1540 size -= excess; 1541 } 1542 } 1543 // Confiscate any remaining parameter registers to preclude their 1544 // assignment to subsequent parameters. 1545 while (State->AllocateReg(GPRArgRegs, 4)) 1546 ; 1547} 1548 1549/// MatchingStackOffset - Return true if the given stack call argument is 1550/// already available in the same position (relatively) of the caller's 1551/// incoming argument stack. 1552static 1553bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, 1554 MachineFrameInfo *MFI, const MachineRegisterInfo *MRI, 1555 const ARMInstrInfo *TII) { 1556 unsigned Bytes = Arg.getValueType().getSizeInBits() / 8; 1557 int FI = INT_MAX; 1558 if (Arg.getOpcode() == ISD::CopyFromReg) { 1559 unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); 1560 if (!TargetRegisterInfo::isVirtualRegister(VR)) 1561 return false; 1562 MachineInstr *Def = MRI->getVRegDef(VR); 1563 if (!Def) 1564 return false; 1565 if (!Flags.isByVal()) { 1566 if (!TII->isLoadFromStackSlot(Def, FI)) 1567 return false; 1568 } else { 1569 return false; 1570 } 1571 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { 1572 if (Flags.isByVal()) 1573 // ByVal argument is passed in as a pointer but it's now being 1574 // dereferenced. e.g. 1575 // define @foo(%struct.X* %A) { 1576 // tail call @bar(%struct.X* byval %A) 1577 // } 1578 return false; 1579 SDValue Ptr = Ld->getBasePtr(); 1580 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); 1581 if (!FINode) 1582 return false; 1583 FI = FINode->getIndex(); 1584 } else 1585 return false; 1586 1587 assert(FI != INT_MAX); 1588 if (!MFI->isFixedObjectIndex(FI)) 1589 return false; 1590 return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI); 1591} 1592 1593/// IsEligibleForTailCallOptimization - Check whether the call is eligible 1594/// for tail call optimization. Targets which want to do tail call 1595/// optimization should implement this function. 1596bool 1597ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 1598 CallingConv::ID CalleeCC, 1599 bool isVarArg, 1600 bool isCalleeStructRet, 1601 bool isCallerStructRet, 1602 const SmallVectorImpl<ISD::OutputArg> &Outs, 1603 const SmallVectorImpl<SDValue> &OutVals, 1604 const SmallVectorImpl<ISD::InputArg> &Ins, 1605 SelectionDAG& DAG) const { 1606 const Function *CallerF = DAG.getMachineFunction().getFunction(); 1607 CallingConv::ID CallerCC = CallerF->getCallingConv(); 1608 bool CCMatch = CallerCC == CalleeCC; 1609 1610 // Look for obvious safe cases to perform tail call optimization that do not 1611 // require ABI changes. This is what gcc calls sibcall. 1612 1613 // Do not sibcall optimize vararg calls unless the call site is not passing 1614 // any arguments. 1615 if (isVarArg && !Outs.empty()) 1616 return false; 1617 1618 // Also avoid sibcall optimization if either caller or callee uses struct 1619 // return semantics. 1620 if (isCalleeStructRet || isCallerStructRet) 1621 return false; 1622 1623 // FIXME: Completely disable sibcall for Thumb1 since Thumb1RegisterInfo:: 1624 // emitEpilogue is not ready for them. 1625 // Doing this is tricky, since the LDM/POP instruction on Thumb doesn't take 1626 // LR. This means if we need to reload LR, it takes an extra instructions, 1627 // which outweighs the value of the tail call; but here we don't know yet 1628 // whether LR is going to be used. Probably the right approach is to 1629 // generate the tail call here and turn it back into CALL/RET in 1630 // emitEpilogue if LR is used. 1631 1632 // Thumb1 PIC calls to external symbols use BX, so they can be tail calls, 1633 // but we need to make sure there are enough registers; the only valid 1634 // registers are the 4 used for parameters. We don't currently do this 1635 // case. 1636 if (Subtarget->isThumb1Only()) 1637 return false; 1638 1639 // If the calling conventions do not match, then we'd better make sure the 1640 // results are returned in the same way as what the caller expects. 1641 if (!CCMatch) { 1642 SmallVector<CCValAssign, 16> RVLocs1; 1643 CCState CCInfo1(CalleeCC, false, getTargetMachine(), 1644 RVLocs1, *DAG.getContext()); 1645 CCInfo1.AnalyzeCallResult(Ins, CCAssignFnForNode(CalleeCC, true, isVarArg)); 1646 1647 SmallVector<CCValAssign, 16> RVLocs2; 1648 CCState CCInfo2(CallerCC, false, getTargetMachine(), 1649 RVLocs2, *DAG.getContext()); 1650 CCInfo2.AnalyzeCallResult(Ins, CCAssignFnForNode(CallerCC, true, isVarArg)); 1651 1652 if (RVLocs1.size() != RVLocs2.size()) 1653 return false; 1654 for (unsigned i = 0, e = RVLocs1.size(); i != e; ++i) { 1655 if (RVLocs1[i].isRegLoc() != RVLocs2[i].isRegLoc()) 1656 return false; 1657 if (RVLocs1[i].getLocInfo() != RVLocs2[i].getLocInfo()) 1658 return false; 1659 if (RVLocs1[i].isRegLoc()) { 1660 if (RVLocs1[i].getLocReg() != RVLocs2[i].getLocReg()) 1661 return false; 1662 } else { 1663 if (RVLocs1[i].getLocMemOffset() != RVLocs2[i].getLocMemOffset()) 1664 return false; 1665 } 1666 } 1667 } 1668 1669 // If the callee takes no arguments then go on to check the results of the 1670 // call. 1671 if (!Outs.empty()) { 1672 // Check if stack adjustment is needed. For now, do not do this if any 1673 // argument is passed on the stack. 1674 SmallVector<CCValAssign, 16> ArgLocs; 1675 CCState CCInfo(CalleeCC, isVarArg, getTargetMachine(), 1676 ArgLocs, *DAG.getContext()); 1677 CCInfo.AnalyzeCallOperands(Outs, 1678 CCAssignFnForNode(CalleeCC, false, isVarArg)); 1679 if (CCInfo.getNextStackOffset()) { 1680 MachineFunction &MF = DAG.getMachineFunction(); 1681 1682 // Check if the arguments are already laid out in the right way as 1683 // the caller's fixed stack objects. 1684 MachineFrameInfo *MFI = MF.getFrameInfo(); 1685 const MachineRegisterInfo *MRI = &MF.getRegInfo(); 1686 const ARMInstrInfo *TII = 1687 ((ARMTargetMachine&)getTargetMachine()).getInstrInfo(); 1688 for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size(); 1689 i != e; 1690 ++i, ++realArgIdx) { 1691 CCValAssign &VA = ArgLocs[i]; 1692 EVT RegVT = VA.getLocVT(); 1693 SDValue Arg = OutVals[realArgIdx]; 1694 ISD::ArgFlagsTy Flags = Outs[realArgIdx].Flags; 1695 if (VA.getLocInfo() == CCValAssign::Indirect) 1696 return false; 1697 if (VA.needsCustom()) { 1698 // f64 and vector types are split into multiple registers or 1699 // register/stack-slot combinations. The types will not match 1700 // the registers; give up on memory f64 refs until we figure 1701 // out what to do about this. 1702 if (!VA.isRegLoc()) 1703 return false; 1704 if (!ArgLocs[++i].isRegLoc()) 1705 return false; 1706 if (RegVT == MVT::v2f64) { 1707 if (!ArgLocs[++i].isRegLoc()) 1708 return false; 1709 if (!ArgLocs[++i].isRegLoc()) 1710 return false; 1711 } 1712 } else if (!VA.isRegLoc()) { 1713 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, 1714 MFI, MRI, TII)) 1715 return false; 1716 } 1717 } 1718 } 1719 } 1720 1721 return true; 1722} 1723 1724SDValue 1725ARMTargetLowering::LowerReturn(SDValue Chain, 1726 CallingConv::ID CallConv, bool isVarArg, 1727 const SmallVectorImpl<ISD::OutputArg> &Outs, 1728 const SmallVectorImpl<SDValue> &OutVals, 1729 DebugLoc dl, SelectionDAG &DAG) const { 1730 1731 // CCValAssign - represent the assignment of the return value to a location. 1732 SmallVector<CCValAssign, 16> RVLocs; 1733 1734 // CCState - Info about the registers and stack slots. 1735 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), RVLocs, 1736 *DAG.getContext()); 1737 1738 // Analyze outgoing return values. 1739 CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true, 1740 isVarArg)); 1741 1742 // If this is the first return lowered for this function, add 1743 // the regs to the liveout set for the function. 1744 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1745 for (unsigned i = 0; i != RVLocs.size(); ++i) 1746 if (RVLocs[i].isRegLoc()) 1747 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1748 } 1749 1750 SDValue Flag; 1751 1752 // Copy the result values into the output registers. 1753 for (unsigned i = 0, realRVLocIdx = 0; 1754 i != RVLocs.size(); 1755 ++i, ++realRVLocIdx) { 1756 CCValAssign &VA = RVLocs[i]; 1757 assert(VA.isRegLoc() && "Can only return in registers!"); 1758 1759 SDValue Arg = OutVals[realRVLocIdx]; 1760 1761 switch (VA.getLocInfo()) { 1762 default: llvm_unreachable("Unknown loc info!"); 1763 case CCValAssign::Full: break; 1764 case CCValAssign::BCvt: 1765 Arg = DAG.getNode(ISD::BITCAST, dl, VA.getLocVT(), Arg); 1766 break; 1767 } 1768 1769 if (VA.needsCustom()) { 1770 if (VA.getLocVT() == MVT::v2f64) { 1771 // Extract the first half and return it in two registers. 1772 SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1773 DAG.getConstant(0, MVT::i32)); 1774 SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl, 1775 DAG.getVTList(MVT::i32, MVT::i32), Half); 1776 1777 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag); 1778 Flag = Chain.getValue(1); 1779 VA = RVLocs[++i]; // skip ahead to next loc 1780 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), 1781 HalfGPRs.getValue(1), Flag); 1782 Flag = Chain.getValue(1); 1783 VA = RVLocs[++i]; // skip ahead to next loc 1784 1785 // Extract the 2nd half and fall through to handle it as an f64 value. 1786 Arg = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg, 1787 DAG.getConstant(1, MVT::i32)); 1788 } 1789 // Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is 1790 // available. 1791 SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, 1792 DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1); 1793 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag); 1794 Flag = Chain.getValue(1); 1795 VA = RVLocs[++i]; // skip ahead to next loc 1796 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd.getValue(1), 1797 Flag); 1798 } else 1799 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 1800 1801 // Guarantee that all emitted copies are 1802 // stuck together, avoiding something bad. 1803 Flag = Chain.getValue(1); 1804 } 1805 1806 SDValue result; 1807 if (Flag.getNode()) 1808 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain, Flag); 1809 else // Return Void 1810 result = DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, Chain); 1811 1812 return result; 1813} 1814 1815bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N) const { 1816 if (N->getNumValues() != 1) 1817 return false; 1818 if (!N->hasNUsesOfValue(1, 0)) 1819 return false; 1820 1821 unsigned NumCopies = 0; 1822 SDNode* Copies[2]; 1823 SDNode *Use = *N->use_begin(); 1824 if (Use->getOpcode() == ISD::CopyToReg) { 1825 Copies[NumCopies++] = Use; 1826 } else if (Use->getOpcode() == ARMISD::VMOVRRD) { 1827 // f64 returned in a pair of GPRs. 1828 for (SDNode::use_iterator UI = Use->use_begin(), UE = Use->use_end(); 1829 UI != UE; ++UI) { 1830 if (UI->getOpcode() != ISD::CopyToReg) 1831 return false; 1832 Copies[UI.getUse().getResNo()] = *UI; 1833 ++NumCopies; 1834 } 1835 } else if (Use->getOpcode() == ISD::BITCAST) { 1836 // f32 returned in a single GPR. 1837 if (!Use->hasNUsesOfValue(1, 0)) 1838 return false; 1839 Use = *Use->use_begin(); 1840 if (Use->getOpcode() != ISD::CopyToReg || !Use->hasNUsesOfValue(1, 0)) 1841 return false; 1842 Copies[NumCopies++] = Use; 1843 } else { 1844 return false; 1845 } 1846 1847 if (NumCopies != 1 && NumCopies != 2) 1848 return false; 1849 1850 bool HasRet = false; 1851 for (unsigned i = 0; i < NumCopies; ++i) { 1852 SDNode *Copy = Copies[i]; 1853 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); 1854 UI != UE; ++UI) { 1855 if (UI->getOpcode() == ISD::CopyToReg) { 1856 SDNode *Use = *UI; 1857 if (Use == Copies[0] || Use == Copies[1]) 1858 continue; 1859 return false; 1860 } 1861 if (UI->getOpcode() != ARMISD::RET_FLAG) 1862 return false; 1863 HasRet = true; 1864 } 1865 } 1866 1867 return HasRet; 1868} 1869 1870bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { 1871 if (!EnableARMTailCalls) 1872 return false; 1873 1874 if (!CI->isTailCall()) 1875 return false; 1876 1877 return !Subtarget->isThumb1Only(); 1878} 1879 1880// ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as 1881// their target counterpart wrapped in the ARMISD::Wrapper node. Suppose N is 1882// one of the above mentioned nodes. It has to be wrapped because otherwise 1883// Select(N) returns N. So the raw TargetGlobalAddress nodes, etc. can only 1884// be used to form addressing mode. These wrapped nodes will be selected 1885// into MOVi. 1886static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { 1887 EVT PtrVT = Op.getValueType(); 1888 // FIXME there is no actual debug info here 1889 DebugLoc dl = Op.getDebugLoc(); 1890 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1891 SDValue Res; 1892 if (CP->isMachineConstantPoolEntry()) 1893 Res = DAG.getTargetConstantPool(CP->getMachineCPVal(), PtrVT, 1894 CP->getAlignment()); 1895 else 1896 Res = DAG.getTargetConstantPool(CP->getConstVal(), PtrVT, 1897 CP->getAlignment()); 1898 return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Res); 1899} 1900 1901unsigned ARMTargetLowering::getJumpTableEncoding() const { 1902 return MachineJumpTableInfo::EK_Inline; 1903} 1904 1905SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, 1906 SelectionDAG &DAG) const { 1907 MachineFunction &MF = DAG.getMachineFunction(); 1908 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1909 unsigned ARMPCLabelIndex = 0; 1910 DebugLoc DL = Op.getDebugLoc(); 1911 EVT PtrVT = getPointerTy(); 1912 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1913 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 1914 SDValue CPAddr; 1915 if (RelocM == Reloc::Static) { 1916 CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4); 1917 } else { 1918 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 1919 ARMPCLabelIndex = AFI->createPICLabelUId(); 1920 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex, 1921 ARMCP::CPBlockAddress, 1922 PCAdj); 1923 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1924 } 1925 CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); 1926 SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, 1927 MachinePointerInfo::getConstantPool(), 1928 false, false, 0); 1929 if (RelocM == Reloc::Static) 1930 return Result; 1931 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1932 return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel); 1933} 1934 1935// Lower ISD::GlobalTLSAddress using the "general dynamic" model 1936SDValue 1937ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, 1938 SelectionDAG &DAG) const { 1939 DebugLoc dl = GA->getDebugLoc(); 1940 EVT PtrVT = getPointerTy(); 1941 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1942 MachineFunction &MF = DAG.getMachineFunction(); 1943 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1944 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1945 ARMConstantPoolValue *CPV = 1946 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1947 ARMCP::CPValue, PCAdj, ARMCP::TLSGD, true); 1948 SDValue Argument = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1949 Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); 1950 Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, 1951 MachinePointerInfo::getConstantPool(), 1952 false, false, 0); 1953 SDValue Chain = Argument.getValue(1); 1954 1955 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 1956 Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel); 1957 1958 // call __tls_get_addr. 1959 ArgListTy Args; 1960 ArgListEntry Entry; 1961 Entry.Node = Argument; 1962 Entry.Ty = (const Type *) Type::getInt32Ty(*DAG.getContext()); 1963 Args.push_back(Entry); 1964 // FIXME: is there useful debug info available here? 1965 std::pair<SDValue, SDValue> CallResult = 1966 LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()), 1967 false, false, false, false, 1968 0, CallingConv::C, false, /*isReturnValueUsed=*/true, 1969 DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl); 1970 return CallResult.first; 1971} 1972 1973// Lower ISD::GlobalTLSAddress using the "initial exec" or 1974// "local exec" model. 1975SDValue 1976ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, 1977 SelectionDAG &DAG) const { 1978 const GlobalValue *GV = GA->getGlobal(); 1979 DebugLoc dl = GA->getDebugLoc(); 1980 SDValue Offset; 1981 SDValue Chain = DAG.getEntryNode(); 1982 EVT PtrVT = getPointerTy(); 1983 // Get the Thread Pointer 1984 SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 1985 1986 if (GV->isDeclaration()) { 1987 MachineFunction &MF = DAG.getMachineFunction(); 1988 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1989 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 1990 // Initial exec model. 1991 unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; 1992 ARMConstantPoolValue *CPV = 1993 new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex, 1994 ARMCP::CPValue, PCAdj, ARMCP::GOTTPOFF, true); 1995 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 1996 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 1997 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 1998 MachinePointerInfo::getConstantPool(), 1999 false, false, 0); 2000 Chain = Offset.getValue(1); 2001 2002 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2003 Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel); 2004 2005 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2006 MachinePointerInfo::getConstantPool(), 2007 false, false, 0); 2008 } else { 2009 // local exec model 2010 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV, ARMCP::TPOFF); 2011 Offset = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2012 Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); 2013 Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, 2014 MachinePointerInfo::getConstantPool(), 2015 false, false, 0); 2016 } 2017 2018 // The address of the thread local variable is the add of the thread 2019 // pointer with the offset of the variable. 2020 return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); 2021} 2022 2023SDValue 2024ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { 2025 // TODO: implement the "local dynamic" model 2026 assert(Subtarget->isTargetELF() && 2027 "TLS not implemented for non-ELF targets"); 2028 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 2029 // If the relocation model is PIC, use the "General Dynamic" TLS Model, 2030 // otherwise use the "Local Exec" TLS Model 2031 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) 2032 return LowerToTLSGeneralDynamicModel(GA, DAG); 2033 else 2034 return LowerToTLSExecModels(GA, DAG); 2035} 2036 2037SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, 2038 SelectionDAG &DAG) const { 2039 EVT PtrVT = getPointerTy(); 2040 DebugLoc dl = Op.getDebugLoc(); 2041 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2042 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2043 if (RelocM == Reloc::PIC_) { 2044 bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); 2045 ARMConstantPoolValue *CPV = 2046 new ARMConstantPoolValue(GV, UseGOTOFF ? ARMCP::GOTOFF : ARMCP::GOT); 2047 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2048 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2049 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), 2050 CPAddr, 2051 MachinePointerInfo::getConstantPool(), 2052 false, false, 0); 2053 SDValue Chain = Result.getValue(1); 2054 SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); 2055 Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); 2056 if (!UseGOTOFF) 2057 Result = DAG.getLoad(PtrVT, dl, Chain, Result, 2058 MachinePointerInfo::getGOT(), false, false, 0); 2059 return Result; 2060 } 2061 2062 // If we have T2 ops, we can materialize the address directly via movt/movw 2063 // pair. This is always cheaper. 2064 if (Subtarget->useMovt()) { 2065 ++NumMovwMovt; 2066 // FIXME: Once remat is capable of dealing with instructions with register 2067 // operands, expand this into two nodes. 2068 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2069 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2070 } else { 2071 SDValue CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2072 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2073 return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2074 MachinePointerInfo::getConstantPool(), 2075 false, false, 0); 2076 } 2077} 2078 2079SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, 2080 SelectionDAG &DAG) const { 2081 EVT PtrVT = getPointerTy(); 2082 DebugLoc dl = Op.getDebugLoc(); 2083 const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); 2084 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2085 MachineFunction &MF = DAG.getMachineFunction(); 2086 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2087 2088 if (Subtarget->useMovt()) { 2089 ++NumMovwMovt; 2090 // FIXME: Once remat is capable of dealing with instructions with register 2091 // operands, expand this into two nodes. 2092 if (RelocM == Reloc::Static) 2093 return DAG.getNode(ARMISD::Wrapper, dl, PtrVT, 2094 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2095 2096 unsigned Wrapper = (RelocM == Reloc::PIC_) 2097 ? ARMISD::WrapperPIC : ARMISD::WrapperDYN; 2098 SDValue Result = DAG.getNode(Wrapper, dl, PtrVT, 2099 DAG.getTargetGlobalAddress(GV, dl, PtrVT)); 2100 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2101 Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, 2102 MachinePointerInfo::getGOT(), false, false, 0); 2103 return Result; 2104 } 2105 2106 unsigned ARMPCLabelIndex = 0; 2107 SDValue CPAddr; 2108 if (RelocM == Reloc::Static) { 2109 CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4); 2110 } else { 2111 ARMPCLabelIndex = AFI->createPICLabelUId(); 2112 unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8); 2113 ARMConstantPoolValue *CPV = 2114 new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj); 2115 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2116 } 2117 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2118 2119 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2120 MachinePointerInfo::getConstantPool(), 2121 false, false, 0); 2122 SDValue Chain = Result.getValue(1); 2123 2124 if (RelocM == Reloc::PIC_) { 2125 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2126 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2127 } 2128 2129 if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) 2130 Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), 2131 false, false, 0); 2132 2133 return Result; 2134} 2135 2136SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, 2137 SelectionDAG &DAG) const { 2138 assert(Subtarget->isTargetELF() && 2139 "GLOBAL OFFSET TABLE not implemented for non-ELF targets"); 2140 MachineFunction &MF = DAG.getMachineFunction(); 2141 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2142 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2143 EVT PtrVT = getPointerTy(); 2144 DebugLoc dl = Op.getDebugLoc(); 2145 unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; 2146 ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(), 2147 "_GLOBAL_OFFSET_TABLE_", 2148 ARMPCLabelIndex, PCAdj); 2149 SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2150 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2151 SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2152 MachinePointerInfo::getConstantPool(), 2153 false, false, 0); 2154 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2155 return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2156} 2157 2158SDValue 2159ARMTargetLowering::LowerEH_SJLJ_DISPATCHSETUP(SDValue Op, SelectionDAG &DAG) 2160 const { 2161 DebugLoc dl = Op.getDebugLoc(); 2162 return DAG.getNode(ARMISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other, 2163 Op.getOperand(0)); 2164} 2165 2166SDValue 2167ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { 2168 DebugLoc dl = Op.getDebugLoc(); 2169 SDValue Val = DAG.getConstant(0, MVT::i32); 2170 return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, MVT::i32, Op.getOperand(0), 2171 Op.getOperand(1), Val); 2172} 2173 2174SDValue 2175ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { 2176 DebugLoc dl = Op.getDebugLoc(); 2177 return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), 2178 Op.getOperand(1), DAG.getConstant(0, MVT::i32)); 2179} 2180 2181SDValue 2182ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, 2183 const ARMSubtarget *Subtarget) const { 2184 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 2185 DebugLoc dl = Op.getDebugLoc(); 2186 switch (IntNo) { 2187 default: return SDValue(); // Don't custom lower most intrinsics. 2188 case Intrinsic::arm_thread_pointer: { 2189 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2190 return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); 2191 } 2192 case Intrinsic::eh_sjlj_lsda: { 2193 MachineFunction &MF = DAG.getMachineFunction(); 2194 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2195 unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); 2196 EVT PtrVT = getPointerTy(); 2197 DebugLoc dl = Op.getDebugLoc(); 2198 Reloc::Model RelocM = getTargetMachine().getRelocationModel(); 2199 SDValue CPAddr; 2200 unsigned PCAdj = (RelocM != Reloc::PIC_) 2201 ? 0 : (Subtarget->isThumb() ? 4 : 8); 2202 ARMConstantPoolValue *CPV = 2203 new ARMConstantPoolValue(MF.getFunction(), ARMPCLabelIndex, 2204 ARMCP::CPLSDA, PCAdj); 2205 CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4); 2206 CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); 2207 SDValue Result = 2208 DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, 2209 MachinePointerInfo::getConstantPool(), 2210 false, false, 0); 2211 2212 if (RelocM == Reloc::PIC_) { 2213 SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); 2214 Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); 2215 } 2216 return Result; 2217 } 2218 case Intrinsic::arm_neon_vmulls: 2219 case Intrinsic::arm_neon_vmullu: { 2220 unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) 2221 ? ARMISD::VMULLs : ARMISD::VMULLu; 2222 return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), 2223 Op.getOperand(1), Op.getOperand(2)); 2224 } 2225 } 2226} 2227 2228static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG, 2229 const ARMSubtarget *Subtarget) { 2230 DebugLoc dl = Op.getDebugLoc(); 2231 if (!Subtarget->hasDataBarrier()) { 2232 // Some ARMv6 cpus can support data barriers with an mcr instruction. 2233 // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get 2234 // here. 2235 assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && 2236 "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); 2237 return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), 2238 DAG.getConstant(0, MVT::i32)); 2239 } 2240 2241 SDValue Op5 = Op.getOperand(5); 2242 bool isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue() != 0; 2243 unsigned isLL = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue(); 2244 unsigned isLS = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue(); 2245 bool isOnlyStoreBarrier = (isLL == 0 && isLS == 0); 2246 2247 ARM_MB::MemBOpt DMBOpt; 2248 if (isDeviceBarrier) 2249 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ST : ARM_MB::SY; 2250 else 2251 DMBOpt = isOnlyStoreBarrier ? ARM_MB::ISHST : ARM_MB::ISH; 2252 return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), 2253 DAG.getConstant(DMBOpt, MVT::i32)); 2254} 2255 2256static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, 2257 const ARMSubtarget *Subtarget) { 2258 // ARM pre v5TE and Thumb1 does not have preload instructions. 2259 if (!(Subtarget->isThumb2() || 2260 (!Subtarget->isThumb1Only() && Subtarget->hasV5TEOps()))) 2261 // Just preserve the chain. 2262 return Op.getOperand(0); 2263 2264 DebugLoc dl = Op.getDebugLoc(); 2265 unsigned isRead = ~cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue() & 1; 2266 if (!isRead && 2267 (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) 2268 // ARMv7 with MP extension has PLDW. 2269 return Op.getOperand(0); 2270 2271 if (Subtarget->isThumb()) 2272 // Invert the bits. 2273 isRead = ~isRead & 1; 2274 unsigned isData = Subtarget->isThumb() ? 0 : 1; 2275 2276 // Currently there is no intrinsic that matches pli. 2277 return DAG.getNode(ARMISD::PRELOAD, dl, MVT::Other, Op.getOperand(0), 2278 Op.getOperand(1), DAG.getConstant(isRead, MVT::i32), 2279 DAG.getConstant(isData, MVT::i32)); 2280} 2281 2282static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { 2283 MachineFunction &MF = DAG.getMachineFunction(); 2284 ARMFunctionInfo *FuncInfo = MF.getInfo<ARMFunctionInfo>(); 2285 2286 // vastart just stores the address of the VarArgsFrameIndex slot into the 2287 // memory location argument. 2288 DebugLoc dl = Op.getDebugLoc(); 2289 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2290 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2291 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 2292 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 2293 MachinePointerInfo(SV), false, false, 0); 2294} 2295 2296SDValue 2297ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, 2298 SDValue &Root, SelectionDAG &DAG, 2299 DebugLoc dl) const { 2300 MachineFunction &MF = DAG.getMachineFunction(); 2301 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2302 2303 TargetRegisterClass *RC; 2304 if (AFI->isThumb1OnlyFunction()) 2305 RC = ARM::tGPRRegisterClass; 2306 else 2307 RC = ARM::GPRRegisterClass; 2308 2309 // Transform the arguments stored in physical registers into virtual ones. 2310 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2311 SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2312 2313 SDValue ArgValue2; 2314 if (NextVA.isMemLoc()) { 2315 MachineFrameInfo *MFI = MF.getFrameInfo(); 2316 int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true); 2317 2318 // Create load node to retrieve arguments from the stack. 2319 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2320 ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, 2321 MachinePointerInfo::getFixedStack(FI), 2322 false, false, 0); 2323 } else { 2324 Reg = MF.addLiveIn(NextVA.getLocReg(), RC); 2325 ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); 2326 } 2327 2328 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2); 2329} 2330 2331void 2332ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, 2333 unsigned &VARegSize, unsigned &VARegSaveSize) 2334 const { 2335 unsigned NumGPRs; 2336 if (CCInfo.isFirstByValRegValid()) 2337 NumGPRs = ARM::R4 - CCInfo.getFirstByValReg(); 2338 else { 2339 unsigned int firstUnalloced; 2340 firstUnalloced = CCInfo.getFirstUnallocated(GPRArgRegs, 2341 sizeof(GPRArgRegs) / 2342 sizeof(GPRArgRegs[0])); 2343 NumGPRs = (firstUnalloced <= 3) ? (4 - firstUnalloced) : 0; 2344 } 2345 2346 unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); 2347 VARegSize = NumGPRs * 4; 2348 VARegSaveSize = (VARegSize + Align - 1) & ~(Align - 1); 2349} 2350 2351// The remaining GPRs hold either the beginning of variable-argument 2352// data, or the beginning of an aggregate passed by value (usuall 2353// byval). Either way, we allocate stack slots adjacent to the data 2354// provided by our caller, and store the unallocated registers there. 2355// If this is a variadic function, the va_list pointer will begin with 2356// these values; otherwise, this reassembles a (byval) structure that 2357// was split between registers and memory. 2358void 2359ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, 2360 DebugLoc dl, SDValue &Chain, 2361 unsigned ArgOffset) const { 2362 MachineFunction &MF = DAG.getMachineFunction(); 2363 MachineFrameInfo *MFI = MF.getFrameInfo(); 2364 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2365 unsigned firstRegToSaveIndex; 2366 if (CCInfo.isFirstByValRegValid()) 2367 firstRegToSaveIndex = CCInfo.getFirstByValReg() - ARM::R0; 2368 else { 2369 firstRegToSaveIndex = CCInfo.getFirstUnallocated 2370 (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); 2371 } 2372 2373 unsigned VARegSize, VARegSaveSize; 2374 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2375 if (VARegSaveSize) { 2376 // If this function is vararg, store any remaining integer argument regs 2377 // to their spots on the stack so that they may be loaded by deferencing 2378 // the result of va_next. 2379 AFI->setVarArgsRegSaveSize(VARegSaveSize); 2380 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(VARegSaveSize, 2381 ArgOffset + VARegSaveSize 2382 - VARegSize, 2383 false)); 2384 SDValue FIN = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), 2385 getPointerTy()); 2386 2387 SmallVector<SDValue, 4> MemOps; 2388 for (; firstRegToSaveIndex < 4; ++firstRegToSaveIndex) { 2389 TargetRegisterClass *RC; 2390 if (AFI->isThumb1OnlyFunction()) 2391 RC = ARM::tGPRRegisterClass; 2392 else 2393 RC = ARM::GPRRegisterClass; 2394 2395 unsigned VReg = MF.addLiveIn(GPRArgRegs[firstRegToSaveIndex], RC); 2396 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2397 SDValue Store = 2398 DAG.getStore(Val.getValue(1), dl, Val, FIN, 2399 MachinePointerInfo::getFixedStack(AFI->getVarArgsFrameIndex()), 2400 false, false, 0); 2401 MemOps.push_back(Store); 2402 FIN = DAG.getNode(ISD::ADD, dl, getPointerTy(), FIN, 2403 DAG.getConstant(4, getPointerTy())); 2404 } 2405 if (!MemOps.empty()) 2406 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 2407 &MemOps[0], MemOps.size()); 2408 } else 2409 // This will point to the next argument passed via stack. 2410 AFI->setVarArgsFrameIndex(MFI->CreateFixedObject(4, ArgOffset, true)); 2411} 2412 2413SDValue 2414ARMTargetLowering::LowerFormalArguments(SDValue Chain, 2415 CallingConv::ID CallConv, bool isVarArg, 2416 const SmallVectorImpl<ISD::InputArg> 2417 &Ins, 2418 DebugLoc dl, SelectionDAG &DAG, 2419 SmallVectorImpl<SDValue> &InVals) 2420 const { 2421 MachineFunction &MF = DAG.getMachineFunction(); 2422 MachineFrameInfo *MFI = MF.getFrameInfo(); 2423 2424 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 2425 2426 // Assign locations to all of the incoming arguments. 2427 SmallVector<CCValAssign, 16> ArgLocs; 2428 CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs, 2429 *DAG.getContext()); 2430 CCInfo.setCallOrPrologue(Prologue); 2431 CCInfo.AnalyzeFormalArguments(Ins, 2432 CCAssignFnForNode(CallConv, /* Return*/ false, 2433 isVarArg)); 2434 2435 SmallVector<SDValue, 16> ArgValues; 2436 int lastInsIndex = -1; 2437 2438 SDValue ArgValue; 2439 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2440 CCValAssign &VA = ArgLocs[i]; 2441 2442 // Arguments stored in registers. 2443 if (VA.isRegLoc()) { 2444 EVT RegVT = VA.getLocVT(); 2445 2446 if (VA.needsCustom()) { 2447 // f64 and vector types are split up into multiple registers or 2448 // combinations of registers and stack slots. 2449 if (VA.getLocVT() == MVT::v2f64) { 2450 SDValue ArgValue1 = GetF64FormalArgument(VA, ArgLocs[++i], 2451 Chain, DAG, dl); 2452 VA = ArgLocs[++i]; // skip ahead to next loc 2453 SDValue ArgValue2; 2454 if (VA.isMemLoc()) { 2455 int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true); 2456 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2457 ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, 2458 MachinePointerInfo::getFixedStack(FI), 2459 false, false, 0); 2460 } else { 2461 ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], 2462 Chain, DAG, dl); 2463 } 2464 ArgValue = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64); 2465 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2466 ArgValue, ArgValue1, DAG.getIntPtrConstant(0)); 2467 ArgValue = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, 2468 ArgValue, ArgValue2, DAG.getIntPtrConstant(1)); 2469 } else 2470 ArgValue = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); 2471 2472 } else { 2473 TargetRegisterClass *RC; 2474 2475 if (RegVT == MVT::f32) 2476 RC = ARM::SPRRegisterClass; 2477 else if (RegVT == MVT::f64) 2478 RC = ARM::DPRRegisterClass; 2479 else if (RegVT == MVT::v2f64) 2480 RC = ARM::QPRRegisterClass; 2481 else if (RegVT == MVT::i32) 2482 RC = (AFI->isThumb1OnlyFunction() ? 2483 ARM::tGPRRegisterClass : ARM::GPRRegisterClass); 2484 else 2485 llvm_unreachable("RegVT not supported by FORMAL_ARGUMENTS Lowering"); 2486 2487 // Transform the arguments in physical registers into virtual ones. 2488 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2489 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); 2490 } 2491 2492 // If this is an 8 or 16-bit value, it is really passed promoted 2493 // to 32 bits. Insert an assert[sz]ext to capture this, then 2494 // truncate to the right size. 2495 switch (VA.getLocInfo()) { 2496 default: llvm_unreachable("Unknown loc info!"); 2497 case CCValAssign::Full: break; 2498 case CCValAssign::BCvt: 2499 ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue); 2500 break; 2501 case CCValAssign::SExt: 2502 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, 2503 DAG.getValueType(VA.getValVT())); 2504 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2505 break; 2506 case CCValAssign::ZExt: 2507 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, 2508 DAG.getValueType(VA.getValVT())); 2509 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); 2510 break; 2511 } 2512 2513 InVals.push_back(ArgValue); 2514 2515 } else { // VA.isRegLoc() 2516 2517 // sanity check 2518 assert(VA.isMemLoc()); 2519 assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered"); 2520 2521 int index = ArgLocs[i].getValNo(); 2522 2523 // Some Ins[] entries become multiple ArgLoc[] entries. 2524 // Process them only once. 2525 if (index != lastInsIndex) 2526 { 2527 ISD::ArgFlagsTy Flags = Ins[index].Flags; 2528 // FIXME: For now, all byval parameter objects are marked mutable. 2529 // This can be changed with more analysis. 2530 // In case of tail call optimization mark all arguments mutable. 2531 // Since they could be overwritten by lowering of arguments in case of 2532 // a tail call. 2533 if (Flags.isByVal()) { 2534 unsigned VARegSize, VARegSaveSize; 2535 computeRegArea(CCInfo, MF, VARegSize, VARegSaveSize); 2536 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, 0); 2537 unsigned Bytes = Flags.getByValSize() - VARegSize; 2538 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects. 2539 int FI = MFI->CreateFixedObject(Bytes, 2540 VA.getLocMemOffset(), false); 2541 InVals.push_back(DAG.getFrameIndex(FI, getPointerTy())); 2542 } else { 2543 int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, 2544 VA.getLocMemOffset(), true); 2545 2546 // Create load nodes to retrieve arguments from the stack. 2547 SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); 2548 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2549 MachinePointerInfo::getFixedStack(FI), 2550 false, false, 0)); 2551 } 2552 lastInsIndex = index; 2553 } 2554 } 2555 } 2556 2557 // varargs 2558 if (isVarArg) 2559 VarArgStyleRegisters(CCInfo, DAG, dl, Chain, CCInfo.getNextStackOffset()); 2560 2561 return Chain; 2562} 2563 2564/// isFloatingPointZero - Return true if this is +0.0. 2565static bool isFloatingPointZero(SDValue Op) { 2566 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 2567 return CFP->getValueAPF().isPosZero(); 2568 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 2569 // Maybe this has already been legalized into the constant pool? 2570 if (Op.getOperand(1).getOpcode() == ARMISD::Wrapper) { 2571 SDValue WrapperOp = Op.getOperand(1).getOperand(0); 2572 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(WrapperOp)) 2573 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 2574 return CFP->getValueAPF().isPosZero(); 2575 } 2576 } 2577 return false; 2578} 2579 2580/// Returns appropriate ARM CMP (cmp) and corresponding condition code for 2581/// the given operands. 2582SDValue 2583ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, 2584 SDValue &ARMcc, SelectionDAG &DAG, 2585 DebugLoc dl) const { 2586 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) { 2587 unsigned C = RHSC->getZExtValue(); 2588 if (!isLegalICmpImmediate(C)) { 2589 // Constant does not fit, try adjusting it by one? 2590 switch (CC) { 2591 default: break; 2592 case ISD::SETLT: 2593 case ISD::SETGE: 2594 if (C != 0x80000000 && isLegalICmpImmediate(C-1)) { 2595 CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT; 2596 RHS = DAG.getConstant(C-1, MVT::i32); 2597 } 2598 break; 2599 case ISD::SETULT: 2600 case ISD::SETUGE: 2601 if (C != 0 && isLegalICmpImmediate(C-1)) { 2602 CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT; 2603 RHS = DAG.getConstant(C-1, MVT::i32); 2604 } 2605 break; 2606 case ISD::SETLE: 2607 case ISD::SETGT: 2608 if (C != 0x7fffffff && isLegalICmpImmediate(C+1)) { 2609 CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE; 2610 RHS = DAG.getConstant(C+1, MVT::i32); 2611 } 2612 break; 2613 case ISD::SETULE: 2614 case ISD::SETUGT: 2615 if (C != 0xffffffff && isLegalICmpImmediate(C+1)) { 2616 CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE; 2617 RHS = DAG.getConstant(C+1, MVT::i32); 2618 } 2619 break; 2620 } 2621 } 2622 } 2623 2624 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2625 ARMISD::NodeType CompareType; 2626 switch (CondCode) { 2627 default: 2628 CompareType = ARMISD::CMP; 2629 break; 2630 case ARMCC::EQ: 2631 case ARMCC::NE: 2632 // Uses only Z Flag 2633 CompareType = ARMISD::CMPZ; 2634 break; 2635 } 2636 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2637 return DAG.getNode(CompareType, dl, MVT::Glue, LHS, RHS); 2638} 2639 2640/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. 2641SDValue 2642ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, 2643 DebugLoc dl) const { 2644 SDValue Cmp; 2645 if (!isFloatingPointZero(RHS)) 2646 Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); 2647 else 2648 Cmp = DAG.getNode(ARMISD::CMPFPw0, dl, MVT::Glue, LHS); 2649 return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Glue, Cmp); 2650} 2651 2652/// duplicateCmp - Glue values can have only one use, so this function 2653/// duplicates a comparison node. 2654SDValue 2655ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { 2656 unsigned Opc = Cmp.getOpcode(); 2657 DebugLoc DL = Cmp.getDebugLoc(); 2658 if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) 2659 return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2660 2661 assert(Opc == ARMISD::FMSTAT && "unexpected comparison operation"); 2662 Cmp = Cmp.getOperand(0); 2663 Opc = Cmp.getOpcode(); 2664 if (Opc == ARMISD::CMPFP) 2665 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); 2666 else { 2667 assert(Opc == ARMISD::CMPFPw0 && "unexpected operand of FMSTAT"); 2668 Cmp = DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0)); 2669 } 2670 return DAG.getNode(ARMISD::FMSTAT, DL, MVT::Glue, Cmp); 2671} 2672 2673SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { 2674 SDValue Cond = Op.getOperand(0); 2675 SDValue SelectTrue = Op.getOperand(1); 2676 SDValue SelectFalse = Op.getOperand(2); 2677 DebugLoc dl = Op.getDebugLoc(); 2678 2679 // Convert: 2680 // 2681 // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) 2682 // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) 2683 // 2684 if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { 2685 const ConstantSDNode *CMOVTrue = 2686 dyn_cast<ConstantSDNode>(Cond.getOperand(0)); 2687 const ConstantSDNode *CMOVFalse = 2688 dyn_cast<ConstantSDNode>(Cond.getOperand(1)); 2689 2690 if (CMOVTrue && CMOVFalse) { 2691 unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); 2692 unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); 2693 2694 SDValue True; 2695 SDValue False; 2696 if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { 2697 True = SelectTrue; 2698 False = SelectFalse; 2699 } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { 2700 True = SelectFalse; 2701 False = SelectTrue; 2702 } 2703 2704 if (True.getNode() && False.getNode()) { 2705 EVT VT = Cond.getValueType(); 2706 SDValue ARMcc = Cond.getOperand(2); 2707 SDValue CCR = Cond.getOperand(3); 2708 SDValue Cmp = duplicateCmp(Cond.getOperand(4), DAG); 2709 return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); 2710 } 2711 } 2712 } 2713 2714 return DAG.getSelectCC(dl, Cond, 2715 DAG.getConstant(0, Cond.getValueType()), 2716 SelectTrue, SelectFalse, ISD::SETNE); 2717} 2718 2719SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 2720 EVT VT = Op.getValueType(); 2721 SDValue LHS = Op.getOperand(0); 2722 SDValue RHS = Op.getOperand(1); 2723 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2724 SDValue TrueVal = Op.getOperand(2); 2725 SDValue FalseVal = Op.getOperand(3); 2726 DebugLoc dl = Op.getDebugLoc(); 2727 2728 if (LHS.getValueType() == MVT::i32) { 2729 SDValue ARMcc; 2730 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2731 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2732 return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); 2733 } 2734 2735 ARMCC::CondCodes CondCode, CondCode2; 2736 FPCCToARMCC(CC, CondCode, CondCode2); 2737 2738 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2739 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2740 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2741 SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, 2742 ARMcc, CCR, Cmp); 2743 if (CondCode2 != ARMCC::AL) { 2744 SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32); 2745 // FIXME: Needs another CMP because flag can have but one use. 2746 SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); 2747 Result = DAG.getNode(ARMISD::CMOV, dl, VT, 2748 Result, TrueVal, ARMcc2, CCR, Cmp2); 2749 } 2750 return Result; 2751} 2752 2753/// canChangeToInt - Given the fp compare operand, return true if it is suitable 2754/// to morph to an integer compare sequence. 2755static bool canChangeToInt(SDValue Op, bool &SeenZero, 2756 const ARMSubtarget *Subtarget) { 2757 SDNode *N = Op.getNode(); 2758 if (!N->hasOneUse()) 2759 // Otherwise it requires moving the value from fp to integer registers. 2760 return false; 2761 if (!N->getNumValues()) 2762 return false; 2763 EVT VT = Op.getValueType(); 2764 if (VT != MVT::f32 && !Subtarget->isFPBrccSlow()) 2765 // f32 case is generally profitable. f64 case only makes sense when vcmpe + 2766 // vmrs are very slow, e.g. cortex-a8. 2767 return false; 2768 2769 if (isFloatingPointZero(Op)) { 2770 SeenZero = true; 2771 return true; 2772 } 2773 return ISD::isNormalLoad(N); 2774} 2775 2776static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { 2777 if (isFloatingPointZero(Op)) 2778 return DAG.getConstant(0, MVT::i32); 2779 2780 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) 2781 return DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2782 Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), 2783 Ld->isVolatile(), Ld->isNonTemporal(), 2784 Ld->getAlignment()); 2785 2786 llvm_unreachable("Unknown VFP cmp argument!"); 2787} 2788 2789static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, 2790 SDValue &RetVal1, SDValue &RetVal2) { 2791 if (isFloatingPointZero(Op)) { 2792 RetVal1 = DAG.getConstant(0, MVT::i32); 2793 RetVal2 = DAG.getConstant(0, MVT::i32); 2794 return; 2795 } 2796 2797 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) { 2798 SDValue Ptr = Ld->getBasePtr(); 2799 RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2800 Ld->getChain(), Ptr, 2801 Ld->getPointerInfo(), 2802 Ld->isVolatile(), Ld->isNonTemporal(), 2803 Ld->getAlignment()); 2804 2805 EVT PtrType = Ptr.getValueType(); 2806 unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); 2807 SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), 2808 PtrType, Ptr, DAG.getConstant(4, PtrType)); 2809 RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), 2810 Ld->getChain(), NewPtr, 2811 Ld->getPointerInfo().getWithOffset(4), 2812 Ld->isVolatile(), Ld->isNonTemporal(), 2813 NewAlign); 2814 return; 2815 } 2816 2817 llvm_unreachable("Unknown VFP cmp argument!"); 2818} 2819 2820/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some 2821/// f32 and even f64 comparisons to integer ones. 2822SDValue 2823ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { 2824 SDValue Chain = Op.getOperand(0); 2825 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2826 SDValue LHS = Op.getOperand(2); 2827 SDValue RHS = Op.getOperand(3); 2828 SDValue Dest = Op.getOperand(4); 2829 DebugLoc dl = Op.getDebugLoc(); 2830 2831 bool SeenZero = false; 2832 if (canChangeToInt(LHS, SeenZero, Subtarget) && 2833 canChangeToInt(RHS, SeenZero, Subtarget) && 2834 // If one of the operand is zero, it's safe to ignore the NaN case since 2835 // we only care about equality comparisons. 2836 (SeenZero || (DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS)))) { 2837 // If unsafe fp math optimization is enabled and there are no other uses of 2838 // the CMP operands, and the condition code is EQ or NE, we can optimize it 2839 // to an integer comparison. 2840 if (CC == ISD::SETOEQ) 2841 CC = ISD::SETEQ; 2842 else if (CC == ISD::SETUNE) 2843 CC = ISD::SETNE; 2844 2845 SDValue ARMcc; 2846 if (LHS.getValueType() == MVT::f32) { 2847 LHS = bitcastf32Toi32(LHS, DAG); 2848 RHS = bitcastf32Toi32(RHS, DAG); 2849 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2850 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2851 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2852 Chain, Dest, ARMcc, CCR, Cmp); 2853 } 2854 2855 SDValue LHS1, LHS2; 2856 SDValue RHS1, RHS2; 2857 expandf64Toi32(LHS, DAG, LHS1, LHS2); 2858 expandf64Toi32(RHS, DAG, RHS1, RHS2); 2859 ARMCC::CondCodes CondCode = IntCCToARMCC(CC); 2860 ARMcc = DAG.getConstant(CondCode, MVT::i32); 2861 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 2862 SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest }; 2863 return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7); 2864 } 2865 2866 return SDValue(); 2867} 2868 2869SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { 2870 SDValue Chain = Op.getOperand(0); 2871 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get(); 2872 SDValue LHS = Op.getOperand(2); 2873 SDValue RHS = Op.getOperand(3); 2874 SDValue Dest = Op.getOperand(4); 2875 DebugLoc dl = Op.getDebugLoc(); 2876 2877 if (LHS.getValueType() == MVT::i32) { 2878 SDValue ARMcc; 2879 SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); 2880 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2881 return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other, 2882 Chain, Dest, ARMcc, CCR, Cmp); 2883 } 2884 2885 assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64); 2886 2887 if (UnsafeFPMath && 2888 (CC == ISD::SETEQ || CC == ISD::SETOEQ || 2889 CC == ISD::SETNE || CC == ISD::SETUNE)) { 2890 SDValue Result = OptimizeVFPBrcond(Op, DAG); 2891 if (Result.getNode()) 2892 return Result; 2893 } 2894 2895 ARMCC::CondCodes CondCode, CondCode2; 2896 FPCCToARMCC(CC, CondCode, CondCode2); 2897 2898 SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); 2899 SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); 2900 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 2901 SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Glue); 2902 SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp }; 2903 SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2904 if (CondCode2 != ARMCC::AL) { 2905 ARMcc = DAG.getConstant(CondCode2, MVT::i32); 2906 SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) }; 2907 Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5); 2908 } 2909 return Res; 2910} 2911 2912SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 2913 SDValue Chain = Op.getOperand(0); 2914 SDValue Table = Op.getOperand(1); 2915 SDValue Index = Op.getOperand(2); 2916 DebugLoc dl = Op.getDebugLoc(); 2917 2918 EVT PTy = getPointerTy(); 2919 JumpTableSDNode *JT = cast<JumpTableSDNode>(Table); 2920 ARMFunctionInfo *AFI = DAG.getMachineFunction().getInfo<ARMFunctionInfo>(); 2921 SDValue UId = DAG.getConstant(AFI->createJumpTableUId(), PTy); 2922 SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PTy); 2923 Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI, UId); 2924 Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, PTy)); 2925 SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); 2926 if (Subtarget->isThumb2()) { 2927 // Thumb2 uses a two-level jump. That is, it jumps into the jump table 2928 // which does another jump to the destination. This also makes it easier 2929 // to translate it to TBB / TBH later. 2930 // FIXME: This might not work if the function is extremely large. 2931 return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain, 2932 Addr, Op.getOperand(2), JTI, UId); 2933 } 2934 if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { 2935 Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, 2936 MachinePointerInfo::getJumpTable(), 2937 false, false, 0); 2938 Chain = Addr.getValue(1); 2939 Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); 2940 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2941 } else { 2942 Addr = DAG.getLoad(PTy, dl, Chain, Addr, 2943 MachinePointerInfo::getJumpTable(), false, false, 0); 2944 Chain = Addr.getValue(1); 2945 return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); 2946 } 2947} 2948 2949static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { 2950 DebugLoc dl = Op.getDebugLoc(); 2951 unsigned Opc; 2952 2953 switch (Op.getOpcode()) { 2954 default: 2955 assert(0 && "Invalid opcode!"); 2956 case ISD::FP_TO_SINT: 2957 Opc = ARMISD::FTOSI; 2958 break; 2959 case ISD::FP_TO_UINT: 2960 Opc = ARMISD::FTOUI; 2961 break; 2962 } 2963 Op = DAG.getNode(Opc, dl, MVT::f32, Op.getOperand(0)); 2964 return DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); 2965} 2966 2967static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2968 EVT VT = Op.getValueType(); 2969 DebugLoc dl = Op.getDebugLoc(); 2970 2971 EVT OperandVT = Op.getOperand(0).getValueType(); 2972 assert(OperandVT == MVT::v4i16 && "Invalid type for custom lowering!"); 2973 if (VT != MVT::v4f32) 2974 return DAG.UnrollVectorOp(Op.getNode()); 2975 2976 unsigned CastOpc; 2977 unsigned Opc; 2978 switch (Op.getOpcode()) { 2979 default: 2980 assert(0 && "Invalid opcode!"); 2981 case ISD::SINT_TO_FP: 2982 CastOpc = ISD::SIGN_EXTEND; 2983 Opc = ISD::SINT_TO_FP; 2984 break; 2985 case ISD::UINT_TO_FP: 2986 CastOpc = ISD::ZERO_EXTEND; 2987 Opc = ISD::UINT_TO_FP; 2988 break; 2989 } 2990 2991 Op = DAG.getNode(CastOpc, dl, MVT::v4i32, Op.getOperand(0)); 2992 return DAG.getNode(Opc, dl, VT, Op); 2993} 2994 2995static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { 2996 EVT VT = Op.getValueType(); 2997 if (VT.isVector()) 2998 return LowerVectorINT_TO_FP(Op, DAG); 2999 3000 DebugLoc dl = Op.getDebugLoc(); 3001 unsigned Opc; 3002 3003 switch (Op.getOpcode()) { 3004 default: 3005 assert(0 && "Invalid opcode!"); 3006 case ISD::SINT_TO_FP: 3007 Opc = ARMISD::SITOF; 3008 break; 3009 case ISD::UINT_TO_FP: 3010 Opc = ARMISD::UITOF; 3011 break; 3012 } 3013 3014 Op = DAG.getNode(ISD::BITCAST, dl, MVT::f32, Op.getOperand(0)); 3015 return DAG.getNode(Opc, dl, VT, Op); 3016} 3017 3018SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { 3019 // Implement fcopysign with a fabs and a conditional fneg. 3020 SDValue Tmp0 = Op.getOperand(0); 3021 SDValue Tmp1 = Op.getOperand(1); 3022 DebugLoc dl = Op.getDebugLoc(); 3023 EVT VT = Op.getValueType(); 3024 EVT SrcVT = Tmp1.getValueType(); 3025 bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || 3026 Tmp0.getOpcode() == ARMISD::VMOVDRR; 3027 bool UseNEON = !InGPR && Subtarget->hasNEON(); 3028 3029 if (UseNEON) { 3030 // Use VBSL to copy the sign bit. 3031 unsigned EncodedVal = ARM_AM::createNEONModImm(0x6, 0x80); 3032 SDValue Mask = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v2i32, 3033 DAG.getTargetConstant(EncodedVal, MVT::i32)); 3034 EVT OpVT = (VT == MVT::f32) ? MVT::v2i32 : MVT::v1i64; 3035 if (VT == MVT::f64) 3036 Mask = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3037 DAG.getNode(ISD::BITCAST, dl, OpVT, Mask), 3038 DAG.getConstant(32, MVT::i32)); 3039 else /*if (VT == MVT::f32)*/ 3040 Tmp0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp0); 3041 if (SrcVT == MVT::f32) { 3042 Tmp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f32, Tmp1); 3043 if (VT == MVT::f64) 3044 Tmp1 = DAG.getNode(ARMISD::VSHL, dl, OpVT, 3045 DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1), 3046 DAG.getConstant(32, MVT::i32)); 3047 } else if (VT == MVT::f32) 3048 Tmp1 = DAG.getNode(ARMISD::VSHRu, dl, MVT::v1i64, 3049 DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Tmp1), 3050 DAG.getConstant(32, MVT::i32)); 3051 Tmp0 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp0); 3052 Tmp1 = DAG.getNode(ISD::BITCAST, dl, OpVT, Tmp1); 3053 3054 SDValue AllOnes = DAG.getTargetConstant(ARM_AM::createNEONModImm(0xe, 0xff), 3055 MVT::i32); 3056 AllOnes = DAG.getNode(ARMISD::VMOVIMM, dl, MVT::v8i8, AllOnes); 3057 SDValue MaskNot = DAG.getNode(ISD::XOR, dl, OpVT, Mask, 3058 DAG.getNode(ISD::BITCAST, dl, OpVT, AllOnes)); 3059 3060 SDValue Res = DAG.getNode(ISD::OR, dl, OpVT, 3061 DAG.getNode(ISD::AND, dl, OpVT, Tmp1, Mask), 3062 DAG.getNode(ISD::AND, dl, OpVT, Tmp0, MaskNot)); 3063 if (VT == MVT::f32) { 3064 Res = DAG.getNode(ISD::BITCAST, dl, MVT::v2f32, Res); 3065 Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, 3066 DAG.getConstant(0, MVT::i32)); 3067 } else { 3068 Res = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Res); 3069 } 3070 3071 return Res; 3072 } 3073 3074 // Bitcast operand 1 to i32. 3075 if (SrcVT == MVT::f64) 3076 Tmp1 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3077 &Tmp1, 1).getValue(1); 3078 Tmp1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp1); 3079 3080 // Or in the signbit with integer operations. 3081 SDValue Mask1 = DAG.getConstant(0x80000000, MVT::i32); 3082 SDValue Mask2 = DAG.getConstant(0x7fffffff, MVT::i32); 3083 Tmp1 = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp1, Mask1); 3084 if (VT == MVT::f32) { 3085 Tmp0 = DAG.getNode(ISD::AND, dl, MVT::i32, 3086 DAG.getNode(ISD::BITCAST, dl, MVT::i32, Tmp0), Mask2); 3087 return DAG.getNode(ISD::BITCAST, dl, MVT::f32, 3088 DAG.getNode(ISD::OR, dl, MVT::i32, Tmp0, Tmp1)); 3089 } 3090 3091 // f64: Or the high part with signbit and then combine two parts. 3092 Tmp0 = DAG.getNode(ARMISD::VMOVRRD, dl, DAG.getVTList(MVT::i32, MVT::i32), 3093 &Tmp0, 1); 3094 SDValue Lo = Tmp0.getValue(0); 3095 SDValue Hi = DAG.getNode(ISD::AND, dl, MVT::i32, Tmp0.getValue(1), Mask2); 3096 Hi = DAG.getNode(ISD::OR, dl, MVT::i32, Hi, Tmp1); 3097 return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi); 3098} 3099 3100SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ 3101 MachineFunction &MF = DAG.getMachineFunction(); 3102 MachineFrameInfo *MFI = MF.getFrameInfo(); 3103 MFI->setReturnAddressIsTaken(true); 3104 3105 EVT VT = Op.getValueType(); 3106 DebugLoc dl = Op.getDebugLoc(); 3107 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3108 if (Depth) { 3109 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 3110 SDValue Offset = DAG.getConstant(4, MVT::i32); 3111 return DAG.getLoad(VT, dl, DAG.getEntryNode(), 3112 DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), 3113 MachinePointerInfo(), false, false, 0); 3114 } 3115 3116 // Return LR, which contains the return address. Mark it an implicit live-in. 3117 unsigned Reg = MF.addLiveIn(ARM::LR, getRegClassFor(MVT::i32)); 3118 return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); 3119} 3120 3121SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { 3122 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 3123 MFI->setFrameAddressIsTaken(true); 3124 3125 EVT VT = Op.getValueType(); 3126 DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful 3127 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 3128 unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) 3129 ? ARM::R7 : ARM::R11; 3130 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); 3131 while (Depth--) 3132 FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, 3133 MachinePointerInfo(), 3134 false, false, 0); 3135 return FrameAddr; 3136} 3137 3138/// ExpandBITCAST - If the target supports VFP, this function is called to 3139/// expand a bit convert where either the source or destination type is i64 to 3140/// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 3141/// operand type is illegal (e.g., v2f32 for a target that doesn't support 3142/// vectors), since the legalizer won't know what to do with that. 3143static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { 3144 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 3145 DebugLoc dl = N->getDebugLoc(); 3146 SDValue Op = N->getOperand(0); 3147 3148 // This function is only supposed to be called for i64 types, either as the 3149 // source or destination of the bit convert. 3150 EVT SrcVT = Op.getValueType(); 3151 EVT DstVT = N->getValueType(0); 3152 assert((SrcVT == MVT::i64 || DstVT == MVT::i64) && 3153 "ExpandBITCAST called for non-i64 type"); 3154 3155 // Turn i64->f64 into VMOVDRR. 3156 if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { 3157 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3158 DAG.getConstant(0, MVT::i32)); 3159 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, 3160 DAG.getConstant(1, MVT::i32)); 3161 return DAG.getNode(ISD::BITCAST, dl, DstVT, 3162 DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi)); 3163 } 3164 3165 // Turn f64->i64 into VMOVRRD. 3166 if (DstVT == MVT::i64 && TLI.isTypeLegal(SrcVT)) { 3167 SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl, 3168 DAG.getVTList(MVT::i32, MVT::i32), &Op, 1); 3169 // Merge the pieces into a single i64 value. 3170 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Cvt, Cvt.getValue(1)); 3171 } 3172 3173 return SDValue(); 3174} 3175 3176/// getZeroVector - Returns a vector of specified type with all zero elements. 3177/// Zero vectors are used to represent vector negation and in those cases 3178/// will be implemented with the NEON VNEG instruction. However, VNEG does 3179/// not support i64 elements, so sometimes the zero vectors will need to be 3180/// explicitly constructed. Regardless, use a canonical VMOV to create the 3181/// zero vector. 3182static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { 3183 assert(VT.isVector() && "Expected a vector type"); 3184 // The canonical modified immediate encoding of a zero vector is....0! 3185 SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); 3186 EVT VmovVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 3187 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, EncodedVal); 3188 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3189} 3190 3191/// LowerShiftRightParts - Lower SRA_PARTS, which returns two 3192/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3193SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, 3194 SelectionDAG &DAG) const { 3195 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3196 EVT VT = Op.getValueType(); 3197 unsigned VTBits = VT.getSizeInBits(); 3198 DebugLoc dl = Op.getDebugLoc(); 3199 SDValue ShOpLo = Op.getOperand(0); 3200 SDValue ShOpHi = Op.getOperand(1); 3201 SDValue ShAmt = Op.getOperand(2); 3202 SDValue ARMcc; 3203 unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; 3204 3205 assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); 3206 3207 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3208 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3209 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); 3210 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3211 DAG.getConstant(VTBits, MVT::i32)); 3212 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); 3213 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3214 SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); 3215 3216 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3217 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3218 ARMcc, DAG, dl); 3219 SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); 3220 SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, 3221 CCR, Cmp); 3222 3223 SDValue Ops[2] = { Lo, Hi }; 3224 return DAG.getMergeValues(Ops, 2, dl); 3225} 3226 3227/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two 3228/// i32 values and take a 2 x i32 value to shift plus a shift amount. 3229SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, 3230 SelectionDAG &DAG) const { 3231 assert(Op.getNumOperands() == 3 && "Not a double-shift!"); 3232 EVT VT = Op.getValueType(); 3233 unsigned VTBits = VT.getSizeInBits(); 3234 DebugLoc dl = Op.getDebugLoc(); 3235 SDValue ShOpLo = Op.getOperand(0); 3236 SDValue ShOpHi = Op.getOperand(1); 3237 SDValue ShAmt = Op.getOperand(2); 3238 SDValue ARMcc; 3239 3240 assert(Op.getOpcode() == ISD::SHL_PARTS); 3241 SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, 3242 DAG.getConstant(VTBits, MVT::i32), ShAmt); 3243 SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); 3244 SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, 3245 DAG.getConstant(VTBits, MVT::i32)); 3246 SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); 3247 SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); 3248 3249 SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); 3250 SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); 3251 SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE, 3252 ARMcc, DAG, dl); 3253 SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); 3254 SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc, 3255 CCR, Cmp); 3256 3257 SDValue Ops[2] = { Lo, Hi }; 3258 return DAG.getMergeValues(Ops, 2, dl); 3259} 3260 3261SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 3262 SelectionDAG &DAG) const { 3263 // The rounding mode is in bits 23:22 of the FPSCR. 3264 // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 3265 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) 3266 // so that the shift + and get folded into a bitfield extract. 3267 DebugLoc dl = Op.getDebugLoc(); 3268 SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, 3269 DAG.getConstant(Intrinsic::arm_get_fpscr, 3270 MVT::i32)); 3271 SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPSCR, 3272 DAG.getConstant(1U << 22, MVT::i32)); 3273 SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds, 3274 DAG.getConstant(22, MVT::i32)); 3275 return DAG.getNode(ISD::AND, dl, MVT::i32, RMODE, 3276 DAG.getConstant(3, MVT::i32)); 3277} 3278 3279static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, 3280 const ARMSubtarget *ST) { 3281 EVT VT = N->getValueType(0); 3282 DebugLoc dl = N->getDebugLoc(); 3283 3284 if (!ST->hasV6T2Ops()) 3285 return SDValue(); 3286 3287 SDValue rbit = DAG.getNode(ARMISD::RBIT, dl, VT, N->getOperand(0)); 3288 return DAG.getNode(ISD::CTLZ, dl, VT, rbit); 3289} 3290 3291static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, 3292 const ARMSubtarget *ST) { 3293 EVT VT = N->getValueType(0); 3294 DebugLoc dl = N->getDebugLoc(); 3295 3296 if (!VT.isVector()) 3297 return SDValue(); 3298 3299 // Lower vector shifts on NEON to use VSHL. 3300 assert(ST->hasNEON() && "unexpected vector shift"); 3301 3302 // Left shifts translate directly to the vshiftu intrinsic. 3303 if (N->getOpcode() == ISD::SHL) 3304 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3305 DAG.getConstant(Intrinsic::arm_neon_vshiftu, MVT::i32), 3306 N->getOperand(0), N->getOperand(1)); 3307 3308 assert((N->getOpcode() == ISD::SRA || 3309 N->getOpcode() == ISD::SRL) && "unexpected vector shift opcode"); 3310 3311 // NEON uses the same intrinsics for both left and right shifts. For 3312 // right shifts, the shift amounts are negative, so negate the vector of 3313 // shift amounts. 3314 EVT ShiftVT = N->getOperand(1).getValueType(); 3315 SDValue NegatedCount = DAG.getNode(ISD::SUB, dl, ShiftVT, 3316 getZeroVector(ShiftVT, DAG, dl), 3317 N->getOperand(1)); 3318 Intrinsic::ID vshiftInt = (N->getOpcode() == ISD::SRA ? 3319 Intrinsic::arm_neon_vshifts : 3320 Intrinsic::arm_neon_vshiftu); 3321 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, 3322 DAG.getConstant(vshiftInt, MVT::i32), 3323 N->getOperand(0), NegatedCount); 3324} 3325 3326static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, 3327 const ARMSubtarget *ST) { 3328 EVT VT = N->getValueType(0); 3329 DebugLoc dl = N->getDebugLoc(); 3330 3331 // We can get here for a node like i32 = ISD::SHL i32, i64 3332 if (VT != MVT::i64) 3333 return SDValue(); 3334 3335 assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && 3336 "Unknown shift to lower!"); 3337 3338 // We only lower SRA, SRL of 1 here, all others use generic lowering. 3339 if (!isa<ConstantSDNode>(N->getOperand(1)) || 3340 cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 1) 3341 return SDValue(); 3342 3343 // If we are in thumb mode, we don't have RRX. 3344 if (ST->isThumb1Only()) return SDValue(); 3345 3346 // Okay, we have a 64-bit SRA or SRL of 1. Lower this to an RRX expr. 3347 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3348 DAG.getConstant(0, MVT::i32)); 3349 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, N->getOperand(0), 3350 DAG.getConstant(1, MVT::i32)); 3351 3352 // First, build a SRA_FLAG/SRL_FLAG op, which shifts the top part by one and 3353 // captures the result into a carry flag. 3354 unsigned Opc = N->getOpcode() == ISD::SRL ? ARMISD::SRL_FLAG:ARMISD::SRA_FLAG; 3355 Hi = DAG.getNode(Opc, dl, DAG.getVTList(MVT::i32, MVT::Glue), &Hi, 1); 3356 3357 // The low part is an ARMISD::RRX operand, which shifts the carry in. 3358 Lo = DAG.getNode(ARMISD::RRX, dl, MVT::i32, Lo, Hi.getValue(1)); 3359 3360 // Merge the pieces into a single i64 value. 3361 return DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); 3362} 3363 3364static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { 3365 SDValue TmpOp0, TmpOp1; 3366 bool Invert = false; 3367 bool Swap = false; 3368 unsigned Opc = 0; 3369 3370 SDValue Op0 = Op.getOperand(0); 3371 SDValue Op1 = Op.getOperand(1); 3372 SDValue CC = Op.getOperand(2); 3373 EVT VT = Op.getValueType(); 3374 ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); 3375 DebugLoc dl = Op.getDebugLoc(); 3376 3377 if (Op.getOperand(1).getValueType().isFloatingPoint()) { 3378 switch (SetCCOpcode) { 3379 default: llvm_unreachable("Illegal FP comparison"); break; 3380 case ISD::SETUNE: 3381 case ISD::SETNE: Invert = true; // Fallthrough 3382 case ISD::SETOEQ: 3383 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3384 case ISD::SETOLT: 3385 case ISD::SETLT: Swap = true; // Fallthrough 3386 case ISD::SETOGT: 3387 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3388 case ISD::SETOLE: 3389 case ISD::SETLE: Swap = true; // Fallthrough 3390 case ISD::SETOGE: 3391 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3392 case ISD::SETUGE: Swap = true; // Fallthrough 3393 case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; 3394 case ISD::SETUGT: Swap = true; // Fallthrough 3395 case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; 3396 case ISD::SETUEQ: Invert = true; // Fallthrough 3397 case ISD::SETONE: 3398 // Expand this to (OLT | OGT). 3399 TmpOp0 = Op0; 3400 TmpOp1 = Op1; 3401 Opc = ISD::OR; 3402 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3403 Op1 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp0, TmpOp1); 3404 break; 3405 case ISD::SETUO: Invert = true; // Fallthrough 3406 case ISD::SETO: 3407 // Expand this to (OLT | OGE). 3408 TmpOp0 = Op0; 3409 TmpOp1 = Op1; 3410 Opc = ISD::OR; 3411 Op0 = DAG.getNode(ARMISD::VCGT, dl, VT, TmpOp1, TmpOp0); 3412 Op1 = DAG.getNode(ARMISD::VCGE, dl, VT, TmpOp0, TmpOp1); 3413 break; 3414 } 3415 } else { 3416 // Integer comparisons. 3417 switch (SetCCOpcode) { 3418 default: llvm_unreachable("Illegal integer comparison"); break; 3419 case ISD::SETNE: Invert = true; 3420 case ISD::SETEQ: Opc = ARMISD::VCEQ; break; 3421 case ISD::SETLT: Swap = true; 3422 case ISD::SETGT: Opc = ARMISD::VCGT; break; 3423 case ISD::SETLE: Swap = true; 3424 case ISD::SETGE: Opc = ARMISD::VCGE; break; 3425 case ISD::SETULT: Swap = true; 3426 case ISD::SETUGT: Opc = ARMISD::VCGTU; break; 3427 case ISD::SETULE: Swap = true; 3428 case ISD::SETUGE: Opc = ARMISD::VCGEU; break; 3429 } 3430 3431 // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). 3432 if (Opc == ARMISD::VCEQ) { 3433 3434 SDValue AndOp; 3435 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3436 AndOp = Op0; 3437 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) 3438 AndOp = Op1; 3439 3440 // Ignore bitconvert. 3441 if (AndOp.getNode() && AndOp.getOpcode() == ISD::BITCAST) 3442 AndOp = AndOp.getOperand(0); 3443 3444 if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { 3445 Opc = ARMISD::VTST; 3446 Op0 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(0)); 3447 Op1 = DAG.getNode(ISD::BITCAST, dl, VT, AndOp.getOperand(1)); 3448 Invert = !Invert; 3449 } 3450 } 3451 } 3452 3453 if (Swap) 3454 std::swap(Op0, Op1); 3455 3456 // If one of the operands is a constant vector zero, attempt to fold the 3457 // comparison to a specialized compare-against-zero form. 3458 SDValue SingleOp; 3459 if (ISD::isBuildVectorAllZeros(Op1.getNode())) 3460 SingleOp = Op0; 3461 else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { 3462 if (Opc == ARMISD::VCGE) 3463 Opc = ARMISD::VCLEZ; 3464 else if (Opc == ARMISD::VCGT) 3465 Opc = ARMISD::VCLTZ; 3466 SingleOp = Op1; 3467 } 3468 3469 SDValue Result; 3470 if (SingleOp.getNode()) { 3471 switch (Opc) { 3472 case ARMISD::VCEQ: 3473 Result = DAG.getNode(ARMISD::VCEQZ, dl, VT, SingleOp); break; 3474 case ARMISD::VCGE: 3475 Result = DAG.getNode(ARMISD::VCGEZ, dl, VT, SingleOp); break; 3476 case ARMISD::VCLEZ: 3477 Result = DAG.getNode(ARMISD::VCLEZ, dl, VT, SingleOp); break; 3478 case ARMISD::VCGT: 3479 Result = DAG.getNode(ARMISD::VCGTZ, dl, VT, SingleOp); break; 3480 case ARMISD::VCLTZ: 3481 Result = DAG.getNode(ARMISD::VCLTZ, dl, VT, SingleOp); break; 3482 default: 3483 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3484 } 3485 } else { 3486 Result = DAG.getNode(Opc, dl, VT, Op0, Op1); 3487 } 3488 3489 if (Invert) 3490 Result = DAG.getNOT(dl, Result, VT); 3491 3492 return Result; 3493} 3494 3495/// isNEONModifiedImm - Check if the specified splat value corresponds to a 3496/// valid vector constant for a NEON instruction with a "modified immediate" 3497/// operand (e.g., VMOV). If so, return the encoded value. 3498static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, 3499 unsigned SplatBitSize, SelectionDAG &DAG, 3500 EVT &VT, bool is128Bits, NEONModImmType type) { 3501 unsigned OpCmode, Imm; 3502 3503 // SplatBitSize is set to the smallest size that splats the vector, so a 3504 // zero vector will always have SplatBitSize == 8. However, NEON modified 3505 // immediate instructions others than VMOV do not support the 8-bit encoding 3506 // of a zero vector, and the default encoding of zero is supposed to be the 3507 // 32-bit version. 3508 if (SplatBits == 0) 3509 SplatBitSize = 32; 3510 3511 switch (SplatBitSize) { 3512 case 8: 3513 if (type != VMOVModImm) 3514 return SDValue(); 3515 // Any 1-byte value is OK. Op=0, Cmode=1110. 3516 assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); 3517 OpCmode = 0xe; 3518 Imm = SplatBits; 3519 VT = is128Bits ? MVT::v16i8 : MVT::v8i8; 3520 break; 3521 3522 case 16: 3523 // NEON's 16-bit VMOV supports splat values where only one byte is nonzero. 3524 VT = is128Bits ? MVT::v8i16 : MVT::v4i16; 3525 if ((SplatBits & ~0xff) == 0) { 3526 // Value = 0x00nn: Op=x, Cmode=100x. 3527 OpCmode = 0x8; 3528 Imm = SplatBits; 3529 break; 3530 } 3531 if ((SplatBits & ~0xff00) == 0) { 3532 // Value = 0xnn00: Op=x, Cmode=101x. 3533 OpCmode = 0xa; 3534 Imm = SplatBits >> 8; 3535 break; 3536 } 3537 return SDValue(); 3538 3539 case 32: 3540 // NEON's 32-bit VMOV supports splat values where: 3541 // * only one byte is nonzero, or 3542 // * the least significant byte is 0xff and the second byte is nonzero, or 3543 // * the least significant 2 bytes are 0xff and the third is nonzero. 3544 VT = is128Bits ? MVT::v4i32 : MVT::v2i32; 3545 if ((SplatBits & ~0xff) == 0) { 3546 // Value = 0x000000nn: Op=x, Cmode=000x. 3547 OpCmode = 0; 3548 Imm = SplatBits; 3549 break; 3550 } 3551 if ((SplatBits & ~0xff00) == 0) { 3552 // Value = 0x0000nn00: Op=x, Cmode=001x. 3553 OpCmode = 0x2; 3554 Imm = SplatBits >> 8; 3555 break; 3556 } 3557 if ((SplatBits & ~0xff0000) == 0) { 3558 // Value = 0x00nn0000: Op=x, Cmode=010x. 3559 OpCmode = 0x4; 3560 Imm = SplatBits >> 16; 3561 break; 3562 } 3563 if ((SplatBits & ~0xff000000) == 0) { 3564 // Value = 0xnn000000: Op=x, Cmode=011x. 3565 OpCmode = 0x6; 3566 Imm = SplatBits >> 24; 3567 break; 3568 } 3569 3570 // cmode == 0b1100 and cmode == 0b1101 are not supported for VORR or VBIC 3571 if (type == OtherModImm) return SDValue(); 3572 3573 if ((SplatBits & ~0xffff) == 0 && 3574 ((SplatBits | SplatUndef) & 0xff) == 0xff) { 3575 // Value = 0x0000nnff: Op=x, Cmode=1100. 3576 OpCmode = 0xc; 3577 Imm = SplatBits >> 8; 3578 SplatBits |= 0xff; 3579 break; 3580 } 3581 3582 if ((SplatBits & ~0xffffff) == 0 && 3583 ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { 3584 // Value = 0x00nnffff: Op=x, Cmode=1101. 3585 OpCmode = 0xd; 3586 Imm = SplatBits >> 16; 3587 SplatBits |= 0xffff; 3588 break; 3589 } 3590 3591 // Note: there are a few 32-bit splat values (specifically: 00ffff00, 3592 // ff000000, ff0000ff, and ffff00ff) that are valid for VMOV.I64 but not 3593 // VMOV.I32. A (very) minor optimization would be to replicate the value 3594 // and fall through here to test for a valid 64-bit splat. But, then the 3595 // caller would also need to check and handle the change in size. 3596 return SDValue(); 3597 3598 case 64: { 3599 if (type != VMOVModImm) 3600 return SDValue(); 3601 // NEON has a 64-bit VMOV splat where each byte is either 0 or 0xff. 3602 uint64_t BitMask = 0xff; 3603 uint64_t Val = 0; 3604 unsigned ImmMask = 1; 3605 Imm = 0; 3606 for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { 3607 if (((SplatBits | SplatUndef) & BitMask) == BitMask) { 3608 Val |= BitMask; 3609 Imm |= ImmMask; 3610 } else if ((SplatBits & BitMask) != 0) { 3611 return SDValue(); 3612 } 3613 BitMask <<= 8; 3614 ImmMask <<= 1; 3615 } 3616 // Op=1, Cmode=1110. 3617 OpCmode = 0x1e; 3618 SplatBits = Val; 3619 VT = is128Bits ? MVT::v2i64 : MVT::v1i64; 3620 break; 3621 } 3622 3623 default: 3624 llvm_unreachable("unexpected size for isNEONModifiedImm"); 3625 return SDValue(); 3626 } 3627 3628 unsigned EncodedVal = ARM_AM::createNEONModImm(OpCmode, Imm); 3629 return DAG.getTargetConstant(EncodedVal, MVT::i32); 3630} 3631 3632static bool isVEXTMask(const SmallVectorImpl<int> &M, EVT VT, 3633 bool &ReverseVEXT, unsigned &Imm) { 3634 unsigned NumElts = VT.getVectorNumElements(); 3635 ReverseVEXT = false; 3636 3637 // Assume that the first shuffle index is not UNDEF. Fail if it is. 3638 if (M[0] < 0) 3639 return false; 3640 3641 Imm = M[0]; 3642 3643 // If this is a VEXT shuffle, the immediate value is the index of the first 3644 // element. The other shuffle indices must be the successive elements after 3645 // the first one. 3646 unsigned ExpectedElt = Imm; 3647 for (unsigned i = 1; i < NumElts; ++i) { 3648 // Increment the expected index. If it wraps around, it may still be 3649 // a VEXT but the source vectors must be swapped. 3650 ExpectedElt += 1; 3651 if (ExpectedElt == NumElts * 2) { 3652 ExpectedElt = 0; 3653 ReverseVEXT = true; 3654 } 3655 3656 if (M[i] < 0) continue; // ignore UNDEF indices 3657 if (ExpectedElt != static_cast<unsigned>(M[i])) 3658 return false; 3659 } 3660 3661 // Adjust the index value if the source operands will be swapped. 3662 if (ReverseVEXT) 3663 Imm -= NumElts; 3664 3665 return true; 3666} 3667 3668/// isVREVMask - Check if a vector shuffle corresponds to a VREV 3669/// instruction with the specified blocksize. (The order of the elements 3670/// within each block of the vector is reversed.) 3671static bool isVREVMask(const SmallVectorImpl<int> &M, EVT VT, 3672 unsigned BlockSize) { 3673 assert((BlockSize==16 || BlockSize==32 || BlockSize==64) && 3674 "Only possible block sizes for VREV are: 16, 32, 64"); 3675 3676 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3677 if (EltSz == 64) 3678 return false; 3679 3680 unsigned NumElts = VT.getVectorNumElements(); 3681 unsigned BlockElts = M[0] + 1; 3682 // If the first shuffle index is UNDEF, be optimistic. 3683 if (M[0] < 0) 3684 BlockElts = BlockSize / EltSz; 3685 3686 if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) 3687 return false; 3688 3689 for (unsigned i = 0; i < NumElts; ++i) { 3690 if (M[i] < 0) continue; // ignore UNDEF indices 3691 if ((unsigned) M[i] != (i - i%BlockElts) + (BlockElts - 1 - i%BlockElts)) 3692 return false; 3693 } 3694 3695 return true; 3696} 3697 3698static bool isVTBLMask(const SmallVectorImpl<int> &M, EVT VT) { 3699 // We can handle <8 x i8> vector shuffles. If the index in the mask is out of 3700 // range, then 0 is placed into the resulting vector. So pretty much any mask 3701 // of 8 elements can work here. 3702 return VT == MVT::v8i8 && M.size() == 8; 3703} 3704 3705static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT, 3706 unsigned &WhichResult) { 3707 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3708 if (EltSz == 64) 3709 return false; 3710 3711 unsigned NumElts = VT.getVectorNumElements(); 3712 WhichResult = (M[0] == 0 ? 0 : 1); 3713 for (unsigned i = 0; i < NumElts; i += 2) { 3714 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3715 (M[i+1] >= 0 && (unsigned) M[i+1] != i + NumElts + WhichResult)) 3716 return false; 3717 } 3718 return true; 3719} 3720 3721/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of 3722/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3723/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>. 3724static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3725 unsigned &WhichResult) { 3726 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3727 if (EltSz == 64) 3728 return false; 3729 3730 unsigned NumElts = VT.getVectorNumElements(); 3731 WhichResult = (M[0] == 0 ? 0 : 1); 3732 for (unsigned i = 0; i < NumElts; i += 2) { 3733 if ((M[i] >= 0 && (unsigned) M[i] != i + WhichResult) || 3734 (M[i+1] >= 0 && (unsigned) M[i+1] != i + WhichResult)) 3735 return false; 3736 } 3737 return true; 3738} 3739 3740static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT, 3741 unsigned &WhichResult) { 3742 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3743 if (EltSz == 64) 3744 return false; 3745 3746 unsigned NumElts = VT.getVectorNumElements(); 3747 WhichResult = (M[0] == 0 ? 0 : 1); 3748 for (unsigned i = 0; i != NumElts; ++i) { 3749 if (M[i] < 0) continue; // ignore UNDEF indices 3750 if ((unsigned) M[i] != 2 * i + WhichResult) 3751 return false; 3752 } 3753 3754 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3755 if (VT.is64BitVector() && EltSz == 32) 3756 return false; 3757 3758 return true; 3759} 3760 3761/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of 3762/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3763/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>, 3764static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3765 unsigned &WhichResult) { 3766 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3767 if (EltSz == 64) 3768 return false; 3769 3770 unsigned Half = VT.getVectorNumElements() / 2; 3771 WhichResult = (M[0] == 0 ? 0 : 1); 3772 for (unsigned j = 0; j != 2; ++j) { 3773 unsigned Idx = WhichResult; 3774 for (unsigned i = 0; i != Half; ++i) { 3775 int MIdx = M[i + j * Half]; 3776 if (MIdx >= 0 && (unsigned) MIdx != Idx) 3777 return false; 3778 Idx += 2; 3779 } 3780 } 3781 3782 // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3783 if (VT.is64BitVector() && EltSz == 32) 3784 return false; 3785 3786 return true; 3787} 3788 3789static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT, 3790 unsigned &WhichResult) { 3791 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3792 if (EltSz == 64) 3793 return false; 3794 3795 unsigned NumElts = VT.getVectorNumElements(); 3796 WhichResult = (M[0] == 0 ? 0 : 1); 3797 unsigned Idx = WhichResult * NumElts / 2; 3798 for (unsigned i = 0; i != NumElts; i += 2) { 3799 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3800 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx + NumElts)) 3801 return false; 3802 Idx += 1; 3803 } 3804 3805 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3806 if (VT.is64BitVector() && EltSz == 32) 3807 return false; 3808 3809 return true; 3810} 3811 3812/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of 3813/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef". 3814/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>. 3815static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT, 3816 unsigned &WhichResult) { 3817 unsigned EltSz = VT.getVectorElementType().getSizeInBits(); 3818 if (EltSz == 64) 3819 return false; 3820 3821 unsigned NumElts = VT.getVectorNumElements(); 3822 WhichResult = (M[0] == 0 ? 0 : 1); 3823 unsigned Idx = WhichResult * NumElts / 2; 3824 for (unsigned i = 0; i != NumElts; i += 2) { 3825 if ((M[i] >= 0 && (unsigned) M[i] != Idx) || 3826 (M[i+1] >= 0 && (unsigned) M[i+1] != Idx)) 3827 return false; 3828 Idx += 1; 3829 } 3830 3831 // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32. 3832 if (VT.is64BitVector() && EltSz == 32) 3833 return false; 3834 3835 return true; 3836} 3837 3838// If N is an integer constant that can be moved into a register in one 3839// instruction, return an SDValue of such a constant (will become a MOV 3840// instruction). Otherwise return null. 3841static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, 3842 const ARMSubtarget *ST, DebugLoc dl) { 3843 uint64_t Val; 3844 if (!isa<ConstantSDNode>(N)) 3845 return SDValue(); 3846 Val = cast<ConstantSDNode>(N)->getZExtValue(); 3847 3848 if (ST->isThumb1Only()) { 3849 if (Val <= 255 || ~Val <= 255) 3850 return DAG.getConstant(Val, MVT::i32); 3851 } else { 3852 if (ARM_AM::getSOImmVal(Val) != -1 || ARM_AM::getSOImmVal(~Val) != -1) 3853 return DAG.getConstant(Val, MVT::i32); 3854 } 3855 return SDValue(); 3856} 3857 3858// If this is a case we can't handle, return null and let the default 3859// expansion code take care of it. 3860SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, 3861 const ARMSubtarget *ST) const { 3862 BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); 3863 DebugLoc dl = Op.getDebugLoc(); 3864 EVT VT = Op.getValueType(); 3865 3866 APInt SplatBits, SplatUndef; 3867 unsigned SplatBitSize; 3868 bool HasAnyUndefs; 3869 if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 3870 if (SplatBitSize <= 64) { 3871 // Check if an immediate VMOV works. 3872 EVT VmovVT; 3873 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 3874 SplatUndef.getZExtValue(), SplatBitSize, 3875 DAG, VmovVT, VT.is128BitVector(), 3876 VMOVModImm); 3877 if (Val.getNode()) { 3878 SDValue Vmov = DAG.getNode(ARMISD::VMOVIMM, dl, VmovVT, Val); 3879 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3880 } 3881 3882 // Try an immediate VMVN. 3883 uint64_t NegatedImm = (SplatBits.getZExtValue() ^ 3884 ((1LL << SplatBitSize) - 1)); 3885 Val = isNEONModifiedImm(NegatedImm, 3886 SplatUndef.getZExtValue(), SplatBitSize, 3887 DAG, VmovVT, VT.is128BitVector(), 3888 VMVNModImm); 3889 if (Val.getNode()) { 3890 SDValue Vmov = DAG.getNode(ARMISD::VMVNIMM, dl, VmovVT, Val); 3891 return DAG.getNode(ISD::BITCAST, dl, VT, Vmov); 3892 } 3893 } 3894 } 3895 3896 // Scan through the operands to see if only one value is used. 3897 unsigned NumElts = VT.getVectorNumElements(); 3898 bool isOnlyLowElement = true; 3899 bool usesOnlyOneValue = true; 3900 bool isConstant = true; 3901 SDValue Value; 3902 for (unsigned i = 0; i < NumElts; ++i) { 3903 SDValue V = Op.getOperand(i); 3904 if (V.getOpcode() == ISD::UNDEF) 3905 continue; 3906 if (i > 0) 3907 isOnlyLowElement = false; 3908 if (!isa<ConstantFPSDNode>(V) && !isa<ConstantSDNode>(V)) 3909 isConstant = false; 3910 3911 if (!Value.getNode()) 3912 Value = V; 3913 else if (V != Value) 3914 usesOnlyOneValue = false; 3915 } 3916 3917 if (!Value.getNode()) 3918 return DAG.getUNDEF(VT); 3919 3920 if (isOnlyLowElement) 3921 return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); 3922 3923 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 3924 3925 // Use VDUP for non-constant splats. For f32 constant splats, reduce to 3926 // i32 and try again. 3927 if (usesOnlyOneValue && EltSize <= 32) { 3928 if (!isConstant) 3929 return DAG.getNode(ARMISD::VDUP, dl, VT, Value); 3930 if (VT.getVectorElementType().isFloatingPoint()) { 3931 SmallVector<SDValue, 8> Ops; 3932 for (unsigned i = 0; i < NumElts; ++i) 3933 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, MVT::i32, 3934 Op.getOperand(i))); 3935 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); 3936 SDValue Val = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], NumElts); 3937 Val = LowerBUILD_VECTOR(Val, DAG, ST); 3938 if (Val.getNode()) 3939 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 3940 } 3941 SDValue Val = IsSingleInstrConstant(Value, DAG, ST, dl); 3942 if (Val.getNode()) 3943 return DAG.getNode(ARMISD::VDUP, dl, VT, Val); 3944 } 3945 3946 // If all elements are constants and the case above didn't get hit, fall back 3947 // to the default expansion, which will generate a load from the constant 3948 // pool. 3949 if (isConstant) 3950 return SDValue(); 3951 3952 // Empirical tests suggest this is rarely worth it for vectors of length <= 2. 3953 if (NumElts >= 4) { 3954 SDValue shuffle = ReconstructShuffle(Op, DAG); 3955 if (shuffle != SDValue()) 3956 return shuffle; 3957 } 3958 3959 // Vectors with 32- or 64-bit elements can be built by directly assigning 3960 // the subregisters. Lower it to an ARMISD::BUILD_VECTOR so the operands 3961 // will be legalized. 3962 if (EltSize >= 32) { 3963 // Do the expansion with floating-point types, since that is what the VFP 3964 // registers are defined to use, and since i64 is not legal. 3965 EVT EltVT = EVT::getFloatingPointVT(EltSize); 3966 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 3967 SmallVector<SDValue, 8> Ops; 3968 for (unsigned i = 0; i < NumElts; ++i) 3969 Ops.push_back(DAG.getNode(ISD::BITCAST, dl, EltVT, Op.getOperand(i))); 3970 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 3971 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 3972 } 3973 3974 return SDValue(); 3975} 3976 3977// Gather data to see if the operation can be modelled as a 3978// shuffle in combination with VEXTs. 3979SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, 3980 SelectionDAG &DAG) const { 3981 DebugLoc dl = Op.getDebugLoc(); 3982 EVT VT = Op.getValueType(); 3983 unsigned NumElts = VT.getVectorNumElements(); 3984 3985 SmallVector<SDValue, 2> SourceVecs; 3986 SmallVector<unsigned, 2> MinElts; 3987 SmallVector<unsigned, 2> MaxElts; 3988 3989 for (unsigned i = 0; i < NumElts; ++i) { 3990 SDValue V = Op.getOperand(i); 3991 if (V.getOpcode() == ISD::UNDEF) 3992 continue; 3993 else if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) { 3994 // A shuffle can only come from building a vector from various 3995 // elements of other vectors. 3996 return SDValue(); 3997 } 3998 3999 // Record this extraction against the appropriate vector if possible... 4000 SDValue SourceVec = V.getOperand(0); 4001 unsigned EltNo = cast<ConstantSDNode>(V.getOperand(1))->getZExtValue(); 4002 bool FoundSource = false; 4003 for (unsigned j = 0; j < SourceVecs.size(); ++j) { 4004 if (SourceVecs[j] == SourceVec) { 4005 if (MinElts[j] > EltNo) 4006 MinElts[j] = EltNo; 4007 if (MaxElts[j] < EltNo) 4008 MaxElts[j] = EltNo; 4009 FoundSource = true; 4010 break; 4011 } 4012 } 4013 4014 // Or record a new source if not... 4015 if (!FoundSource) { 4016 SourceVecs.push_back(SourceVec); 4017 MinElts.push_back(EltNo); 4018 MaxElts.push_back(EltNo); 4019 } 4020 } 4021 4022 // Currently only do something sane when at most two source vectors 4023 // involved. 4024 if (SourceVecs.size() > 2) 4025 return SDValue(); 4026 4027 SDValue ShuffleSrcs[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; 4028 int VEXTOffsets[2] = {0, 0}; 4029 4030 // This loop extracts the usage patterns of the source vectors 4031 // and prepares appropriate SDValues for a shuffle if possible. 4032 for (unsigned i = 0; i < SourceVecs.size(); ++i) { 4033 if (SourceVecs[i].getValueType() == VT) { 4034 // No VEXT necessary 4035 ShuffleSrcs[i] = SourceVecs[i]; 4036 VEXTOffsets[i] = 0; 4037 continue; 4038 } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { 4039 // It probably isn't worth padding out a smaller vector just to 4040 // break it down again in a shuffle. 4041 return SDValue(); 4042 } 4043 4044 // Since only 64-bit and 128-bit vectors are legal on ARM and 4045 // we've eliminated the other cases... 4046 assert(SourceVecs[i].getValueType().getVectorNumElements() == 2*NumElts && 4047 "unexpected vector sizes in ReconstructShuffle"); 4048 4049 if (MaxElts[i] - MinElts[i] >= NumElts) { 4050 // Span too large for a VEXT to cope 4051 return SDValue(); 4052 } 4053 4054 if (MinElts[i] >= NumElts) { 4055 // The extraction can just take the second half 4056 VEXTOffsets[i] = NumElts; 4057 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4058 SourceVecs[i], 4059 DAG.getIntPtrConstant(NumElts)); 4060 } else if (MaxElts[i] < NumElts) { 4061 // The extraction can just take the first half 4062 VEXTOffsets[i] = 0; 4063 ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4064 SourceVecs[i], 4065 DAG.getIntPtrConstant(0)); 4066 } else { 4067 // An actual VEXT is needed 4068 VEXTOffsets[i] = MinElts[i]; 4069 SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4070 SourceVecs[i], 4071 DAG.getIntPtrConstant(0)); 4072 SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, 4073 SourceVecs[i], 4074 DAG.getIntPtrConstant(NumElts)); 4075 ShuffleSrcs[i] = DAG.getNode(ARMISD::VEXT, dl, VT, VEXTSrc1, VEXTSrc2, 4076 DAG.getConstant(VEXTOffsets[i], MVT::i32)); 4077 } 4078 } 4079 4080 SmallVector<int, 8> Mask; 4081 4082 for (unsigned i = 0; i < NumElts; ++i) { 4083 SDValue Entry = Op.getOperand(i); 4084 if (Entry.getOpcode() == ISD::UNDEF) { 4085 Mask.push_back(-1); 4086 continue; 4087 } 4088 4089 SDValue ExtractVec = Entry.getOperand(0); 4090 int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i) 4091 .getOperand(1))->getSExtValue(); 4092 if (ExtractVec == SourceVecs[0]) { 4093 Mask.push_back(ExtractElt - VEXTOffsets[0]); 4094 } else { 4095 Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); 4096 } 4097 } 4098 4099 // Final check before we try to produce nonsense... 4100 if (isShuffleMaskLegal(Mask, VT)) 4101 return DAG.getVectorShuffle(VT, dl, ShuffleSrcs[0], ShuffleSrcs[1], 4102 &Mask[0]); 4103 4104 return SDValue(); 4105} 4106 4107/// isShuffleMaskLegal - Targets can use this to indicate that they only 4108/// support *some* VECTOR_SHUFFLE operations, those with specific masks. 4109/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values 4110/// are assumed to be legal. 4111bool 4112ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, 4113 EVT VT) const { 4114 if (VT.getVectorNumElements() == 4 && 4115 (VT.is128BitVector() || VT.is64BitVector())) { 4116 unsigned PFIndexes[4]; 4117 for (unsigned i = 0; i != 4; ++i) { 4118 if (M[i] < 0) 4119 PFIndexes[i] = 8; 4120 else 4121 PFIndexes[i] = M[i]; 4122 } 4123 4124 // Compute the index in the perfect shuffle table. 4125 unsigned PFTableIndex = 4126 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4127 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4128 unsigned Cost = (PFEntry >> 30); 4129 4130 if (Cost <= 4) 4131 return true; 4132 } 4133 4134 bool ReverseVEXT; 4135 unsigned Imm, WhichResult; 4136 4137 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4138 return (EltSize >= 32 || 4139 ShuffleVectorSDNode::isSplatMask(&M[0], VT) || 4140 isVREVMask(M, VT, 64) || 4141 isVREVMask(M, VT, 32) || 4142 isVREVMask(M, VT, 16) || 4143 isVEXTMask(M, VT, ReverseVEXT, Imm) || 4144 isVTBLMask(M, VT) || 4145 isVTRNMask(M, VT, WhichResult) || 4146 isVUZPMask(M, VT, WhichResult) || 4147 isVZIPMask(M, VT, WhichResult) || 4148 isVTRN_v_undef_Mask(M, VT, WhichResult) || 4149 isVUZP_v_undef_Mask(M, VT, WhichResult) || 4150 isVZIP_v_undef_Mask(M, VT, WhichResult)); 4151} 4152 4153/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 4154/// the specified operations to build the shuffle. 4155static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 4156 SDValue RHS, SelectionDAG &DAG, 4157 DebugLoc dl) { 4158 unsigned OpNum = (PFEntry >> 26) & 0x0F; 4159 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 4160 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 4161 4162 enum { 4163 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 4164 OP_VREV, 4165 OP_VDUP0, 4166 OP_VDUP1, 4167 OP_VDUP2, 4168 OP_VDUP3, 4169 OP_VEXT1, 4170 OP_VEXT2, 4171 OP_VEXT3, 4172 OP_VUZPL, // VUZP, left result 4173 OP_VUZPR, // VUZP, right result 4174 OP_VZIPL, // VZIP, left result 4175 OP_VZIPR, // VZIP, right result 4176 OP_VTRNL, // VTRN, left result 4177 OP_VTRNR // VTRN, right result 4178 }; 4179 4180 if (OpNum == OP_COPY) { 4181 if (LHSID == (1*9+2)*9+3) return LHS; 4182 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 4183 return RHS; 4184 } 4185 4186 SDValue OpLHS, OpRHS; 4187 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 4188 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 4189 EVT VT = OpLHS.getValueType(); 4190 4191 switch (OpNum) { 4192 default: llvm_unreachable("Unknown shuffle opcode!"); 4193 case OP_VREV: 4194 return DAG.getNode(ARMISD::VREV64, dl, VT, OpLHS); 4195 case OP_VDUP0: 4196 case OP_VDUP1: 4197 case OP_VDUP2: 4198 case OP_VDUP3: 4199 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, 4200 OpLHS, DAG.getConstant(OpNum-OP_VDUP0, MVT::i32)); 4201 case OP_VEXT1: 4202 case OP_VEXT2: 4203 case OP_VEXT3: 4204 return DAG.getNode(ARMISD::VEXT, dl, VT, 4205 OpLHS, OpRHS, 4206 DAG.getConstant(OpNum-OP_VEXT1+1, MVT::i32)); 4207 case OP_VUZPL: 4208 case OP_VUZPR: 4209 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4210 OpLHS, OpRHS).getValue(OpNum-OP_VUZPL); 4211 case OP_VZIPL: 4212 case OP_VZIPR: 4213 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4214 OpLHS, OpRHS).getValue(OpNum-OP_VZIPL); 4215 case OP_VTRNL: 4216 case OP_VTRNR: 4217 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4218 OpLHS, OpRHS).getValue(OpNum-OP_VTRNL); 4219 } 4220} 4221 4222static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, 4223 SmallVectorImpl<int> &ShuffleMask, 4224 SelectionDAG &DAG) { 4225 // Check to see if we can use the VTBL instruction. 4226 SDValue V1 = Op.getOperand(0); 4227 SDValue V2 = Op.getOperand(1); 4228 DebugLoc DL = Op.getDebugLoc(); 4229 4230 SmallVector<SDValue, 8> VTBLMask; 4231 for (SmallVectorImpl<int>::iterator 4232 I = ShuffleMask.begin(), E = ShuffleMask.end(); I != E; ++I) 4233 VTBLMask.push_back(DAG.getConstant(*I, MVT::i32)); 4234 4235 if (V2.getNode()->getOpcode() == ISD::UNDEF) 4236 return DAG.getNode(ARMISD::VTBL1, DL, MVT::v8i8, V1, 4237 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4238 &VTBLMask[0], 8)); 4239 4240 return DAG.getNode(ARMISD::VTBL2, DL, MVT::v8i8, V1, V2, 4241 DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v8i8, 4242 &VTBLMask[0], 8)); 4243} 4244 4245static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { 4246 SDValue V1 = Op.getOperand(0); 4247 SDValue V2 = Op.getOperand(1); 4248 DebugLoc dl = Op.getDebugLoc(); 4249 EVT VT = Op.getValueType(); 4250 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode()); 4251 SmallVector<int, 8> ShuffleMask; 4252 4253 // Convert shuffles that are directly supported on NEON to target-specific 4254 // DAG nodes, instead of keeping them as shuffles and matching them again 4255 // during code selection. This is more efficient and avoids the possibility 4256 // of inconsistencies between legalization and selection. 4257 // FIXME: floating-point vectors should be canonicalized to integer vectors 4258 // of the same time so that they get CSEd properly. 4259 SVN->getMask(ShuffleMask); 4260 4261 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4262 if (EltSize <= 32) { 4263 if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { 4264 int Lane = SVN->getSplatIndex(); 4265 // If this is undef splat, generate it via "just" vdup, if possible. 4266 if (Lane == -1) Lane = 0; 4267 4268 if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { 4269 return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); 4270 } 4271 return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, 4272 DAG.getConstant(Lane, MVT::i32)); 4273 } 4274 4275 bool ReverseVEXT; 4276 unsigned Imm; 4277 if (isVEXTMask(ShuffleMask, VT, ReverseVEXT, Imm)) { 4278 if (ReverseVEXT) 4279 std::swap(V1, V2); 4280 return DAG.getNode(ARMISD::VEXT, dl, VT, V1, V2, 4281 DAG.getConstant(Imm, MVT::i32)); 4282 } 4283 4284 if (isVREVMask(ShuffleMask, VT, 64)) 4285 return DAG.getNode(ARMISD::VREV64, dl, VT, V1); 4286 if (isVREVMask(ShuffleMask, VT, 32)) 4287 return DAG.getNode(ARMISD::VREV32, dl, VT, V1); 4288 if (isVREVMask(ShuffleMask, VT, 16)) 4289 return DAG.getNode(ARMISD::VREV16, dl, VT, V1); 4290 4291 // Check for Neon shuffles that modify both input vectors in place. 4292 // If both results are used, i.e., if there are two shuffles with the same 4293 // source operands and with masks corresponding to both results of one of 4294 // these operations, DAG memoization will ensure that a single node is 4295 // used for both shuffles. 4296 unsigned WhichResult; 4297 if (isVTRNMask(ShuffleMask, VT, WhichResult)) 4298 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4299 V1, V2).getValue(WhichResult); 4300 if (isVUZPMask(ShuffleMask, VT, WhichResult)) 4301 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4302 V1, V2).getValue(WhichResult); 4303 if (isVZIPMask(ShuffleMask, VT, WhichResult)) 4304 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4305 V1, V2).getValue(WhichResult); 4306 4307 if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4308 return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT), 4309 V1, V1).getValue(WhichResult); 4310 if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4311 return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT), 4312 V1, V1).getValue(WhichResult); 4313 if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) 4314 return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT), 4315 V1, V1).getValue(WhichResult); 4316 } 4317 4318 // If the shuffle is not directly supported and it has 4 elements, use 4319 // the PerfectShuffle-generated table to synthesize it from other shuffles. 4320 unsigned NumElts = VT.getVectorNumElements(); 4321 if (NumElts == 4) { 4322 unsigned PFIndexes[4]; 4323 for (unsigned i = 0; i != 4; ++i) { 4324 if (ShuffleMask[i] < 0) 4325 PFIndexes[i] = 8; 4326 else 4327 PFIndexes[i] = ShuffleMask[i]; 4328 } 4329 4330 // Compute the index in the perfect shuffle table. 4331 unsigned PFTableIndex = 4332 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 4333 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 4334 unsigned Cost = (PFEntry >> 30); 4335 4336 if (Cost <= 4) 4337 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 4338 } 4339 4340 // Implement shuffles with 32- or 64-bit elements as ARMISD::BUILD_VECTORs. 4341 if (EltSize >= 32) { 4342 // Do the expansion with floating-point types, since that is what the VFP 4343 // registers are defined to use, and since i64 is not legal. 4344 EVT EltVT = EVT::getFloatingPointVT(EltSize); 4345 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts); 4346 V1 = DAG.getNode(ISD::BITCAST, dl, VecVT, V1); 4347 V2 = DAG.getNode(ISD::BITCAST, dl, VecVT, V2); 4348 SmallVector<SDValue, 8> Ops; 4349 for (unsigned i = 0; i < NumElts; ++i) { 4350 if (ShuffleMask[i] < 0) 4351 Ops.push_back(DAG.getUNDEF(EltVT)); 4352 else 4353 Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, 4354 ShuffleMask[i] < (int)NumElts ? V1 : V2, 4355 DAG.getConstant(ShuffleMask[i] & (NumElts-1), 4356 MVT::i32))); 4357 } 4358 SDValue Val = DAG.getNode(ARMISD::BUILD_VECTOR, dl, VecVT, &Ops[0],NumElts); 4359 return DAG.getNode(ISD::BITCAST, dl, VT, Val); 4360 } 4361 4362 if (VT == MVT::v8i8) { 4363 SDValue NewOp = LowerVECTOR_SHUFFLEv8i8(Op, ShuffleMask, DAG); 4364 if (NewOp.getNode()) 4365 return NewOp; 4366 } 4367 4368 return SDValue(); 4369} 4370 4371static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { 4372 // EXTRACT_VECTOR_ELT is legal only for immediate indexes. 4373 SDValue Lane = Op.getOperand(1); 4374 if (!isa<ConstantSDNode>(Lane)) 4375 return SDValue(); 4376 4377 SDValue Vec = Op.getOperand(0); 4378 if (Op.getValueType() == MVT::i32 && 4379 Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { 4380 DebugLoc dl = Op.getDebugLoc(); 4381 return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); 4382 } 4383 4384 return Op; 4385} 4386 4387static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { 4388 // The only time a CONCAT_VECTORS operation can have legal types is when 4389 // two 64-bit vectors are concatenated to a 128-bit vector. 4390 assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && 4391 "unexpected CONCAT_VECTORS"); 4392 DebugLoc dl = Op.getDebugLoc(); 4393 SDValue Val = DAG.getUNDEF(MVT::v2f64); 4394 SDValue Op0 = Op.getOperand(0); 4395 SDValue Op1 = Op.getOperand(1); 4396 if (Op0.getOpcode() != ISD::UNDEF) 4397 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4398 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op0), 4399 DAG.getIntPtrConstant(0)); 4400 if (Op1.getOpcode() != ISD::UNDEF) 4401 Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Val, 4402 DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op1), 4403 DAG.getIntPtrConstant(1)); 4404 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Val); 4405} 4406 4407/// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each 4408/// element has been zero/sign-extended, depending on the isSigned parameter, 4409/// from an integer type half its size. 4410static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG, 4411 bool isSigned) { 4412 // A v2i64 BUILD_VECTOR will have been legalized to a BITCAST from v4i32. 4413 EVT VT = N->getValueType(0); 4414 if (VT == MVT::v2i64 && N->getOpcode() == ISD::BITCAST) { 4415 SDNode *BVN = N->getOperand(0).getNode(); 4416 if (BVN->getValueType(0) != MVT::v4i32 || 4417 BVN->getOpcode() != ISD::BUILD_VECTOR) 4418 return false; 4419 unsigned LoElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4420 unsigned HiElt = 1 - LoElt; 4421 ConstantSDNode *Lo0 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt)); 4422 ConstantSDNode *Hi0 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt)); 4423 ConstantSDNode *Lo1 = dyn_cast<ConstantSDNode>(BVN->getOperand(LoElt+2)); 4424 ConstantSDNode *Hi1 = dyn_cast<ConstantSDNode>(BVN->getOperand(HiElt+2)); 4425 if (!Lo0 || !Hi0 || !Lo1 || !Hi1) 4426 return false; 4427 if (isSigned) { 4428 if (Hi0->getSExtValue() == Lo0->getSExtValue() >> 32 && 4429 Hi1->getSExtValue() == Lo1->getSExtValue() >> 32) 4430 return true; 4431 } else { 4432 if (Hi0->isNullValue() && Hi1->isNullValue()) 4433 return true; 4434 } 4435 return false; 4436 } 4437 4438 if (N->getOpcode() != ISD::BUILD_VECTOR) 4439 return false; 4440 4441 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 4442 SDNode *Elt = N->getOperand(i).getNode(); 4443 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) { 4444 unsigned EltSize = VT.getVectorElementType().getSizeInBits(); 4445 unsigned HalfSize = EltSize / 2; 4446 if (isSigned) { 4447 int64_t SExtVal = C->getSExtValue(); 4448 if ((SExtVal >> HalfSize) != (SExtVal >> EltSize)) 4449 return false; 4450 } else { 4451 if ((C->getZExtValue() >> HalfSize) != 0) 4452 return false; 4453 } 4454 continue; 4455 } 4456 return false; 4457 } 4458 4459 return true; 4460} 4461 4462/// isSignExtended - Check if a node is a vector value that is sign-extended 4463/// or a constant BUILD_VECTOR with sign-extended elements. 4464static bool isSignExtended(SDNode *N, SelectionDAG &DAG) { 4465 if (N->getOpcode() == ISD::SIGN_EXTEND || ISD::isSEXTLoad(N)) 4466 return true; 4467 if (isExtendedBUILD_VECTOR(N, DAG, true)) 4468 return true; 4469 return false; 4470} 4471 4472/// isZeroExtended - Check if a node is a vector value that is zero-extended 4473/// or a constant BUILD_VECTOR with zero-extended elements. 4474static bool isZeroExtended(SDNode *N, SelectionDAG &DAG) { 4475 if (N->getOpcode() == ISD::ZERO_EXTEND || ISD::isZEXTLoad(N)) 4476 return true; 4477 if (isExtendedBUILD_VECTOR(N, DAG, false)) 4478 return true; 4479 return false; 4480} 4481 4482/// SkipExtension - For a node that is a SIGN_EXTEND, ZERO_EXTEND, extending 4483/// load, or BUILD_VECTOR with extended elements, return the unextended value. 4484static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { 4485 if (N->getOpcode() == ISD::SIGN_EXTEND || N->getOpcode() == ISD::ZERO_EXTEND) 4486 return N->getOperand(0); 4487 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) 4488 return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), 4489 LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), 4490 LD->isNonTemporal(), LD->getAlignment()); 4491 // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will 4492 // have been legalized as a BITCAST from v4i32. 4493 if (N->getOpcode() == ISD::BITCAST) { 4494 SDNode *BVN = N->getOperand(0).getNode(); 4495 assert(BVN->getOpcode() == ISD::BUILD_VECTOR && 4496 BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); 4497 unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; 4498 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, 4499 BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); 4500 } 4501 // Construct a new BUILD_VECTOR with elements truncated to half the size. 4502 assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR"); 4503 EVT VT = N->getValueType(0); 4504 unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2; 4505 unsigned NumElts = VT.getVectorNumElements(); 4506 MVT TruncVT = MVT::getIntegerVT(EltSize); 4507 SmallVector<SDValue, 8> Ops; 4508 for (unsigned i = 0; i != NumElts; ++i) { 4509 ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(i)); 4510 const APInt &CInt = C->getAPIntValue(); 4511 Ops.push_back(DAG.getConstant(CInt.trunc(EltSize), TruncVT)); 4512 } 4513 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 4514 MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); 4515} 4516 4517static bool isAddSubSExt(SDNode *N, SelectionDAG &DAG) { 4518 unsigned Opcode = N->getOpcode(); 4519 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4520 SDNode *N0 = N->getOperand(0).getNode(); 4521 SDNode *N1 = N->getOperand(1).getNode(); 4522 return N0->hasOneUse() && N1->hasOneUse() && 4523 isSignExtended(N0, DAG) && isSignExtended(N1, DAG); 4524 } 4525 return false; 4526} 4527 4528static bool isAddSubZExt(SDNode *N, SelectionDAG &DAG) { 4529 unsigned Opcode = N->getOpcode(); 4530 if (Opcode == ISD::ADD || Opcode == ISD::SUB) { 4531 SDNode *N0 = N->getOperand(0).getNode(); 4532 SDNode *N1 = N->getOperand(1).getNode(); 4533 return N0->hasOneUse() && N1->hasOneUse() && 4534 isZeroExtended(N0, DAG) && isZeroExtended(N1, DAG); 4535 } 4536 return false; 4537} 4538 4539static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { 4540 // Multiplications are only custom-lowered for 128-bit vectors so that 4541 // VMULL can be detected. Otherwise v2i64 multiplications are not legal. 4542 EVT VT = Op.getValueType(); 4543 assert(VT.is128BitVector() && "unexpected type for custom-lowering ISD::MUL"); 4544 SDNode *N0 = Op.getOperand(0).getNode(); 4545 SDNode *N1 = Op.getOperand(1).getNode(); 4546 unsigned NewOpc = 0; 4547 bool isMLA = false; 4548 bool isN0SExt = isSignExtended(N0, DAG); 4549 bool isN1SExt = isSignExtended(N1, DAG); 4550 if (isN0SExt && isN1SExt) 4551 NewOpc = ARMISD::VMULLs; 4552 else { 4553 bool isN0ZExt = isZeroExtended(N0, DAG); 4554 bool isN1ZExt = isZeroExtended(N1, DAG); 4555 if (isN0ZExt && isN1ZExt) 4556 NewOpc = ARMISD::VMULLu; 4557 else if (isN1SExt || isN1ZExt) { 4558 // Look for (s/zext A + s/zext B) * (s/zext C). We want to turn these 4559 // into (s/zext A * s/zext C) + (s/zext B * s/zext C) 4560 if (isN1SExt && isAddSubSExt(N0, DAG)) { 4561 NewOpc = ARMISD::VMULLs; 4562 isMLA = true; 4563 } else if (isN1ZExt && isAddSubZExt(N0, DAG)) { 4564 NewOpc = ARMISD::VMULLu; 4565 isMLA = true; 4566 } else if (isN0ZExt && isAddSubZExt(N1, DAG)) { 4567 std::swap(N0, N1); 4568 NewOpc = ARMISD::VMULLu; 4569 isMLA = true; 4570 } 4571 } 4572 4573 if (!NewOpc) { 4574 if (VT == MVT::v2i64) 4575 // Fall through to expand this. It is not legal. 4576 return SDValue(); 4577 else 4578 // Other vector multiplications are legal. 4579 return Op; 4580 } 4581 } 4582 4583 // Legalize to a VMULL instruction. 4584 DebugLoc DL = Op.getDebugLoc(); 4585 SDValue Op0; 4586 SDValue Op1 = SkipExtension(N1, DAG); 4587 if (!isMLA) { 4588 Op0 = SkipExtension(N0, DAG); 4589 assert(Op0.getValueType().is64BitVector() && 4590 Op1.getValueType().is64BitVector() && 4591 "unexpected types for extended operands to VMULL"); 4592 return DAG.getNode(NewOpc, DL, VT, Op0, Op1); 4593 } 4594 4595 // Optimizing (zext A + zext B) * C, to (VMULL A, C) + (VMULL B, C) during 4596 // isel lowering to take advantage of no-stall back to back vmul + vmla. 4597 // vmull q0, d4, d6 4598 // vmlal q0, d5, d6 4599 // is faster than 4600 // vaddl q0, d4, d5 4601 // vmovl q1, d6 4602 // vmul q0, q0, q1 4603 SDValue N00 = SkipExtension(N0->getOperand(0).getNode(), DAG); 4604 SDValue N01 = SkipExtension(N0->getOperand(1).getNode(), DAG); 4605 EVT Op1VT = Op1.getValueType(); 4606 return DAG.getNode(N0->getOpcode(), DL, VT, 4607 DAG.getNode(NewOpc, DL, VT, 4608 DAG.getNode(ISD::BITCAST, DL, Op1VT, N00), Op1), 4609 DAG.getNode(NewOpc, DL, VT, 4610 DAG.getNode(ISD::BITCAST, DL, Op1VT, N01), Op1)); 4611} 4612 4613static SDValue 4614LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { 4615 // Convert to float 4616 // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); 4617 // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); 4618 X = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, X); 4619 Y = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Y); 4620 X = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, X); 4621 Y = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, Y); 4622 // Get reciprocal estimate. 4623 // float4 recip = vrecpeq_f32(yf); 4624 Y = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4625 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), Y); 4626 // Because char has a smaller range than uchar, we can actually get away 4627 // without any newton steps. This requires that we use a weird bias 4628 // of 0xb000, however (again, this has been exhaustively tested). 4629 // float4 result = as_float4(as_int4(xf*recip) + 0xb000); 4630 X = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, X, Y); 4631 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, X); 4632 Y = DAG.getConstant(0xb000, MVT::i32); 4633 Y = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Y, Y, Y, Y); 4634 X = DAG.getNode(ISD::ADD, dl, MVT::v4i32, X, Y); 4635 X = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, X); 4636 // Convert back to short. 4637 X = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, X); 4638 X = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, X); 4639 return X; 4640} 4641 4642static SDValue 4643LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { 4644 SDValue N2; 4645 // Convert to float. 4646 // float4 yf = vcvt_f32_s32(vmovl_s16(y)); 4647 // float4 xf = vcvt_f32_s32(vmovl_s16(x)); 4648 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N0); 4649 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, N1); 4650 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4651 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4652 4653 // Use reciprocal estimate and one refinement step. 4654 // float4 recip = vrecpeq_f32(yf); 4655 // recip *= vrecpsq_f32(yf, recip); 4656 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4657 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 4658 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4659 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4660 N1, N2); 4661 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4662 // Because short has a smaller range than ushort, we can actually get away 4663 // with only a single newton step. This requires that we use a weird bias 4664 // of 89, however (again, this has been exhaustively tested). 4665 // float4 result = as_float4(as_int4(xf*recip) + 89); 4666 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4667 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4668 N1 = DAG.getConstant(89, MVT::i32); 4669 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 4670 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 4671 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 4672 // Convert back to integer and return. 4673 // return vmovn_s32(vcvt_s32_f32(result)); 4674 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 4675 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 4676 return N0; 4677} 4678 4679static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { 4680 EVT VT = Op.getValueType(); 4681 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4682 "unexpected type for custom-lowering ISD::SDIV"); 4683 4684 DebugLoc dl = Op.getDebugLoc(); 4685 SDValue N0 = Op.getOperand(0); 4686 SDValue N1 = Op.getOperand(1); 4687 SDValue N2, N3; 4688 4689 if (VT == MVT::v8i8) { 4690 N0 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N0); 4691 N1 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v8i16, N1); 4692 4693 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4694 DAG.getIntPtrConstant(4)); 4695 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4696 DAG.getIntPtrConstant(4)); 4697 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4698 DAG.getIntPtrConstant(0)); 4699 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4700 DAG.getIntPtrConstant(0)); 4701 4702 N0 = LowerSDIV_v4i8(N0, N1, dl, DAG); // v4i16 4703 N2 = LowerSDIV_v4i8(N2, N3, dl, DAG); // v4i16 4704 4705 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 4706 N0 = LowerCONCAT_VECTORS(N0, DAG); 4707 4708 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v8i8, N0); 4709 return N0; 4710 } 4711 return LowerSDIV_v4i16(N0, N1, dl, DAG); 4712} 4713 4714static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { 4715 EVT VT = Op.getValueType(); 4716 assert((VT == MVT::v4i16 || VT == MVT::v8i8) && 4717 "unexpected type for custom-lowering ISD::UDIV"); 4718 4719 DebugLoc dl = Op.getDebugLoc(); 4720 SDValue N0 = Op.getOperand(0); 4721 SDValue N1 = Op.getOperand(1); 4722 SDValue N2, N3; 4723 4724 if (VT == MVT::v8i8) { 4725 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N0); 4726 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v8i16, N1); 4727 4728 N2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4729 DAG.getIntPtrConstant(4)); 4730 N3 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4731 DAG.getIntPtrConstant(4)); 4732 N0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N0, 4733 DAG.getIntPtrConstant(0)); 4734 N1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v4i16, N1, 4735 DAG.getIntPtrConstant(0)); 4736 4737 N0 = LowerSDIV_v4i16(N0, N1, dl, DAG); // v4i16 4738 N2 = LowerSDIV_v4i16(N2, N3, dl, DAG); // v4i16 4739 4740 N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, N0, N2); 4741 N0 = LowerCONCAT_VECTORS(N0, DAG); 4742 4743 N0 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v8i8, 4744 DAG.getConstant(Intrinsic::arm_neon_vqmovnsu, MVT::i32), 4745 N0); 4746 return N0; 4747 } 4748 4749 // v4i16 sdiv ... Convert to float. 4750 // float4 yf = vcvt_f32_s32(vmovl_u16(y)); 4751 // float4 xf = vcvt_f32_s32(vmovl_u16(x)); 4752 N0 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N0); 4753 N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v4i32, N1); 4754 N0 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N0); 4755 N1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::v4f32, N1); 4756 4757 // Use reciprocal estimate and two refinement steps. 4758 // float4 recip = vrecpeq_f32(yf); 4759 // recip *= vrecpsq_f32(yf, recip); 4760 // recip *= vrecpsq_f32(yf, recip); 4761 N2 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4762 DAG.getConstant(Intrinsic::arm_neon_vrecpe, MVT::i32), N1); 4763 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4764 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4765 N1, N2); 4766 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4767 N1 = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v4f32, 4768 DAG.getConstant(Intrinsic::arm_neon_vrecps, MVT::i32), 4769 N1, N2); 4770 N2 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N1, N2); 4771 // Simply multiplying by the reciprocal estimate can leave us a few ulps 4772 // too low, so we add 2 ulps (exhaustive testing shows that this is enough, 4773 // and that it will never cause us to return an answer too large). 4774 // float4 result = as_float4(as_int4(xf*recip) + 89); 4775 N0 = DAG.getNode(ISD::FMUL, dl, MVT::v4f32, N0, N2); 4776 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, N0); 4777 N1 = DAG.getConstant(2, MVT::i32); 4778 N1 = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, N1, N1, N1, N1); 4779 N0 = DAG.getNode(ISD::ADD, dl, MVT::v4i32, N0, N1); 4780 N0 = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, N0); 4781 // Convert back to integer and return. 4782 // return vmovn_u32(vcvt_s32_f32(result)); 4783 N0 = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::v4i32, N0); 4784 N0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::v4i16, N0); 4785 return N0; 4786} 4787 4788SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 4789 switch (Op.getOpcode()) { 4790 default: llvm_unreachable("Don't know how to custom lower this!"); 4791 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 4792 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 4793 case ISD::GlobalAddress: 4794 return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : 4795 LowerGlobalAddressELF(Op, DAG); 4796 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 4797 case ISD::SELECT: return LowerSELECT(Op, DAG); 4798 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 4799 case ISD::BR_CC: return LowerBR_CC(Op, DAG); 4800 case ISD::BR_JT: return LowerBR_JT(Op, DAG); 4801 case ISD::VASTART: return LowerVASTART(Op, DAG); 4802 case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget); 4803 case ISD::PREFETCH: return LowerPREFETCH(Op, DAG, Subtarget); 4804 case ISD::SINT_TO_FP: 4805 case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 4806 case ISD::FP_TO_SINT: 4807 case ISD::FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); 4808 case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); 4809 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 4810 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 4811 case ISD::GLOBAL_OFFSET_TABLE: return LowerGLOBAL_OFFSET_TABLE(Op, DAG); 4812 case ISD::EH_SJLJ_SETJMP: return LowerEH_SJLJ_SETJMP(Op, DAG); 4813 case ISD::EH_SJLJ_LONGJMP: return LowerEH_SJLJ_LONGJMP(Op, DAG); 4814 case ISD::EH_SJLJ_DISPATCHSETUP: return LowerEH_SJLJ_DISPATCHSETUP(Op, DAG); 4815 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG, 4816 Subtarget); 4817 case ISD::BITCAST: return ExpandBITCAST(Op.getNode(), DAG); 4818 case ISD::SHL: 4819 case ISD::SRL: 4820 case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget); 4821 case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG); 4822 case ISD::SRL_PARTS: 4823 case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG); 4824 case ISD::CTTZ: return LowerCTTZ(Op.getNode(), DAG, Subtarget); 4825 case ISD::VSETCC: return LowerVSETCC(Op, DAG); 4826 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); 4827 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 4828 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); 4829 case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 4830 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 4831 case ISD::MUL: return LowerMUL(Op, DAG); 4832 case ISD::SDIV: return LowerSDIV(Op, DAG); 4833 case ISD::UDIV: return LowerUDIV(Op, DAG); 4834 } 4835 return SDValue(); 4836} 4837 4838/// ReplaceNodeResults - Replace the results of node with an illegal result 4839/// type with new values built out of custom code. 4840void ARMTargetLowering::ReplaceNodeResults(SDNode *N, 4841 SmallVectorImpl<SDValue>&Results, 4842 SelectionDAG &DAG) const { 4843 SDValue Res; 4844 switch (N->getOpcode()) { 4845 default: 4846 llvm_unreachable("Don't know how to custom expand this!"); 4847 break; 4848 case ISD::BITCAST: 4849 Res = ExpandBITCAST(N, DAG); 4850 break; 4851 case ISD::SRL: 4852 case ISD::SRA: 4853 Res = Expand64BitShift(N, DAG, Subtarget); 4854 break; 4855 } 4856 if (Res.getNode()) 4857 Results.push_back(Res); 4858} 4859 4860//===----------------------------------------------------------------------===// 4861// ARM Scheduler Hooks 4862//===----------------------------------------------------------------------===// 4863 4864MachineBasicBlock * 4865ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, 4866 MachineBasicBlock *BB, 4867 unsigned Size) const { 4868 unsigned dest = MI->getOperand(0).getReg(); 4869 unsigned ptr = MI->getOperand(1).getReg(); 4870 unsigned oldval = MI->getOperand(2).getReg(); 4871 unsigned newval = MI->getOperand(3).getReg(); 4872 unsigned scratch = BB->getParent()->getRegInfo() 4873 .createVirtualRegister(ARM::GPRRegisterClass); 4874 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4875 DebugLoc dl = MI->getDebugLoc(); 4876 bool isThumb2 = Subtarget->isThumb2(); 4877 4878 unsigned ldrOpc, strOpc; 4879 switch (Size) { 4880 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 4881 case 1: 4882 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 4883 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 4884 break; 4885 case 2: 4886 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 4887 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 4888 break; 4889 case 4: 4890 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 4891 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 4892 break; 4893 } 4894 4895 MachineFunction *MF = BB->getParent(); 4896 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4897 MachineFunction::iterator It = BB; 4898 ++It; // insert the new blocks after the current block 4899 4900 MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB); 4901 MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB); 4902 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4903 MF->insert(It, loop1MBB); 4904 MF->insert(It, loop2MBB); 4905 MF->insert(It, exitMBB); 4906 4907 // Transfer the remainder of BB and its successor edges to exitMBB. 4908 exitMBB->splice(exitMBB->begin(), BB, 4909 llvm::next(MachineBasicBlock::iterator(MI)), 4910 BB->end()); 4911 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 4912 4913 // thisMBB: 4914 // ... 4915 // fallthrough --> loop1MBB 4916 BB->addSuccessor(loop1MBB); 4917 4918 // loop1MBB: 4919 // ldrex dest, [ptr] 4920 // cmp dest, oldval 4921 // bne exitMBB 4922 BB = loop1MBB; 4923 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 4924 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 4925 .addReg(dest).addReg(oldval)); 4926 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 4927 .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 4928 BB->addSuccessor(loop2MBB); 4929 BB->addSuccessor(exitMBB); 4930 4931 // loop2MBB: 4932 // strex scratch, newval, [ptr] 4933 // cmp scratch, #0 4934 // bne loop1MBB 4935 BB = loop2MBB; 4936 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval) 4937 .addReg(ptr)); 4938 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 4939 .addReg(scratch).addImm(0)); 4940 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 4941 .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 4942 BB->addSuccessor(loop1MBB); 4943 BB->addSuccessor(exitMBB); 4944 4945 // exitMBB: 4946 // ... 4947 BB = exitMBB; 4948 4949 MI->eraseFromParent(); // The instruction is gone now. 4950 4951 return BB; 4952} 4953 4954MachineBasicBlock * 4955ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 4956 unsigned Size, unsigned BinOpcode) const { 4957 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 4958 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 4959 4960 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4961 MachineFunction *MF = BB->getParent(); 4962 MachineFunction::iterator It = BB; 4963 ++It; 4964 4965 unsigned dest = MI->getOperand(0).getReg(); 4966 unsigned ptr = MI->getOperand(1).getReg(); 4967 unsigned incr = MI->getOperand(2).getReg(); 4968 DebugLoc dl = MI->getDebugLoc(); 4969 4970 bool isThumb2 = Subtarget->isThumb2(); 4971 unsigned ldrOpc, strOpc; 4972 switch (Size) { 4973 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 4974 case 1: 4975 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 4976 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 4977 break; 4978 case 2: 4979 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 4980 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 4981 break; 4982 case 4: 4983 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 4984 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 4985 break; 4986 } 4987 4988 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4989 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 4990 MF->insert(It, loopMBB); 4991 MF->insert(It, exitMBB); 4992 4993 // Transfer the remainder of BB and its successor edges to exitMBB. 4994 exitMBB->splice(exitMBB->begin(), BB, 4995 llvm::next(MachineBasicBlock::iterator(MI)), 4996 BB->end()); 4997 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 4998 4999 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 5000 unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 5001 unsigned scratch2 = (!BinOpcode) ? incr : 5002 RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 5003 5004 // thisMBB: 5005 // ... 5006 // fallthrough --> loopMBB 5007 BB->addSuccessor(loopMBB); 5008 5009 // loopMBB: 5010 // ldrex dest, ptr 5011 // <binop> scratch2, dest, incr 5012 // strex scratch, scratch2, ptr 5013 // cmp scratch, #0 5014 // bne- loopMBB 5015 // fallthrough --> exitMBB 5016 BB = loopMBB; 5017 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 5018 if (BinOpcode) { 5019 // operand order needs to go the other way for NAND 5020 if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr) 5021 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5022 addReg(incr).addReg(dest)).addReg(0); 5023 else 5024 AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2). 5025 addReg(dest).addReg(incr)).addReg(0); 5026 } 5027 5028 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) 5029 .addReg(ptr)); 5030 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5031 .addReg(scratch).addImm(0)); 5032 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5033 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5034 5035 BB->addSuccessor(loopMBB); 5036 BB->addSuccessor(exitMBB); 5037 5038 // exitMBB: 5039 // ... 5040 BB = exitMBB; 5041 5042 MI->eraseFromParent(); // The instruction is gone now. 5043 5044 return BB; 5045} 5046 5047MachineBasicBlock * 5048ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, 5049 MachineBasicBlock *BB, 5050 unsigned Size, 5051 bool signExtend, 5052 ARMCC::CondCodes Cond) const { 5053 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5054 5055 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5056 MachineFunction *MF = BB->getParent(); 5057 MachineFunction::iterator It = BB; 5058 ++It; 5059 5060 unsigned dest = MI->getOperand(0).getReg(); 5061 unsigned ptr = MI->getOperand(1).getReg(); 5062 unsigned incr = MI->getOperand(2).getReg(); 5063 unsigned oldval = dest; 5064 DebugLoc dl = MI->getDebugLoc(); 5065 5066 bool isThumb2 = Subtarget->isThumb2(); 5067 unsigned ldrOpc, strOpc, extendOpc; 5068 switch (Size) { 5069 default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); 5070 case 1: 5071 ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; 5072 strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; 5073 extendOpc = isThumb2 ? ARM::t2SXTBr : ARM::SXTBr; 5074 break; 5075 case 2: 5076 ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; 5077 strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; 5078 extendOpc = isThumb2 ? ARM::t2SXTHr : ARM::SXTHr; 5079 break; 5080 case 4: 5081 ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; 5082 strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; 5083 extendOpc = 0; 5084 break; 5085 } 5086 5087 MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5088 MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); 5089 MF->insert(It, loopMBB); 5090 MF->insert(It, exitMBB); 5091 5092 // Transfer the remainder of BB and its successor edges to exitMBB. 5093 exitMBB->splice(exitMBB->begin(), BB, 5094 llvm::next(MachineBasicBlock::iterator(MI)), 5095 BB->end()); 5096 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5097 5098 MachineRegisterInfo &RegInfo = MF->getRegInfo(); 5099 unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 5100 unsigned scratch2 = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 5101 5102 // thisMBB: 5103 // ... 5104 // fallthrough --> loopMBB 5105 BB->addSuccessor(loopMBB); 5106 5107 // loopMBB: 5108 // ldrex dest, ptr 5109 // (sign extend dest, if required) 5110 // cmp dest, incr 5111 // cmov.cond scratch2, dest, incr 5112 // strex scratch, scratch2, ptr 5113 // cmp scratch, #0 5114 // bne- loopMBB 5115 // fallthrough --> exitMBB 5116 BB = loopMBB; 5117 AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr)); 5118 5119 // Sign extend the value, if necessary. 5120 if (signExtend && extendOpc) { 5121 oldval = RegInfo.createVirtualRegister(ARM::GPRRegisterClass); 5122 AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval).addReg(dest)); 5123 } 5124 5125 // Build compare and cmov instructions. 5126 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5127 .addReg(oldval).addReg(incr)); 5128 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr), scratch2) 5129 .addReg(oldval).addReg(incr).addImm(Cond).addReg(ARM::CPSR); 5130 5131 AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2) 5132 .addReg(ptr)); 5133 AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5134 .addReg(scratch).addImm(0)); 5135 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5136 .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); 5137 5138 BB->addSuccessor(loopMBB); 5139 BB->addSuccessor(exitMBB); 5140 5141 // exitMBB: 5142 // ... 5143 BB = exitMBB; 5144 5145 MI->eraseFromParent(); // The instruction is gone now. 5146 5147 return BB; 5148} 5149 5150static 5151MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { 5152 for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(), 5153 E = MBB->succ_end(); I != E; ++I) 5154 if (*I != Succ) 5155 return *I; 5156 llvm_unreachable("Expecting a BB with two successors!"); 5157} 5158 5159// FIXME: This opcode table should obviously be expressed in the target 5160// description. We probably just need a "machine opcode" value in the pseudo 5161// instruction. But the ideal solution maybe to simply remove the "S" version 5162// of the opcode altogether. 5163struct AddSubFlagsOpcodePair { 5164 unsigned PseudoOpc; 5165 unsigned MachineOpc; 5166}; 5167 5168static AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = { 5169 {ARM::ADCSri, ARM::ADCri}, 5170 {ARM::ADCSrr, ARM::ADCrr}, 5171 {ARM::ADCSrs, ARM::ADCrs}, 5172 {ARM::SBCSri, ARM::SBCri}, 5173 {ARM::SBCSrr, ARM::SBCrr}, 5174 {ARM::SBCSrs, ARM::SBCrs}, 5175 {ARM::RSBSri, ARM::RSBri}, 5176 {ARM::RSBSrr, ARM::RSBrr}, 5177 {ARM::RSBSrs, ARM::RSBrs}, 5178 {ARM::RSCSri, ARM::RSCri}, 5179 {ARM::RSCSrs, ARM::RSCrs}, 5180 {ARM::t2ADCSri, ARM::t2ADCri}, 5181 {ARM::t2ADCSrr, ARM::t2ADCrr}, 5182 {ARM::t2ADCSrs, ARM::t2ADCrs}, 5183 {ARM::t2SBCSri, ARM::t2SBCri}, 5184 {ARM::t2SBCSrr, ARM::t2SBCrr}, 5185 {ARM::t2SBCSrs, ARM::t2SBCrs}, 5186 {ARM::t2RSBSri, ARM::t2RSBri}, 5187 {ARM::t2RSBSrs, ARM::t2RSBrs}, 5188}; 5189 5190// Convert and Add or Subtract with Carry and Flags to a generic opcode with 5191// CPSR<def> operand. e.g. ADCS (...) -> ADC (... CPSR<def>). 5192// 5193// FIXME: Somewhere we should assert that CPSR<def> is in the correct 5194// position to be recognized by the target descrition as the 'S' bit. 5195bool ARMTargetLowering::RemapAddSubWithFlags(MachineInstr *MI, 5196 MachineBasicBlock *BB) const { 5197 unsigned OldOpc = MI->getOpcode(); 5198 unsigned NewOpc = 0; 5199 5200 // This is only called for instructions that need remapping, so iterating over 5201 // the tiny opcode table is not costly. 5202 static const int NPairs = 5203 sizeof(AddSubFlagsOpcodeMap) / sizeof(AddSubFlagsOpcodePair); 5204 for (AddSubFlagsOpcodePair *Pair = &AddSubFlagsOpcodeMap[0], 5205 *End = &AddSubFlagsOpcodeMap[NPairs]; Pair != End; ++Pair) { 5206 if (OldOpc == Pair->PseudoOpc) { 5207 NewOpc = Pair->MachineOpc; 5208 break; 5209 } 5210 } 5211 if (!NewOpc) 5212 return false; 5213 5214 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5215 DebugLoc dl = MI->getDebugLoc(); 5216 MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(NewOpc)); 5217 for (unsigned i = 0; i < MI->getNumOperands(); ++i) 5218 MIB.addOperand(MI->getOperand(i)); 5219 AddDefaultPred(MIB); 5220 MIB.addReg(ARM::CPSR, RegState::Define); // S bit 5221 MI->eraseFromParent(); 5222 return true; 5223} 5224 5225MachineBasicBlock * 5226ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 5227 MachineBasicBlock *BB) const { 5228 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5229 DebugLoc dl = MI->getDebugLoc(); 5230 bool isThumb2 = Subtarget->isThumb2(); 5231 switch (MI->getOpcode()) { 5232 default: { 5233 if (RemapAddSubWithFlags(MI, BB)) 5234 return BB; 5235 5236 MI->dump(); 5237 llvm_unreachable("Unexpected instr type to insert"); 5238 } 5239 case ARM::ATOMIC_LOAD_ADD_I8: 5240 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 5241 case ARM::ATOMIC_LOAD_ADD_I16: 5242 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 5243 case ARM::ATOMIC_LOAD_ADD_I32: 5244 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr); 5245 5246 case ARM::ATOMIC_LOAD_AND_I8: 5247 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 5248 case ARM::ATOMIC_LOAD_AND_I16: 5249 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 5250 case ARM::ATOMIC_LOAD_AND_I32: 5251 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); 5252 5253 case ARM::ATOMIC_LOAD_OR_I8: 5254 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 5255 case ARM::ATOMIC_LOAD_OR_I16: 5256 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 5257 case ARM::ATOMIC_LOAD_OR_I32: 5258 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); 5259 5260 case ARM::ATOMIC_LOAD_XOR_I8: 5261 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 5262 case ARM::ATOMIC_LOAD_XOR_I16: 5263 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 5264 case ARM::ATOMIC_LOAD_XOR_I32: 5265 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr); 5266 5267 case ARM::ATOMIC_LOAD_NAND_I8: 5268 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 5269 case ARM::ATOMIC_LOAD_NAND_I16: 5270 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 5271 case ARM::ATOMIC_LOAD_NAND_I32: 5272 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr); 5273 5274 case ARM::ATOMIC_LOAD_SUB_I8: 5275 return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 5276 case ARM::ATOMIC_LOAD_SUB_I16: 5277 return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 5278 case ARM::ATOMIC_LOAD_SUB_I32: 5279 return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr); 5280 5281 case ARM::ATOMIC_LOAD_MIN_I8: 5282 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::LT); 5283 case ARM::ATOMIC_LOAD_MIN_I16: 5284 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::LT); 5285 case ARM::ATOMIC_LOAD_MIN_I32: 5286 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::LT); 5287 5288 case ARM::ATOMIC_LOAD_MAX_I8: 5289 return EmitAtomicBinaryMinMax(MI, BB, 1, true, ARMCC::GT); 5290 case ARM::ATOMIC_LOAD_MAX_I16: 5291 return EmitAtomicBinaryMinMax(MI, BB, 2, true, ARMCC::GT); 5292 case ARM::ATOMIC_LOAD_MAX_I32: 5293 return EmitAtomicBinaryMinMax(MI, BB, 4, true, ARMCC::GT); 5294 5295 case ARM::ATOMIC_LOAD_UMIN_I8: 5296 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::LO); 5297 case ARM::ATOMIC_LOAD_UMIN_I16: 5298 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::LO); 5299 case ARM::ATOMIC_LOAD_UMIN_I32: 5300 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::LO); 5301 5302 case ARM::ATOMIC_LOAD_UMAX_I8: 5303 return EmitAtomicBinaryMinMax(MI, BB, 1, false, ARMCC::HI); 5304 case ARM::ATOMIC_LOAD_UMAX_I16: 5305 return EmitAtomicBinaryMinMax(MI, BB, 2, false, ARMCC::HI); 5306 case ARM::ATOMIC_LOAD_UMAX_I32: 5307 return EmitAtomicBinaryMinMax(MI, BB, 4, false, ARMCC::HI); 5308 5309 case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0); 5310 case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0); 5311 case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0); 5312 5313 case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1); 5314 case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); 5315 case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); 5316 5317 case ARM::tMOVCCr_pseudo: { 5318 // To "insert" a SELECT_CC instruction, we actually have to insert the 5319 // diamond control-flow pattern. The incoming instruction knows the 5320 // destination vreg to set, the condition code register to branch on, the 5321 // true/false values to select between, and a branch opcode to use. 5322 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5323 MachineFunction::iterator It = BB; 5324 ++It; 5325 5326 // thisMBB: 5327 // ... 5328 // TrueVal = ... 5329 // cmpTY ccX, r1, r2 5330 // bCC copy1MBB 5331 // fallthrough --> copy0MBB 5332 MachineBasicBlock *thisMBB = BB; 5333 MachineFunction *F = BB->getParent(); 5334 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 5335 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 5336 F->insert(It, copy0MBB); 5337 F->insert(It, sinkMBB); 5338 5339 // Transfer the remainder of BB and its successor edges to sinkMBB. 5340 sinkMBB->splice(sinkMBB->begin(), BB, 5341 llvm::next(MachineBasicBlock::iterator(MI)), 5342 BB->end()); 5343 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 5344 5345 BB->addSuccessor(copy0MBB); 5346 BB->addSuccessor(sinkMBB); 5347 5348 BuildMI(BB, dl, TII->get(ARM::tBcc)).addMBB(sinkMBB) 5349 .addImm(MI->getOperand(3).getImm()).addReg(MI->getOperand(4).getReg()); 5350 5351 // copy0MBB: 5352 // %FalseValue = ... 5353 // # fallthrough to sinkMBB 5354 BB = copy0MBB; 5355 5356 // Update machine-CFG edges 5357 BB->addSuccessor(sinkMBB); 5358 5359 // sinkMBB: 5360 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 5361 // ... 5362 BB = sinkMBB; 5363 BuildMI(*BB, BB->begin(), dl, 5364 TII->get(ARM::PHI), MI->getOperand(0).getReg()) 5365 .addReg(MI->getOperand(1).getReg()).addMBB(copy0MBB) 5366 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 5367 5368 MI->eraseFromParent(); // The pseudo instruction is gone now. 5369 return BB; 5370 } 5371 5372 case ARM::BCCi64: 5373 case ARM::BCCZi64: { 5374 // If there is an unconditional branch to the other successor, remove it. 5375 BB->erase(llvm::next(MachineBasicBlock::iterator(MI)), BB->end()); 5376 5377 // Compare both parts that make up the double comparison separately for 5378 // equality. 5379 bool RHSisZero = MI->getOpcode() == ARM::BCCZi64; 5380 5381 unsigned LHS1 = MI->getOperand(1).getReg(); 5382 unsigned LHS2 = MI->getOperand(2).getReg(); 5383 if (RHSisZero) { 5384 AddDefaultPred(BuildMI(BB, dl, 5385 TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5386 .addReg(LHS1).addImm(0)); 5387 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) 5388 .addReg(LHS2).addImm(0) 5389 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 5390 } else { 5391 unsigned RHS1 = MI->getOperand(3).getReg(); 5392 unsigned RHS2 = MI->getOperand(4).getReg(); 5393 AddDefaultPred(BuildMI(BB, dl, 5394 TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5395 .addReg(LHS1).addReg(RHS1)); 5396 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr)) 5397 .addReg(LHS2).addReg(RHS2) 5398 .addImm(ARMCC::EQ).addReg(ARM::CPSR); 5399 } 5400 5401 MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB(); 5402 MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB); 5403 if (MI->getOperand(0).getImm() == ARMCC::NE) 5404 std::swap(destMBB, exitMBB); 5405 5406 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) 5407 .addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR); 5408 BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B)) 5409 .addMBB(exitMBB); 5410 5411 MI->eraseFromParent(); // The pseudo instruction is gone now. 5412 return BB; 5413 } 5414 } 5415} 5416 5417//===----------------------------------------------------------------------===// 5418// ARM Optimization Hooks 5419//===----------------------------------------------------------------------===// 5420 5421static 5422SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, 5423 TargetLowering::DAGCombinerInfo &DCI) { 5424 SelectionDAG &DAG = DCI.DAG; 5425 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5426 EVT VT = N->getValueType(0); 5427 unsigned Opc = N->getOpcode(); 5428 bool isSlctCC = Slct.getOpcode() == ISD::SELECT_CC; 5429 SDValue LHS = isSlctCC ? Slct.getOperand(2) : Slct.getOperand(1); 5430 SDValue RHS = isSlctCC ? Slct.getOperand(3) : Slct.getOperand(2); 5431 ISD::CondCode CC = ISD::SETCC_INVALID; 5432 5433 if (isSlctCC) { 5434 CC = cast<CondCodeSDNode>(Slct.getOperand(4))->get(); 5435 } else { 5436 SDValue CCOp = Slct.getOperand(0); 5437 if (CCOp.getOpcode() == ISD::SETCC) 5438 CC = cast<CondCodeSDNode>(CCOp.getOperand(2))->get(); 5439 } 5440 5441 bool DoXform = false; 5442 bool InvCC = false; 5443 assert ((Opc == ISD::ADD || (Opc == ISD::SUB && Slct == N->getOperand(1))) && 5444 "Bad input!"); 5445 5446 if (LHS.getOpcode() == ISD::Constant && 5447 cast<ConstantSDNode>(LHS)->isNullValue()) { 5448 DoXform = true; 5449 } else if (CC != ISD::SETCC_INVALID && 5450 RHS.getOpcode() == ISD::Constant && 5451 cast<ConstantSDNode>(RHS)->isNullValue()) { 5452 std::swap(LHS, RHS); 5453 SDValue Op0 = Slct.getOperand(0); 5454 EVT OpVT = isSlctCC ? Op0.getValueType() : 5455 Op0.getOperand(0).getValueType(); 5456 bool isInt = OpVT.isInteger(); 5457 CC = ISD::getSetCCInverse(CC, isInt); 5458 5459 if (!TLI.isCondCodeLegal(CC, OpVT)) 5460 return SDValue(); // Inverse operator isn't legal. 5461 5462 DoXform = true; 5463 InvCC = true; 5464 } 5465 5466 if (DoXform) { 5467 SDValue Result = DAG.getNode(Opc, RHS.getDebugLoc(), VT, OtherOp, RHS); 5468 if (isSlctCC) 5469 return DAG.getSelectCC(N->getDebugLoc(), OtherOp, Result, 5470 Slct.getOperand(0), Slct.getOperand(1), CC); 5471 SDValue CCOp = Slct.getOperand(0); 5472 if (InvCC) 5473 CCOp = DAG.getSetCC(Slct.getDebugLoc(), CCOp.getValueType(), 5474 CCOp.getOperand(0), CCOp.getOperand(1), CC); 5475 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 5476 CCOp, OtherOp, Result); 5477 } 5478 return SDValue(); 5479} 5480 5481/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with 5482/// operands N0 and N1. This is a helper for PerformADDCombine that is 5483/// called with the default operands, and if that fails, with commuted 5484/// operands. 5485static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, 5486 TargetLowering::DAGCombinerInfo &DCI) { 5487 // fold (add (select cc, 0, c), x) -> (select cc, x, (add, x, c)) 5488 if (N0.getOpcode() == ISD::SELECT && N0.getNode()->hasOneUse()) { 5489 SDValue Result = combineSelectAndUse(N, N0, N1, DCI); 5490 if (Result.getNode()) return Result; 5491 } 5492 return SDValue(); 5493} 5494 5495/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. 5496/// 5497static SDValue PerformADDCombine(SDNode *N, 5498 TargetLowering::DAGCombinerInfo &DCI) { 5499 SDValue N0 = N->getOperand(0); 5500 SDValue N1 = N->getOperand(1); 5501 5502 // First try with the default operand order. 5503 SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI); 5504 if (Result.getNode()) 5505 return Result; 5506 5507 // If that didn't work, try again with the operands commuted. 5508 return PerformADDCombineWithOperands(N, N1, N0, DCI); 5509} 5510 5511/// PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB. 5512/// 5513static SDValue PerformSUBCombine(SDNode *N, 5514 TargetLowering::DAGCombinerInfo &DCI) { 5515 SDValue N0 = N->getOperand(0); 5516 SDValue N1 = N->getOperand(1); 5517 5518 // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c)) 5519 if (N1.getOpcode() == ISD::SELECT && N1.getNode()->hasOneUse()) { 5520 SDValue Result = combineSelectAndUse(N, N1, N0, DCI); 5521 if (Result.getNode()) return Result; 5522 } 5523 5524 return SDValue(); 5525} 5526 5527/// PerformVMULCombine 5528/// Distribute (A + B) * C to (A * C) + (B * C) to take advantage of the 5529/// special multiplier accumulator forwarding. 5530/// vmul d3, d0, d2 5531/// vmla d3, d1, d2 5532/// is faster than 5533/// vadd d3, d0, d1 5534/// vmul d3, d3, d2 5535static SDValue PerformVMULCombine(SDNode *N, 5536 TargetLowering::DAGCombinerInfo &DCI, 5537 const ARMSubtarget *Subtarget) { 5538 if (!Subtarget->hasVMLxForwarding()) 5539 return SDValue(); 5540 5541 SelectionDAG &DAG = DCI.DAG; 5542 SDValue N0 = N->getOperand(0); 5543 SDValue N1 = N->getOperand(1); 5544 unsigned Opcode = N0.getOpcode(); 5545 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 5546 Opcode != ISD::FADD && Opcode != ISD::FSUB) { 5547 Opcode = N0.getOpcode(); 5548 if (Opcode != ISD::ADD && Opcode != ISD::SUB && 5549 Opcode != ISD::FADD && Opcode != ISD::FSUB) 5550 return SDValue(); 5551 std::swap(N0, N1); 5552 } 5553 5554 EVT VT = N->getValueType(0); 5555 DebugLoc DL = N->getDebugLoc(); 5556 SDValue N00 = N0->getOperand(0); 5557 SDValue N01 = N0->getOperand(1); 5558 return DAG.getNode(Opcode, DL, VT, 5559 DAG.getNode(ISD::MUL, DL, VT, N00, N1), 5560 DAG.getNode(ISD::MUL, DL, VT, N01, N1)); 5561} 5562 5563static SDValue PerformMULCombine(SDNode *N, 5564 TargetLowering::DAGCombinerInfo &DCI, 5565 const ARMSubtarget *Subtarget) { 5566 SelectionDAG &DAG = DCI.DAG; 5567 5568 if (Subtarget->isThumb1Only()) 5569 return SDValue(); 5570 5571 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 5572 return SDValue(); 5573 5574 EVT VT = N->getValueType(0); 5575 if (VT.is64BitVector() || VT.is128BitVector()) 5576 return PerformVMULCombine(N, DCI, Subtarget); 5577 if (VT != MVT::i32) 5578 return SDValue(); 5579 5580 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 5581 if (!C) 5582 return SDValue(); 5583 5584 uint64_t MulAmt = C->getZExtValue(); 5585 unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); 5586 ShiftAmt = ShiftAmt & (32 - 1); 5587 SDValue V = N->getOperand(0); 5588 DebugLoc DL = N->getDebugLoc(); 5589 5590 SDValue Res; 5591 MulAmt >>= ShiftAmt; 5592 if (isPowerOf2_32(MulAmt - 1)) { 5593 // (mul x, 2^N + 1) => (add (shl x, N), x) 5594 Res = DAG.getNode(ISD::ADD, DL, VT, 5595 V, DAG.getNode(ISD::SHL, DL, VT, 5596 V, DAG.getConstant(Log2_32(MulAmt-1), 5597 MVT::i32))); 5598 } else if (isPowerOf2_32(MulAmt + 1)) { 5599 // (mul x, 2^N - 1) => (sub (shl x, N), x) 5600 Res = DAG.getNode(ISD::SUB, DL, VT, 5601 DAG.getNode(ISD::SHL, DL, VT, 5602 V, DAG.getConstant(Log2_32(MulAmt+1), 5603 MVT::i32)), 5604 V); 5605 } else 5606 return SDValue(); 5607 5608 if (ShiftAmt != 0) 5609 Res = DAG.getNode(ISD::SHL, DL, VT, Res, 5610 DAG.getConstant(ShiftAmt, MVT::i32)); 5611 5612 // Do not add new nodes to DAG combiner worklist. 5613 DCI.CombineTo(N, Res, false); 5614 return SDValue(); 5615} 5616 5617static SDValue PerformANDCombine(SDNode *N, 5618 TargetLowering::DAGCombinerInfo &DCI) { 5619 5620 // Attempt to use immediate-form VBIC 5621 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 5622 DebugLoc dl = N->getDebugLoc(); 5623 EVT VT = N->getValueType(0); 5624 SelectionDAG &DAG = DCI.DAG; 5625 5626 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 5627 return SDValue(); 5628 5629 APInt SplatBits, SplatUndef; 5630 unsigned SplatBitSize; 5631 bool HasAnyUndefs; 5632 if (BVN && 5633 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 5634 if (SplatBitSize <= 64) { 5635 EVT VbicVT; 5636 SDValue Val = isNEONModifiedImm((~SplatBits).getZExtValue(), 5637 SplatUndef.getZExtValue(), SplatBitSize, 5638 DAG, VbicVT, VT.is128BitVector(), 5639 OtherModImm); 5640 if (Val.getNode()) { 5641 SDValue Input = 5642 DAG.getNode(ISD::BITCAST, dl, VbicVT, N->getOperand(0)); 5643 SDValue Vbic = DAG.getNode(ARMISD::VBICIMM, dl, VbicVT, Input, Val); 5644 return DAG.getNode(ISD::BITCAST, dl, VT, Vbic); 5645 } 5646 } 5647 } 5648 5649 return SDValue(); 5650} 5651 5652/// PerformORCombine - Target-specific dag combine xforms for ISD::OR 5653static SDValue PerformORCombine(SDNode *N, 5654 TargetLowering::DAGCombinerInfo &DCI, 5655 const ARMSubtarget *Subtarget) { 5656 // Attempt to use immediate-form VORR 5657 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N->getOperand(1)); 5658 DebugLoc dl = N->getDebugLoc(); 5659 EVT VT = N->getValueType(0); 5660 SelectionDAG &DAG = DCI.DAG; 5661 5662 if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) 5663 return SDValue(); 5664 5665 APInt SplatBits, SplatUndef; 5666 unsigned SplatBitSize; 5667 bool HasAnyUndefs; 5668 if (BVN && Subtarget->hasNEON() && 5669 BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { 5670 if (SplatBitSize <= 64) { 5671 EVT VorrVT; 5672 SDValue Val = isNEONModifiedImm(SplatBits.getZExtValue(), 5673 SplatUndef.getZExtValue(), SplatBitSize, 5674 DAG, VorrVT, VT.is128BitVector(), 5675 OtherModImm); 5676 if (Val.getNode()) { 5677 SDValue Input = 5678 DAG.getNode(ISD::BITCAST, dl, VorrVT, N->getOperand(0)); 5679 SDValue Vorr = DAG.getNode(ARMISD::VORRIMM, dl, VorrVT, Input, Val); 5680 return DAG.getNode(ISD::BITCAST, dl, VT, Vorr); 5681 } 5682 } 5683 } 5684 5685 SDValue N0 = N->getOperand(0); 5686 if (N0.getOpcode() != ISD::AND) 5687 return SDValue(); 5688 SDValue N1 = N->getOperand(1); 5689 5690 // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. 5691 if (Subtarget->hasNEON() && N1.getOpcode() == ISD::AND && VT.isVector() && 5692 DAG.getTargetLoweringInfo().isTypeLegal(VT)) { 5693 APInt SplatUndef; 5694 unsigned SplatBitSize; 5695 bool HasAnyUndefs; 5696 5697 BuildVectorSDNode *BVN0 = dyn_cast<BuildVectorSDNode>(N0->getOperand(1)); 5698 APInt SplatBits0; 5699 if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, 5700 HasAnyUndefs) && !HasAnyUndefs) { 5701 BuildVectorSDNode *BVN1 = dyn_cast<BuildVectorSDNode>(N1->getOperand(1)); 5702 APInt SplatBits1; 5703 if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, 5704 HasAnyUndefs) && !HasAnyUndefs && 5705 SplatBits0 == ~SplatBits1) { 5706 // Canonicalize the vector type to make instruction selection simpler. 5707 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; 5708 SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, 5709 N0->getOperand(1), N0->getOperand(0), 5710 N1->getOperand(0)); 5711 return DAG.getNode(ISD::BITCAST, dl, VT, Result); 5712 } 5713 } 5714 } 5715 5716 // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when 5717 // reasonable. 5718 5719 // BFI is only available on V6T2+ 5720 if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) 5721 return SDValue(); 5722 5723 DebugLoc DL = N->getDebugLoc(); 5724 // 1) or (and A, mask), val => ARMbfi A, val, mask 5725 // iff (val & mask) == val 5726 // 5727 // 2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 5728 // 2a) iff isBitFieldInvertedMask(mask) && isBitFieldInvertedMask(~mask2) 5729 // && mask == ~mask2 5730 // 2b) iff isBitFieldInvertedMask(~mask) && isBitFieldInvertedMask(mask2) 5731 // && ~mask == mask2 5732 // (i.e., copy a bitfield value into another bitfield of the same width) 5733 5734 if (VT != MVT::i32) 5735 return SDValue(); 5736 5737 SDValue N00 = N0.getOperand(0); 5738 5739 // The value and the mask need to be constants so we can verify this is 5740 // actually a bitfield set. If the mask is 0xffff, we can do better 5741 // via a movt instruction, so don't use BFI in that case. 5742 SDValue MaskOp = N0.getOperand(1); 5743 ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(MaskOp); 5744 if (!MaskC) 5745 return SDValue(); 5746 unsigned Mask = MaskC->getZExtValue(); 5747 if (Mask == 0xffff) 5748 return SDValue(); 5749 SDValue Res; 5750 // Case (1): or (and A, mask), val => ARMbfi A, val, mask 5751 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 5752 if (N1C) { 5753 unsigned Val = N1C->getZExtValue(); 5754 if ((Val & ~Mask) != Val) 5755 return SDValue(); 5756 5757 if (ARM::isBitFieldInvertedMask(Mask)) { 5758 Val >>= CountTrailingZeros_32(~Mask); 5759 5760 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, 5761 DAG.getConstant(Val, MVT::i32), 5762 DAG.getConstant(Mask, MVT::i32)); 5763 5764 // Do not add new nodes to DAG combiner worklist. 5765 DCI.CombineTo(N, Res, false); 5766 return SDValue(); 5767 } 5768 } else if (N1.getOpcode() == ISD::AND) { 5769 // case (2) or (and A, mask), (and B, mask2) => ARMbfi A, (lsr B, amt), mask 5770 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 5771 if (!N11C) 5772 return SDValue(); 5773 unsigned Mask2 = N11C->getZExtValue(); 5774 5775 // Mask and ~Mask2 (or reverse) must be equivalent for the BFI pattern 5776 // as is to match. 5777 if (ARM::isBitFieldInvertedMask(Mask) && 5778 (Mask == ~Mask2)) { 5779 // The pack halfword instruction works better for masks that fit it, 5780 // so use that when it's available. 5781 if (Subtarget->hasT2ExtractPack() && 5782 (Mask == 0xffff || Mask == 0xffff0000)) 5783 return SDValue(); 5784 // 2a 5785 unsigned amt = CountTrailingZeros_32(Mask2); 5786 Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), 5787 DAG.getConstant(amt, MVT::i32)); 5788 Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, 5789 DAG.getConstant(Mask, MVT::i32)); 5790 // Do not add new nodes to DAG combiner worklist. 5791 DCI.CombineTo(N, Res, false); 5792 return SDValue(); 5793 } else if (ARM::isBitFieldInvertedMask(~Mask) && 5794 (~Mask == Mask2)) { 5795 // The pack halfword instruction works better for masks that fit it, 5796 // so use that when it's available. 5797 if (Subtarget->hasT2ExtractPack() && 5798 (Mask2 == 0xffff || Mask2 == 0xffff0000)) 5799 return SDValue(); 5800 // 2b 5801 unsigned lsb = CountTrailingZeros_32(Mask); 5802 Res = DAG.getNode(ISD::SRL, DL, VT, N00, 5803 DAG.getConstant(lsb, MVT::i32)); 5804 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, 5805 DAG.getConstant(Mask2, MVT::i32)); 5806 // Do not add new nodes to DAG combiner worklist. 5807 DCI.CombineTo(N, Res, false); 5808 return SDValue(); 5809 } 5810 } 5811 5812 if (DAG.MaskedValueIsZero(N1, MaskC->getAPIntValue()) && 5813 N00.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N00.getOperand(1)) && 5814 ARM::isBitFieldInvertedMask(~Mask)) { 5815 // Case (3): or (and (shl A, #shamt), mask), B => ARMbfi B, A, ~mask 5816 // where lsb(mask) == #shamt and masked bits of B are known zero. 5817 SDValue ShAmt = N00.getOperand(1); 5818 unsigned ShAmtC = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 5819 unsigned LSB = CountTrailingZeros_32(Mask); 5820 if (ShAmtC != LSB) 5821 return SDValue(); 5822 5823 Res = DAG.getNode(ARMISD::BFI, DL, VT, N1, N00.getOperand(0), 5824 DAG.getConstant(~Mask, MVT::i32)); 5825 5826 // Do not add new nodes to DAG combiner worklist. 5827 DCI.CombineTo(N, Res, false); 5828 } 5829 5830 return SDValue(); 5831} 5832 5833/// PerformBFICombine - (bfi A, (and B, C1), C2) -> (bfi A, B, C2) iff 5834/// C1 & C2 == C1. 5835static SDValue PerformBFICombine(SDNode *N, 5836 TargetLowering::DAGCombinerInfo &DCI) { 5837 SDValue N1 = N->getOperand(1); 5838 if (N1.getOpcode() == ISD::AND) { 5839 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 5840 if (!N11C) 5841 return SDValue(); 5842 unsigned Mask = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 5843 unsigned Mask2 = N11C->getZExtValue(); 5844 if ((Mask & Mask2) == Mask2) 5845 return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), 5846 N->getOperand(0), N1.getOperand(0), 5847 N->getOperand(2)); 5848 } 5849 return SDValue(); 5850} 5851 5852/// PerformVMOVRRDCombine - Target-specific dag combine xforms for 5853/// ARMISD::VMOVRRD. 5854static SDValue PerformVMOVRRDCombine(SDNode *N, 5855 TargetLowering::DAGCombinerInfo &DCI) { 5856 // vmovrrd(vmovdrr x, y) -> x,y 5857 SDValue InDouble = N->getOperand(0); 5858 if (InDouble.getOpcode() == ARMISD::VMOVDRR) 5859 return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1)); 5860 5861 // vmovrrd(load f64) -> (load i32), (load i32) 5862 SDNode *InNode = InDouble.getNode(); 5863 if (ISD::isNormalLoad(InNode) && InNode->hasOneUse() && 5864 InNode->getValueType(0) == MVT::f64 && 5865 InNode->getOperand(1).getOpcode() == ISD::FrameIndex && 5866 !cast<LoadSDNode>(InNode)->isVolatile()) { 5867 // TODO: Should this be done for non-FrameIndex operands? 5868 LoadSDNode *LD = cast<LoadSDNode>(InNode); 5869 5870 SelectionDAG &DAG = DCI.DAG; 5871 DebugLoc DL = LD->getDebugLoc(); 5872 SDValue BasePtr = LD->getBasePtr(); 5873 SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, 5874 LD->getPointerInfo(), LD->isVolatile(), 5875 LD->isNonTemporal(), LD->getAlignment()); 5876 5877 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 5878 DAG.getConstant(4, MVT::i32)); 5879 SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, 5880 LD->getPointerInfo(), LD->isVolatile(), 5881 LD->isNonTemporal(), 5882 std::min(4U, LD->getAlignment() / 2)); 5883 5884 DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); 5885 SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); 5886 DCI.RemoveFromWorklist(LD); 5887 DAG.DeleteNode(LD); 5888 return Result; 5889 } 5890 5891 return SDValue(); 5892} 5893 5894/// PerformVMOVDRRCombine - Target-specific dag combine xforms for 5895/// ARMISD::VMOVDRR. This is also used for BUILD_VECTORs with 2 operands. 5896static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { 5897 // N=vmovrrd(X); vmovdrr(N:0, N:1) -> bit_convert(X) 5898 SDValue Op0 = N->getOperand(0); 5899 SDValue Op1 = N->getOperand(1); 5900 if (Op0.getOpcode() == ISD::BITCAST) 5901 Op0 = Op0.getOperand(0); 5902 if (Op1.getOpcode() == ISD::BITCAST) 5903 Op1 = Op1.getOperand(0); 5904 if (Op0.getOpcode() == ARMISD::VMOVRRD && 5905 Op0.getNode() == Op1.getNode() && 5906 Op0.getResNo() == 0 && Op1.getResNo() == 1) 5907 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 5908 N->getValueType(0), Op0.getOperand(0)); 5909 return SDValue(); 5910} 5911 5912/// PerformSTORECombine - Target-specific dag combine xforms for 5913/// ISD::STORE. 5914static SDValue PerformSTORECombine(SDNode *N, 5915 TargetLowering::DAGCombinerInfo &DCI) { 5916 // Bitcast an i64 store extracted from a vector to f64. 5917 // Otherwise, the i64 value will be legalized to a pair of i32 values. 5918 StoreSDNode *St = cast<StoreSDNode>(N); 5919 SDValue StVal = St->getValue(); 5920 if (!ISD::isNormalStore(St) || St->isVolatile()) 5921 return SDValue(); 5922 5923 if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && 5924 StVal.getNode()->hasOneUse() && !St->isVolatile()) { 5925 SelectionDAG &DAG = DCI.DAG; 5926 DebugLoc DL = St->getDebugLoc(); 5927 SDValue BasePtr = St->getBasePtr(); 5928 SDValue NewST1 = DAG.getStore(St->getChain(), DL, 5929 StVal.getNode()->getOperand(0), BasePtr, 5930 St->getPointerInfo(), St->isVolatile(), 5931 St->isNonTemporal(), St->getAlignment()); 5932 5933 SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, 5934 DAG.getConstant(4, MVT::i32)); 5935 return DAG.getStore(NewST1.getValue(0), DL, StVal.getNode()->getOperand(1), 5936 OffsetPtr, St->getPointerInfo(), St->isVolatile(), 5937 St->isNonTemporal(), 5938 std::min(4U, St->getAlignment() / 2)); 5939 } 5940 5941 if (StVal.getValueType() != MVT::i64 || 5942 StVal.getNode()->getOpcode() != ISD::EXTRACT_VECTOR_ELT) 5943 return SDValue(); 5944 5945 SelectionDAG &DAG = DCI.DAG; 5946 DebugLoc dl = StVal.getDebugLoc(); 5947 SDValue IntVec = StVal.getOperand(0); 5948 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 5949 IntVec.getValueType().getVectorNumElements()); 5950 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); 5951 SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, 5952 Vec, StVal.getOperand(1)); 5953 dl = N->getDebugLoc(); 5954 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); 5955 // Make the DAGCombiner fold the bitcasts. 5956 DCI.AddToWorklist(Vec.getNode()); 5957 DCI.AddToWorklist(ExtElt.getNode()); 5958 DCI.AddToWorklist(V.getNode()); 5959 return DAG.getStore(St->getChain(), dl, V, St->getBasePtr(), 5960 St->getPointerInfo(), St->isVolatile(), 5961 St->isNonTemporal(), St->getAlignment(), 5962 St->getTBAAInfo()); 5963} 5964 5965/// hasNormalLoadOperand - Check if any of the operands of a BUILD_VECTOR node 5966/// are normal, non-volatile loads. If so, it is profitable to bitcast an 5967/// i64 vector to have f64 elements, since the value can then be loaded 5968/// directly into a VFP register. 5969static bool hasNormalLoadOperand(SDNode *N) { 5970 unsigned NumElts = N->getValueType(0).getVectorNumElements(); 5971 for (unsigned i = 0; i < NumElts; ++i) { 5972 SDNode *Elt = N->getOperand(i).getNode(); 5973 if (ISD::isNormalLoad(Elt) && !cast<LoadSDNode>(Elt)->isVolatile()) 5974 return true; 5975 } 5976 return false; 5977} 5978 5979/// PerformBUILD_VECTORCombine - Target-specific dag combine xforms for 5980/// ISD::BUILD_VECTOR. 5981static SDValue PerformBUILD_VECTORCombine(SDNode *N, 5982 TargetLowering::DAGCombinerInfo &DCI){ 5983 // build_vector(N=ARMISD::VMOVRRD(X), N:1) -> bit_convert(X): 5984 // VMOVRRD is introduced when legalizing i64 types. It forces the i64 value 5985 // into a pair of GPRs, which is fine when the value is used as a scalar, 5986 // but if the i64 value is converted to a vector, we need to undo the VMOVRRD. 5987 SelectionDAG &DAG = DCI.DAG; 5988 if (N->getNumOperands() == 2) { 5989 SDValue RV = PerformVMOVDRRCombine(N, DAG); 5990 if (RV.getNode()) 5991 return RV; 5992 } 5993 5994 // Load i64 elements as f64 values so that type legalization does not split 5995 // them up into i32 values. 5996 EVT VT = N->getValueType(0); 5997 if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) 5998 return SDValue(); 5999 DebugLoc dl = N->getDebugLoc(); 6000 SmallVector<SDValue, 8> Ops; 6001 unsigned NumElts = VT.getVectorNumElements(); 6002 for (unsigned i = 0; i < NumElts; ++i) { 6003 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(i)); 6004 Ops.push_back(V); 6005 // Make the DAGCombiner fold the bitcast. 6006 DCI.AddToWorklist(V.getNode()); 6007 } 6008 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, NumElts); 6009 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, FloatVT, Ops.data(), NumElts); 6010 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 6011} 6012 6013/// PerformInsertEltCombine - Target-specific dag combine xforms for 6014/// ISD::INSERT_VECTOR_ELT. 6015static SDValue PerformInsertEltCombine(SDNode *N, 6016 TargetLowering::DAGCombinerInfo &DCI) { 6017 // Bitcast an i64 load inserted into a vector to f64. 6018 // Otherwise, the i64 value will be legalized to a pair of i32 values. 6019 EVT VT = N->getValueType(0); 6020 SDNode *Elt = N->getOperand(1).getNode(); 6021 if (VT.getVectorElementType() != MVT::i64 || 6022 !ISD::isNormalLoad(Elt) || cast<LoadSDNode>(Elt)->isVolatile()) 6023 return SDValue(); 6024 6025 SelectionDAG &DAG = DCI.DAG; 6026 DebugLoc dl = N->getDebugLoc(); 6027 EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, 6028 VT.getVectorNumElements()); 6029 SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); 6030 SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::f64, N->getOperand(1)); 6031 // Make the DAGCombiner fold the bitcasts. 6032 DCI.AddToWorklist(Vec.getNode()); 6033 DCI.AddToWorklist(V.getNode()); 6034 SDValue InsElt = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, FloatVT, 6035 Vec, V, N->getOperand(2)); 6036 return DAG.getNode(ISD::BITCAST, dl, VT, InsElt); 6037} 6038 6039/// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for 6040/// ISD::VECTOR_SHUFFLE. 6041static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { 6042 // The LLVM shufflevector instruction does not require the shuffle mask 6043 // length to match the operand vector length, but ISD::VECTOR_SHUFFLE does 6044 // have that requirement. When translating to ISD::VECTOR_SHUFFLE, if the 6045 // operands do not match the mask length, they are extended by concatenating 6046 // them with undef vectors. That is probably the right thing for other 6047 // targets, but for NEON it is better to concatenate two double-register 6048 // size vector operands into a single quad-register size vector. Do that 6049 // transformation here: 6050 // shuffle(concat(v1, undef), concat(v2, undef)) -> 6051 // shuffle(concat(v1, v2), undef) 6052 SDValue Op0 = N->getOperand(0); 6053 SDValue Op1 = N->getOperand(1); 6054 if (Op0.getOpcode() != ISD::CONCAT_VECTORS || 6055 Op1.getOpcode() != ISD::CONCAT_VECTORS || 6056 Op0.getNumOperands() != 2 || 6057 Op1.getNumOperands() != 2) 6058 return SDValue(); 6059 SDValue Concat0Op1 = Op0.getOperand(1); 6060 SDValue Concat1Op1 = Op1.getOperand(1); 6061 if (Concat0Op1.getOpcode() != ISD::UNDEF || 6062 Concat1Op1.getOpcode() != ISD::UNDEF) 6063 return SDValue(); 6064 // Skip the transformation if any of the types are illegal. 6065 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6066 EVT VT = N->getValueType(0); 6067 if (!TLI.isTypeLegal(VT) || 6068 !TLI.isTypeLegal(Concat0Op1.getValueType()) || 6069 !TLI.isTypeLegal(Concat1Op1.getValueType())) 6070 return SDValue(); 6071 6072 SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, 6073 Op0.getOperand(0), Op1.getOperand(0)); 6074 // Translate the shuffle mask. 6075 SmallVector<int, 16> NewMask; 6076 unsigned NumElts = VT.getVectorNumElements(); 6077 unsigned HalfElts = NumElts/2; 6078 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 6079 for (unsigned n = 0; n < NumElts; ++n) { 6080 int MaskElt = SVN->getMaskElt(n); 6081 int NewElt = -1; 6082 if (MaskElt < (int)HalfElts) 6083 NewElt = MaskElt; 6084 else if (MaskElt >= (int)NumElts && MaskElt < (int)(NumElts + HalfElts)) 6085 NewElt = HalfElts + MaskElt - NumElts; 6086 NewMask.push_back(NewElt); 6087 } 6088 return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, 6089 DAG.getUNDEF(VT), NewMask.data()); 6090} 6091 6092/// CombineBaseUpdate - Target-specific DAG combine function for VLDDUP and 6093/// NEON load/store intrinsics to merge base address updates. 6094static SDValue CombineBaseUpdate(SDNode *N, 6095 TargetLowering::DAGCombinerInfo &DCI) { 6096 if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) 6097 return SDValue(); 6098 6099 SelectionDAG &DAG = DCI.DAG; 6100 bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || 6101 N->getOpcode() == ISD::INTRINSIC_W_CHAIN); 6102 unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); 6103 SDValue Addr = N->getOperand(AddrOpIdx); 6104 6105 // Search for a use of the address operand that is an increment. 6106 for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), 6107 UE = Addr.getNode()->use_end(); UI != UE; ++UI) { 6108 SDNode *User = *UI; 6109 if (User->getOpcode() != ISD::ADD || 6110 UI.getUse().getResNo() != Addr.getResNo()) 6111 continue; 6112 6113 // Check that the add is independent of the load/store. Otherwise, folding 6114 // it would create a cycle. 6115 if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) 6116 continue; 6117 6118 // Find the new opcode for the updating load/store. 6119 bool isLoad = true; 6120 bool isLaneOp = false; 6121 unsigned NewOpc = 0; 6122 unsigned NumVecs = 0; 6123 if (isIntrinsic) { 6124 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 6125 switch (IntNo) { 6126 default: assert(0 && "unexpected intrinsic for Neon base update"); 6127 case Intrinsic::arm_neon_vld1: NewOpc = ARMISD::VLD1_UPD; 6128 NumVecs = 1; break; 6129 case Intrinsic::arm_neon_vld2: NewOpc = ARMISD::VLD2_UPD; 6130 NumVecs = 2; break; 6131 case Intrinsic::arm_neon_vld3: NewOpc = ARMISD::VLD3_UPD; 6132 NumVecs = 3; break; 6133 case Intrinsic::arm_neon_vld4: NewOpc = ARMISD::VLD4_UPD; 6134 NumVecs = 4; break; 6135 case Intrinsic::arm_neon_vld2lane: NewOpc = ARMISD::VLD2LN_UPD; 6136 NumVecs = 2; isLaneOp = true; break; 6137 case Intrinsic::arm_neon_vld3lane: NewOpc = ARMISD::VLD3LN_UPD; 6138 NumVecs = 3; isLaneOp = true; break; 6139 case Intrinsic::arm_neon_vld4lane: NewOpc = ARMISD::VLD4LN_UPD; 6140 NumVecs = 4; isLaneOp = true; break; 6141 case Intrinsic::arm_neon_vst1: NewOpc = ARMISD::VST1_UPD; 6142 NumVecs = 1; isLoad = false; break; 6143 case Intrinsic::arm_neon_vst2: NewOpc = ARMISD::VST2_UPD; 6144 NumVecs = 2; isLoad = false; break; 6145 case Intrinsic::arm_neon_vst3: NewOpc = ARMISD::VST3_UPD; 6146 NumVecs = 3; isLoad = false; break; 6147 case Intrinsic::arm_neon_vst4: NewOpc = ARMISD::VST4_UPD; 6148 NumVecs = 4; isLoad = false; break; 6149 case Intrinsic::arm_neon_vst2lane: NewOpc = ARMISD::VST2LN_UPD; 6150 NumVecs = 2; isLoad = false; isLaneOp = true; break; 6151 case Intrinsic::arm_neon_vst3lane: NewOpc = ARMISD::VST3LN_UPD; 6152 NumVecs = 3; isLoad = false; isLaneOp = true; break; 6153 case Intrinsic::arm_neon_vst4lane: NewOpc = ARMISD::VST4LN_UPD; 6154 NumVecs = 4; isLoad = false; isLaneOp = true; break; 6155 } 6156 } else { 6157 isLaneOp = true; 6158 switch (N->getOpcode()) { 6159 default: assert(0 && "unexpected opcode for Neon base update"); 6160 case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break; 6161 case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break; 6162 case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break; 6163 } 6164 } 6165 6166 // Find the size of memory referenced by the load/store. 6167 EVT VecTy; 6168 if (isLoad) 6169 VecTy = N->getValueType(0); 6170 else 6171 VecTy = N->getOperand(AddrOpIdx+1).getValueType(); 6172 unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; 6173 if (isLaneOp) 6174 NumBytes /= VecTy.getVectorNumElements(); 6175 6176 // If the increment is a constant, it must match the memory ref size. 6177 SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); 6178 if (ConstantSDNode *CInc = dyn_cast<ConstantSDNode>(Inc.getNode())) { 6179 uint64_t IncVal = CInc->getZExtValue(); 6180 if (IncVal != NumBytes) 6181 continue; 6182 } else if (NumBytes >= 3 * 16) { 6183 // VLD3/4 and VST3/4 for 128-bit vectors are implemented with two 6184 // separate instructions that make it harder to use a non-constant update. 6185 continue; 6186 } 6187 6188 // Create the new updating load/store node. 6189 EVT Tys[6]; 6190 unsigned NumResultVecs = (isLoad ? NumVecs : 0); 6191 unsigned n; 6192 for (n = 0; n < NumResultVecs; ++n) 6193 Tys[n] = VecTy; 6194 Tys[n++] = MVT::i32; 6195 Tys[n] = MVT::Other; 6196 SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs+2); 6197 SmallVector<SDValue, 8> Ops; 6198 Ops.push_back(N->getOperand(0)); // incoming chain 6199 Ops.push_back(N->getOperand(AddrOpIdx)); 6200 Ops.push_back(Inc); 6201 for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { 6202 Ops.push_back(N->getOperand(i)); 6203 } 6204 MemIntrinsicSDNode *MemInt = cast<MemIntrinsicSDNode>(N); 6205 SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, 6206 Ops.data(), Ops.size(), 6207 MemInt->getMemoryVT(), 6208 MemInt->getMemOperand()); 6209 6210 // Update the uses. 6211 std::vector<SDValue> NewResults; 6212 for (unsigned i = 0; i < NumResultVecs; ++i) { 6213 NewResults.push_back(SDValue(UpdN.getNode(), i)); 6214 } 6215 NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs+1)); // chain 6216 DCI.CombineTo(N, NewResults); 6217 DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); 6218 6219 break; 6220 } 6221 return SDValue(); 6222} 6223 6224/// CombineVLDDUP - For a VDUPLANE node N, check if its source operand is a 6225/// vldN-lane (N > 1) intrinsic, and if all the other uses of that intrinsic 6226/// are also VDUPLANEs. If so, combine them to a vldN-dup operation and 6227/// return true. 6228static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { 6229 SelectionDAG &DAG = DCI.DAG; 6230 EVT VT = N->getValueType(0); 6231 // vldN-dup instructions only support 64-bit vectors for N > 1. 6232 if (!VT.is64BitVector()) 6233 return false; 6234 6235 // Check if the VDUPLANE operand is a vldN-dup intrinsic. 6236 SDNode *VLD = N->getOperand(0).getNode(); 6237 if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) 6238 return false; 6239 unsigned NumVecs = 0; 6240 unsigned NewOpc = 0; 6241 unsigned IntNo = cast<ConstantSDNode>(VLD->getOperand(1))->getZExtValue(); 6242 if (IntNo == Intrinsic::arm_neon_vld2lane) { 6243 NumVecs = 2; 6244 NewOpc = ARMISD::VLD2DUP; 6245 } else if (IntNo == Intrinsic::arm_neon_vld3lane) { 6246 NumVecs = 3; 6247 NewOpc = ARMISD::VLD3DUP; 6248 } else if (IntNo == Intrinsic::arm_neon_vld4lane) { 6249 NumVecs = 4; 6250 NewOpc = ARMISD::VLD4DUP; 6251 } else { 6252 return false; 6253 } 6254 6255 // First check that all the vldN-lane uses are VDUPLANEs and that the lane 6256 // numbers match the load. 6257 unsigned VLDLaneNo = 6258 cast<ConstantSDNode>(VLD->getOperand(NumVecs+3))->getZExtValue(); 6259 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 6260 UI != UE; ++UI) { 6261 // Ignore uses of the chain result. 6262 if (UI.getUse().getResNo() == NumVecs) 6263 continue; 6264 SDNode *User = *UI; 6265 if (User->getOpcode() != ARMISD::VDUPLANE || 6266 VLDLaneNo != cast<ConstantSDNode>(User->getOperand(1))->getZExtValue()) 6267 return false; 6268 } 6269 6270 // Create the vldN-dup node. 6271 EVT Tys[5]; 6272 unsigned n; 6273 for (n = 0; n < NumVecs; ++n) 6274 Tys[n] = VT; 6275 Tys[n] = MVT::Other; 6276 SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); 6277 SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; 6278 MemIntrinsicSDNode *VLDMemInt = cast<MemIntrinsicSDNode>(VLD); 6279 SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, 6280 Ops, 2, VLDMemInt->getMemoryVT(), 6281 VLDMemInt->getMemOperand()); 6282 6283 // Update the uses. 6284 for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); 6285 UI != UE; ++UI) { 6286 unsigned ResNo = UI.getUse().getResNo(); 6287 // Ignore uses of the chain result. 6288 if (ResNo == NumVecs) 6289 continue; 6290 SDNode *User = *UI; 6291 DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); 6292 } 6293 6294 // Now the vldN-lane intrinsic is dead except for its chain result. 6295 // Update uses of the chain. 6296 std::vector<SDValue> VLDDupResults; 6297 for (unsigned n = 0; n < NumVecs; ++n) 6298 VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); 6299 VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); 6300 DCI.CombineTo(VLD, VLDDupResults); 6301 6302 return true; 6303} 6304 6305/// PerformVDUPLANECombine - Target-specific dag combine xforms for 6306/// ARMISD::VDUPLANE. 6307static SDValue PerformVDUPLANECombine(SDNode *N, 6308 TargetLowering::DAGCombinerInfo &DCI) { 6309 SDValue Op = N->getOperand(0); 6310 6311 // If the source is a vldN-lane (N > 1) intrinsic, and all the other uses 6312 // of that intrinsic are also VDUPLANEs, combine them to a vldN-dup operation. 6313 if (CombineVLDDUP(N, DCI)) 6314 return SDValue(N, 0); 6315 6316 // If the source is already a VMOVIMM or VMVNIMM splat, the VDUPLANE is 6317 // redundant. Ignore bit_converts for now; element sizes are checked below. 6318 while (Op.getOpcode() == ISD::BITCAST) 6319 Op = Op.getOperand(0); 6320 if (Op.getOpcode() != ARMISD::VMOVIMM && Op.getOpcode() != ARMISD::VMVNIMM) 6321 return SDValue(); 6322 6323 // Make sure the VMOV element size is not bigger than the VDUPLANE elements. 6324 unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits(); 6325 // The canonical VMOV for a zero vector uses a 32-bit element size. 6326 unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 6327 unsigned EltBits; 6328 if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0) 6329 EltSize = 8; 6330 EVT VT = N->getValueType(0); 6331 if (EltSize > VT.getVectorElementType().getSizeInBits()) 6332 return SDValue(); 6333 6334 return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); 6335} 6336 6337/// getVShiftImm - Check if this is a valid build_vector for the immediate 6338/// operand of a vector shift operation, where all the elements of the 6339/// build_vector must have the same constant integer value. 6340static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { 6341 // Ignore bit_converts. 6342 while (Op.getOpcode() == ISD::BITCAST) 6343 Op = Op.getOperand(0); 6344 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 6345 APInt SplatBits, SplatUndef; 6346 unsigned SplatBitSize; 6347 bool HasAnyUndefs; 6348 if (! BVN || ! BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, 6349 HasAnyUndefs, ElementBits) || 6350 SplatBitSize > ElementBits) 6351 return false; 6352 Cnt = SplatBits.getSExtValue(); 6353 return true; 6354} 6355 6356/// isVShiftLImm - Check if this is a valid build_vector for the immediate 6357/// operand of a vector shift left operation. That value must be in the range: 6358/// 0 <= Value < ElementBits for a left shift; or 6359/// 0 <= Value <= ElementBits for a long left shift. 6360static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) { 6361 assert(VT.isVector() && "vector shift count is not a vector type"); 6362 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 6363 if (! getVShiftImm(Op, ElementBits, Cnt)) 6364 return false; 6365 return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits); 6366} 6367 6368/// isVShiftRImm - Check if this is a valid build_vector for the immediate 6369/// operand of a vector shift right operation. For a shift opcode, the value 6370/// is positive, but for an intrinsic the value count must be negative. The 6371/// absolute value must be in the range: 6372/// 1 <= |Value| <= ElementBits for a right shift; or 6373/// 1 <= |Value| <= ElementBits/2 for a narrow right shift. 6374static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic, 6375 int64_t &Cnt) { 6376 assert(VT.isVector() && "vector shift count is not a vector type"); 6377 unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); 6378 if (! getVShiftImm(Op, ElementBits, Cnt)) 6379 return false; 6380 if (isIntrinsic) 6381 Cnt = -Cnt; 6382 return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits/2 : ElementBits)); 6383} 6384 6385/// PerformIntrinsicCombine - ARM-specific DAG combining for intrinsics. 6386static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { 6387 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 6388 switch (IntNo) { 6389 default: 6390 // Don't do anything for most intrinsics. 6391 break; 6392 6393 // Vector shifts: check for immediate versions and lower them. 6394 // Note: This is done during DAG combining instead of DAG legalizing because 6395 // the build_vectors for 64-bit vector element shift counts are generally 6396 // not legal, and it is hard to see their values after they get legalized to 6397 // loads from a constant pool. 6398 case Intrinsic::arm_neon_vshifts: 6399 case Intrinsic::arm_neon_vshiftu: 6400 case Intrinsic::arm_neon_vshiftls: 6401 case Intrinsic::arm_neon_vshiftlu: 6402 case Intrinsic::arm_neon_vshiftn: 6403 case Intrinsic::arm_neon_vrshifts: 6404 case Intrinsic::arm_neon_vrshiftu: 6405 case Intrinsic::arm_neon_vrshiftn: 6406 case Intrinsic::arm_neon_vqshifts: 6407 case Intrinsic::arm_neon_vqshiftu: 6408 case Intrinsic::arm_neon_vqshiftsu: 6409 case Intrinsic::arm_neon_vqshiftns: 6410 case Intrinsic::arm_neon_vqshiftnu: 6411 case Intrinsic::arm_neon_vqshiftnsu: 6412 case Intrinsic::arm_neon_vqrshiftns: 6413 case Intrinsic::arm_neon_vqrshiftnu: 6414 case Intrinsic::arm_neon_vqrshiftnsu: { 6415 EVT VT = N->getOperand(1).getValueType(); 6416 int64_t Cnt; 6417 unsigned VShiftOpc = 0; 6418 6419 switch (IntNo) { 6420 case Intrinsic::arm_neon_vshifts: 6421 case Intrinsic::arm_neon_vshiftu: 6422 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) { 6423 VShiftOpc = ARMISD::VSHL; 6424 break; 6425 } 6426 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) { 6427 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshifts ? 6428 ARMISD::VSHRs : ARMISD::VSHRu); 6429 break; 6430 } 6431 return SDValue(); 6432 6433 case Intrinsic::arm_neon_vshiftls: 6434 case Intrinsic::arm_neon_vshiftlu: 6435 if (isVShiftLImm(N->getOperand(2), VT, true, Cnt)) 6436 break; 6437 llvm_unreachable("invalid shift count for vshll intrinsic"); 6438 6439 case Intrinsic::arm_neon_vrshifts: 6440 case Intrinsic::arm_neon_vrshiftu: 6441 if (isVShiftRImm(N->getOperand(2), VT, false, true, Cnt)) 6442 break; 6443 return SDValue(); 6444 6445 case Intrinsic::arm_neon_vqshifts: 6446 case Intrinsic::arm_neon_vqshiftu: 6447 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 6448 break; 6449 return SDValue(); 6450 6451 case Intrinsic::arm_neon_vqshiftsu: 6452 if (isVShiftLImm(N->getOperand(2), VT, false, Cnt)) 6453 break; 6454 llvm_unreachable("invalid shift count for vqshlu intrinsic"); 6455 6456 case Intrinsic::arm_neon_vshiftn: 6457 case Intrinsic::arm_neon_vrshiftn: 6458 case Intrinsic::arm_neon_vqshiftns: 6459 case Intrinsic::arm_neon_vqshiftnu: 6460 case Intrinsic::arm_neon_vqshiftnsu: 6461 case Intrinsic::arm_neon_vqrshiftns: 6462 case Intrinsic::arm_neon_vqrshiftnu: 6463 case Intrinsic::arm_neon_vqrshiftnsu: 6464 // Narrowing shifts require an immediate right shift. 6465 if (isVShiftRImm(N->getOperand(2), VT, true, true, Cnt)) 6466 break; 6467 llvm_unreachable("invalid shift count for narrowing vector shift " 6468 "intrinsic"); 6469 6470 default: 6471 llvm_unreachable("unhandled vector shift"); 6472 } 6473 6474 switch (IntNo) { 6475 case Intrinsic::arm_neon_vshifts: 6476 case Intrinsic::arm_neon_vshiftu: 6477 // Opcode already set above. 6478 break; 6479 case Intrinsic::arm_neon_vshiftls: 6480 case Intrinsic::arm_neon_vshiftlu: 6481 if (Cnt == VT.getVectorElementType().getSizeInBits()) 6482 VShiftOpc = ARMISD::VSHLLi; 6483 else 6484 VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ? 6485 ARMISD::VSHLLs : ARMISD::VSHLLu); 6486 break; 6487 case Intrinsic::arm_neon_vshiftn: 6488 VShiftOpc = ARMISD::VSHRN; break; 6489 case Intrinsic::arm_neon_vrshifts: 6490 VShiftOpc = ARMISD::VRSHRs; break; 6491 case Intrinsic::arm_neon_vrshiftu: 6492 VShiftOpc = ARMISD::VRSHRu; break; 6493 case Intrinsic::arm_neon_vrshiftn: 6494 VShiftOpc = ARMISD::VRSHRN; break; 6495 case Intrinsic::arm_neon_vqshifts: 6496 VShiftOpc = ARMISD::VQSHLs; break; 6497 case Intrinsic::arm_neon_vqshiftu: 6498 VShiftOpc = ARMISD::VQSHLu; break; 6499 case Intrinsic::arm_neon_vqshiftsu: 6500 VShiftOpc = ARMISD::VQSHLsu; break; 6501 case Intrinsic::arm_neon_vqshiftns: 6502 VShiftOpc = ARMISD::VQSHRNs; break; 6503 case Intrinsic::arm_neon_vqshiftnu: 6504 VShiftOpc = ARMISD::VQSHRNu; break; 6505 case Intrinsic::arm_neon_vqshiftnsu: 6506 VShiftOpc = ARMISD::VQSHRNsu; break; 6507 case Intrinsic::arm_neon_vqrshiftns: 6508 VShiftOpc = ARMISD::VQRSHRNs; break; 6509 case Intrinsic::arm_neon_vqrshiftnu: 6510 VShiftOpc = ARMISD::VQRSHRNu; break; 6511 case Intrinsic::arm_neon_vqrshiftnsu: 6512 VShiftOpc = ARMISD::VQRSHRNsu; break; 6513 } 6514 6515 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 6516 N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); 6517 } 6518 6519 case Intrinsic::arm_neon_vshiftins: { 6520 EVT VT = N->getOperand(1).getValueType(); 6521 int64_t Cnt; 6522 unsigned VShiftOpc = 0; 6523 6524 if (isVShiftLImm(N->getOperand(3), VT, false, Cnt)) 6525 VShiftOpc = ARMISD::VSLI; 6526 else if (isVShiftRImm(N->getOperand(3), VT, false, true, Cnt)) 6527 VShiftOpc = ARMISD::VSRI; 6528 else { 6529 llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); 6530 } 6531 6532 return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), 6533 N->getOperand(1), N->getOperand(2), 6534 DAG.getConstant(Cnt, MVT::i32)); 6535 } 6536 6537 case Intrinsic::arm_neon_vqrshifts: 6538 case Intrinsic::arm_neon_vqrshiftu: 6539 // No immediate versions of these to check for. 6540 break; 6541 } 6542 6543 return SDValue(); 6544} 6545 6546/// PerformShiftCombine - Checks for immediate versions of vector shifts and 6547/// lowers them. As with the vector shift intrinsics, this is done during DAG 6548/// combining instead of DAG legalizing because the build_vectors for 64-bit 6549/// vector element shift counts are generally not legal, and it is hard to see 6550/// their values after they get legalized to loads from a constant pool. 6551static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, 6552 const ARMSubtarget *ST) { 6553 EVT VT = N->getValueType(0); 6554 6555 // Nothing to be done for scalar shifts. 6556 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6557 if (!VT.isVector() || !TLI.isTypeLegal(VT)) 6558 return SDValue(); 6559 6560 assert(ST->hasNEON() && "unexpected vector shift"); 6561 int64_t Cnt; 6562 6563 switch (N->getOpcode()) { 6564 default: llvm_unreachable("unexpected shift opcode"); 6565 6566 case ISD::SHL: 6567 if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) 6568 return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), 6569 DAG.getConstant(Cnt, MVT::i32)); 6570 break; 6571 6572 case ISD::SRA: 6573 case ISD::SRL: 6574 if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { 6575 unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? 6576 ARMISD::VSHRs : ARMISD::VSHRu); 6577 return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), 6578 DAG.getConstant(Cnt, MVT::i32)); 6579 } 6580 } 6581 return SDValue(); 6582} 6583 6584/// PerformExtendCombine - Target-specific DAG combining for ISD::SIGN_EXTEND, 6585/// ISD::ZERO_EXTEND, and ISD::ANY_EXTEND. 6586static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, 6587 const ARMSubtarget *ST) { 6588 SDValue N0 = N->getOperand(0); 6589 6590 // Check for sign- and zero-extensions of vector extract operations of 8- 6591 // and 16-bit vector elements. NEON supports these directly. They are 6592 // handled during DAG combining because type legalization will promote them 6593 // to 32-bit types and it is messy to recognize the operations after that. 6594 if (ST->hasNEON() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { 6595 SDValue Vec = N0.getOperand(0); 6596 SDValue Lane = N0.getOperand(1); 6597 EVT VT = N->getValueType(0); 6598 EVT EltVT = N0.getValueType(); 6599 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6600 6601 if (VT == MVT::i32 && 6602 (EltVT == MVT::i8 || EltVT == MVT::i16) && 6603 TLI.isTypeLegal(Vec.getValueType()) && 6604 isa<ConstantSDNode>(Lane)) { 6605 6606 unsigned Opc = 0; 6607 switch (N->getOpcode()) { 6608 default: llvm_unreachable("unexpected opcode"); 6609 case ISD::SIGN_EXTEND: 6610 Opc = ARMISD::VGETLANEs; 6611 break; 6612 case ISD::ZERO_EXTEND: 6613 case ISD::ANY_EXTEND: 6614 Opc = ARMISD::VGETLANEu; 6615 break; 6616 } 6617 return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); 6618 } 6619 } 6620 6621 return SDValue(); 6622} 6623 6624/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC 6625/// to match f32 max/min patterns to use NEON vmax/vmin instructions. 6626static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, 6627 const ARMSubtarget *ST) { 6628 // If the target supports NEON, try to use vmax/vmin instructions for f32 6629 // selects like "x < y ? x : y". Unless the NoNaNsFPMath option is set, 6630 // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is 6631 // a NaN; only do the transformation when it matches that behavior. 6632 6633 // For now only do this when using NEON for FP operations; if using VFP, it 6634 // is not obvious that the benefit outweighs the cost of switching to the 6635 // NEON pipeline. 6636 if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || 6637 N->getValueType(0) != MVT::f32) 6638 return SDValue(); 6639 6640 SDValue CondLHS = N->getOperand(0); 6641 SDValue CondRHS = N->getOperand(1); 6642 SDValue LHS = N->getOperand(2); 6643 SDValue RHS = N->getOperand(3); 6644 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get(); 6645 6646 unsigned Opcode = 0; 6647 bool IsReversed; 6648 if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) { 6649 IsReversed = false; // x CC y ? x : y 6650 } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) { 6651 IsReversed = true ; // x CC y ? y : x 6652 } else { 6653 return SDValue(); 6654 } 6655 6656 bool IsUnordered; 6657 switch (CC) { 6658 default: break; 6659 case ISD::SETOLT: 6660 case ISD::SETOLE: 6661 case ISD::SETLT: 6662 case ISD::SETLE: 6663 case ISD::SETULT: 6664 case ISD::SETULE: 6665 // If LHS is NaN, an ordered comparison will be false and the result will 6666 // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS 6667 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 6668 IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE); 6669 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 6670 break; 6671 // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin 6672 // will return -0, so vmin can only be used for unsafe math or if one of 6673 // the operands is known to be nonzero. 6674 if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) && 6675 !UnsafeFPMath && 6676 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 6677 break; 6678 Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; 6679 break; 6680 6681 case ISD::SETOGT: 6682 case ISD::SETOGE: 6683 case ISD::SETGT: 6684 case ISD::SETGE: 6685 case ISD::SETUGT: 6686 case ISD::SETUGE: 6687 // If LHS is NaN, an ordered comparison will be false and the result will 6688 // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS 6689 // != NaN. Likewise, for unordered comparisons, check for RHS != NaN. 6690 IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE); 6691 if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS)) 6692 break; 6693 // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax 6694 // will return +0, so vmax can only be used for unsafe math or if one of 6695 // the operands is known to be nonzero. 6696 if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) && 6697 !UnsafeFPMath && 6698 !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) 6699 break; 6700 Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; 6701 break; 6702 } 6703 6704 if (!Opcode) 6705 return SDValue(); 6706 return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); 6707} 6708 6709SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, 6710 DAGCombinerInfo &DCI) const { 6711 switch (N->getOpcode()) { 6712 default: break; 6713 case ISD::ADD: return PerformADDCombine(N, DCI); 6714 case ISD::SUB: return PerformSUBCombine(N, DCI); 6715 case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); 6716 case ISD::OR: return PerformORCombine(N, DCI, Subtarget); 6717 case ISD::AND: return PerformANDCombine(N, DCI); 6718 case ARMISD::BFI: return PerformBFICombine(N, DCI); 6719 case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); 6720 case ARMISD::VMOVDRR: return PerformVMOVDRRCombine(N, DCI.DAG); 6721 case ISD::STORE: return PerformSTORECombine(N, DCI); 6722 case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DCI); 6723 case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); 6724 case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); 6725 case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); 6726 case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); 6727 case ISD::SHL: 6728 case ISD::SRA: 6729 case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); 6730 case ISD::SIGN_EXTEND: 6731 case ISD::ZERO_EXTEND: 6732 case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); 6733 case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); 6734 case ARMISD::VLD2DUP: 6735 case ARMISD::VLD3DUP: 6736 case ARMISD::VLD4DUP: 6737 return CombineBaseUpdate(N, DCI); 6738 case ISD::INTRINSIC_VOID: 6739 case ISD::INTRINSIC_W_CHAIN: 6740 switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) { 6741 case Intrinsic::arm_neon_vld1: 6742 case Intrinsic::arm_neon_vld2: 6743 case Intrinsic::arm_neon_vld3: 6744 case Intrinsic::arm_neon_vld4: 6745 case Intrinsic::arm_neon_vld2lane: 6746 case Intrinsic::arm_neon_vld3lane: 6747 case Intrinsic::arm_neon_vld4lane: 6748 case Intrinsic::arm_neon_vst1: 6749 case Intrinsic::arm_neon_vst2: 6750 case Intrinsic::arm_neon_vst3: 6751 case Intrinsic::arm_neon_vst4: 6752 case Intrinsic::arm_neon_vst2lane: 6753 case Intrinsic::arm_neon_vst3lane: 6754 case Intrinsic::arm_neon_vst4lane: 6755 return CombineBaseUpdate(N, DCI); 6756 default: break; 6757 } 6758 break; 6759 } 6760 return SDValue(); 6761} 6762 6763bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc, 6764 EVT VT) const { 6765 return (VT == MVT::f32) && (Opc == ISD::LOAD || Opc == ISD::STORE); 6766} 6767 6768bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { 6769 if (!Subtarget->allowsUnalignedMem()) 6770 return false; 6771 6772 switch (VT.getSimpleVT().SimpleTy) { 6773 default: 6774 return false; 6775 case MVT::i8: 6776 case MVT::i16: 6777 case MVT::i32: 6778 return true; 6779 // FIXME: VLD1 etc with standard alignment is legal. 6780 } 6781} 6782 6783static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { 6784 if (V < 0) 6785 return false; 6786 6787 unsigned Scale = 1; 6788 switch (VT.getSimpleVT().SimpleTy) { 6789 default: return false; 6790 case MVT::i1: 6791 case MVT::i8: 6792 // Scale == 1; 6793 break; 6794 case MVT::i16: 6795 // Scale == 2; 6796 Scale = 2; 6797 break; 6798 case MVT::i32: 6799 // Scale == 4; 6800 Scale = 4; 6801 break; 6802 } 6803 6804 if ((V & (Scale - 1)) != 0) 6805 return false; 6806 V /= Scale; 6807 return V == (V & ((1LL << 5) - 1)); 6808} 6809 6810static bool isLegalT2AddressImmediate(int64_t V, EVT VT, 6811 const ARMSubtarget *Subtarget) { 6812 bool isNeg = false; 6813 if (V < 0) { 6814 isNeg = true; 6815 V = - V; 6816 } 6817 6818 switch (VT.getSimpleVT().SimpleTy) { 6819 default: return false; 6820 case MVT::i1: 6821 case MVT::i8: 6822 case MVT::i16: 6823 case MVT::i32: 6824 // + imm12 or - imm8 6825 if (isNeg) 6826 return V == (V & ((1LL << 8) - 1)); 6827 return V == (V & ((1LL << 12) - 1)); 6828 case MVT::f32: 6829 case MVT::f64: 6830 // Same as ARM mode. FIXME: NEON? 6831 if (!Subtarget->hasVFP2()) 6832 return false; 6833 if ((V & 3) != 0) 6834 return false; 6835 V >>= 2; 6836 return V == (V & ((1LL << 8) - 1)); 6837 } 6838} 6839 6840/// isLegalAddressImmediate - Return true if the integer value can be used 6841/// as the offset of the target addressing mode for load / store of the 6842/// given type. 6843static bool isLegalAddressImmediate(int64_t V, EVT VT, 6844 const ARMSubtarget *Subtarget) { 6845 if (V == 0) 6846 return true; 6847 6848 if (!VT.isSimple()) 6849 return false; 6850 6851 if (Subtarget->isThumb1Only()) 6852 return isLegalT1AddressImmediate(V, VT); 6853 else if (Subtarget->isThumb2()) 6854 return isLegalT2AddressImmediate(V, VT, Subtarget); 6855 6856 // ARM mode. 6857 if (V < 0) 6858 V = - V; 6859 switch (VT.getSimpleVT().SimpleTy) { 6860 default: return false; 6861 case MVT::i1: 6862 case MVT::i8: 6863 case MVT::i32: 6864 // +- imm12 6865 return V == (V & ((1LL << 12) - 1)); 6866 case MVT::i16: 6867 // +- imm8 6868 return V == (V & ((1LL << 8) - 1)); 6869 case MVT::f32: 6870 case MVT::f64: 6871 if (!Subtarget->hasVFP2()) // FIXME: NEON? 6872 return false; 6873 if ((V & 3) != 0) 6874 return false; 6875 V >>= 2; 6876 return V == (V & ((1LL << 8) - 1)); 6877 } 6878} 6879 6880bool ARMTargetLowering::isLegalT2ScaledAddressingMode(const AddrMode &AM, 6881 EVT VT) const { 6882 int Scale = AM.Scale; 6883 if (Scale < 0) 6884 return false; 6885 6886 switch (VT.getSimpleVT().SimpleTy) { 6887 default: return false; 6888 case MVT::i1: 6889 case MVT::i8: 6890 case MVT::i16: 6891 case MVT::i32: 6892 if (Scale == 1) 6893 return true; 6894 // r + r << imm 6895 Scale = Scale & ~1; 6896 return Scale == 2 || Scale == 4 || Scale == 8; 6897 case MVT::i64: 6898 // r + r 6899 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 6900 return true; 6901 return false; 6902 case MVT::isVoid: 6903 // Note, we allow "void" uses (basically, uses that aren't loads or 6904 // stores), because arm allows folding a scale into many arithmetic 6905 // operations. This should be made more precise and revisited later. 6906 6907 // Allow r << imm, but the imm has to be a multiple of two. 6908 if (Scale & 1) return false; 6909 return isPowerOf2_32(Scale); 6910 } 6911} 6912 6913/// isLegalAddressingMode - Return true if the addressing mode represented 6914/// by AM is legal for this target, for a load/store of the specified type. 6915bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM, 6916 const Type *Ty) const { 6917 EVT VT = getValueType(Ty, true); 6918 if (!isLegalAddressImmediate(AM.BaseOffs, VT, Subtarget)) 6919 return false; 6920 6921 // Can never fold addr of global into load/store. 6922 if (AM.BaseGV) 6923 return false; 6924 6925 switch (AM.Scale) { 6926 case 0: // no scale reg, must be "r+i" or "r", or "i". 6927 break; 6928 case 1: 6929 if (Subtarget->isThumb1Only()) 6930 return false; 6931 // FALL THROUGH. 6932 default: 6933 // ARM doesn't support any R+R*scale+imm addr modes. 6934 if (AM.BaseOffs) 6935 return false; 6936 6937 if (!VT.isSimple()) 6938 return false; 6939 6940 if (Subtarget->isThumb2()) 6941 return isLegalT2ScaledAddressingMode(AM, VT); 6942 6943 int Scale = AM.Scale; 6944 switch (VT.getSimpleVT().SimpleTy) { 6945 default: return false; 6946 case MVT::i1: 6947 case MVT::i8: 6948 case MVT::i32: 6949 if (Scale < 0) Scale = -Scale; 6950 if (Scale == 1) 6951 return true; 6952 // r + r << imm 6953 return isPowerOf2_32(Scale & ~1); 6954 case MVT::i16: 6955 case MVT::i64: 6956 // r + r 6957 if (((unsigned)AM.HasBaseReg + Scale) <= 2) 6958 return true; 6959 return false; 6960 6961 case MVT::isVoid: 6962 // Note, we allow "void" uses (basically, uses that aren't loads or 6963 // stores), because arm allows folding a scale into many arithmetic 6964 // operations. This should be made more precise and revisited later. 6965 6966 // Allow r << imm, but the imm has to be a multiple of two. 6967 if (Scale & 1) return false; 6968 return isPowerOf2_32(Scale); 6969 } 6970 break; 6971 } 6972 return true; 6973} 6974 6975/// isLegalICmpImmediate - Return true if the specified immediate is legal 6976/// icmp immediate, that is the target has icmp instructions which can compare 6977/// a register against the immediate without having to materialize the 6978/// immediate into a register. 6979bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const { 6980 if (!Subtarget->isThumb()) 6981 return ARM_AM::getSOImmVal(Imm) != -1; 6982 if (Subtarget->isThumb2()) 6983 return ARM_AM::getT2SOImmVal(Imm) != -1; 6984 return Imm >= 0 && Imm <= 255; 6985} 6986 6987/// isLegalAddImmediate - Return true if the specified immediate is legal 6988/// add immediate, that is the target has add instructions which can add 6989/// a register with the immediate without having to materialize the 6990/// immediate into a register. 6991bool ARMTargetLowering::isLegalAddImmediate(int64_t Imm) const { 6992 return ARM_AM::getSOImmVal(Imm) != -1; 6993} 6994 6995static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT, 6996 bool isSEXTLoad, SDValue &Base, 6997 SDValue &Offset, bool &isInc, 6998 SelectionDAG &DAG) { 6999 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 7000 return false; 7001 7002 if (VT == MVT::i16 || ((VT == MVT::i8 || VT == MVT::i1) && isSEXTLoad)) { 7003 // AddressingMode 3 7004 Base = Ptr->getOperand(0); 7005 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 7006 int RHSC = (int)RHS->getZExtValue(); 7007 if (RHSC < 0 && RHSC > -256) { 7008 assert(Ptr->getOpcode() == ISD::ADD); 7009 isInc = false; 7010 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 7011 return true; 7012 } 7013 } 7014 isInc = (Ptr->getOpcode() == ISD::ADD); 7015 Offset = Ptr->getOperand(1); 7016 return true; 7017 } else if (VT == MVT::i32 || VT == MVT::i8 || VT == MVT::i1) { 7018 // AddressingMode 2 7019 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 7020 int RHSC = (int)RHS->getZExtValue(); 7021 if (RHSC < 0 && RHSC > -0x1000) { 7022 assert(Ptr->getOpcode() == ISD::ADD); 7023 isInc = false; 7024 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 7025 Base = Ptr->getOperand(0); 7026 return true; 7027 } 7028 } 7029 7030 if (Ptr->getOpcode() == ISD::ADD) { 7031 isInc = true; 7032 ARM_AM::ShiftOpc ShOpcVal= ARM_AM::getShiftOpcForNode(Ptr->getOperand(0)); 7033 if (ShOpcVal != ARM_AM::no_shift) { 7034 Base = Ptr->getOperand(1); 7035 Offset = Ptr->getOperand(0); 7036 } else { 7037 Base = Ptr->getOperand(0); 7038 Offset = Ptr->getOperand(1); 7039 } 7040 return true; 7041 } 7042 7043 isInc = (Ptr->getOpcode() == ISD::ADD); 7044 Base = Ptr->getOperand(0); 7045 Offset = Ptr->getOperand(1); 7046 return true; 7047 } 7048 7049 // FIXME: Use VLDM / VSTM to emulate indexed FP load / store. 7050 return false; 7051} 7052 7053static bool getT2IndexedAddressParts(SDNode *Ptr, EVT VT, 7054 bool isSEXTLoad, SDValue &Base, 7055 SDValue &Offset, bool &isInc, 7056 SelectionDAG &DAG) { 7057 if (Ptr->getOpcode() != ISD::ADD && Ptr->getOpcode() != ISD::SUB) 7058 return false; 7059 7060 Base = Ptr->getOperand(0); 7061 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Ptr->getOperand(1))) { 7062 int RHSC = (int)RHS->getZExtValue(); 7063 if (RHSC < 0 && RHSC > -0x100) { // 8 bits. 7064 assert(Ptr->getOpcode() == ISD::ADD); 7065 isInc = false; 7066 Offset = DAG.getConstant(-RHSC, RHS->getValueType(0)); 7067 return true; 7068 } else if (RHSC > 0 && RHSC < 0x100) { // 8 bit, no zero. 7069 isInc = Ptr->getOpcode() == ISD::ADD; 7070 Offset = DAG.getConstant(RHSC, RHS->getValueType(0)); 7071 return true; 7072 } 7073 } 7074 7075 return false; 7076} 7077 7078/// getPreIndexedAddressParts - returns true by value, base pointer and 7079/// offset pointer and addressing mode by reference if the node's address 7080/// can be legally represented as pre-indexed load / store address. 7081bool 7082ARMTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 7083 SDValue &Offset, 7084 ISD::MemIndexedMode &AM, 7085 SelectionDAG &DAG) const { 7086 if (Subtarget->isThumb1Only()) 7087 return false; 7088 7089 EVT VT; 7090 SDValue Ptr; 7091 bool isSEXTLoad = false; 7092 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 7093 Ptr = LD->getBasePtr(); 7094 VT = LD->getMemoryVT(); 7095 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 7096 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 7097 Ptr = ST->getBasePtr(); 7098 VT = ST->getMemoryVT(); 7099 } else 7100 return false; 7101 7102 bool isInc; 7103 bool isLegal = false; 7104 if (Subtarget->isThumb2()) 7105 isLegal = getT2IndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 7106 Offset, isInc, DAG); 7107 else 7108 isLegal = getARMIndexedAddressParts(Ptr.getNode(), VT, isSEXTLoad, Base, 7109 Offset, isInc, DAG); 7110 if (!isLegal) 7111 return false; 7112 7113 AM = isInc ? ISD::PRE_INC : ISD::PRE_DEC; 7114 return true; 7115} 7116 7117/// getPostIndexedAddressParts - returns true by value, base pointer and 7118/// offset pointer and addressing mode by reference if this node can be 7119/// combined with a load / store to form a post-indexed load / store. 7120bool ARMTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op, 7121 SDValue &Base, 7122 SDValue &Offset, 7123 ISD::MemIndexedMode &AM, 7124 SelectionDAG &DAG) const { 7125 if (Subtarget->isThumb1Only()) 7126 return false; 7127 7128 EVT VT; 7129 SDValue Ptr; 7130 bool isSEXTLoad = false; 7131 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 7132 VT = LD->getMemoryVT(); 7133 Ptr = LD->getBasePtr(); 7134 isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD; 7135 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 7136 VT = ST->getMemoryVT(); 7137 Ptr = ST->getBasePtr(); 7138 } else 7139 return false; 7140 7141 bool isInc; 7142 bool isLegal = false; 7143 if (Subtarget->isThumb2()) 7144 isLegal = getT2IndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 7145 isInc, DAG); 7146 else 7147 isLegal = getARMIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset, 7148 isInc, DAG); 7149 if (!isLegal) 7150 return false; 7151 7152 if (Ptr != Base) { 7153 // Swap base ptr and offset to catch more post-index load / store when 7154 // it's legal. In Thumb2 mode, offset must be an immediate. 7155 if (Ptr == Offset && Op->getOpcode() == ISD::ADD && 7156 !Subtarget->isThumb2()) 7157 std::swap(Base, Offset); 7158 7159 // Post-indexed load / store update the base pointer. 7160 if (Ptr != Base) 7161 return false; 7162 } 7163 7164 AM = isInc ? ISD::POST_INC : ISD::POST_DEC; 7165 return true; 7166} 7167 7168void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 7169 const APInt &Mask, 7170 APInt &KnownZero, 7171 APInt &KnownOne, 7172 const SelectionDAG &DAG, 7173 unsigned Depth) const { 7174 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 7175 switch (Op.getOpcode()) { 7176 default: break; 7177 case ARMISD::CMOV: { 7178 // Bits are known zero/one if known on the LHS and RHS. 7179 DAG.ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 7180 if (KnownZero == 0 && KnownOne == 0) return; 7181 7182 APInt KnownZeroRHS, KnownOneRHS; 7183 DAG.ComputeMaskedBits(Op.getOperand(1), Mask, 7184 KnownZeroRHS, KnownOneRHS, Depth+1); 7185 KnownZero &= KnownZeroRHS; 7186 KnownOne &= KnownOneRHS; 7187 return; 7188 } 7189 } 7190} 7191 7192//===----------------------------------------------------------------------===// 7193// ARM Inline Assembly Support 7194//===----------------------------------------------------------------------===// 7195 7196bool ARMTargetLowering::ExpandInlineAsm(CallInst *CI) const { 7197 // Looking for "rev" which is V6+. 7198 if (!Subtarget->hasV6Ops()) 7199 return false; 7200 7201 InlineAsm *IA = cast<InlineAsm>(CI->getCalledValue()); 7202 std::string AsmStr = IA->getAsmString(); 7203 SmallVector<StringRef, 4> AsmPieces; 7204 SplitString(AsmStr, AsmPieces, ";\n"); 7205 7206 switch (AsmPieces.size()) { 7207 default: return false; 7208 case 1: 7209 AsmStr = AsmPieces[0]; 7210 AsmPieces.clear(); 7211 SplitString(AsmStr, AsmPieces, " \t,"); 7212 7213 // rev $0, $1 7214 if (AsmPieces.size() == 3 && 7215 AsmPieces[0] == "rev" && AsmPieces[1] == "$0" && AsmPieces[2] == "$1" && 7216 IA->getConstraintString().compare(0, 4, "=l,l") == 0) { 7217 const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); 7218 if (Ty && Ty->getBitWidth() == 32) 7219 return IntrinsicLowering::LowerToByteSwap(CI); 7220 } 7221 break; 7222 } 7223 7224 return false; 7225} 7226 7227/// getConstraintType - Given a constraint letter, return the type of 7228/// constraint it is for this target. 7229ARMTargetLowering::ConstraintType 7230ARMTargetLowering::getConstraintType(const std::string &Constraint) const { 7231 if (Constraint.size() == 1) { 7232 switch (Constraint[0]) { 7233 default: break; 7234 case 'l': return C_RegisterClass; 7235 case 'w': return C_RegisterClass; 7236 } 7237 } 7238 return TargetLowering::getConstraintType(Constraint); 7239} 7240 7241/// Examine constraint type and operand type and determine a weight value. 7242/// This object must already have been set up with the operand type 7243/// and the current alternative constraint selected. 7244TargetLowering::ConstraintWeight 7245ARMTargetLowering::getSingleConstraintMatchWeight( 7246 AsmOperandInfo &info, const char *constraint) const { 7247 ConstraintWeight weight = CW_Invalid; 7248 Value *CallOperandVal = info.CallOperandVal; 7249 // If we don't have a value, we can't do a match, 7250 // but allow it at the lowest weight. 7251 if (CallOperandVal == NULL) 7252 return CW_Default; 7253 const Type *type = CallOperandVal->getType(); 7254 // Look at the constraint type. 7255 switch (*constraint) { 7256 default: 7257 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 7258 break; 7259 case 'l': 7260 if (type->isIntegerTy()) { 7261 if (Subtarget->isThumb()) 7262 weight = CW_SpecificReg; 7263 else 7264 weight = CW_Register; 7265 } 7266 break; 7267 case 'w': 7268 if (type->isFloatingPointTy()) 7269 weight = CW_Register; 7270 break; 7271 } 7272 return weight; 7273} 7274 7275std::pair<unsigned, const TargetRegisterClass*> 7276ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 7277 EVT VT) const { 7278 if (Constraint.size() == 1) { 7279 // GCC ARM Constraint Letters 7280 switch (Constraint[0]) { 7281 case 'l': 7282 if (Subtarget->isThumb()) 7283 return std::make_pair(0U, ARM::tGPRRegisterClass); 7284 else 7285 return std::make_pair(0U, ARM::GPRRegisterClass); 7286 case 'r': 7287 return std::make_pair(0U, ARM::GPRRegisterClass); 7288 case 'w': 7289 if (VT == MVT::f32) 7290 return std::make_pair(0U, ARM::SPRRegisterClass); 7291 if (VT.getSizeInBits() == 64) 7292 return std::make_pair(0U, ARM::DPRRegisterClass); 7293 if (VT.getSizeInBits() == 128) 7294 return std::make_pair(0U, ARM::QPRRegisterClass); 7295 break; 7296 } 7297 } 7298 if (StringRef("{cc}").equals_lower(Constraint)) 7299 return std::make_pair(unsigned(ARM::CPSR), ARM::CCRRegisterClass); 7300 7301 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 7302} 7303 7304std::vector<unsigned> ARMTargetLowering:: 7305getRegClassForInlineAsmConstraint(const std::string &Constraint, 7306 EVT VT) const { 7307 if (Constraint.size() != 1) 7308 return std::vector<unsigned>(); 7309 7310 switch (Constraint[0]) { // GCC ARM Constraint Letters 7311 default: break; 7312 case 'l': 7313 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 7314 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 7315 0); 7316 case 'r': 7317 return make_vector<unsigned>(ARM::R0, ARM::R1, ARM::R2, ARM::R3, 7318 ARM::R4, ARM::R5, ARM::R6, ARM::R7, 7319 ARM::R8, ARM::R9, ARM::R10, ARM::R11, 7320 ARM::R12, ARM::LR, 0); 7321 case 'w': 7322 if (VT == MVT::f32) 7323 return make_vector<unsigned>(ARM::S0, ARM::S1, ARM::S2, ARM::S3, 7324 ARM::S4, ARM::S5, ARM::S6, ARM::S7, 7325 ARM::S8, ARM::S9, ARM::S10, ARM::S11, 7326 ARM::S12,ARM::S13,ARM::S14,ARM::S15, 7327 ARM::S16,ARM::S17,ARM::S18,ARM::S19, 7328 ARM::S20,ARM::S21,ARM::S22,ARM::S23, 7329 ARM::S24,ARM::S25,ARM::S26,ARM::S27, 7330 ARM::S28,ARM::S29,ARM::S30,ARM::S31, 0); 7331 if (VT.getSizeInBits() == 64) 7332 return make_vector<unsigned>(ARM::D0, ARM::D1, ARM::D2, ARM::D3, 7333 ARM::D4, ARM::D5, ARM::D6, ARM::D7, 7334 ARM::D8, ARM::D9, ARM::D10,ARM::D11, 7335 ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0); 7336 if (VT.getSizeInBits() == 128) 7337 return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3, 7338 ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0); 7339 break; 7340 } 7341 7342 return std::vector<unsigned>(); 7343} 7344 7345/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 7346/// vector. If it is invalid, don't add anything to Ops. 7347void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 7348 char Constraint, 7349 std::vector<SDValue>&Ops, 7350 SelectionDAG &DAG) const { 7351 SDValue Result(0, 0); 7352 7353 switch (Constraint) { 7354 default: break; 7355 case 'I': case 'J': case 'K': case 'L': 7356 case 'M': case 'N': case 'O': 7357 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 7358 if (!C) 7359 return; 7360 7361 int64_t CVal64 = C->getSExtValue(); 7362 int CVal = (int) CVal64; 7363 // None of these constraints allow values larger than 32 bits. Check 7364 // that the value fits in an int. 7365 if (CVal != CVal64) 7366 return; 7367 7368 switch (Constraint) { 7369 case 'I': 7370 if (Subtarget->isThumb1Only()) { 7371 // This must be a constant between 0 and 255, for ADD 7372 // immediates. 7373 if (CVal >= 0 && CVal <= 255) 7374 break; 7375 } else if (Subtarget->isThumb2()) { 7376 // A constant that can be used as an immediate value in a 7377 // data-processing instruction. 7378 if (ARM_AM::getT2SOImmVal(CVal) != -1) 7379 break; 7380 } else { 7381 // A constant that can be used as an immediate value in a 7382 // data-processing instruction. 7383 if (ARM_AM::getSOImmVal(CVal) != -1) 7384 break; 7385 } 7386 return; 7387 7388 case 'J': 7389 if (Subtarget->isThumb()) { // FIXME thumb2 7390 // This must be a constant between -255 and -1, for negated ADD 7391 // immediates. This can be used in GCC with an "n" modifier that 7392 // prints the negated value, for use with SUB instructions. It is 7393 // not useful otherwise but is implemented for compatibility. 7394 if (CVal >= -255 && CVal <= -1) 7395 break; 7396 } else { 7397 // This must be a constant between -4095 and 4095. It is not clear 7398 // what this constraint is intended for. Implemented for 7399 // compatibility with GCC. 7400 if (CVal >= -4095 && CVal <= 4095) 7401 break; 7402 } 7403 return; 7404 7405 case 'K': 7406 if (Subtarget->isThumb1Only()) { 7407 // A 32-bit value where only one byte has a nonzero value. Exclude 7408 // zero to match GCC. This constraint is used by GCC internally for 7409 // constants that can be loaded with a move/shift combination. 7410 // It is not useful otherwise but is implemented for compatibility. 7411 if (CVal != 0 && ARM_AM::isThumbImmShiftedVal(CVal)) 7412 break; 7413 } else if (Subtarget->isThumb2()) { 7414 // A constant whose bitwise inverse can be used as an immediate 7415 // value in a data-processing instruction. This can be used in GCC 7416 // with a "B" modifier that prints the inverted value, for use with 7417 // BIC and MVN instructions. It is not useful otherwise but is 7418 // implemented for compatibility. 7419 if (ARM_AM::getT2SOImmVal(~CVal) != -1) 7420 break; 7421 } else { 7422 // A constant whose bitwise inverse can be used as an immediate 7423 // value in a data-processing instruction. This can be used in GCC 7424 // with a "B" modifier that prints the inverted value, for use with 7425 // BIC and MVN instructions. It is not useful otherwise but is 7426 // implemented for compatibility. 7427 if (ARM_AM::getSOImmVal(~CVal) != -1) 7428 break; 7429 } 7430 return; 7431 7432 case 'L': 7433 if (Subtarget->isThumb1Only()) { 7434 // This must be a constant between -7 and 7, 7435 // for 3-operand ADD/SUB immediate instructions. 7436 if (CVal >= -7 && CVal < 7) 7437 break; 7438 } else if (Subtarget->isThumb2()) { 7439 // A constant whose negation can be used as an immediate value in a 7440 // data-processing instruction. This can be used in GCC with an "n" 7441 // modifier that prints the negated value, for use with SUB 7442 // instructions. It is not useful otherwise but is implemented for 7443 // compatibility. 7444 if (ARM_AM::getT2SOImmVal(-CVal) != -1) 7445 break; 7446 } else { 7447 // A constant whose negation can be used as an immediate value in a 7448 // data-processing instruction. This can be used in GCC with an "n" 7449 // modifier that prints the negated value, for use with SUB 7450 // instructions. It is not useful otherwise but is implemented for 7451 // compatibility. 7452 if (ARM_AM::getSOImmVal(-CVal) != -1) 7453 break; 7454 } 7455 return; 7456 7457 case 'M': 7458 if (Subtarget->isThumb()) { // FIXME thumb2 7459 // This must be a multiple of 4 between 0 and 1020, for 7460 // ADD sp + immediate. 7461 if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0)) 7462 break; 7463 } else { 7464 // A power of two or a constant between 0 and 32. This is used in 7465 // GCC for the shift amount on shifted register operands, but it is 7466 // useful in general for any shift amounts. 7467 if ((CVal >= 0 && CVal <= 32) || ((CVal & (CVal - 1)) == 0)) 7468 break; 7469 } 7470 return; 7471 7472 case 'N': 7473 if (Subtarget->isThumb()) { // FIXME thumb2 7474 // This must be a constant between 0 and 31, for shift amounts. 7475 if (CVal >= 0 && CVal <= 31) 7476 break; 7477 } 7478 return; 7479 7480 case 'O': 7481 if (Subtarget->isThumb()) { // FIXME thumb2 7482 // This must be a multiple of 4 between -508 and 508, for 7483 // ADD/SUB sp = sp + immediate. 7484 if ((CVal >= -508 && CVal <= 508) && ((CVal & 3) == 0)) 7485 break; 7486 } 7487 return; 7488 } 7489 Result = DAG.getTargetConstant(CVal, Op.getValueType()); 7490 break; 7491 } 7492 7493 if (Result.getNode()) { 7494 Ops.push_back(Result); 7495 return; 7496 } 7497 return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 7498} 7499 7500bool 7501ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 7502 // The ARM target isn't yet aware of offsets. 7503 return false; 7504} 7505 7506int ARM::getVFPf32Imm(const APFloat &FPImm) { 7507 APInt Imm = FPImm.bitcastToAPInt(); 7508 uint32_t Sign = Imm.lshr(31).getZExtValue() & 1; 7509 int32_t Exp = (Imm.lshr(23).getSExtValue() & 0xff) - 127; // -126 to 127 7510 int64_t Mantissa = Imm.getZExtValue() & 0x7fffff; // 23 bits 7511 7512 // We can handle 4 bits of mantissa. 7513 // mantissa = (16+UInt(e:f:g:h))/16. 7514 if (Mantissa & 0x7ffff) 7515 return -1; 7516 Mantissa >>= 19; 7517 if ((Mantissa & 0xf) != Mantissa) 7518 return -1; 7519 7520 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 7521 if (Exp < -3 || Exp > 4) 7522 return -1; 7523 Exp = ((Exp+3) & 0x7) ^ 4; 7524 7525 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 7526} 7527 7528int ARM::getVFPf64Imm(const APFloat &FPImm) { 7529 APInt Imm = FPImm.bitcastToAPInt(); 7530 uint64_t Sign = Imm.lshr(63).getZExtValue() & 1; 7531 int64_t Exp = (Imm.lshr(52).getSExtValue() & 0x7ff) - 1023; // -1022 to 1023 7532 uint64_t Mantissa = Imm.getZExtValue() & 0xfffffffffffffLL; 7533 7534 // We can handle 4 bits of mantissa. 7535 // mantissa = (16+UInt(e:f:g:h))/16. 7536 if (Mantissa & 0xffffffffffffLL) 7537 return -1; 7538 Mantissa >>= 48; 7539 if ((Mantissa & 0xf) != Mantissa) 7540 return -1; 7541 7542 // We can handle 3 bits of exponent: exp == UInt(NOT(b):c:d)-3 7543 if (Exp < -3 || Exp > 4) 7544 return -1; 7545 Exp = ((Exp+3) & 0x7) ^ 4; 7546 7547 return ((int)Sign << 7) | (Exp << 4) | Mantissa; 7548} 7549 7550bool ARM::isBitFieldInvertedMask(unsigned v) { 7551 if (v == 0xffffffff) 7552 return 0; 7553 // there can be 1's on either or both "outsides", all the "inside" 7554 // bits must be 0's 7555 unsigned int lsb = 0, msb = 31; 7556 while (v & (1 << msb)) --msb; 7557 while (v & (1 << lsb)) ++lsb; 7558 for (unsigned int i = lsb; i <= msb; ++i) { 7559 if (v & (1 << i)) 7560 return 0; 7561 } 7562 return 1; 7563} 7564 7565/// isFPImmLegal - Returns true if the target can instruction select the 7566/// specified FP immediate natively. If false, the legalizer will 7567/// materialize the FP immediate as a load from a constant pool. 7568bool ARMTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { 7569 if (!Subtarget->hasVFP3()) 7570 return false; 7571 if (VT == MVT::f32) 7572 return ARM::getVFPf32Imm(Imm) != -1; 7573 if (VT == MVT::f64) 7574 return ARM::getVFPf64Imm(Imm) != -1; 7575 return false; 7576} 7577 7578/// getTgtMemIntrinsic - Represent NEON load and store intrinsics as 7579/// MemIntrinsicNodes. The associated MachineMemOperands record the alignment 7580/// specified in the intrinsic calls. 7581bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, 7582 const CallInst &I, 7583 unsigned Intrinsic) const { 7584 switch (Intrinsic) { 7585 case Intrinsic::arm_neon_vld1: 7586 case Intrinsic::arm_neon_vld2: 7587 case Intrinsic::arm_neon_vld3: 7588 case Intrinsic::arm_neon_vld4: 7589 case Intrinsic::arm_neon_vld2lane: 7590 case Intrinsic::arm_neon_vld3lane: 7591 case Intrinsic::arm_neon_vld4lane: { 7592 Info.opc = ISD::INTRINSIC_W_CHAIN; 7593 // Conservatively set memVT to the entire set of vectors loaded. 7594 uint64_t NumElts = getTargetData()->getTypeAllocSize(I.getType()) / 8; 7595 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 7596 Info.ptrVal = I.getArgOperand(0); 7597 Info.offset = 0; 7598 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 7599 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 7600 Info.vol = false; // volatile loads with NEON intrinsics not supported 7601 Info.readMem = true; 7602 Info.writeMem = false; 7603 return true; 7604 } 7605 case Intrinsic::arm_neon_vst1: 7606 case Intrinsic::arm_neon_vst2: 7607 case Intrinsic::arm_neon_vst3: 7608 case Intrinsic::arm_neon_vst4: 7609 case Intrinsic::arm_neon_vst2lane: 7610 case Intrinsic::arm_neon_vst3lane: 7611 case Intrinsic::arm_neon_vst4lane: { 7612 Info.opc = ISD::INTRINSIC_VOID; 7613 // Conservatively set memVT to the entire set of vectors stored. 7614 unsigned NumElts = 0; 7615 for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { 7616 const Type *ArgTy = I.getArgOperand(ArgI)->getType(); 7617 if (!ArgTy->isVectorTy()) 7618 break; 7619 NumElts += getTargetData()->getTypeAllocSize(ArgTy) / 8; 7620 } 7621 Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); 7622 Info.ptrVal = I.getArgOperand(0); 7623 Info.offset = 0; 7624 Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); 7625 Info.align = cast<ConstantInt>(AlignArg)->getZExtValue(); 7626 Info.vol = false; // volatile stores with NEON intrinsics not supported 7627 Info.readMem = false; 7628 Info.writeMem = true; 7629 return true; 7630 } 7631 default: 7632 break; 7633 } 7634 7635 return false; 7636} 7637