PPCISelLowering.cpp revision d3f7766f2363862b9c8586d2f78bc413223240d3
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "MCTargetDesc/PPCPredicates.h" 16#include "PPCMachineFunctionInfo.h" 17#include "PPCPerfectShuffle.h" 18#include "PPCTargetMachine.h" 19#include "PPCTargetObjectFile.h" 20#include "llvm/ADT/STLExtras.h" 21#include "llvm/CodeGen/CallingConvLower.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineFunction.h" 24#include "llvm/CodeGen/MachineInstrBuilder.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/SelectionDAG.h" 27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28#include "llvm/IR/CallingConv.h" 29#include "llvm/IR/Constants.h" 30#include "llvm/IR/DerivedTypes.h" 31#include "llvm/IR/Function.h" 32#include "llvm/IR/Intrinsics.h" 33#include "llvm/Support/CommandLine.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/Target/TargetOptions.h" 38using namespace llvm; 39 40static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 41cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 42 43static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 44cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 45 46static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 47cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 48 49static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { 50 if (TM.getSubtargetImpl()->isDarwin()) 51 return new TargetLoweringObjectFileMachO(); 52 53 if (TM.getSubtargetImpl()->isSVR4ABI()) 54 return new PPC64LinuxTargetObjectFile(); 55 56 return new TargetLoweringObjectFileELF(); 57} 58 59PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 60 : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { 61 const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); 62 63 setPow2DivIsCheap(); 64 65 // Use _setjmp/_longjmp instead of setjmp/longjmp. 66 setUseUnderscoreSetJmp(true); 67 setUseUnderscoreLongJmp(true); 68 69 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 70 // arguments are at least 4/8 bytes aligned. 71 bool isPPC64 = Subtarget->isPPC64(); 72 setMinStackArgumentAlignment(isPPC64 ? 8:4); 73 74 // Set up the register classes. 75 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 76 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 77 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 78 79 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 80 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 81 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); 82 83 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 84 85 // PowerPC has pre-inc load and store's. 86 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 87 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 88 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 89 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 90 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 91 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 92 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 93 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 94 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 95 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 96 97 // This is used in the ppcf128->int sequence. Note it has different semantics 98 // from FP_ROUND: that rounds to nearest, this rounds to zero. 99 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 100 101 // We do not currently implement these libm ops for PowerPC. 102 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 103 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 104 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 105 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 106 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 107 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 108 109 // PowerPC has no SREM/UREM instructions 110 setOperationAction(ISD::SREM, MVT::i32, Expand); 111 setOperationAction(ISD::UREM, MVT::i32, Expand); 112 setOperationAction(ISD::SREM, MVT::i64, Expand); 113 setOperationAction(ISD::UREM, MVT::i64, Expand); 114 115 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 116 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 117 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 118 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 119 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 120 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 121 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 122 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 123 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 124 125 // We don't support sin/cos/sqrt/fmod/pow 126 setOperationAction(ISD::FSIN , MVT::f64, Expand); 127 setOperationAction(ISD::FCOS , MVT::f64, Expand); 128 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 129 setOperationAction(ISD::FREM , MVT::f64, Expand); 130 setOperationAction(ISD::FPOW , MVT::f64, Expand); 131 setOperationAction(ISD::FMA , MVT::f64, Legal); 132 setOperationAction(ISD::FSIN , MVT::f32, Expand); 133 setOperationAction(ISD::FCOS , MVT::f32, Expand); 134 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 135 setOperationAction(ISD::FREM , MVT::f32, Expand); 136 setOperationAction(ISD::FPOW , MVT::f32, Expand); 137 setOperationAction(ISD::FMA , MVT::f32, Legal); 138 139 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 140 141 // If we're enabling GP optimizations, use hardware square root 142 if (!Subtarget->hasFSQRT() && 143 !(TM.Options.UnsafeFPMath && 144 Subtarget->hasFRSQRTE() && Subtarget->hasFRE())) 145 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 146 147 if (!Subtarget->hasFSQRT() && 148 !(TM.Options.UnsafeFPMath && 149 Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) 150 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 151 152 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 153 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 154 155 if (Subtarget->hasFPRND()) { 156 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 157 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 158 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 159 160 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 161 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 162 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 163 164 // frin does not implement "ties to even." Thus, this is safe only in 165 // fast-math mode. 166 if (TM.Options.UnsafeFPMath) { 167 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); 168 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); 169 170 // These need to set FE_INEXACT, and use a custom inserter. 171 setOperationAction(ISD::FRINT, MVT::f64, Legal); 172 setOperationAction(ISD::FRINT, MVT::f32, Legal); 173 } 174 } 175 176 // PowerPC does not have BSWAP, CTPOP or CTTZ 177 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 178 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 179 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 180 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 181 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 182 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 183 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 184 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 185 186 if (Subtarget->hasPOPCNTD()) { 187 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 188 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 189 } else { 190 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 191 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 192 } 193 194 // PowerPC does not have ROTR 195 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 196 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 197 198 // PowerPC does not have Select 199 setOperationAction(ISD::SELECT, MVT::i32, Expand); 200 setOperationAction(ISD::SELECT, MVT::i64, Expand); 201 setOperationAction(ISD::SELECT, MVT::f32, Expand); 202 setOperationAction(ISD::SELECT, MVT::f64, Expand); 203 204 // PowerPC wants to turn select_cc of FP into fsel when possible. 205 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 206 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 207 208 // PowerPC wants to optimize integer setcc a bit 209 setOperationAction(ISD::SETCC, MVT::i32, Custom); 210 211 // PowerPC does not have BRCOND which requires SetCC 212 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 213 214 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 215 216 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 217 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 218 219 // PowerPC does not have [U|S]INT_TO_FP 220 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 221 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 222 223 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 224 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 225 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 226 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 227 228 // We cannot sextinreg(i1). Expand to shifts. 229 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 230 231 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 232 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 233 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 234 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 235 236 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 237 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 238 // support continuation, user-level threading, and etc.. As a result, no 239 // other SjLj exception interfaces are implemented and please don't build 240 // your own exception handling based on them. 241 // LLVM/Clang supports zero-cost DWARF exception handling. 242 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 243 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 244 245 // We want to legalize GlobalAddress and ConstantPool nodes into the 246 // appropriate instructions to materialize the address. 247 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 248 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 249 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 250 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 251 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 252 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 253 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 254 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 255 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 256 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 257 258 // TRAP is legal. 259 setOperationAction(ISD::TRAP, MVT::Other, Legal); 260 261 // TRAMPOLINE is custom lowered. 262 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 263 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 264 265 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 266 setOperationAction(ISD::VASTART , MVT::Other, Custom); 267 268 if (Subtarget->isSVR4ABI()) { 269 if (isPPC64) { 270 // VAARG always uses double-word chunks, so promote anything smaller. 271 setOperationAction(ISD::VAARG, MVT::i1, Promote); 272 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 273 setOperationAction(ISD::VAARG, MVT::i8, Promote); 274 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 275 setOperationAction(ISD::VAARG, MVT::i16, Promote); 276 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 277 setOperationAction(ISD::VAARG, MVT::i32, Promote); 278 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 279 setOperationAction(ISD::VAARG, MVT::Other, Expand); 280 } else { 281 // VAARG is custom lowered with the 32-bit SVR4 ABI. 282 setOperationAction(ISD::VAARG, MVT::Other, Custom); 283 setOperationAction(ISD::VAARG, MVT::i64, Custom); 284 } 285 } else 286 setOperationAction(ISD::VAARG, MVT::Other, Expand); 287 288 // Use the default implementation. 289 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 290 setOperationAction(ISD::VAEND , MVT::Other, Expand); 291 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 292 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 293 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 294 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 295 296 // We want to custom lower some of our intrinsics. 297 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 298 299 // To handle counter-based loop conditions. 300 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 301 302 // Comparisons that require checking two conditions. 303 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 304 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 305 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 306 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 307 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 308 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 309 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 310 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 311 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 312 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 313 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 314 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 315 316 if (Subtarget->has64BitSupport()) { 317 // They also have instructions for converting between i64 and fp. 318 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 319 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 320 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 321 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 322 // This is just the low 32 bits of a (signed) fp->i64 conversion. 323 // We cannot do this with Promote because i64 is not a legal type. 324 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 325 326 if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64()) 327 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 328 } else { 329 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 330 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 331 } 332 333 // With the instructions enabled under FPCVT, we can do everything. 334 if (PPCSubTarget.hasFPCVT()) { 335 if (Subtarget->has64BitSupport()) { 336 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 337 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 338 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 339 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 340 } 341 342 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 343 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 344 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 345 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 346 } 347 348 if (Subtarget->use64BitRegs()) { 349 // 64-bit PowerPC implementations can support i64 types directly 350 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 351 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 352 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 353 // 64-bit PowerPC wants to expand i128 shifts itself. 354 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 355 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 356 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 357 } else { 358 // 32-bit PowerPC wants to expand i64 shifts itself. 359 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 360 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 361 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 362 } 363 364 if (Subtarget->hasAltivec()) { 365 // First set operation action for all vector types to expand. Then we 366 // will selectively turn on ones that can be effectively codegen'd. 367 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 368 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 369 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 370 371 // add/sub are legal for all supported vector VT's. 372 setOperationAction(ISD::ADD , VT, Legal); 373 setOperationAction(ISD::SUB , VT, Legal); 374 375 // We promote all shuffles to v16i8. 376 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 377 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 378 379 // We promote all non-typed operations to v4i32. 380 setOperationAction(ISD::AND , VT, Promote); 381 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 382 setOperationAction(ISD::OR , VT, Promote); 383 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 384 setOperationAction(ISD::XOR , VT, Promote); 385 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 386 setOperationAction(ISD::LOAD , VT, Promote); 387 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 388 setOperationAction(ISD::SELECT, VT, Promote); 389 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 390 setOperationAction(ISD::STORE, VT, Promote); 391 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 392 393 // No other operations are legal. 394 setOperationAction(ISD::MUL , VT, Expand); 395 setOperationAction(ISD::SDIV, VT, Expand); 396 setOperationAction(ISD::SREM, VT, Expand); 397 setOperationAction(ISD::UDIV, VT, Expand); 398 setOperationAction(ISD::UREM, VT, Expand); 399 setOperationAction(ISD::FDIV, VT, Expand); 400 setOperationAction(ISD::FNEG, VT, Expand); 401 setOperationAction(ISD::FSQRT, VT, Expand); 402 setOperationAction(ISD::FLOG, VT, Expand); 403 setOperationAction(ISD::FLOG10, VT, Expand); 404 setOperationAction(ISD::FLOG2, VT, Expand); 405 setOperationAction(ISD::FEXP, VT, Expand); 406 setOperationAction(ISD::FEXP2, VT, Expand); 407 setOperationAction(ISD::FSIN, VT, Expand); 408 setOperationAction(ISD::FCOS, VT, Expand); 409 setOperationAction(ISD::FABS, VT, Expand); 410 setOperationAction(ISD::FPOWI, VT, Expand); 411 setOperationAction(ISD::FFLOOR, VT, Expand); 412 setOperationAction(ISD::FCEIL, VT, Expand); 413 setOperationAction(ISD::FTRUNC, VT, Expand); 414 setOperationAction(ISD::FRINT, VT, Expand); 415 setOperationAction(ISD::FNEARBYINT, VT, Expand); 416 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 417 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 418 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 419 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 420 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 421 setOperationAction(ISD::UDIVREM, VT, Expand); 422 setOperationAction(ISD::SDIVREM, VT, Expand); 423 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 424 setOperationAction(ISD::FPOW, VT, Expand); 425 setOperationAction(ISD::CTPOP, VT, Expand); 426 setOperationAction(ISD::CTLZ, VT, Expand); 427 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 428 setOperationAction(ISD::CTTZ, VT, Expand); 429 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 430 setOperationAction(ISD::VSELECT, VT, Expand); 431 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 432 433 for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 434 j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { 435 MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j; 436 setTruncStoreAction(VT, InnerVT, Expand); 437 } 438 setLoadExtAction(ISD::SEXTLOAD, VT, Expand); 439 setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); 440 setLoadExtAction(ISD::EXTLOAD, VT, Expand); 441 } 442 443 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 444 // with merges, splats, etc. 445 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 446 447 setOperationAction(ISD::AND , MVT::v4i32, Legal); 448 setOperationAction(ISD::OR , MVT::v4i32, Legal); 449 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 450 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 451 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 452 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 453 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 454 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 455 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 456 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 457 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 458 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 459 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 460 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 461 462 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 463 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 464 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 465 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 466 467 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 468 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 469 470 if (TM.Options.UnsafeFPMath) { 471 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 472 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 473 } 474 475 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 476 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 477 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 478 479 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 480 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 481 482 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 483 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 484 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 485 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 486 487 // Altivec does not contain unordered floating-point compare instructions 488 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 489 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 490 setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); 491 setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); 492 setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); 493 setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); 494 } 495 496 if (Subtarget->has64BitSupport()) { 497 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 498 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); 499 } 500 501 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); 502 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); 503 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 504 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 505 506 setBooleanContents(ZeroOrOneBooleanContent); 507 // Altivec instructions set fields to all zeros or all ones. 508 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 509 510 if (isPPC64) { 511 setStackPointerRegisterToSaveRestore(PPC::X1); 512 setExceptionPointerRegister(PPC::X3); 513 setExceptionSelectorRegister(PPC::X4); 514 } else { 515 setStackPointerRegisterToSaveRestore(PPC::R1); 516 setExceptionPointerRegister(PPC::R3); 517 setExceptionSelectorRegister(PPC::R4); 518 } 519 520 // We have target-specific dag combine patterns for the following nodes: 521 setTargetDAGCombine(ISD::SINT_TO_FP); 522 setTargetDAGCombine(ISD::LOAD); 523 setTargetDAGCombine(ISD::STORE); 524 setTargetDAGCombine(ISD::BR_CC); 525 setTargetDAGCombine(ISD::BSWAP); 526 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 527 528 // Use reciprocal estimates. 529 if (TM.Options.UnsafeFPMath) { 530 setTargetDAGCombine(ISD::FDIV); 531 setTargetDAGCombine(ISD::FSQRT); 532 } 533 534 // Darwin long double math library functions have $LDBL128 appended. 535 if (Subtarget->isDarwin()) { 536 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 537 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 538 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 539 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 540 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 541 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 542 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 543 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 544 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 545 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 546 } 547 548 setMinFunctionAlignment(2); 549 if (PPCSubTarget.isDarwin()) 550 setPrefFunctionAlignment(4); 551 552 if (isPPC64 && Subtarget->isJITCodeModel()) 553 // Temporary workaround for the inability of PPC64 JIT to handle jump 554 // tables. 555 setSupportJumpTables(false); 556 557 setInsertFencesForAtomic(true); 558 559 setSchedulingPreference(Sched::Hybrid); 560 561 computeRegisterProperties(); 562 563 // The Freescale cores does better with aggressive inlining of memcpy and 564 // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). 565 if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || 566 Subtarget->getDarwinDirective() == PPC::DIR_E5500) { 567 MaxStoresPerMemset = 32; 568 MaxStoresPerMemsetOptSize = 16; 569 MaxStoresPerMemcpy = 32; 570 MaxStoresPerMemcpyOptSize = 8; 571 MaxStoresPerMemmove = 32; 572 MaxStoresPerMemmoveOptSize = 8; 573 574 setPrefFunctionAlignment(4); 575 } 576} 577 578/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 579/// function arguments in the caller parameter area. 580unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 581 const TargetMachine &TM = getTargetMachine(); 582 // Darwin passes everything on 4 byte boundary. 583 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) 584 return 4; 585 586 // 16byte and wider vectors are passed on 16byte boundary. 587 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) 588 if (VTy->getBitWidth() >= 128) 589 return 16; 590 591 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 592 if (PPCSubTarget.isPPC64()) 593 return 8; 594 595 return 4; 596} 597 598const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 599 switch (Opcode) { 600 default: return 0; 601 case PPCISD::FSEL: return "PPCISD::FSEL"; 602 case PPCISD::FCFID: return "PPCISD::FCFID"; 603 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 604 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 605 case PPCISD::FRE: return "PPCISD::FRE"; 606 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 607 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 608 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 609 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 610 case PPCISD::VPERM: return "PPCISD::VPERM"; 611 case PPCISD::Hi: return "PPCISD::Hi"; 612 case PPCISD::Lo: return "PPCISD::Lo"; 613 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 614 case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE"; 615 case PPCISD::LOAD: return "PPCISD::LOAD"; 616 case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; 617 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 618 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 619 case PPCISD::SRL: return "PPCISD::SRL"; 620 case PPCISD::SRA: return "PPCISD::SRA"; 621 case PPCISD::SHL: return "PPCISD::SHL"; 622 case PPCISD::CALL: return "PPCISD::CALL"; 623 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 624 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 625 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 626 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 627 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 628 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 629 case PPCISD::MFCR: return "PPCISD::MFCR"; 630 case PPCISD::VCMP: return "PPCISD::VCMP"; 631 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 632 case PPCISD::LBRX: return "PPCISD::LBRX"; 633 case PPCISD::STBRX: return "PPCISD::STBRX"; 634 case PPCISD::LARX: return "PPCISD::LARX"; 635 case PPCISD::STCX: return "PPCISD::STCX"; 636 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 637 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 638 case PPCISD::BDZ: return "PPCISD::BDZ"; 639 case PPCISD::MFFS: return "PPCISD::MFFS"; 640 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 641 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 642 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 643 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 644 case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; 645 case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; 646 case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; 647 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 648 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 649 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 650 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 651 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 652 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 653 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 654 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 655 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 656 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 657 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 658 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 659 case PPCISD::SC: return "PPCISD::SC"; 660 } 661} 662 663EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 664 if (!VT.isVector()) 665 return MVT::i32; 666 return VT.changeVectorElementTypeToInteger(); 667} 668 669//===----------------------------------------------------------------------===// 670// Node matching predicates, for use by the tblgen matching code. 671//===----------------------------------------------------------------------===// 672 673/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 674static bool isFloatingPointZero(SDValue Op) { 675 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 676 return CFP->getValueAPF().isZero(); 677 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 678 // Maybe this has already been legalized into the constant pool? 679 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 680 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 681 return CFP->getValueAPF().isZero(); 682 } 683 return false; 684} 685 686/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 687/// true if Op is undef or if it matches the specified value. 688static bool isConstantOrUndef(int Op, int Val) { 689 return Op < 0 || Op == Val; 690} 691 692/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 693/// VPKUHUM instruction. 694bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 695 if (!isUnary) { 696 for (unsigned i = 0; i != 16; ++i) 697 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 698 return false; 699 } else { 700 for (unsigned i = 0; i != 8; ++i) 701 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || 702 !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) 703 return false; 704 } 705 return true; 706} 707 708/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 709/// VPKUWUM instruction. 710bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 711 if (!isUnary) { 712 for (unsigned i = 0; i != 16; i += 2) 713 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 714 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 715 return false; 716 } else { 717 for (unsigned i = 0; i != 8; i += 2) 718 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 719 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || 720 !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || 721 !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) 722 return false; 723 } 724 return true; 725} 726 727/// isVMerge - Common function, used to match vmrg* shuffles. 728/// 729static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 730 unsigned LHSStart, unsigned RHSStart) { 731 assert(N->getValueType(0) == MVT::v16i8 && 732 "PPC only supports shuffles by bytes!"); 733 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 734 "Unsupported merge size!"); 735 736 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 737 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 738 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 739 LHSStart+j+i*UnitSize) || 740 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 741 RHSStart+j+i*UnitSize)) 742 return false; 743 } 744 return true; 745} 746 747/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 748/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 749bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 750 bool isUnary) { 751 if (!isUnary) 752 return isVMerge(N, UnitSize, 8, 24); 753 return isVMerge(N, UnitSize, 8, 8); 754} 755 756/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 757/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 758bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 759 bool isUnary) { 760 if (!isUnary) 761 return isVMerge(N, UnitSize, 0, 16); 762 return isVMerge(N, UnitSize, 0, 0); 763} 764 765 766/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 767/// amount, otherwise return -1. 768int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 769 assert(N->getValueType(0) == MVT::v16i8 && 770 "PPC only supports shuffles by bytes!"); 771 772 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 773 774 // Find the first non-undef value in the shuffle mask. 775 unsigned i; 776 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 777 /*search*/; 778 779 if (i == 16) return -1; // all undef. 780 781 // Otherwise, check to see if the rest of the elements are consecutively 782 // numbered from this value. 783 unsigned ShiftAmt = SVOp->getMaskElt(i); 784 if (ShiftAmt < i) return -1; 785 ShiftAmt -= i; 786 787 if (!isUnary) { 788 // Check the rest of the elements to see if they are consecutive. 789 for (++i; i != 16; ++i) 790 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 791 return -1; 792 } else { 793 // Check the rest of the elements to see if they are consecutive. 794 for (++i; i != 16; ++i) 795 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 796 return -1; 797 } 798 return ShiftAmt; 799} 800 801/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 802/// specifies a splat of a single element that is suitable for input to 803/// VSPLTB/VSPLTH/VSPLTW. 804bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 805 assert(N->getValueType(0) == MVT::v16i8 && 806 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 807 808 // This is a splat operation if each element of the permute is the same, and 809 // if the value doesn't reference the second vector. 810 unsigned ElementBase = N->getMaskElt(0); 811 812 // FIXME: Handle UNDEF elements too! 813 if (ElementBase >= 16) 814 return false; 815 816 // Check that the indices are consecutive, in the case of a multi-byte element 817 // splatted with a v16i8 mask. 818 for (unsigned i = 1; i != EltSize; ++i) 819 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 820 return false; 821 822 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 823 if (N->getMaskElt(i) < 0) continue; 824 for (unsigned j = 0; j != EltSize; ++j) 825 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 826 return false; 827 } 828 return true; 829} 830 831/// isAllNegativeZeroVector - Returns true if all elements of build_vector 832/// are -0.0. 833bool PPC::isAllNegativeZeroVector(SDNode *N) { 834 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 835 836 APInt APVal, APUndef; 837 unsigned BitSize; 838 bool HasAnyUndefs; 839 840 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 841 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 842 return CFP->getValueAPF().isNegZero(); 843 844 return false; 845} 846 847/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 848/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 849unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 850 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 851 assert(isSplatShuffleMask(SVOp, EltSize)); 852 return SVOp->getMaskElt(0) / EltSize; 853} 854 855/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 856/// by using a vspltis[bhw] instruction of the specified element size, return 857/// the constant being splatted. The ByteSize field indicates the number of 858/// bytes of each element [124] -> [bhw]. 859SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 860 SDValue OpVal(0, 0); 861 862 // If ByteSize of the splat is bigger than the element size of the 863 // build_vector, then we have a case where we are checking for a splat where 864 // multiple elements of the buildvector are folded together into a single 865 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 866 unsigned EltSize = 16/N->getNumOperands(); 867 if (EltSize < ByteSize) { 868 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 869 SDValue UniquedVals[4]; 870 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 871 872 // See if all of the elements in the buildvector agree across. 873 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 874 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 875 // If the element isn't a constant, bail fully out. 876 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 877 878 879 if (UniquedVals[i&(Multiple-1)].getNode() == 0) 880 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 881 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 882 return SDValue(); // no match. 883 } 884 885 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 886 // either constant or undef values that are identical for each chunk. See 887 // if these chunks can form into a larger vspltis*. 888 889 // Check to see if all of the leading entries are either 0 or -1. If 890 // neither, then this won't fit into the immediate field. 891 bool LeadingZero = true; 892 bool LeadingOnes = true; 893 for (unsigned i = 0; i != Multiple-1; ++i) { 894 if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. 895 896 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 897 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 898 } 899 // Finally, check the least significant entry. 900 if (LeadingZero) { 901 if (UniquedVals[Multiple-1].getNode() == 0) 902 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 903 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 904 if (Val < 16) 905 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 906 } 907 if (LeadingOnes) { 908 if (UniquedVals[Multiple-1].getNode() == 0) 909 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 910 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 911 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 912 return DAG.getTargetConstant(Val, MVT::i32); 913 } 914 915 return SDValue(); 916 } 917 918 // Check to see if this buildvec has a single non-undef value in its elements. 919 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 920 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 921 if (OpVal.getNode() == 0) 922 OpVal = N->getOperand(i); 923 else if (OpVal != N->getOperand(i)) 924 return SDValue(); 925 } 926 927 if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. 928 929 unsigned ValSizeInBytes = EltSize; 930 uint64_t Value = 0; 931 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 932 Value = CN->getZExtValue(); 933 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 934 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 935 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 936 } 937 938 // If the splat value is larger than the element value, then we can never do 939 // this splat. The only case that we could fit the replicated bits into our 940 // immediate field for would be zero, and we prefer to use vxor for it. 941 if (ValSizeInBytes < ByteSize) return SDValue(); 942 943 // If the element value is larger than the splat value, cut it in half and 944 // check to see if the two halves are equal. Continue doing this until we 945 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 946 while (ValSizeInBytes > ByteSize) { 947 ValSizeInBytes >>= 1; 948 949 // If the top half equals the bottom half, we're still ok. 950 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 951 (Value & ((1 << (8*ValSizeInBytes))-1))) 952 return SDValue(); 953 } 954 955 // Properly sign extend the value. 956 int MaskVal = SignExtend32(Value, ByteSize * 8); 957 958 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 959 if (MaskVal == 0) return SDValue(); 960 961 // Finally, if this value fits in a 5 bit sext field, return it 962 if (SignExtend32<5>(MaskVal) == MaskVal) 963 return DAG.getTargetConstant(MaskVal, MVT::i32); 964 return SDValue(); 965} 966 967//===----------------------------------------------------------------------===// 968// Addressing Mode Selection 969//===----------------------------------------------------------------------===// 970 971/// isIntS16Immediate - This method tests to see if the node is either a 32-bit 972/// or 64-bit immediate, and if the value can be accurately represented as a 973/// sign extension from a 16-bit value. If so, this returns true and the 974/// immediate. 975static bool isIntS16Immediate(SDNode *N, short &Imm) { 976 if (N->getOpcode() != ISD::Constant) 977 return false; 978 979 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 980 if (N->getValueType(0) == MVT::i32) 981 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 982 else 983 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 984} 985static bool isIntS16Immediate(SDValue Op, short &Imm) { 986 return isIntS16Immediate(Op.getNode(), Imm); 987} 988 989 990/// SelectAddressRegReg - Given the specified addressed, check to see if it 991/// can be represented as an indexed [r+r] operation. Returns false if it 992/// can be more efficiently represented with [r+imm]. 993bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 994 SDValue &Index, 995 SelectionDAG &DAG) const { 996 short imm = 0; 997 if (N.getOpcode() == ISD::ADD) { 998 if (isIntS16Immediate(N.getOperand(1), imm)) 999 return false; // r+i 1000 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1001 return false; // r+i 1002 1003 Base = N.getOperand(0); 1004 Index = N.getOperand(1); 1005 return true; 1006 } else if (N.getOpcode() == ISD::OR) { 1007 if (isIntS16Immediate(N.getOperand(1), imm)) 1008 return false; // r+i can fold it if we can. 1009 1010 // If this is an or of disjoint bitfields, we can codegen this as an add 1011 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1012 // disjoint. 1013 APInt LHSKnownZero, LHSKnownOne; 1014 APInt RHSKnownZero, RHSKnownOne; 1015 DAG.ComputeMaskedBits(N.getOperand(0), 1016 LHSKnownZero, LHSKnownOne); 1017 1018 if (LHSKnownZero.getBoolValue()) { 1019 DAG.ComputeMaskedBits(N.getOperand(1), 1020 RHSKnownZero, RHSKnownOne); 1021 // If all of the bits are known zero on the LHS or RHS, the add won't 1022 // carry. 1023 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1024 Base = N.getOperand(0); 1025 Index = N.getOperand(1); 1026 return true; 1027 } 1028 } 1029 } 1030 1031 return false; 1032} 1033 1034/// Returns true if the address N can be represented by a base register plus 1035/// a signed 16-bit displacement [r+imm], and if it is not better 1036/// represented as reg+reg. If Aligned is true, only accept displacements 1037/// suitable for STD and friends, i.e. multiples of 4. 1038bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1039 SDValue &Base, 1040 SelectionDAG &DAG, 1041 bool Aligned) const { 1042 // FIXME dl should come from parent load or store, not from address 1043 SDLoc dl(N); 1044 // If this can be more profitably realized as r+r, fail. 1045 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1046 return false; 1047 1048 if (N.getOpcode() == ISD::ADD) { 1049 short imm = 0; 1050 if (isIntS16Immediate(N.getOperand(1), imm) && 1051 (!Aligned || (imm & 3) == 0)) { 1052 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1053 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1054 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1055 } else { 1056 Base = N.getOperand(0); 1057 } 1058 return true; // [r+i] 1059 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1060 // Match LOAD (ADD (X, Lo(G))). 1061 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1062 && "Cannot handle constant offsets yet!"); 1063 Disp = N.getOperand(1).getOperand(0); // The global address. 1064 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1065 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1066 Disp.getOpcode() == ISD::TargetConstantPool || 1067 Disp.getOpcode() == ISD::TargetJumpTable); 1068 Base = N.getOperand(0); 1069 return true; // [&g+r] 1070 } 1071 } else if (N.getOpcode() == ISD::OR) { 1072 short imm = 0; 1073 if (isIntS16Immediate(N.getOperand(1), imm) && 1074 (!Aligned || (imm & 3) == 0)) { 1075 // If this is an or of disjoint bitfields, we can codegen this as an add 1076 // (for better address arithmetic) if the LHS and RHS of the OR are 1077 // provably disjoint. 1078 APInt LHSKnownZero, LHSKnownOne; 1079 DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1080 1081 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1082 // If all of the bits are known zero on the LHS or RHS, the add won't 1083 // carry. 1084 Base = N.getOperand(0); 1085 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1086 return true; 1087 } 1088 } 1089 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1090 // Loading from a constant address. 1091 1092 // If this address fits entirely in a 16-bit sext immediate field, codegen 1093 // this as "d, 0" 1094 short Imm; 1095 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1096 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 1097 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1098 CN->getValueType(0)); 1099 return true; 1100 } 1101 1102 // Handle 32-bit sext immediates with LIS + addr mode. 1103 if ((CN->getValueType(0) == MVT::i32 || 1104 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1105 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1106 int Addr = (int)CN->getZExtValue(); 1107 1108 // Otherwise, break this down into an LIS + disp. 1109 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 1110 1111 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 1112 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1113 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1114 return true; 1115 } 1116 } 1117 1118 Disp = DAG.getTargetConstant(0, getPointerTy()); 1119 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 1120 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1121 else 1122 Base = N; 1123 return true; // [r+0] 1124} 1125 1126/// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1127/// represented as an indexed [r+r] operation. 1128bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1129 SDValue &Index, 1130 SelectionDAG &DAG) const { 1131 // Check to see if we can easily represent this as an [r+r] address. This 1132 // will fail if it thinks that the address is more profitably represented as 1133 // reg+imm, e.g. where imm = 0. 1134 if (SelectAddressRegReg(N, Base, Index, DAG)) 1135 return true; 1136 1137 // If the operand is an addition, always emit this as [r+r], since this is 1138 // better (for code size, and execution, as the memop does the add for free) 1139 // than emitting an explicit add. 1140 if (N.getOpcode() == ISD::ADD) { 1141 Base = N.getOperand(0); 1142 Index = N.getOperand(1); 1143 return true; 1144 } 1145 1146 // Otherwise, do it the hard way, using R0 as the base register. 1147 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1148 N.getValueType()); 1149 Index = N; 1150 return true; 1151} 1152 1153/// getPreIndexedAddressParts - returns true by value, base pointer and 1154/// offset pointer and addressing mode by reference if the node's address 1155/// can be legally represented as pre-indexed load / store address. 1156bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1157 SDValue &Offset, 1158 ISD::MemIndexedMode &AM, 1159 SelectionDAG &DAG) const { 1160 if (DisablePPCPreinc) return false; 1161 1162 bool isLoad = true; 1163 SDValue Ptr; 1164 EVT VT; 1165 unsigned Alignment; 1166 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1167 Ptr = LD->getBasePtr(); 1168 VT = LD->getMemoryVT(); 1169 Alignment = LD->getAlignment(); 1170 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1171 Ptr = ST->getBasePtr(); 1172 VT = ST->getMemoryVT(); 1173 Alignment = ST->getAlignment(); 1174 isLoad = false; 1175 } else 1176 return false; 1177 1178 // PowerPC doesn't have preinc load/store instructions for vectors. 1179 if (VT.isVector()) 1180 return false; 1181 1182 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 1183 1184 // Common code will reject creating a pre-inc form if the base pointer 1185 // is a frame index, or if N is a store and the base pointer is either 1186 // the same as or a predecessor of the value being stored. Check for 1187 // those situations here, and try with swapped Base/Offset instead. 1188 bool Swap = false; 1189 1190 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 1191 Swap = true; 1192 else if (!isLoad) { 1193 SDValue Val = cast<StoreSDNode>(N)->getValue(); 1194 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 1195 Swap = true; 1196 } 1197 1198 if (Swap) 1199 std::swap(Base, Offset); 1200 1201 AM = ISD::PRE_INC; 1202 return true; 1203 } 1204 1205 // LDU/STU can only handle immediates that are a multiple of 4. 1206 if (VT != MVT::i64) { 1207 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 1208 return false; 1209 } else { 1210 // LDU/STU need an address with at least 4-byte alignment. 1211 if (Alignment < 4) 1212 return false; 1213 1214 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 1215 return false; 1216 } 1217 1218 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1219 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1220 // sext i32 to i64 when addr mode is r+i. 1221 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1222 LD->getExtensionType() == ISD::SEXTLOAD && 1223 isa<ConstantSDNode>(Offset)) 1224 return false; 1225 } 1226 1227 AM = ISD::PRE_INC; 1228 return true; 1229} 1230 1231//===----------------------------------------------------------------------===// 1232// LowerOperation implementation 1233//===----------------------------------------------------------------------===// 1234 1235/// GetLabelAccessInfo - Return true if we should reference labels using a 1236/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1237static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, 1238 unsigned &LoOpFlags, const GlobalValue *GV = 0) { 1239 HiOpFlags = PPCII::MO_HA16; 1240 LoOpFlags = PPCII::MO_LO16; 1241 1242 // Don't use the pic base if not in PIC relocation model. Or if we are on a 1243 // non-darwin platform. We don't support PIC on other platforms yet. 1244 bool isPIC = TM.getRelocationModel() == Reloc::PIC_ && 1245 TM.getSubtarget<PPCSubtarget>().isDarwin(); 1246 if (isPIC) { 1247 HiOpFlags |= PPCII::MO_PIC_FLAG; 1248 LoOpFlags |= PPCII::MO_PIC_FLAG; 1249 } 1250 1251 // If this is a reference to a global value that requires a non-lazy-ptr, make 1252 // sure that instruction lowering adds it. 1253 if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { 1254 HiOpFlags |= PPCII::MO_NLP_FLAG; 1255 LoOpFlags |= PPCII::MO_NLP_FLAG; 1256 1257 if (GV->hasHiddenVisibility()) { 1258 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1259 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1260 } 1261 } 1262 1263 return isPIC; 1264} 1265 1266static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1267 SelectionDAG &DAG) { 1268 EVT PtrVT = HiPart.getValueType(); 1269 SDValue Zero = DAG.getConstant(0, PtrVT); 1270 SDLoc DL(HiPart); 1271 1272 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1273 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1274 1275 // With PIC, the first instruction is actually "GR+hi(&G)". 1276 if (isPIC) 1277 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1278 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1279 1280 // Generate non-pic code that has direct accesses to the constant pool. 1281 // The address of the global is just (hi(&g)+lo(&g)). 1282 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1283} 1284 1285SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1286 SelectionDAG &DAG) const { 1287 EVT PtrVT = Op.getValueType(); 1288 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1289 const Constant *C = CP->getConstVal(); 1290 1291 // 64-bit SVR4 ABI code is always position-independent. 1292 // The actual address of the GlobalValue is stored in the TOC. 1293 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1294 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 1295 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, 1296 DAG.getRegister(PPC::X2, MVT::i64)); 1297 } 1298 1299 unsigned MOHiFlag, MOLoFlag; 1300 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1301 SDValue CPIHi = 1302 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1303 SDValue CPILo = 1304 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1305 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1306} 1307 1308SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1309 EVT PtrVT = Op.getValueType(); 1310 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1311 1312 // 64-bit SVR4 ABI code is always position-independent. 1313 // The actual address of the GlobalValue is stored in the TOC. 1314 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1315 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1316 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, 1317 DAG.getRegister(PPC::X2, MVT::i64)); 1318 } 1319 1320 unsigned MOHiFlag, MOLoFlag; 1321 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1322 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1323 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1324 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1325} 1326 1327SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1328 SelectionDAG &DAG) const { 1329 EVT PtrVT = Op.getValueType(); 1330 1331 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1332 1333 unsigned MOHiFlag, MOLoFlag; 1334 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1335 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 1336 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 1337 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1338} 1339 1340SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1341 SelectionDAG &DAG) const { 1342 1343 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1344 SDLoc dl(GA); 1345 const GlobalValue *GV = GA->getGlobal(); 1346 EVT PtrVT = getPointerTy(); 1347 bool is64bit = PPCSubTarget.isPPC64(); 1348 1349 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 1350 1351 if (Model == TLSModel::LocalExec) { 1352 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1353 PPCII::MO_TPREL16_HA); 1354 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1355 PPCII::MO_TPREL16_LO); 1356 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 1357 is64bit ? MVT::i64 : MVT::i32); 1358 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 1359 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 1360 } 1361 1362 if (!is64bit) 1363 llvm_unreachable("only local-exec is currently supported for ppc32"); 1364 1365 if (Model == TLSModel::InitialExec) { 1366 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1367 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1368 SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 1369 PtrVT, GOTReg, TGA); 1370 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 1371 PtrVT, TGA, TPOffsetHi); 1372 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA); 1373 } 1374 1375 if (Model == TLSModel::GeneralDynamic) { 1376 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1377 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1378 SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 1379 GOTReg, TGA); 1380 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, 1381 GOTEntryHi, TGA); 1382 1383 // We need a chain node, and don't have one handy. The underlying 1384 // call has no side effects, so using the function entry node 1385 // suffices. 1386 SDValue Chain = DAG.getEntryNode(); 1387 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); 1388 SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); 1389 SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, 1390 PtrVT, ParmReg, TGA); 1391 // The return value from GET_TLS_ADDR really is in X3 already, but 1392 // some hacks are needed here to tie everything together. The extra 1393 // copies dissolve during subsequent transforms. 1394 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); 1395 return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); 1396 } 1397 1398 if (Model == TLSModel::LocalDynamic) { 1399 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1400 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1401 SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 1402 GOTReg, TGA); 1403 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, 1404 GOTEntryHi, TGA); 1405 1406 // We need a chain node, and don't have one handy. The underlying 1407 // call has no side effects, so using the function entry node 1408 // suffices. 1409 SDValue Chain = DAG.getEntryNode(); 1410 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); 1411 SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); 1412 SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, 1413 PtrVT, ParmReg, TGA); 1414 // The return value from GET_TLSLD_ADDR really is in X3 already, but 1415 // some hacks are needed here to tie everything together. The extra 1416 // copies dissolve during subsequent transforms. 1417 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); 1418 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, 1419 Chain, ParmReg, TGA); 1420 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 1421 } 1422 1423 llvm_unreachable("Unknown TLS model!"); 1424} 1425 1426SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1427 SelectionDAG &DAG) const { 1428 EVT PtrVT = Op.getValueType(); 1429 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1430 SDLoc DL(GSDN); 1431 const GlobalValue *GV = GSDN->getGlobal(); 1432 1433 // 64-bit SVR4 ABI code is always position-independent. 1434 // The actual address of the GlobalValue is stored in the TOC. 1435 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1436 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 1437 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, 1438 DAG.getRegister(PPC::X2, MVT::i64)); 1439 } 1440 1441 unsigned MOHiFlag, MOLoFlag; 1442 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); 1443 1444 SDValue GAHi = 1445 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 1446 SDValue GALo = 1447 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 1448 1449 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 1450 1451 // If the global reference is actually to a non-lazy-pointer, we have to do an 1452 // extra load to get the address of the global. 1453 if (MOHiFlag & PPCII::MO_NLP_FLAG) 1454 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 1455 false, false, false, 0); 1456 return Ptr; 1457} 1458 1459SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 1460 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1461 SDLoc dl(Op); 1462 1463 // If we're comparing for equality to zero, expose the fact that this is 1464 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1465 // fold the new nodes. 1466 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1467 if (C->isNullValue() && CC == ISD::SETEQ) { 1468 EVT VT = Op.getOperand(0).getValueType(); 1469 SDValue Zext = Op.getOperand(0); 1470 if (VT.bitsLT(MVT::i32)) { 1471 VT = MVT::i32; 1472 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 1473 } 1474 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1475 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 1476 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 1477 DAG.getConstant(Log2b, MVT::i32)); 1478 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 1479 } 1480 // Leave comparisons against 0 and -1 alone for now, since they're usually 1481 // optimized. FIXME: revisit this when we can custom lower all setcc 1482 // optimizations. 1483 if (C->isAllOnesValue() || C->isNullValue()) 1484 return SDValue(); 1485 } 1486 1487 // If we have an integer seteq/setne, turn it into a compare against zero 1488 // by xor'ing the rhs with the lhs, which is faster than setting a 1489 // condition register, reading it back out, and masking the correct bit. The 1490 // normal approach here uses sub to do this instead of xor. Using xor exposes 1491 // the result to other bit-twiddling opportunities. 1492 EVT LHSVT = Op.getOperand(0).getValueType(); 1493 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1494 EVT VT = Op.getValueType(); 1495 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 1496 Op.getOperand(1)); 1497 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 1498 } 1499 return SDValue(); 1500} 1501 1502SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1503 const PPCSubtarget &Subtarget) const { 1504 SDNode *Node = Op.getNode(); 1505 EVT VT = Node->getValueType(0); 1506 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1507 SDValue InChain = Node->getOperand(0); 1508 SDValue VAListPtr = Node->getOperand(1); 1509 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 1510 SDLoc dl(Node); 1511 1512 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 1513 1514 // gpr_index 1515 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1516 VAListPtr, MachinePointerInfo(SV), MVT::i8, 1517 false, false, 0); 1518 InChain = GprIndex.getValue(1); 1519 1520 if (VT == MVT::i64) { 1521 // Check if GprIndex is even 1522 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 1523 DAG.getConstant(1, MVT::i32)); 1524 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 1525 DAG.getConstant(0, MVT::i32), ISD::SETNE); 1526 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 1527 DAG.getConstant(1, MVT::i32)); 1528 // Align GprIndex to be even if it isn't 1529 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 1530 GprIndex); 1531 } 1532 1533 // fpr index is 1 byte after gpr 1534 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1535 DAG.getConstant(1, MVT::i32)); 1536 1537 // fpr 1538 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1539 FprPtr, MachinePointerInfo(SV), MVT::i8, 1540 false, false, 0); 1541 InChain = FprIndex.getValue(1); 1542 1543 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1544 DAG.getConstant(8, MVT::i32)); 1545 1546 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1547 DAG.getConstant(4, MVT::i32)); 1548 1549 // areas 1550 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 1551 MachinePointerInfo(), false, false, 1552 false, 0); 1553 InChain = OverflowArea.getValue(1); 1554 1555 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 1556 MachinePointerInfo(), false, false, 1557 false, 0); 1558 InChain = RegSaveArea.getValue(1); 1559 1560 // select overflow_area if index > 8 1561 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 1562 DAG.getConstant(8, MVT::i32), ISD::SETLT); 1563 1564 // adjustment constant gpr_index * 4/8 1565 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 1566 VT.isInteger() ? GprIndex : FprIndex, 1567 DAG.getConstant(VT.isInteger() ? 4 : 8, 1568 MVT::i32)); 1569 1570 // OurReg = RegSaveArea + RegConstant 1571 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 1572 RegConstant); 1573 1574 // Floating types are 32 bytes into RegSaveArea 1575 if (VT.isFloatingPoint()) 1576 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 1577 DAG.getConstant(32, MVT::i32)); 1578 1579 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 1580 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 1581 VT.isInteger() ? GprIndex : FprIndex, 1582 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 1583 MVT::i32)); 1584 1585 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 1586 VT.isInteger() ? VAListPtr : FprPtr, 1587 MachinePointerInfo(SV), 1588 MVT::i8, false, false, 0); 1589 1590 // determine if we should load from reg_save_area or overflow_area 1591 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 1592 1593 // increase overflow_area by 4/8 if gpr/fpr > 8 1594 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 1595 DAG.getConstant(VT.isInteger() ? 4 : 8, 1596 MVT::i32)); 1597 1598 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 1599 OverflowAreaPlusN); 1600 1601 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 1602 OverflowAreaPtr, 1603 MachinePointerInfo(), 1604 MVT::i32, false, false, 0); 1605 1606 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 1607 false, false, false, 0); 1608} 1609 1610SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 1611 SelectionDAG &DAG) const { 1612 return Op.getOperand(0); 1613} 1614 1615SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 1616 SelectionDAG &DAG) const { 1617 SDValue Chain = Op.getOperand(0); 1618 SDValue Trmp = Op.getOperand(1); // trampoline 1619 SDValue FPtr = Op.getOperand(2); // nested function 1620 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 1621 SDLoc dl(Op); 1622 1623 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1624 bool isPPC64 = (PtrVT == MVT::i64); 1625 Type *IntPtrTy = 1626 DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( 1627 *DAG.getContext()); 1628 1629 TargetLowering::ArgListTy Args; 1630 TargetLowering::ArgListEntry Entry; 1631 1632 Entry.Ty = IntPtrTy; 1633 Entry.Node = Trmp; Args.push_back(Entry); 1634 1635 // TrampSize == (isPPC64 ? 48 : 40); 1636 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 1637 isPPC64 ? MVT::i64 : MVT::i32); 1638 Args.push_back(Entry); 1639 1640 Entry.Node = FPtr; Args.push_back(Entry); 1641 Entry.Node = Nest; Args.push_back(Entry); 1642 1643 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 1644 TargetLowering::CallLoweringInfo CLI(Chain, 1645 Type::getVoidTy(*DAG.getContext()), 1646 false, false, false, false, 0, 1647 CallingConv::C, 1648 /*isTailCall=*/false, 1649 /*doesNotRet=*/false, 1650 /*isReturnValueUsed=*/true, 1651 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 1652 Args, DAG, dl); 1653 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 1654 1655 return CallResult.second; 1656} 1657 1658SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 1659 const PPCSubtarget &Subtarget) const { 1660 MachineFunction &MF = DAG.getMachineFunction(); 1661 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1662 1663 SDLoc dl(Op); 1664 1665 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 1666 // vastart just stores the address of the VarArgsFrameIndex slot into the 1667 // memory location argument. 1668 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1669 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1670 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1671 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 1672 MachinePointerInfo(SV), 1673 false, false, 0); 1674 } 1675 1676 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 1677 // We suppose the given va_list is already allocated. 1678 // 1679 // typedef struct { 1680 // char gpr; /* index into the array of 8 GPRs 1681 // * stored in the register save area 1682 // * gpr=0 corresponds to r3, 1683 // * gpr=1 to r4, etc. 1684 // */ 1685 // char fpr; /* index into the array of 8 FPRs 1686 // * stored in the register save area 1687 // * fpr=0 corresponds to f1, 1688 // * fpr=1 to f2, etc. 1689 // */ 1690 // char *overflow_arg_area; 1691 // /* location on stack that holds 1692 // * the next overflow argument 1693 // */ 1694 // char *reg_save_area; 1695 // /* where r3:r10 and f1:f8 (if saved) 1696 // * are stored 1697 // */ 1698 // } va_list[1]; 1699 1700 1701 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 1702 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 1703 1704 1705 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1706 1707 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 1708 PtrVT); 1709 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1710 PtrVT); 1711 1712 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 1713 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 1714 1715 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 1716 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 1717 1718 uint64_t FPROffset = 1; 1719 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 1720 1721 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1722 1723 // Store first byte : number of int regs 1724 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 1725 Op.getOperand(1), 1726 MachinePointerInfo(SV), 1727 MVT::i8, false, false, 0); 1728 uint64_t nextOffset = FPROffset; 1729 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 1730 ConstFPROffset); 1731 1732 // Store second byte : number of float regs 1733 SDValue secondStore = 1734 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 1735 MachinePointerInfo(SV, nextOffset), MVT::i8, 1736 false, false, 0); 1737 nextOffset += StackOffset; 1738 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 1739 1740 // Store second word : arguments given on stack 1741 SDValue thirdStore = 1742 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 1743 MachinePointerInfo(SV, nextOffset), 1744 false, false, 0); 1745 nextOffset += FrameOffset; 1746 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 1747 1748 // Store third word : arguments given in registers 1749 return DAG.getStore(thirdStore, dl, FR, nextPtr, 1750 MachinePointerInfo(SV, nextOffset), 1751 false, false, 0); 1752 1753} 1754 1755#include "PPCGenCallingConv.inc" 1756 1757bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 1758 CCValAssign::LocInfo &LocInfo, 1759 ISD::ArgFlagsTy &ArgFlags, 1760 CCState &State) { 1761 return true; 1762} 1763 1764bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 1765 MVT &LocVT, 1766 CCValAssign::LocInfo &LocInfo, 1767 ISD::ArgFlagsTy &ArgFlags, 1768 CCState &State) { 1769 static const uint16_t ArgRegs[] = { 1770 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1771 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1772 }; 1773 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1774 1775 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1776 1777 // Skip one register if the first unallocated register has an even register 1778 // number and there are still argument registers available which have not been 1779 // allocated yet. RegNum is actually an index into ArgRegs, which means we 1780 // need to skip a register if RegNum is odd. 1781 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 1782 State.AllocateReg(ArgRegs[RegNum]); 1783 } 1784 1785 // Always return false here, as this function only makes sure that the first 1786 // unallocated register has an odd register number and does not actually 1787 // allocate a register for the current argument. 1788 return false; 1789} 1790 1791bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 1792 MVT &LocVT, 1793 CCValAssign::LocInfo &LocInfo, 1794 ISD::ArgFlagsTy &ArgFlags, 1795 CCState &State) { 1796 static const uint16_t ArgRegs[] = { 1797 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1798 PPC::F8 1799 }; 1800 1801 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1802 1803 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1804 1805 // If there is only one Floating-point register left we need to put both f64 1806 // values of a split ppc_fp128 value on the stack. 1807 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 1808 State.AllocateReg(ArgRegs[RegNum]); 1809 } 1810 1811 // Always return false here, as this function only makes sure that the two f64 1812 // values a ppc_fp128 value is split into are both passed in registers or both 1813 // passed on the stack and does not actually allocate a register for the 1814 // current argument. 1815 return false; 1816} 1817 1818/// GetFPR - Get the set of FP registers that should be allocated for arguments, 1819/// on Darwin. 1820static const uint16_t *GetFPR() { 1821 static const uint16_t FPR[] = { 1822 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1823 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1824 }; 1825 1826 return FPR; 1827} 1828 1829/// CalculateStackSlotSize - Calculates the size reserved for this argument on 1830/// the stack. 1831static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 1832 unsigned PtrByteSize) { 1833 unsigned ArgSize = ArgVT.getSizeInBits()/8; 1834 if (Flags.isByVal()) 1835 ArgSize = Flags.getByValSize(); 1836 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1837 1838 return ArgSize; 1839} 1840 1841SDValue 1842PPCTargetLowering::LowerFormalArguments(SDValue Chain, 1843 CallingConv::ID CallConv, bool isVarArg, 1844 const SmallVectorImpl<ISD::InputArg> 1845 &Ins, 1846 SDLoc dl, SelectionDAG &DAG, 1847 SmallVectorImpl<SDValue> &InVals) 1848 const { 1849 if (PPCSubTarget.isSVR4ABI()) { 1850 if (PPCSubTarget.isPPC64()) 1851 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 1852 dl, DAG, InVals); 1853 else 1854 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 1855 dl, DAG, InVals); 1856 } else { 1857 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 1858 dl, DAG, InVals); 1859 } 1860} 1861 1862SDValue 1863PPCTargetLowering::LowerFormalArguments_32SVR4( 1864 SDValue Chain, 1865 CallingConv::ID CallConv, bool isVarArg, 1866 const SmallVectorImpl<ISD::InputArg> 1867 &Ins, 1868 SDLoc dl, SelectionDAG &DAG, 1869 SmallVectorImpl<SDValue> &InVals) const { 1870 1871 // 32-bit SVR4 ABI Stack Frame Layout: 1872 // +-----------------------------------+ 1873 // +--> | Back chain | 1874 // | +-----------------------------------+ 1875 // | | Floating-point register save area | 1876 // | +-----------------------------------+ 1877 // | | General register save area | 1878 // | +-----------------------------------+ 1879 // | | CR save word | 1880 // | +-----------------------------------+ 1881 // | | VRSAVE save word | 1882 // | +-----------------------------------+ 1883 // | | Alignment padding | 1884 // | +-----------------------------------+ 1885 // | | Vector register save area | 1886 // | +-----------------------------------+ 1887 // | | Local variable space | 1888 // | +-----------------------------------+ 1889 // | | Parameter list area | 1890 // | +-----------------------------------+ 1891 // | | LR save word | 1892 // | +-----------------------------------+ 1893 // SP--> +--- | Back chain | 1894 // +-----------------------------------+ 1895 // 1896 // Specifications: 1897 // System V Application Binary Interface PowerPC Processor Supplement 1898 // AltiVec Technology Programming Interface Manual 1899 1900 MachineFunction &MF = DAG.getMachineFunction(); 1901 MachineFrameInfo *MFI = MF.getFrameInfo(); 1902 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1903 1904 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1905 // Potential tail calls could cause overwriting of argument stack slots. 1906 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 1907 (CallConv == CallingConv::Fast)); 1908 unsigned PtrByteSize = 4; 1909 1910 // Assign locations to all of the incoming arguments. 1911 SmallVector<CCValAssign, 16> ArgLocs; 1912 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1913 getTargetMachine(), ArgLocs, *DAG.getContext()); 1914 1915 // Reserve space for the linkage area on the stack. 1916 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 1917 1918 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 1919 1920 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 1921 CCValAssign &VA = ArgLocs[i]; 1922 1923 // Arguments stored in registers. 1924 if (VA.isRegLoc()) { 1925 const TargetRegisterClass *RC; 1926 EVT ValVT = VA.getValVT(); 1927 1928 switch (ValVT.getSimpleVT().SimpleTy) { 1929 default: 1930 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 1931 case MVT::i32: 1932 RC = &PPC::GPRCRegClass; 1933 break; 1934 case MVT::f32: 1935 RC = &PPC::F4RCRegClass; 1936 break; 1937 case MVT::f64: 1938 RC = &PPC::F8RCRegClass; 1939 break; 1940 case MVT::v16i8: 1941 case MVT::v8i16: 1942 case MVT::v4i32: 1943 case MVT::v4f32: 1944 RC = &PPC::VRRCRegClass; 1945 break; 1946 } 1947 1948 // Transform the arguments stored in physical registers into virtual ones. 1949 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 1950 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); 1951 1952 InVals.push_back(ArgValue); 1953 } else { 1954 // Argument stored in memory. 1955 assert(VA.isMemLoc()); 1956 1957 unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; 1958 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 1959 isImmutable); 1960 1961 // Create load nodes to retrieve arguments from the stack. 1962 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 1963 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 1964 MachinePointerInfo(), 1965 false, false, false, 0)); 1966 } 1967 } 1968 1969 // Assign locations to all of the incoming aggregate by value arguments. 1970 // Aggregates passed by value are stored in the local variable space of the 1971 // caller's stack frame, right above the parameter list area. 1972 SmallVector<CCValAssign, 16> ByValArgLocs; 1973 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 1974 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 1975 1976 // Reserve stack space for the allocations in CCInfo. 1977 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 1978 1979 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 1980 1981 // Area that is at least reserved in the caller of this function. 1982 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 1983 1984 // Set the size that is at least reserved in caller of this function. Tail 1985 // call optimized function's reserved stack space needs to be aligned so that 1986 // taking the difference between two stack areas will result in an aligned 1987 // stack. 1988 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1989 1990 MinReservedArea = 1991 std::max(MinReservedArea, 1992 PPCFrameLowering::getMinCallFrameSize(false, false)); 1993 1994 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> 1995 getStackAlignment(); 1996 unsigned AlignMask = TargetAlign-1; 1997 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 1998 1999 FI->setMinReservedArea(MinReservedArea); 2000 2001 SmallVector<SDValue, 8> MemOps; 2002 2003 // If the function takes variable number of arguments, make a frame index for 2004 // the start of the first vararg value... for expansion of llvm.va_start. 2005 if (isVarArg) { 2006 static const uint16_t GPArgRegs[] = { 2007 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2008 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2009 }; 2010 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 2011 2012 static const uint16_t FPArgRegs[] = { 2013 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2014 PPC::F8 2015 }; 2016 const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 2017 2018 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, 2019 NumGPArgRegs)); 2020 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, 2021 NumFPArgRegs)); 2022 2023 // Make room for NumGPArgRegs and NumFPArgRegs. 2024 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 2025 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; 2026 2027 FuncInfo->setVarArgsStackOffset( 2028 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2029 CCInfo.getNextStackOffset(), true)); 2030 2031 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 2032 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2033 2034 // The fixed integer arguments of a variadic function are stored to the 2035 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 2036 // the result of va_next. 2037 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 2038 // Get an existing live-in vreg, or add a new one. 2039 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 2040 if (!VReg) 2041 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 2042 2043 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2044 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2045 MachinePointerInfo(), false, false, 0); 2046 MemOps.push_back(Store); 2047 // Increment the address by four for the next argument to store 2048 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2049 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2050 } 2051 2052 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 2053 // is set. 2054 // The double arguments are stored to the VarArgsFrameIndex 2055 // on the stack. 2056 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 2057 // Get an existing live-in vreg, or add a new one. 2058 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 2059 if (!VReg) 2060 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 2061 2062 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 2063 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2064 MachinePointerInfo(), false, false, 0); 2065 MemOps.push_back(Store); 2066 // Increment the address by eight for the next argument to store 2067 SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, 2068 PtrVT); 2069 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2070 } 2071 } 2072 2073 if (!MemOps.empty()) 2074 Chain = DAG.getNode(ISD::TokenFactor, dl, 2075 MVT::Other, &MemOps[0], MemOps.size()); 2076 2077 return Chain; 2078} 2079 2080// PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2081// value to MVT::i64 and then truncate to the correct register size. 2082SDValue 2083PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 2084 SelectionDAG &DAG, SDValue ArgVal, 2085 SDLoc dl) const { 2086 if (Flags.isSExt()) 2087 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2088 DAG.getValueType(ObjectVT)); 2089 else if (Flags.isZExt()) 2090 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2091 DAG.getValueType(ObjectVT)); 2092 2093 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 2094} 2095 2096// Set the size that is at least reserved in caller of this function. Tail 2097// call optimized functions' reserved stack space needs to be aligned so that 2098// taking the difference between two stack areas will result in an aligned 2099// stack. 2100void 2101PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, 2102 unsigned nAltivecParamsAtEnd, 2103 unsigned MinReservedArea, 2104 bool isPPC64) const { 2105 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2106 // Add the Altivec parameters at the end, if needed. 2107 if (nAltivecParamsAtEnd) { 2108 MinReservedArea = ((MinReservedArea+15)/16)*16; 2109 MinReservedArea += 16*nAltivecParamsAtEnd; 2110 } 2111 MinReservedArea = 2112 std::max(MinReservedArea, 2113 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2114 unsigned TargetAlign 2115 = DAG.getMachineFunction().getTarget().getFrameLowering()-> 2116 getStackAlignment(); 2117 unsigned AlignMask = TargetAlign-1; 2118 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 2119 FI->setMinReservedArea(MinReservedArea); 2120} 2121 2122SDValue 2123PPCTargetLowering::LowerFormalArguments_64SVR4( 2124 SDValue Chain, 2125 CallingConv::ID CallConv, bool isVarArg, 2126 const SmallVectorImpl<ISD::InputArg> 2127 &Ins, 2128 SDLoc dl, SelectionDAG &DAG, 2129 SmallVectorImpl<SDValue> &InVals) const { 2130 // TODO: add description of PPC stack frame format, or at least some docs. 2131 // 2132 MachineFunction &MF = DAG.getMachineFunction(); 2133 MachineFrameInfo *MFI = MF.getFrameInfo(); 2134 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2135 2136 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2137 // Potential tail calls could cause overwriting of argument stack slots. 2138 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2139 (CallConv == CallingConv::Fast)); 2140 unsigned PtrByteSize = 8; 2141 2142 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); 2143 // Area that is at least reserved in caller of this function. 2144 unsigned MinReservedArea = ArgOffset; 2145 2146 static const uint16_t GPR[] = { 2147 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2148 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2149 }; 2150 2151 static const uint16_t *FPR = GetFPR(); 2152 2153 static const uint16_t VR[] = { 2154 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2155 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2156 }; 2157 2158 const unsigned Num_GPR_Regs = array_lengthof(GPR); 2159 const unsigned Num_FPR_Regs = 13; 2160 const unsigned Num_VR_Regs = array_lengthof(VR); 2161 2162 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2163 2164 // Add DAG nodes to load the arguments or copy them out of registers. On 2165 // entry to a function on PPC, the arguments start after the linkage area, 2166 // although the first ones are often in registers. 2167 2168 SmallVector<SDValue, 8> MemOps; 2169 unsigned nAltivecParamsAtEnd = 0; 2170 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2171 unsigned CurArgIdx = 0; 2172 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2173 SDValue ArgVal; 2174 bool needsLoad = false; 2175 EVT ObjectVT = Ins[ArgNo].VT; 2176 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 2177 unsigned ArgSize = ObjSize; 2178 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2179 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); 2180 CurArgIdx = Ins[ArgNo].OrigArgIndex; 2181 2182 unsigned CurArgOffset = ArgOffset; 2183 2184 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 2185 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 2186 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 2187 if (isVarArg) { 2188 MinReservedArea = ((MinReservedArea+15)/16)*16; 2189 MinReservedArea += CalculateStackSlotSize(ObjectVT, 2190 Flags, 2191 PtrByteSize); 2192 } else 2193 nAltivecParamsAtEnd++; 2194 } else 2195 // Calculate min reserved area. 2196 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 2197 Flags, 2198 PtrByteSize); 2199 2200 // FIXME the codegen can be much improved in some cases. 2201 // We do not have to keep everything in memory. 2202 if (Flags.isByVal()) { 2203 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2204 ObjSize = Flags.getByValSize(); 2205 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2206 // Empty aggregate parameters do not take up registers. Examples: 2207 // struct { } a; 2208 // union { } b; 2209 // int c[0]; 2210 // etc. However, we have to provide a place-holder in InVals, so 2211 // pretend we have an 8-byte item at the current address for that 2212 // purpose. 2213 if (!ObjSize) { 2214 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2215 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2216 InVals.push_back(FIN); 2217 continue; 2218 } 2219 // All aggregates smaller than 8 bytes must be passed right-justified. 2220 if (ObjSize < PtrByteSize) 2221 CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); 2222 // The value of the object is its address. 2223 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2224 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2225 InVals.push_back(FIN); 2226 2227 if (ObjSize < 8) { 2228 if (GPR_idx != Num_GPR_Regs) { 2229 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2230 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2231 SDValue Store; 2232 2233 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 2234 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 2235 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 2236 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2237 MachinePointerInfo(FuncArg, CurArgOffset), 2238 ObjType, false, false, 0); 2239 } else { 2240 // For sizes that don't fit a truncating store (3, 5, 6, 7), 2241 // store the whole register as-is to the parameter save area 2242 // slot. The address of the parameter was already calculated 2243 // above (InVals.push_back(FIN)) to be the right-justified 2244 // offset within the slot. For this store, we need a new 2245 // frame index that points at the beginning of the slot. 2246 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2247 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2248 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2249 MachinePointerInfo(FuncArg, ArgOffset), 2250 false, false, 0); 2251 } 2252 2253 MemOps.push_back(Store); 2254 ++GPR_idx; 2255 } 2256 // Whether we copied from a register or not, advance the offset 2257 // into the parameter save area by a full doubleword. 2258 ArgOffset += PtrByteSize; 2259 continue; 2260 } 2261 2262 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2263 // Store whatever pieces of the object are in registers 2264 // to memory. ArgOffset will be the address of the beginning 2265 // of the object. 2266 if (GPR_idx != Num_GPR_Regs) { 2267 unsigned VReg; 2268 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2269 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2270 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2271 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2272 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2273 MachinePointerInfo(FuncArg, ArgOffset), 2274 false, false, 0); 2275 MemOps.push_back(Store); 2276 ++GPR_idx; 2277 ArgOffset += PtrByteSize; 2278 } else { 2279 ArgOffset += ArgSize - j; 2280 break; 2281 } 2282 } 2283 continue; 2284 } 2285 2286 switch (ObjectVT.getSimpleVT().SimpleTy) { 2287 default: llvm_unreachable("Unhandled argument type!"); 2288 case MVT::i32: 2289 case MVT::i64: 2290 if (GPR_idx != Num_GPR_Regs) { 2291 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2292 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2293 2294 if (ObjectVT == MVT::i32) 2295 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2296 // value to MVT::i64 and then truncate to the correct register size. 2297 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2298 2299 ++GPR_idx; 2300 } else { 2301 needsLoad = true; 2302 ArgSize = PtrByteSize; 2303 } 2304 ArgOffset += 8; 2305 break; 2306 2307 case MVT::f32: 2308 case MVT::f64: 2309 // Every 8 bytes of argument space consumes one of the GPRs available for 2310 // argument passing. 2311 if (GPR_idx != Num_GPR_Regs) { 2312 ++GPR_idx; 2313 } 2314 if (FPR_idx != Num_FPR_Regs) { 2315 unsigned VReg; 2316 2317 if (ObjectVT == MVT::f32) 2318 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2319 else 2320 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2321 2322 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2323 ++FPR_idx; 2324 } else { 2325 needsLoad = true; 2326 ArgSize = PtrByteSize; 2327 } 2328 2329 ArgOffset += 8; 2330 break; 2331 case MVT::v4f32: 2332 case MVT::v4i32: 2333 case MVT::v8i16: 2334 case MVT::v16i8: 2335 // Note that vector arguments in registers don't reserve stack space, 2336 // except in varargs functions. 2337 if (VR_idx != Num_VR_Regs) { 2338 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2339 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2340 if (isVarArg) { 2341 while ((ArgOffset % 16) != 0) { 2342 ArgOffset += PtrByteSize; 2343 if (GPR_idx != Num_GPR_Regs) 2344 GPR_idx++; 2345 } 2346 ArgOffset += 16; 2347 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2348 } 2349 ++VR_idx; 2350 } else { 2351 // Vectors are aligned. 2352 ArgOffset = ((ArgOffset+15)/16)*16; 2353 CurArgOffset = ArgOffset; 2354 ArgOffset += 16; 2355 needsLoad = true; 2356 } 2357 break; 2358 } 2359 2360 // We need to load the argument to a virtual register if we determined 2361 // above that we ran out of physical registers of the appropriate type. 2362 if (needsLoad) { 2363 int FI = MFI->CreateFixedObject(ObjSize, 2364 CurArgOffset + (ArgSize - ObjSize), 2365 isImmutable); 2366 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2367 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2368 false, false, false, 0); 2369 } 2370 2371 InVals.push_back(ArgVal); 2372 } 2373 2374 // Set the size that is at least reserved in caller of this function. Tail 2375 // call optimized functions' reserved stack space needs to be aligned so that 2376 // taking the difference between two stack areas will result in an aligned 2377 // stack. 2378 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true); 2379 2380 // If the function takes variable number of arguments, make a frame index for 2381 // the start of the first vararg value... for expansion of llvm.va_start. 2382 if (isVarArg) { 2383 int Depth = ArgOffset; 2384 2385 FuncInfo->setVarArgsFrameIndex( 2386 MFI->CreateFixedObject(PtrByteSize, Depth, true)); 2387 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2388 2389 // If this function is vararg, store any remaining integer argument regs 2390 // to their spots on the stack so that they may be loaded by deferencing the 2391 // result of va_next. 2392 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2393 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2394 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2395 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2396 MachinePointerInfo(), false, false, 0); 2397 MemOps.push_back(Store); 2398 // Increment the address by four for the next argument to store 2399 SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); 2400 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2401 } 2402 } 2403 2404 if (!MemOps.empty()) 2405 Chain = DAG.getNode(ISD::TokenFactor, dl, 2406 MVT::Other, &MemOps[0], MemOps.size()); 2407 2408 return Chain; 2409} 2410 2411SDValue 2412PPCTargetLowering::LowerFormalArguments_Darwin( 2413 SDValue Chain, 2414 CallingConv::ID CallConv, bool isVarArg, 2415 const SmallVectorImpl<ISD::InputArg> 2416 &Ins, 2417 SDLoc dl, SelectionDAG &DAG, 2418 SmallVectorImpl<SDValue> &InVals) const { 2419 // TODO: add description of PPC stack frame format, or at least some docs. 2420 // 2421 MachineFunction &MF = DAG.getMachineFunction(); 2422 MachineFrameInfo *MFI = MF.getFrameInfo(); 2423 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2424 2425 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2426 bool isPPC64 = PtrVT == MVT::i64; 2427 // Potential tail calls could cause overwriting of argument stack slots. 2428 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2429 (CallConv == CallingConv::Fast)); 2430 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2431 2432 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 2433 // Area that is at least reserved in caller of this function. 2434 unsigned MinReservedArea = ArgOffset; 2435 2436 static const uint16_t GPR_32[] = { // 32-bit registers. 2437 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2438 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2439 }; 2440 static const uint16_t GPR_64[] = { // 64-bit registers. 2441 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2442 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2443 }; 2444 2445 static const uint16_t *FPR = GetFPR(); 2446 2447 static const uint16_t VR[] = { 2448 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2449 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2450 }; 2451 2452 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 2453 const unsigned Num_FPR_Regs = 13; 2454 const unsigned Num_VR_Regs = array_lengthof( VR); 2455 2456 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2457 2458 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 2459 2460 // In 32-bit non-varargs functions, the stack space for vectors is after the 2461 // stack space for non-vectors. We do not use this space unless we have 2462 // too many vectors to fit in registers, something that only occurs in 2463 // constructed examples:), but we have to walk the arglist to figure 2464 // that out...for the pathological case, compute VecArgOffset as the 2465 // start of the vector parameter area. Computing VecArgOffset is the 2466 // entire point of the following loop. 2467 unsigned VecArgOffset = ArgOffset; 2468 if (!isVarArg && !isPPC64) { 2469 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 2470 ++ArgNo) { 2471 EVT ObjectVT = Ins[ArgNo].VT; 2472 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2473 2474 if (Flags.isByVal()) { 2475 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 2476 unsigned ObjSize = Flags.getByValSize(); 2477 unsigned ArgSize = 2478 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2479 VecArgOffset += ArgSize; 2480 continue; 2481 } 2482 2483 switch(ObjectVT.getSimpleVT().SimpleTy) { 2484 default: llvm_unreachable("Unhandled argument type!"); 2485 case MVT::i32: 2486 case MVT::f32: 2487 VecArgOffset += 4; 2488 break; 2489 case MVT::i64: // PPC64 2490 case MVT::f64: 2491 // FIXME: We are guaranteed to be !isPPC64 at this point. 2492 // Does MVT::i64 apply? 2493 VecArgOffset += 8; 2494 break; 2495 case MVT::v4f32: 2496 case MVT::v4i32: 2497 case MVT::v8i16: 2498 case MVT::v16i8: 2499 // Nothing to do, we're only looking at Nonvector args here. 2500 break; 2501 } 2502 } 2503 } 2504 // We've found where the vector parameter area in memory is. Skip the 2505 // first 12 parameters; these don't use that memory. 2506 VecArgOffset = ((VecArgOffset+15)/16)*16; 2507 VecArgOffset += 12*16; 2508 2509 // Add DAG nodes to load the arguments or copy them out of registers. On 2510 // entry to a function on PPC, the arguments start after the linkage area, 2511 // although the first ones are often in registers. 2512 2513 SmallVector<SDValue, 8> MemOps; 2514 unsigned nAltivecParamsAtEnd = 0; 2515 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2516 unsigned CurArgIdx = 0; 2517 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2518 SDValue ArgVal; 2519 bool needsLoad = false; 2520 EVT ObjectVT = Ins[ArgNo].VT; 2521 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 2522 unsigned ArgSize = ObjSize; 2523 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2524 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); 2525 CurArgIdx = Ins[ArgNo].OrigArgIndex; 2526 2527 unsigned CurArgOffset = ArgOffset; 2528 2529 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 2530 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 2531 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 2532 if (isVarArg || isPPC64) { 2533 MinReservedArea = ((MinReservedArea+15)/16)*16; 2534 MinReservedArea += CalculateStackSlotSize(ObjectVT, 2535 Flags, 2536 PtrByteSize); 2537 } else nAltivecParamsAtEnd++; 2538 } else 2539 // Calculate min reserved area. 2540 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 2541 Flags, 2542 PtrByteSize); 2543 2544 // FIXME the codegen can be much improved in some cases. 2545 // We do not have to keep everything in memory. 2546 if (Flags.isByVal()) { 2547 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2548 ObjSize = Flags.getByValSize(); 2549 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2550 // Objects of size 1 and 2 are right justified, everything else is 2551 // left justified. This means the memory address is adjusted forwards. 2552 if (ObjSize==1 || ObjSize==2) { 2553 CurArgOffset = CurArgOffset + (4 - ObjSize); 2554 } 2555 // The value of the object is its address. 2556 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2557 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2558 InVals.push_back(FIN); 2559 if (ObjSize==1 || ObjSize==2) { 2560 if (GPR_idx != Num_GPR_Regs) { 2561 unsigned VReg; 2562 if (isPPC64) 2563 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2564 else 2565 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2566 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2567 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 2568 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2569 MachinePointerInfo(FuncArg, 2570 CurArgOffset), 2571 ObjType, false, false, 0); 2572 MemOps.push_back(Store); 2573 ++GPR_idx; 2574 } 2575 2576 ArgOffset += PtrByteSize; 2577 2578 continue; 2579 } 2580 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2581 // Store whatever pieces of the object are in registers 2582 // to memory. ArgOffset will be the address of the beginning 2583 // of the object. 2584 if (GPR_idx != Num_GPR_Regs) { 2585 unsigned VReg; 2586 if (isPPC64) 2587 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2588 else 2589 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2590 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2591 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2592 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2593 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2594 MachinePointerInfo(FuncArg, ArgOffset), 2595 false, false, 0); 2596 MemOps.push_back(Store); 2597 ++GPR_idx; 2598 ArgOffset += PtrByteSize; 2599 } else { 2600 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 2601 break; 2602 } 2603 } 2604 continue; 2605 } 2606 2607 switch (ObjectVT.getSimpleVT().SimpleTy) { 2608 default: llvm_unreachable("Unhandled argument type!"); 2609 case MVT::i32: 2610 if (!isPPC64) { 2611 if (GPR_idx != Num_GPR_Regs) { 2612 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2613 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2614 ++GPR_idx; 2615 } else { 2616 needsLoad = true; 2617 ArgSize = PtrByteSize; 2618 } 2619 // All int arguments reserve stack space in the Darwin ABI. 2620 ArgOffset += PtrByteSize; 2621 break; 2622 } 2623 // FALLTHROUGH 2624 case MVT::i64: // PPC64 2625 if (GPR_idx != Num_GPR_Regs) { 2626 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2627 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2628 2629 if (ObjectVT == MVT::i32) 2630 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2631 // value to MVT::i64 and then truncate to the correct register size. 2632 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2633 2634 ++GPR_idx; 2635 } else { 2636 needsLoad = true; 2637 ArgSize = PtrByteSize; 2638 } 2639 // All int arguments reserve stack space in the Darwin ABI. 2640 ArgOffset += 8; 2641 break; 2642 2643 case MVT::f32: 2644 case MVT::f64: 2645 // Every 4 bytes of argument space consumes one of the GPRs available for 2646 // argument passing. 2647 if (GPR_idx != Num_GPR_Regs) { 2648 ++GPR_idx; 2649 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 2650 ++GPR_idx; 2651 } 2652 if (FPR_idx != Num_FPR_Regs) { 2653 unsigned VReg; 2654 2655 if (ObjectVT == MVT::f32) 2656 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2657 else 2658 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2659 2660 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2661 ++FPR_idx; 2662 } else { 2663 needsLoad = true; 2664 } 2665 2666 // All FP arguments reserve stack space in the Darwin ABI. 2667 ArgOffset += isPPC64 ? 8 : ObjSize; 2668 break; 2669 case MVT::v4f32: 2670 case MVT::v4i32: 2671 case MVT::v8i16: 2672 case MVT::v16i8: 2673 // Note that vector arguments in registers don't reserve stack space, 2674 // except in varargs functions. 2675 if (VR_idx != Num_VR_Regs) { 2676 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2677 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2678 if (isVarArg) { 2679 while ((ArgOffset % 16) != 0) { 2680 ArgOffset += PtrByteSize; 2681 if (GPR_idx != Num_GPR_Regs) 2682 GPR_idx++; 2683 } 2684 ArgOffset += 16; 2685 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2686 } 2687 ++VR_idx; 2688 } else { 2689 if (!isVarArg && !isPPC64) { 2690 // Vectors go after all the nonvectors. 2691 CurArgOffset = VecArgOffset; 2692 VecArgOffset += 16; 2693 } else { 2694 // Vectors are aligned. 2695 ArgOffset = ((ArgOffset+15)/16)*16; 2696 CurArgOffset = ArgOffset; 2697 ArgOffset += 16; 2698 } 2699 needsLoad = true; 2700 } 2701 break; 2702 } 2703 2704 // We need to load the argument to a virtual register if we determined above 2705 // that we ran out of physical registers of the appropriate type. 2706 if (needsLoad) { 2707 int FI = MFI->CreateFixedObject(ObjSize, 2708 CurArgOffset + (ArgSize - ObjSize), 2709 isImmutable); 2710 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2711 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2712 false, false, false, 0); 2713 } 2714 2715 InVals.push_back(ArgVal); 2716 } 2717 2718 // Set the size that is at least reserved in caller of this function. Tail 2719 // call optimized functions' reserved stack space needs to be aligned so that 2720 // taking the difference between two stack areas will result in an aligned 2721 // stack. 2722 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64); 2723 2724 // If the function takes variable number of arguments, make a frame index for 2725 // the start of the first vararg value... for expansion of llvm.va_start. 2726 if (isVarArg) { 2727 int Depth = ArgOffset; 2728 2729 FuncInfo->setVarArgsFrameIndex( 2730 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2731 Depth, true)); 2732 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2733 2734 // If this function is vararg, store any remaining integer argument regs 2735 // to their spots on the stack so that they may be loaded by deferencing the 2736 // result of va_next. 2737 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2738 unsigned VReg; 2739 2740 if (isPPC64) 2741 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2742 else 2743 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2744 2745 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2746 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2747 MachinePointerInfo(), false, false, 0); 2748 MemOps.push_back(Store); 2749 // Increment the address by four for the next argument to store 2750 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2751 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2752 } 2753 } 2754 2755 if (!MemOps.empty()) 2756 Chain = DAG.getNode(ISD::TokenFactor, dl, 2757 MVT::Other, &MemOps[0], MemOps.size()); 2758 2759 return Chain; 2760} 2761 2762/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus 2763/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI. 2764static unsigned 2765CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, 2766 bool isPPC64, 2767 bool isVarArg, 2768 unsigned CC, 2769 const SmallVectorImpl<ISD::OutputArg> 2770 &Outs, 2771 const SmallVectorImpl<SDValue> &OutVals, 2772 unsigned &nAltivecParamsAtEnd) { 2773 // Count how many bytes are to be pushed on the stack, including the linkage 2774 // area, and parameter passing area. We start with 24/48 bytes, which is 2775 // prereserved space for [SP][CR][LR][3 x unused]. 2776 unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true); 2777 unsigned NumOps = Outs.size(); 2778 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2779 2780 // Add up all the space actually used. 2781 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 2782 // they all go in registers, but we must reserve stack space for them for 2783 // possible use by the caller. In varargs or 64-bit calls, parameters are 2784 // assigned stack space in order, with padding so Altivec parameters are 2785 // 16-byte aligned. 2786 nAltivecParamsAtEnd = 0; 2787 for (unsigned i = 0; i != NumOps; ++i) { 2788 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2789 EVT ArgVT = Outs[i].VT; 2790 // Varargs Altivec parameters are padded to a 16 byte boundary. 2791 if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || 2792 ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { 2793 if (!isVarArg && !isPPC64) { 2794 // Non-varargs Altivec parameters go after all the non-Altivec 2795 // parameters; handle those later so we know how much padding we need. 2796 nAltivecParamsAtEnd++; 2797 continue; 2798 } 2799 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 2800 NumBytes = ((NumBytes+15)/16)*16; 2801 } 2802 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2803 } 2804 2805 // Allow for Altivec parameters at the end, if needed. 2806 if (nAltivecParamsAtEnd) { 2807 NumBytes = ((NumBytes+15)/16)*16; 2808 NumBytes += 16*nAltivecParamsAtEnd; 2809 } 2810 2811 // The prolog code of the callee may store up to 8 GPR argument registers to 2812 // the stack, allowing va_start to index over them in memory if its varargs. 2813 // Because we cannot tell if this is needed on the caller side, we have to 2814 // conservatively assume that it is needed. As such, make sure we have at 2815 // least enough stack space for the caller to store the 8 GPRs. 2816 NumBytes = std::max(NumBytes, 2817 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2818 2819 // Tail call needs the stack to be aligned. 2820 if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ 2821 unsigned TargetAlign = DAG.getMachineFunction().getTarget(). 2822 getFrameLowering()->getStackAlignment(); 2823 unsigned AlignMask = TargetAlign-1; 2824 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2825 } 2826 2827 return NumBytes; 2828} 2829 2830/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 2831/// adjusted to accommodate the arguments for the tailcall. 2832static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 2833 unsigned ParamSize) { 2834 2835 if (!isTailCall) return 0; 2836 2837 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 2838 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 2839 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 2840 // Remember only if the new adjustement is bigger. 2841 if (SPDiff < FI->getTailCallSPDelta()) 2842 FI->setTailCallSPDelta(SPDiff); 2843 2844 return SPDiff; 2845} 2846 2847/// IsEligibleForTailCallOptimization - Check whether the call is eligible 2848/// for tail call optimization. Targets which want to do tail call 2849/// optimization should implement this function. 2850bool 2851PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 2852 CallingConv::ID CalleeCC, 2853 bool isVarArg, 2854 const SmallVectorImpl<ISD::InputArg> &Ins, 2855 SelectionDAG& DAG) const { 2856 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 2857 return false; 2858 2859 // Variable argument functions are not supported. 2860 if (isVarArg) 2861 return false; 2862 2863 MachineFunction &MF = DAG.getMachineFunction(); 2864 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 2865 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 2866 // Functions containing by val parameters are not supported. 2867 for (unsigned i = 0; i != Ins.size(); i++) { 2868 ISD::ArgFlagsTy Flags = Ins[i].Flags; 2869 if (Flags.isByVal()) return false; 2870 } 2871 2872 // Non PIC/GOT tail calls are supported. 2873 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 2874 return true; 2875 2876 // At the moment we can only do local tail calls (in same module, hidden 2877 // or protected) if we are generating PIC. 2878 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2879 return G->getGlobal()->hasHiddenVisibility() 2880 || G->getGlobal()->hasProtectedVisibility(); 2881 } 2882 2883 return false; 2884} 2885 2886/// isCallCompatibleAddress - Return the immediate to use if the specified 2887/// 32-bit value is representable in the immediate field of a BxA instruction. 2888static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 2889 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 2890 if (!C) return 0; 2891 2892 int Addr = C->getZExtValue(); 2893 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 2894 SignExtend32<26>(Addr) != Addr) 2895 return 0; // Top 6 bits have to be sext of immediate. 2896 2897 return DAG.getConstant((int)C->getZExtValue() >> 2, 2898 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 2899} 2900 2901namespace { 2902 2903struct TailCallArgumentInfo { 2904 SDValue Arg; 2905 SDValue FrameIdxOp; 2906 int FrameIdx; 2907 2908 TailCallArgumentInfo() : FrameIdx(0) {} 2909}; 2910 2911} 2912 2913/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 2914static void 2915StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 2916 SDValue Chain, 2917 const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs, 2918 SmallVector<SDValue, 8> &MemOpChains, 2919 SDLoc dl) { 2920 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 2921 SDValue Arg = TailCallArgs[i].Arg; 2922 SDValue FIN = TailCallArgs[i].FrameIdxOp; 2923 int FI = TailCallArgs[i].FrameIdx; 2924 // Store relative to framepointer. 2925 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 2926 MachinePointerInfo::getFixedStack(FI), 2927 false, false, 0)); 2928 } 2929} 2930 2931/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 2932/// the appropriate stack slot for the tail call optimized function call. 2933static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 2934 MachineFunction &MF, 2935 SDValue Chain, 2936 SDValue OldRetAddr, 2937 SDValue OldFP, 2938 int SPDiff, 2939 bool isPPC64, 2940 bool isDarwinABI, 2941 SDLoc dl) { 2942 if (SPDiff) { 2943 // Calculate the new stack slot for the return address. 2944 int SlotSize = isPPC64 ? 8 : 4; 2945 int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, 2946 isDarwinABI); 2947 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 2948 NewRetAddrLoc, true); 2949 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 2950 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 2951 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 2952 MachinePointerInfo::getFixedStack(NewRetAddr), 2953 false, false, 0); 2954 2955 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 2956 // slot as the FP is never overwritten. 2957 if (isDarwinABI) { 2958 int NewFPLoc = 2959 SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); 2960 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 2961 true); 2962 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 2963 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 2964 MachinePointerInfo::getFixedStack(NewFPIdx), 2965 false, false, 0); 2966 } 2967 } 2968 return Chain; 2969} 2970 2971/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 2972/// the position of the argument. 2973static void 2974CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 2975 SDValue Arg, int SPDiff, unsigned ArgOffset, 2976 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { 2977 int Offset = ArgOffset + SPDiff; 2978 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 2979 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 2980 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 2981 SDValue FIN = DAG.getFrameIndex(FI, VT); 2982 TailCallArgumentInfo Info; 2983 Info.Arg = Arg; 2984 Info.FrameIdxOp = FIN; 2985 Info.FrameIdx = FI; 2986 TailCallArguments.push_back(Info); 2987} 2988 2989/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 2990/// stack slot. Returns the chain as result and the loaded frame pointers in 2991/// LROpOut/FPOpout. Used when tail calling. 2992SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 2993 int SPDiff, 2994 SDValue Chain, 2995 SDValue &LROpOut, 2996 SDValue &FPOpOut, 2997 bool isDarwinABI, 2998 SDLoc dl) const { 2999 if (SPDiff) { 3000 // Load the LR and FP stack slot for later adjusting. 3001 EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; 3002 LROpOut = getReturnAddrFrameIndex(DAG); 3003 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 3004 false, false, false, 0); 3005 Chain = SDValue(LROpOut.getNode(), 1); 3006 3007 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 3008 // slot as the FP is never overwritten. 3009 if (isDarwinABI) { 3010 FPOpOut = getFramePointerFrameIndex(DAG); 3011 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 3012 false, false, false, 0); 3013 Chain = SDValue(FPOpOut.getNode(), 1); 3014 } 3015 } 3016 return Chain; 3017} 3018 3019/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 3020/// by "Src" to address "Dst" of size "Size". Alignment information is 3021/// specified by the specific parameter attribute. The copy will be passed as 3022/// a byval function parameter. 3023/// Sometimes what we are copying is the end of a larger object, the part that 3024/// does not fit in registers. 3025static SDValue 3026CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 3027 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 3028 SDLoc dl) { 3029 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 3030 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 3031 false, false, MachinePointerInfo(0), 3032 MachinePointerInfo(0)); 3033} 3034 3035/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 3036/// tail calls. 3037static void 3038LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 3039 SDValue Arg, SDValue PtrOff, int SPDiff, 3040 unsigned ArgOffset, bool isPPC64, bool isTailCall, 3041 bool isVector, SmallVector<SDValue, 8> &MemOpChains, 3042 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments, 3043 SDLoc dl) { 3044 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3045 if (!isTailCall) { 3046 if (isVector) { 3047 SDValue StackPtr; 3048 if (isPPC64) 3049 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3050 else 3051 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3052 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3053 DAG.getConstant(ArgOffset, PtrVT)); 3054 } 3055 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3056 MachinePointerInfo(), false, false, 0)); 3057 // Calculate and remember argument location. 3058 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 3059 TailCallArguments); 3060} 3061 3062static 3063void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 3064 SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 3065 SDValue LROp, SDValue FPOp, bool isDarwinABI, 3066 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) { 3067 MachineFunction &MF = DAG.getMachineFunction(); 3068 3069 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 3070 // might overwrite each other in case of tail call optimization. 3071 SmallVector<SDValue, 8> MemOpChains2; 3072 // Do not flag preceding copytoreg stuff together with the following stuff. 3073 InFlag = SDValue(); 3074 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 3075 MemOpChains2, dl); 3076 if (!MemOpChains2.empty()) 3077 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3078 &MemOpChains2[0], MemOpChains2.size()); 3079 3080 // Store the return address to the appropriate stack slot. 3081 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 3082 isPPC64, isDarwinABI, dl); 3083 3084 // Emit callseq_end just before tailcall node. 3085 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3086 DAG.getIntPtrConstant(0, true), InFlag, dl); 3087 InFlag = Chain.getValue(1); 3088} 3089 3090static 3091unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 3092 SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, 3093 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, 3094 SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys, 3095 const PPCSubtarget &PPCSubTarget) { 3096 3097 bool isPPC64 = PPCSubTarget.isPPC64(); 3098 bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); 3099 3100 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3101 NodeTys.push_back(MVT::Other); // Returns a chain 3102 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 3103 3104 unsigned CallOpc = PPCISD::CALL; 3105 3106 bool needIndirectCall = true; 3107 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 3108 // If this is an absolute destination address, use the munged value. 3109 Callee = SDValue(Dest, 0); 3110 needIndirectCall = false; 3111 } 3112 3113 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3114 // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 3115 // Use indirect calls for ALL functions calls in JIT mode, since the 3116 // far-call stubs may be outside relocation limits for a BL instruction. 3117 if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { 3118 unsigned OpFlags = 0; 3119 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 3120 (PPCSubTarget.getTargetTriple().isMacOSX() && 3121 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 3122 (G->getGlobal()->isDeclaration() || 3123 G->getGlobal()->isWeakForLinker())) { 3124 // PC-relative references to external symbols should go through $stub, 3125 // unless we're building with the leopard linker or later, which 3126 // automatically synthesizes these stubs. 3127 OpFlags = PPCII::MO_DARWIN_STUB; 3128 } 3129 3130 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 3131 // every direct call is) turn it into a TargetGlobalAddress / 3132 // TargetExternalSymbol node so that legalize doesn't hack it. 3133 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 3134 Callee.getValueType(), 3135 0, OpFlags); 3136 needIndirectCall = false; 3137 } 3138 } 3139 3140 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3141 unsigned char OpFlags = 0; 3142 3143 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 3144 (PPCSubTarget.getTargetTriple().isMacOSX() && 3145 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { 3146 // PC-relative references to external symbols should go through $stub, 3147 // unless we're building with the leopard linker or later, which 3148 // automatically synthesizes these stubs. 3149 OpFlags = PPCII::MO_DARWIN_STUB; 3150 } 3151 3152 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 3153 OpFlags); 3154 needIndirectCall = false; 3155 } 3156 3157 if (needIndirectCall) { 3158 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 3159 // to do the call, we can't use PPCISD::CALL. 3160 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 3161 3162 if (isSVR4ABI && isPPC64) { 3163 // Function pointers in the 64-bit SVR4 ABI do not point to the function 3164 // entry point, but to the function descriptor (the function entry point 3165 // address is part of the function descriptor though). 3166 // The function descriptor is a three doubleword structure with the 3167 // following fields: function entry point, TOC base address and 3168 // environment pointer. 3169 // Thus for a call through a function pointer, the following actions need 3170 // to be performed: 3171 // 1. Save the TOC of the caller in the TOC save area of its stack 3172 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 3173 // 2. Load the address of the function entry point from the function 3174 // descriptor. 3175 // 3. Load the TOC of the callee from the function descriptor into r2. 3176 // 4. Load the environment pointer from the function descriptor into 3177 // r11. 3178 // 5. Branch to the function entry point address. 3179 // 6. On return of the callee, the TOC of the caller needs to be 3180 // restored (this is done in FinishCall()). 3181 // 3182 // All those operations are flagged together to ensure that no other 3183 // operations can be scheduled in between. E.g. without flagging the 3184 // operations together, a TOC access in the caller could be scheduled 3185 // between the load of the callee TOC and the branch to the callee, which 3186 // results in the TOC access going through the TOC of the callee instead 3187 // of going through the TOC of the caller, which leads to incorrect code. 3188 3189 // Load the address of the function entry point from the function 3190 // descriptor. 3191 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); 3192 SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, 3193 InFlag.getNode() ? 3 : 2); 3194 Chain = LoadFuncPtr.getValue(1); 3195 InFlag = LoadFuncPtr.getValue(2); 3196 3197 // Load environment pointer into r11. 3198 // Offset of the environment pointer within the function descriptor. 3199 SDValue PtrOff = DAG.getIntPtrConstant(16); 3200 3201 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 3202 SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, 3203 InFlag); 3204 Chain = LoadEnvPtr.getValue(1); 3205 InFlag = LoadEnvPtr.getValue(2); 3206 3207 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 3208 InFlag); 3209 Chain = EnvVal.getValue(0); 3210 InFlag = EnvVal.getValue(1); 3211 3212 // Load TOC of the callee into r2. We are using a target-specific load 3213 // with r2 hard coded, because the result of a target-independent load 3214 // would never go directly into r2, since r2 is a reserved register (which 3215 // prevents the register allocator from allocating it), resulting in an 3216 // additional register being allocated and an unnecessary move instruction 3217 // being generated. 3218 VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3219 SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, 3220 Callee, InFlag); 3221 Chain = LoadTOCPtr.getValue(0); 3222 InFlag = LoadTOCPtr.getValue(1); 3223 3224 MTCTROps[0] = Chain; 3225 MTCTROps[1] = LoadFuncPtr; 3226 MTCTROps[2] = InFlag; 3227 } 3228 3229 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, 3230 2 + (InFlag.getNode() != 0)); 3231 InFlag = Chain.getValue(1); 3232 3233 NodeTys.clear(); 3234 NodeTys.push_back(MVT::Other); 3235 NodeTys.push_back(MVT::Glue); 3236 Ops.push_back(Chain); 3237 CallOpc = PPCISD::BCTRL; 3238 Callee.setNode(0); 3239 // Add use of X11 (holding environment pointer) 3240 if (isSVR4ABI && isPPC64) 3241 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 3242 // Add CTR register as callee so a bctr can be emitted later. 3243 if (isTailCall) 3244 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 3245 } 3246 3247 // If this is a direct call, pass the chain and the callee. 3248 if (Callee.getNode()) { 3249 Ops.push_back(Chain); 3250 Ops.push_back(Callee); 3251 } 3252 // If this is a tail call add stack pointer delta. 3253 if (isTailCall) 3254 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 3255 3256 // Add argument registers to the end of the list so that they are known live 3257 // into the call. 3258 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 3259 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 3260 RegsToPass[i].second.getValueType())); 3261 3262 return CallOpc; 3263} 3264 3265static 3266bool isLocalCall(const SDValue &Callee) 3267{ 3268 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3269 return !G->getGlobal()->isDeclaration() && 3270 !G->getGlobal()->isWeakForLinker(); 3271 return false; 3272} 3273 3274SDValue 3275PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 3276 CallingConv::ID CallConv, bool isVarArg, 3277 const SmallVectorImpl<ISD::InputArg> &Ins, 3278 SDLoc dl, SelectionDAG &DAG, 3279 SmallVectorImpl<SDValue> &InVals) const { 3280 3281 SmallVector<CCValAssign, 16> RVLocs; 3282 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3283 getTargetMachine(), RVLocs, *DAG.getContext()); 3284 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 3285 3286 // Copy all of the result registers out of their specified physreg. 3287 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 3288 CCValAssign &VA = RVLocs[i]; 3289 assert(VA.isRegLoc() && "Can only return in registers!"); 3290 3291 SDValue Val = DAG.getCopyFromReg(Chain, dl, 3292 VA.getLocReg(), VA.getLocVT(), InFlag); 3293 Chain = Val.getValue(1); 3294 InFlag = Val.getValue(2); 3295 3296 switch (VA.getLocInfo()) { 3297 default: llvm_unreachable("Unknown loc info!"); 3298 case CCValAssign::Full: break; 3299 case CCValAssign::AExt: 3300 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3301 break; 3302 case CCValAssign::ZExt: 3303 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 3304 DAG.getValueType(VA.getValVT())); 3305 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3306 break; 3307 case CCValAssign::SExt: 3308 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 3309 DAG.getValueType(VA.getValVT())); 3310 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3311 break; 3312 } 3313 3314 InVals.push_back(Val); 3315 } 3316 3317 return Chain; 3318} 3319 3320SDValue 3321PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, 3322 bool isTailCall, bool isVarArg, 3323 SelectionDAG &DAG, 3324 SmallVector<std::pair<unsigned, SDValue>, 8> 3325 &RegsToPass, 3326 SDValue InFlag, SDValue Chain, 3327 SDValue &Callee, 3328 int SPDiff, unsigned NumBytes, 3329 const SmallVectorImpl<ISD::InputArg> &Ins, 3330 SmallVectorImpl<SDValue> &InVals) const { 3331 std::vector<EVT> NodeTys; 3332 SmallVector<SDValue, 8> Ops; 3333 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, 3334 isTailCall, RegsToPass, Ops, NodeTys, 3335 PPCSubTarget); 3336 3337 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 3338 if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) 3339 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 3340 3341 // When performing tail call optimization the callee pops its arguments off 3342 // the stack. Account for this here so these bytes can be pushed back on in 3343 // PPCFrameLowering::eliminateCallFramePseudoInstr. 3344 int BytesCalleePops = 3345 (CallConv == CallingConv::Fast && 3346 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 3347 3348 // Add a register mask operand representing the call-preserved registers. 3349 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 3350 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 3351 assert(Mask && "Missing call preserved mask for calling convention"); 3352 Ops.push_back(DAG.getRegisterMask(Mask)); 3353 3354 if (InFlag.getNode()) 3355 Ops.push_back(InFlag); 3356 3357 // Emit tail call. 3358 if (isTailCall) { 3359 assert(((Callee.getOpcode() == ISD::Register && 3360 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 3361 Callee.getOpcode() == ISD::TargetExternalSymbol || 3362 Callee.getOpcode() == ISD::TargetGlobalAddress || 3363 isa<ConstantSDNode>(Callee)) && 3364 "Expecting an global address, external symbol, absolute value or register"); 3365 3366 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); 3367 } 3368 3369 // Add a NOP immediately after the branch instruction when using the 64-bit 3370 // SVR4 ABI. At link time, if caller and callee are in a different module and 3371 // thus have a different TOC, the call will be replaced with a call to a stub 3372 // function which saves the current TOC, loads the TOC of the callee and 3373 // branches to the callee. The NOP will be replaced with a load instruction 3374 // which restores the TOC of the caller from the TOC save slot of the current 3375 // stack frame. If caller and callee belong to the same module (and have the 3376 // same TOC), the NOP will remain unchanged. 3377 3378 bool needsTOCRestore = false; 3379 if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { 3380 if (CallOpc == PPCISD::BCTRL) { 3381 // This is a call through a function pointer. 3382 // Restore the caller TOC from the save area into R2. 3383 // See PrepareCall() for more information about calls through function 3384 // pointers in the 64-bit SVR4 ABI. 3385 // We are using a target-specific load with r2 hard coded, because the 3386 // result of a target-independent load would never go directly into r2, 3387 // since r2 is a reserved register (which prevents the register allocator 3388 // from allocating it), resulting in an additional register being 3389 // allocated and an unnecessary move instruction being generated. 3390 needsTOCRestore = true; 3391 } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) { 3392 // Otherwise insert NOP for non-local calls. 3393 CallOpc = PPCISD::CALL_NOP; 3394 } 3395 } 3396 3397 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 3398 InFlag = Chain.getValue(1); 3399 3400 if (needsTOCRestore) { 3401 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3402 Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); 3403 InFlag = Chain.getValue(1); 3404 } 3405 3406 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3407 DAG.getIntPtrConstant(BytesCalleePops, true), 3408 InFlag, dl); 3409 if (!Ins.empty()) 3410 InFlag = Chain.getValue(1); 3411 3412 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 3413 Ins, dl, DAG, InVals); 3414} 3415 3416SDValue 3417PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 3418 SmallVectorImpl<SDValue> &InVals) const { 3419 SelectionDAG &DAG = CLI.DAG; 3420 SDLoc &dl = CLI.DL; 3421 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 3422 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 3423 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 3424 SDValue Chain = CLI.Chain; 3425 SDValue Callee = CLI.Callee; 3426 bool &isTailCall = CLI.IsTailCall; 3427 CallingConv::ID CallConv = CLI.CallConv; 3428 bool isVarArg = CLI.IsVarArg; 3429 3430 if (isTailCall) 3431 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 3432 Ins, DAG); 3433 3434 if (PPCSubTarget.isSVR4ABI()) { 3435 if (PPCSubTarget.isPPC64()) 3436 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 3437 isTailCall, Outs, OutVals, Ins, 3438 dl, DAG, InVals); 3439 else 3440 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 3441 isTailCall, Outs, OutVals, Ins, 3442 dl, DAG, InVals); 3443 } 3444 3445 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 3446 isTailCall, Outs, OutVals, Ins, 3447 dl, DAG, InVals); 3448} 3449 3450SDValue 3451PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 3452 CallingConv::ID CallConv, bool isVarArg, 3453 bool isTailCall, 3454 const SmallVectorImpl<ISD::OutputArg> &Outs, 3455 const SmallVectorImpl<SDValue> &OutVals, 3456 const SmallVectorImpl<ISD::InputArg> &Ins, 3457 SDLoc dl, SelectionDAG &DAG, 3458 SmallVectorImpl<SDValue> &InVals) const { 3459 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 3460 // of the 32-bit SVR4 ABI stack frame layout. 3461 3462 assert((CallConv == CallingConv::C || 3463 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 3464 3465 unsigned PtrByteSize = 4; 3466 3467 MachineFunction &MF = DAG.getMachineFunction(); 3468 3469 // Mark this function as potentially containing a function that contains a 3470 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3471 // and restoring the callers stack pointer in this functions epilog. This is 3472 // done because by tail calling the called function might overwrite the value 3473 // in this function's (MF) stack pointer stack slot 0(SP). 3474 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3475 CallConv == CallingConv::Fast) 3476 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3477 3478 // Count how many bytes are to be pushed on the stack, including the linkage 3479 // area, parameter list area and the part of the local variable space which 3480 // contains copies of aggregates which are passed by value. 3481 3482 // Assign locations to all of the outgoing arguments. 3483 SmallVector<CCValAssign, 16> ArgLocs; 3484 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3485 getTargetMachine(), ArgLocs, *DAG.getContext()); 3486 3487 // Reserve space for the linkage area on the stack. 3488 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 3489 3490 if (isVarArg) { 3491 // Handle fixed and variable vector arguments differently. 3492 // Fixed vector arguments go into registers as long as registers are 3493 // available. Variable vector arguments always go into memory. 3494 unsigned NumArgs = Outs.size(); 3495 3496 for (unsigned i = 0; i != NumArgs; ++i) { 3497 MVT ArgVT = Outs[i].VT; 3498 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3499 bool Result; 3500 3501 if (Outs[i].IsFixed) { 3502 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 3503 CCInfo); 3504 } else { 3505 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 3506 ArgFlags, CCInfo); 3507 } 3508 3509 if (Result) { 3510#ifndef NDEBUG 3511 errs() << "Call operand #" << i << " has unhandled type " 3512 << EVT(ArgVT).getEVTString() << "\n"; 3513#endif 3514 llvm_unreachable(0); 3515 } 3516 } 3517 } else { 3518 // All arguments are treated the same. 3519 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 3520 } 3521 3522 // Assign locations to all of the outgoing aggregate by value arguments. 3523 SmallVector<CCValAssign, 16> ByValArgLocs; 3524 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3525 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 3526 3527 // Reserve stack space for the allocations in CCInfo. 3528 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 3529 3530 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 3531 3532 // Size of the linkage area, parameter list area and the part of the local 3533 // space variable where copies of aggregates which are passed by value are 3534 // stored. 3535 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 3536 3537 // Calculate by how many bytes the stack has to be adjusted in case of tail 3538 // call optimization. 3539 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3540 3541 // Adjust the stack pointer for the new arguments... 3542 // These operations are automatically eliminated by the prolog/epilog pass 3543 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 3544 dl); 3545 SDValue CallSeqStart = Chain; 3546 3547 // Load the return address and frame pointer so it can be moved somewhere else 3548 // later. 3549 SDValue LROp, FPOp; 3550 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 3551 dl); 3552 3553 // Set up a copy of the stack pointer for use loading and storing any 3554 // arguments that may not fit in the registers available for argument 3555 // passing. 3556 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3557 3558 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3559 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3560 SmallVector<SDValue, 8> MemOpChains; 3561 3562 bool seenFloatArg = false; 3563 // Walk the register/memloc assignments, inserting copies/loads. 3564 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 3565 i != e; 3566 ++i) { 3567 CCValAssign &VA = ArgLocs[i]; 3568 SDValue Arg = OutVals[i]; 3569 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3570 3571 if (Flags.isByVal()) { 3572 // Argument is an aggregate which is passed by value, thus we need to 3573 // create a copy of it in the local variable space of the current stack 3574 // frame (which is the stack frame of the caller) and pass the address of 3575 // this copy to the callee. 3576 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 3577 CCValAssign &ByValVA = ByValArgLocs[j++]; 3578 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 3579 3580 // Memory reserved in the local variable space of the callers stack frame. 3581 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 3582 3583 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3584 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3585 3586 // Create a copy of the argument in the local area of the current 3587 // stack frame. 3588 SDValue MemcpyCall = 3589 CreateCopyOfByValArgument(Arg, PtrOff, 3590 CallSeqStart.getNode()->getOperand(0), 3591 Flags, DAG, dl); 3592 3593 // This must go outside the CALLSEQ_START..END. 3594 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3595 CallSeqStart.getNode()->getOperand(1), 3596 SDLoc(MemcpyCall)); 3597 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3598 NewCallSeqStart.getNode()); 3599 Chain = CallSeqStart = NewCallSeqStart; 3600 3601 // Pass the address of the aggregate copy on the stack either in a 3602 // physical register or in the parameter list area of the current stack 3603 // frame to the callee. 3604 Arg = PtrOff; 3605 } 3606 3607 if (VA.isRegLoc()) { 3608 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 3609 // Put argument in a physical register. 3610 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 3611 } else { 3612 // Put argument in the parameter list area of the current stack frame. 3613 assert(VA.isMemLoc()); 3614 unsigned LocMemOffset = VA.getLocMemOffset(); 3615 3616 if (!isTailCall) { 3617 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3618 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3619 3620 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3621 MachinePointerInfo(), 3622 false, false, 0)); 3623 } else { 3624 // Calculate and remember argument location. 3625 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 3626 TailCallArguments); 3627 } 3628 } 3629 } 3630 3631 if (!MemOpChains.empty()) 3632 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3633 &MemOpChains[0], MemOpChains.size()); 3634 3635 // Build a sequence of copy-to-reg nodes chained together with token chain 3636 // and flag operands which copy the outgoing args into the appropriate regs. 3637 SDValue InFlag; 3638 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 3639 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 3640 RegsToPass[i].second, InFlag); 3641 InFlag = Chain.getValue(1); 3642 } 3643 3644 // Set CR bit 6 to true if this is a vararg call with floating args passed in 3645 // registers. 3646 if (isVarArg) { 3647 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3648 SDValue Ops[] = { Chain, InFlag }; 3649 3650 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 3651 dl, VTs, Ops, InFlag.getNode() ? 2 : 1); 3652 3653 InFlag = Chain.getValue(1); 3654 } 3655 3656 if (isTailCall) 3657 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 3658 false, TailCallArguments); 3659 3660 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 3661 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 3662 Ins, InVals); 3663} 3664 3665// Copy an argument into memory, being careful to do this outside the 3666// call sequence for the call to which the argument belongs. 3667SDValue 3668PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 3669 SDValue CallSeqStart, 3670 ISD::ArgFlagsTy Flags, 3671 SelectionDAG &DAG, 3672 SDLoc dl) const { 3673 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 3674 CallSeqStart.getNode()->getOperand(0), 3675 Flags, DAG, dl); 3676 // The MEMCPY must go outside the CALLSEQ_START..END. 3677 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3678 CallSeqStart.getNode()->getOperand(1), 3679 SDLoc(MemcpyCall)); 3680 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3681 NewCallSeqStart.getNode()); 3682 return NewCallSeqStart; 3683} 3684 3685SDValue 3686PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 3687 CallingConv::ID CallConv, bool isVarArg, 3688 bool isTailCall, 3689 const SmallVectorImpl<ISD::OutputArg> &Outs, 3690 const SmallVectorImpl<SDValue> &OutVals, 3691 const SmallVectorImpl<ISD::InputArg> &Ins, 3692 SDLoc dl, SelectionDAG &DAG, 3693 SmallVectorImpl<SDValue> &InVals) const { 3694 3695 unsigned NumOps = Outs.size(); 3696 3697 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3698 unsigned PtrByteSize = 8; 3699 3700 MachineFunction &MF = DAG.getMachineFunction(); 3701 3702 // Mark this function as potentially containing a function that contains a 3703 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3704 // and restoring the callers stack pointer in this functions epilog. This is 3705 // done because by tail calling the called function might overwrite the value 3706 // in this function's (MF) stack pointer stack slot 0(SP). 3707 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3708 CallConv == CallingConv::Fast) 3709 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3710 3711 unsigned nAltivecParamsAtEnd = 0; 3712 3713 // Count how many bytes are to be pushed on the stack, including the linkage 3714 // area, and parameter passing area. We start with at least 48 bytes, which 3715 // is reserved space for [SP][CR][LR][3 x unused]. 3716 // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result 3717 // of this call. 3718 unsigned NumBytes = 3719 CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv, 3720 Outs, OutVals, nAltivecParamsAtEnd); 3721 3722 // Calculate by how many bytes the stack has to be adjusted in case of tail 3723 // call optimization. 3724 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3725 3726 // To protect arguments on the stack from being clobbered in a tail call, 3727 // force all the loads to happen before doing any other lowering. 3728 if (isTailCall) 3729 Chain = DAG.getStackArgumentTokenFactor(Chain); 3730 3731 // Adjust the stack pointer for the new arguments... 3732 // These operations are automatically eliminated by the prolog/epilog pass 3733 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 3734 dl); 3735 SDValue CallSeqStart = Chain; 3736 3737 // Load the return address and frame pointer so it can be move somewhere else 3738 // later. 3739 SDValue LROp, FPOp; 3740 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 3741 dl); 3742 3743 // Set up a copy of the stack pointer for use loading and storing any 3744 // arguments that may not fit in the registers available for argument 3745 // passing. 3746 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3747 3748 // Figure out which arguments are going to go in registers, and which in 3749 // memory. Also, if this is a vararg function, floating point operations 3750 // must be stored to our stack, and loaded into integer regs as well, if 3751 // any integer regs are available for argument passing. 3752 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); 3753 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3754 3755 static const uint16_t GPR[] = { 3756 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3757 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3758 }; 3759 static const uint16_t *FPR = GetFPR(); 3760 3761 static const uint16_t VR[] = { 3762 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3763 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3764 }; 3765 const unsigned NumGPRs = array_lengthof(GPR); 3766 const unsigned NumFPRs = 13; 3767 const unsigned NumVRs = array_lengthof(VR); 3768 3769 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3770 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3771 3772 SmallVector<SDValue, 8> MemOpChains; 3773 for (unsigned i = 0; i != NumOps; ++i) { 3774 SDValue Arg = OutVals[i]; 3775 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3776 3777 // PtrOff will be used to store the current argument to the stack if a 3778 // register cannot be found for it. 3779 SDValue PtrOff; 3780 3781 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 3782 3783 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 3784 3785 // Promote integers to 64-bit values. 3786 if (Arg.getValueType() == MVT::i32) { 3787 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 3788 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 3789 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 3790 } 3791 3792 // FIXME memcpy is used way more than necessary. Correctness first. 3793 // Note: "by value" is code for passing a structure by value, not 3794 // basic types. 3795 if (Flags.isByVal()) { 3796 // Note: Size includes alignment padding, so 3797 // struct x { short a; char b; } 3798 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 3799 // These are the proper values we need for right-justifying the 3800 // aggregate in a parameter register. 3801 unsigned Size = Flags.getByValSize(); 3802 3803 // An empty aggregate parameter takes up no storage and no 3804 // registers. 3805 if (Size == 0) 3806 continue; 3807 3808 // All aggregates smaller than 8 bytes must be passed right-justified. 3809 if (Size==1 || Size==2 || Size==4) { 3810 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 3811 if (GPR_idx != NumGPRs) { 3812 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 3813 MachinePointerInfo(), VT, 3814 false, false, 0); 3815 MemOpChains.push_back(Load.getValue(1)); 3816 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3817 3818 ArgOffset += PtrByteSize; 3819 continue; 3820 } 3821 } 3822 3823 if (GPR_idx == NumGPRs && Size < 8) { 3824 SDValue Const = DAG.getConstant(PtrByteSize - Size, 3825 PtrOff.getValueType()); 3826 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 3827 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 3828 CallSeqStart, 3829 Flags, DAG, dl); 3830 ArgOffset += PtrByteSize; 3831 continue; 3832 } 3833 // Copy entire object into memory. There are cases where gcc-generated 3834 // code assumes it is there, even if it could be put entirely into 3835 // registers. (This is not what the doc says.) 3836 3837 // FIXME: The above statement is likely due to a misunderstanding of the 3838 // documents. All arguments must be copied into the parameter area BY 3839 // THE CALLEE in the event that the callee takes the address of any 3840 // formal argument. That has not yet been implemented. However, it is 3841 // reasonable to use the stack area as a staging area for the register 3842 // load. 3843 3844 // Skip this for small aggregates, as we will use the same slot for a 3845 // right-justified copy, below. 3846 if (Size >= 8) 3847 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 3848 CallSeqStart, 3849 Flags, DAG, dl); 3850 3851 // When a register is available, pass a small aggregate right-justified. 3852 if (Size < 8 && GPR_idx != NumGPRs) { 3853 // The easiest way to get this right-justified in a register 3854 // is to copy the structure into the rightmost portion of a 3855 // local variable slot, then load the whole slot into the 3856 // register. 3857 // FIXME: The memcpy seems to produce pretty awful code for 3858 // small aggregates, particularly for packed ones. 3859 // FIXME: It would be preferable to use the slot in the 3860 // parameter save area instead of a new local variable. 3861 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); 3862 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 3863 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 3864 CallSeqStart, 3865 Flags, DAG, dl); 3866 3867 // Load the slot into the register. 3868 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 3869 MachinePointerInfo(), 3870 false, false, false, 0); 3871 MemOpChains.push_back(Load.getValue(1)); 3872 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3873 3874 // Done with this argument. 3875 ArgOffset += PtrByteSize; 3876 continue; 3877 } 3878 3879 // For aggregates larger than PtrByteSize, copy the pieces of the 3880 // object that fit into registers from the parameter save area. 3881 for (unsigned j=0; j<Size; j+=PtrByteSize) { 3882 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 3883 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 3884 if (GPR_idx != NumGPRs) { 3885 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 3886 MachinePointerInfo(), 3887 false, false, false, 0); 3888 MemOpChains.push_back(Load.getValue(1)); 3889 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3890 ArgOffset += PtrByteSize; 3891 } else { 3892 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 3893 break; 3894 } 3895 } 3896 continue; 3897 } 3898 3899 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 3900 default: llvm_unreachable("Unexpected ValueType for argument!"); 3901 case MVT::i32: 3902 case MVT::i64: 3903 if (GPR_idx != NumGPRs) { 3904 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 3905 } else { 3906 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 3907 true, isTailCall, false, MemOpChains, 3908 TailCallArguments, dl); 3909 } 3910 ArgOffset += PtrByteSize; 3911 break; 3912 case MVT::f32: 3913 case MVT::f64: 3914 if (FPR_idx != NumFPRs) { 3915 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 3916 3917 if (isVarArg) { 3918 // A single float or an aggregate containing only a single float 3919 // must be passed right-justified in the stack doubleword, and 3920 // in the GPR, if one is available. 3921 SDValue StoreOff; 3922 if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) { 3923 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 3924 StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 3925 } else 3926 StoreOff = PtrOff; 3927 3928 SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff, 3929 MachinePointerInfo(), false, false, 0); 3930 MemOpChains.push_back(Store); 3931 3932 // Float varargs are always shadowed in available integer registers 3933 if (GPR_idx != NumGPRs) { 3934 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 3935 MachinePointerInfo(), false, false, 3936 false, 0); 3937 MemOpChains.push_back(Load.getValue(1)); 3938 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3939 } 3940 } else if (GPR_idx != NumGPRs) 3941 // If we have any FPRs remaining, we may also have GPRs remaining. 3942 ++GPR_idx; 3943 } else { 3944 // Single-precision floating-point values are mapped to the 3945 // second (rightmost) word of the stack doubleword. 3946 if (Arg.getValueType() == MVT::f32) { 3947 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 3948 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 3949 } 3950 3951 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 3952 true, isTailCall, false, MemOpChains, 3953 TailCallArguments, dl); 3954 } 3955 ArgOffset += 8; 3956 break; 3957 case MVT::v4f32: 3958 case MVT::v4i32: 3959 case MVT::v8i16: 3960 case MVT::v16i8: 3961 if (isVarArg) { 3962 // These go aligned on the stack, or in the corresponding R registers 3963 // when within range. The Darwin PPC ABI doc claims they also go in 3964 // V registers; in fact gcc does this only for arguments that are 3965 // prototyped, not for those that match the ... We do it for all 3966 // arguments, seems to work. 3967 while (ArgOffset % 16 !=0) { 3968 ArgOffset += PtrByteSize; 3969 if (GPR_idx != NumGPRs) 3970 GPR_idx++; 3971 } 3972 // We could elide this store in the case where the object fits 3973 // entirely in R registers. Maybe later. 3974 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3975 DAG.getConstant(ArgOffset, PtrVT)); 3976 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 3977 MachinePointerInfo(), false, false, 0); 3978 MemOpChains.push_back(Store); 3979 if (VR_idx != NumVRs) { 3980 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 3981 MachinePointerInfo(), 3982 false, false, false, 0); 3983 MemOpChains.push_back(Load.getValue(1)); 3984 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 3985 } 3986 ArgOffset += 16; 3987 for (unsigned i=0; i<16; i+=PtrByteSize) { 3988 if (GPR_idx == NumGPRs) 3989 break; 3990 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 3991 DAG.getConstant(i, PtrVT)); 3992 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 3993 false, false, false, 0); 3994 MemOpChains.push_back(Load.getValue(1)); 3995 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3996 } 3997 break; 3998 } 3999 4000 // Non-varargs Altivec params generally go in registers, but have 4001 // stack space allocated at the end. 4002 if (VR_idx != NumVRs) { 4003 // Doesn't have GPR space allocated. 4004 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 4005 } else { 4006 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4007 true, isTailCall, true, MemOpChains, 4008 TailCallArguments, dl); 4009 ArgOffset += 16; 4010 } 4011 break; 4012 } 4013 } 4014 4015 if (!MemOpChains.empty()) 4016 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 4017 &MemOpChains[0], MemOpChains.size()); 4018 4019 // Check if this is an indirect call (MTCTR/BCTRL). 4020 // See PrepareCall() for more information about calls through function 4021 // pointers in the 64-bit SVR4 ABI. 4022 if (!isTailCall && 4023 !dyn_cast<GlobalAddressSDNode>(Callee) && 4024 !dyn_cast<ExternalSymbolSDNode>(Callee) && 4025 !isBLACompatibleAddress(Callee, DAG)) { 4026 // Load r2 into a virtual register and store it to the TOC save area. 4027 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 4028 // TOC save area offset. 4029 SDValue PtrOff = DAG.getIntPtrConstant(40); 4030 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4031 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), 4032 false, false, 0); 4033 // R12 must contain the address of an indirect callee. This does not 4034 // mean the MTCTR instruction must use R12; it's easier to model this 4035 // as an extra parameter, so do that. 4036 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 4037 } 4038 4039 // Build a sequence of copy-to-reg nodes chained together with token chain 4040 // and flag operands which copy the outgoing args into the appropriate regs. 4041 SDValue InFlag; 4042 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4043 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4044 RegsToPass[i].second, InFlag); 4045 InFlag = Chain.getValue(1); 4046 } 4047 4048 if (isTailCall) 4049 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 4050 FPOp, true, TailCallArguments); 4051 4052 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4053 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4054 Ins, InVals); 4055} 4056 4057SDValue 4058PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 4059 CallingConv::ID CallConv, bool isVarArg, 4060 bool isTailCall, 4061 const SmallVectorImpl<ISD::OutputArg> &Outs, 4062 const SmallVectorImpl<SDValue> &OutVals, 4063 const SmallVectorImpl<ISD::InputArg> &Ins, 4064 SDLoc dl, SelectionDAG &DAG, 4065 SmallVectorImpl<SDValue> &InVals) const { 4066 4067 unsigned NumOps = Outs.size(); 4068 4069 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4070 bool isPPC64 = PtrVT == MVT::i64; 4071 unsigned PtrByteSize = isPPC64 ? 8 : 4; 4072 4073 MachineFunction &MF = DAG.getMachineFunction(); 4074 4075 // Mark this function as potentially containing a function that contains a 4076 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4077 // and restoring the callers stack pointer in this functions epilog. This is 4078 // done because by tail calling the called function might overwrite the value 4079 // in this function's (MF) stack pointer stack slot 0(SP). 4080 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4081 CallConv == CallingConv::Fast) 4082 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4083 4084 unsigned nAltivecParamsAtEnd = 0; 4085 4086 // Count how many bytes are to be pushed on the stack, including the linkage 4087 // area, and parameter passing area. We start with 24/48 bytes, which is 4088 // prereserved space for [SP][CR][LR][3 x unused]. 4089 unsigned NumBytes = 4090 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, 4091 Outs, OutVals, 4092 nAltivecParamsAtEnd); 4093 4094 // Calculate by how many bytes the stack has to be adjusted in case of tail 4095 // call optimization. 4096 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4097 4098 // To protect arguments on the stack from being clobbered in a tail call, 4099 // force all the loads to happen before doing any other lowering. 4100 if (isTailCall) 4101 Chain = DAG.getStackArgumentTokenFactor(Chain); 4102 4103 // Adjust the stack pointer for the new arguments... 4104 // These operations are automatically eliminated by the prolog/epilog pass 4105 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4106 dl); 4107 SDValue CallSeqStart = Chain; 4108 4109 // Load the return address and frame pointer so it can be move somewhere else 4110 // later. 4111 SDValue LROp, FPOp; 4112 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4113 dl); 4114 4115 // Set up a copy of the stack pointer for use loading and storing any 4116 // arguments that may not fit in the registers available for argument 4117 // passing. 4118 SDValue StackPtr; 4119 if (isPPC64) 4120 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4121 else 4122 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4123 4124 // Figure out which arguments are going to go in registers, and which in 4125 // memory. Also, if this is a vararg function, floating point operations 4126 // must be stored to our stack, and loaded into integer regs as well, if 4127 // any integer regs are available for argument passing. 4128 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 4129 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4130 4131 static const uint16_t GPR_32[] = { // 32-bit registers. 4132 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4133 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4134 }; 4135 static const uint16_t GPR_64[] = { // 64-bit registers. 4136 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4137 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4138 }; 4139 static const uint16_t *FPR = GetFPR(); 4140 4141 static const uint16_t VR[] = { 4142 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4143 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4144 }; 4145 const unsigned NumGPRs = array_lengthof(GPR_32); 4146 const unsigned NumFPRs = 13; 4147 const unsigned NumVRs = array_lengthof(VR); 4148 4149 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 4150 4151 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4152 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4153 4154 SmallVector<SDValue, 8> MemOpChains; 4155 for (unsigned i = 0; i != NumOps; ++i) { 4156 SDValue Arg = OutVals[i]; 4157 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4158 4159 // PtrOff will be used to store the current argument to the stack if a 4160 // register cannot be found for it. 4161 SDValue PtrOff; 4162 4163 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4164 4165 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4166 4167 // On PPC64, promote integers to 64-bit values. 4168 if (isPPC64 && Arg.getValueType() == MVT::i32) { 4169 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4170 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4171 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4172 } 4173 4174 // FIXME memcpy is used way more than necessary. Correctness first. 4175 // Note: "by value" is code for passing a structure by value, not 4176 // basic types. 4177 if (Flags.isByVal()) { 4178 unsigned Size = Flags.getByValSize(); 4179 // Very small objects are passed right-justified. Everything else is 4180 // passed left-justified. 4181 if (Size==1 || Size==2) { 4182 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 4183 if (GPR_idx != NumGPRs) { 4184 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4185 MachinePointerInfo(), VT, 4186 false, false, 0); 4187 MemOpChains.push_back(Load.getValue(1)); 4188 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4189 4190 ArgOffset += PtrByteSize; 4191 } else { 4192 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4193 PtrOff.getValueType()); 4194 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4195 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4196 CallSeqStart, 4197 Flags, DAG, dl); 4198 ArgOffset += PtrByteSize; 4199 } 4200 continue; 4201 } 4202 // Copy entire object into memory. There are cases where gcc-generated 4203 // code assumes it is there, even if it could be put entirely into 4204 // registers. (This is not what the doc says.) 4205 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4206 CallSeqStart, 4207 Flags, DAG, dl); 4208 4209 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 4210 // copy the pieces of the object that fit into registers from the 4211 // parameter save area. 4212 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4213 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4214 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4215 if (GPR_idx != NumGPRs) { 4216 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4217 MachinePointerInfo(), 4218 false, false, false, 0); 4219 MemOpChains.push_back(Load.getValue(1)); 4220 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4221 ArgOffset += PtrByteSize; 4222 } else { 4223 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4224 break; 4225 } 4226 } 4227 continue; 4228 } 4229 4230 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 4231 default: llvm_unreachable("Unexpected ValueType for argument!"); 4232 case MVT::i32: 4233 case MVT::i64: 4234 if (GPR_idx != NumGPRs) { 4235 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4236 } else { 4237 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4238 isPPC64, isTailCall, false, MemOpChains, 4239 TailCallArguments, dl); 4240 } 4241 ArgOffset += PtrByteSize; 4242 break; 4243 case MVT::f32: 4244 case MVT::f64: 4245 if (FPR_idx != NumFPRs) { 4246 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4247 4248 if (isVarArg) { 4249 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4250 MachinePointerInfo(), false, false, 0); 4251 MemOpChains.push_back(Store); 4252 4253 // Float varargs are always shadowed in available integer registers 4254 if (GPR_idx != NumGPRs) { 4255 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4256 MachinePointerInfo(), false, false, 4257 false, 0); 4258 MemOpChains.push_back(Load.getValue(1)); 4259 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4260 } 4261 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 4262 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4263 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4264 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4265 MachinePointerInfo(), 4266 false, false, false, 0); 4267 MemOpChains.push_back(Load.getValue(1)); 4268 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4269 } 4270 } else { 4271 // If we have any FPRs remaining, we may also have GPRs remaining. 4272 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 4273 // GPRs. 4274 if (GPR_idx != NumGPRs) 4275 ++GPR_idx; 4276 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 4277 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 4278 ++GPR_idx; 4279 } 4280 } else 4281 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4282 isPPC64, isTailCall, false, MemOpChains, 4283 TailCallArguments, dl); 4284 if (isPPC64) 4285 ArgOffset += 8; 4286 else 4287 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 4288 break; 4289 case MVT::v4f32: 4290 case MVT::v4i32: 4291 case MVT::v8i16: 4292 case MVT::v16i8: 4293 if (isVarArg) { 4294 // These go aligned on the stack, or in the corresponding R registers 4295 // when within range. The Darwin PPC ABI doc claims they also go in 4296 // V registers; in fact gcc does this only for arguments that are 4297 // prototyped, not for those that match the ... We do it for all 4298 // arguments, seems to work. 4299 while (ArgOffset % 16 !=0) { 4300 ArgOffset += PtrByteSize; 4301 if (GPR_idx != NumGPRs) 4302 GPR_idx++; 4303 } 4304 // We could elide this store in the case where the object fits 4305 // entirely in R registers. Maybe later. 4306 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4307 DAG.getConstant(ArgOffset, PtrVT)); 4308 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4309 MachinePointerInfo(), false, false, 0); 4310 MemOpChains.push_back(Store); 4311 if (VR_idx != NumVRs) { 4312 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4313 MachinePointerInfo(), 4314 false, false, false, 0); 4315 MemOpChains.push_back(Load.getValue(1)); 4316 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 4317 } 4318 ArgOffset += 16; 4319 for (unsigned i=0; i<16; i+=PtrByteSize) { 4320 if (GPR_idx == NumGPRs) 4321 break; 4322 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4323 DAG.getConstant(i, PtrVT)); 4324 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4325 false, false, false, 0); 4326 MemOpChains.push_back(Load.getValue(1)); 4327 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4328 } 4329 break; 4330 } 4331 4332 // Non-varargs Altivec params generally go in registers, but have 4333 // stack space allocated at the end. 4334 if (VR_idx != NumVRs) { 4335 // Doesn't have GPR space allocated. 4336 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 4337 } else if (nAltivecParamsAtEnd==0) { 4338 // We are emitting Altivec params in order. 4339 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4340 isPPC64, isTailCall, true, MemOpChains, 4341 TailCallArguments, dl); 4342 ArgOffset += 16; 4343 } 4344 break; 4345 } 4346 } 4347 // If all Altivec parameters fit in registers, as they usually do, 4348 // they get stack space following the non-Altivec parameters. We 4349 // don't track this here because nobody below needs it. 4350 // If there are more Altivec parameters than fit in registers emit 4351 // the stores here. 4352 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 4353 unsigned j = 0; 4354 // Offset is aligned; skip 1st 12 params which go in V registers. 4355 ArgOffset = ((ArgOffset+15)/16)*16; 4356 ArgOffset += 12*16; 4357 for (unsigned i = 0; i != NumOps; ++i) { 4358 SDValue Arg = OutVals[i]; 4359 EVT ArgType = Outs[i].VT; 4360 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 4361 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 4362 if (++j > NumVRs) { 4363 SDValue PtrOff; 4364 // We are emitting Altivec params in order. 4365 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4366 isPPC64, isTailCall, true, MemOpChains, 4367 TailCallArguments, dl); 4368 ArgOffset += 16; 4369 } 4370 } 4371 } 4372 } 4373 4374 if (!MemOpChains.empty()) 4375 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 4376 &MemOpChains[0], MemOpChains.size()); 4377 4378 // On Darwin, R12 must contain the address of an indirect callee. This does 4379 // not mean the MTCTR instruction must use R12; it's easier to model this as 4380 // an extra parameter, so do that. 4381 if (!isTailCall && 4382 !dyn_cast<GlobalAddressSDNode>(Callee) && 4383 !dyn_cast<ExternalSymbolSDNode>(Callee) && 4384 !isBLACompatibleAddress(Callee, DAG)) 4385 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 4386 PPC::R12), Callee)); 4387 4388 // Build a sequence of copy-to-reg nodes chained together with token chain 4389 // and flag operands which copy the outgoing args into the appropriate regs. 4390 SDValue InFlag; 4391 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4392 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4393 RegsToPass[i].second, InFlag); 4394 InFlag = Chain.getValue(1); 4395 } 4396 4397 if (isTailCall) 4398 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 4399 FPOp, true, TailCallArguments); 4400 4401 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4402 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4403 Ins, InVals); 4404} 4405 4406bool 4407PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 4408 MachineFunction &MF, bool isVarArg, 4409 const SmallVectorImpl<ISD::OutputArg> &Outs, 4410 LLVMContext &Context) const { 4411 SmallVector<CCValAssign, 16> RVLocs; 4412 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 4413 RVLocs, Context); 4414 return CCInfo.CheckReturn(Outs, RetCC_PPC); 4415} 4416 4417SDValue 4418PPCTargetLowering::LowerReturn(SDValue Chain, 4419 CallingConv::ID CallConv, bool isVarArg, 4420 const SmallVectorImpl<ISD::OutputArg> &Outs, 4421 const SmallVectorImpl<SDValue> &OutVals, 4422 SDLoc dl, SelectionDAG &DAG) const { 4423 4424 SmallVector<CCValAssign, 16> RVLocs; 4425 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4426 getTargetMachine(), RVLocs, *DAG.getContext()); 4427 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 4428 4429 SDValue Flag; 4430 SmallVector<SDValue, 4> RetOps(1, Chain); 4431 4432 // Copy the result values into the output registers. 4433 for (unsigned i = 0; i != RVLocs.size(); ++i) { 4434 CCValAssign &VA = RVLocs[i]; 4435 assert(VA.isRegLoc() && "Can only return in registers!"); 4436 4437 SDValue Arg = OutVals[i]; 4438 4439 switch (VA.getLocInfo()) { 4440 default: llvm_unreachable("Unknown loc info!"); 4441 case CCValAssign::Full: break; 4442 case CCValAssign::AExt: 4443 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 4444 break; 4445 case CCValAssign::ZExt: 4446 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 4447 break; 4448 case CCValAssign::SExt: 4449 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 4450 break; 4451 } 4452 4453 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 4454 Flag = Chain.getValue(1); 4455 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4456 } 4457 4458 RetOps[0] = Chain; // Update chain. 4459 4460 // Add the flag if we have it. 4461 if (Flag.getNode()) 4462 RetOps.push_back(Flag); 4463 4464 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, 4465 &RetOps[0], RetOps.size()); 4466} 4467 4468SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 4469 const PPCSubtarget &Subtarget) const { 4470 // When we pop the dynamic allocation we need to restore the SP link. 4471 SDLoc dl(Op); 4472 4473 // Get the corect type for pointers. 4474 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4475 4476 // Construct the stack pointer operand. 4477 bool isPPC64 = Subtarget.isPPC64(); 4478 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 4479 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 4480 4481 // Get the operands for the STACKRESTORE. 4482 SDValue Chain = Op.getOperand(0); 4483 SDValue SaveSP = Op.getOperand(1); 4484 4485 // Load the old link SP. 4486 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 4487 MachinePointerInfo(), 4488 false, false, false, 0); 4489 4490 // Restore the stack pointer. 4491 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 4492 4493 // Store the old link SP. 4494 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 4495 false, false, 0); 4496} 4497 4498 4499 4500SDValue 4501PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 4502 MachineFunction &MF = DAG.getMachineFunction(); 4503 bool isPPC64 = PPCSubTarget.isPPC64(); 4504 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 4505 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4506 4507 // Get current frame pointer save index. The users of this index will be 4508 // primarily DYNALLOC instructions. 4509 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 4510 int RASI = FI->getReturnAddrSaveIndex(); 4511 4512 // If the frame pointer save index hasn't been defined yet. 4513 if (!RASI) { 4514 // Find out what the fix offset of the frame pointer save area. 4515 int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); 4516 // Allocate the frame index for frame pointer save area. 4517 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); 4518 // Save the result. 4519 FI->setReturnAddrSaveIndex(RASI); 4520 } 4521 return DAG.getFrameIndex(RASI, PtrVT); 4522} 4523 4524SDValue 4525PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 4526 MachineFunction &MF = DAG.getMachineFunction(); 4527 bool isPPC64 = PPCSubTarget.isPPC64(); 4528 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 4529 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4530 4531 // Get current frame pointer save index. The users of this index will be 4532 // primarily DYNALLOC instructions. 4533 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 4534 int FPSI = FI->getFramePointerSaveIndex(); 4535 4536 // If the frame pointer save index hasn't been defined yet. 4537 if (!FPSI) { 4538 // Find out what the fix offset of the frame pointer save area. 4539 int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, 4540 isDarwinABI); 4541 4542 // Allocate the frame index for frame pointer save area. 4543 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 4544 // Save the result. 4545 FI->setFramePointerSaveIndex(FPSI); 4546 } 4547 return DAG.getFrameIndex(FPSI, PtrVT); 4548} 4549 4550SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 4551 SelectionDAG &DAG, 4552 const PPCSubtarget &Subtarget) const { 4553 // Get the inputs. 4554 SDValue Chain = Op.getOperand(0); 4555 SDValue Size = Op.getOperand(1); 4556 SDLoc dl(Op); 4557 4558 // Get the corect type for pointers. 4559 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4560 // Negate the size. 4561 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 4562 DAG.getConstant(0, PtrVT), Size); 4563 // Construct a node for the frame pointer save index. 4564 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 4565 // Build a DYNALLOC node. 4566 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 4567 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 4568 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); 4569} 4570 4571SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 4572 SelectionDAG &DAG) const { 4573 SDLoc DL(Op); 4574 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 4575 DAG.getVTList(MVT::i32, MVT::Other), 4576 Op.getOperand(0), Op.getOperand(1)); 4577} 4578 4579SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 4580 SelectionDAG &DAG) const { 4581 SDLoc DL(Op); 4582 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 4583 Op.getOperand(0), Op.getOperand(1)); 4584} 4585 4586/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 4587/// possible. 4588SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 4589 // Not FP? Not a fsel. 4590 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 4591 !Op.getOperand(2).getValueType().isFloatingPoint()) 4592 return Op; 4593 4594 // We might be able to do better than this under some circumstances, but in 4595 // general, fsel-based lowering of select is a finite-math-only optimization. 4596 // For more information, see section F.3 of the 2.06 ISA specification. 4597 if (!DAG.getTarget().Options.NoInfsFPMath || 4598 !DAG.getTarget().Options.NoNaNsFPMath) 4599 return Op; 4600 4601 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 4602 4603 EVT ResVT = Op.getValueType(); 4604 EVT CmpVT = Op.getOperand(0).getValueType(); 4605 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 4606 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 4607 SDLoc dl(Op); 4608 4609 // If the RHS of the comparison is a 0.0, we don't need to do the 4610 // subtraction at all. 4611 SDValue Sel1; 4612 if (isFloatingPointZero(RHS)) 4613 switch (CC) { 4614 default: break; // SETUO etc aren't handled by fsel. 4615 case ISD::SETNE: 4616 std::swap(TV, FV); 4617 case ISD::SETEQ: 4618 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4619 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4620 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 4621 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 4622 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 4623 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4624 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 4625 case ISD::SETULT: 4626 case ISD::SETLT: 4627 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 4628 case ISD::SETOGE: 4629 case ISD::SETGE: 4630 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4631 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4632 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 4633 case ISD::SETUGT: 4634 case ISD::SETGT: 4635 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 4636 case ISD::SETOLE: 4637 case ISD::SETLE: 4638 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4639 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4640 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4641 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 4642 } 4643 4644 SDValue Cmp; 4645 switch (CC) { 4646 default: break; // SETUO etc aren't handled by fsel. 4647 case ISD::SETNE: 4648 std::swap(TV, FV); 4649 case ISD::SETEQ: 4650 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4651 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4652 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4653 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4654 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 4655 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 4656 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4657 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 4658 case ISD::SETULT: 4659 case ISD::SETLT: 4660 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4661 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4662 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4663 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 4664 case ISD::SETOGE: 4665 case ISD::SETGE: 4666 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4667 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4668 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4669 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4670 case ISD::SETUGT: 4671 case ISD::SETGT: 4672 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 4673 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4674 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4675 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 4676 case ISD::SETOLE: 4677 case ISD::SETLE: 4678 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 4679 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4680 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4681 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4682 } 4683 return Op; 4684} 4685 4686// FIXME: Split this code up when LegalizeDAGTypes lands. 4687SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 4688 SDLoc dl) const { 4689 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 4690 SDValue Src = Op.getOperand(0); 4691 if (Src.getValueType() == MVT::f32) 4692 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 4693 4694 SDValue Tmp; 4695 switch (Op.getValueType().getSimpleVT().SimpleTy) { 4696 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 4697 case MVT::i32: 4698 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : 4699 (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ : 4700 PPCISD::FCTIDZ), 4701 dl, MVT::f64, Src); 4702 break; 4703 case MVT::i64: 4704 assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) && 4705 "i64 FP_TO_UINT is supported only with FPCVT"); 4706 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 4707 PPCISD::FCTIDUZ, 4708 dl, MVT::f64, Src); 4709 break; 4710 } 4711 4712 // Convert the FP value to an int value through memory. 4713 bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() && 4714 (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()); 4715 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 4716 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 4717 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); 4718 4719 // Emit a store to the stack slot. 4720 SDValue Chain; 4721 if (i32Stack) { 4722 MachineFunction &MF = DAG.getMachineFunction(); 4723 MachineMemOperand *MMO = 4724 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 4725 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 4726 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 4727 DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), 4728 MVT::i32, MMO); 4729 } else 4730 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 4731 MPI, false, false, 0); 4732 4733 // Result is a load from the stack slot. If loading 4 bytes, make sure to 4734 // add in a bias. 4735 if (Op.getValueType() == MVT::i32 && !i32Stack) { 4736 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 4737 DAG.getConstant(4, FIPtr.getValueType())); 4738 MPI = MachinePointerInfo(); 4739 } 4740 4741 return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI, 4742 false, false, false, 0); 4743} 4744 4745SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 4746 SelectionDAG &DAG) const { 4747 SDLoc dl(Op); 4748 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 4749 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 4750 return SDValue(); 4751 4752 assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && 4753 "UINT_TO_FP is supported only with FPCVT"); 4754 4755 // If we have FCFIDS, then use it when converting to single-precision. 4756 // Otherwise, convert to double-precision and then round. 4757 unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 4758 (Op.getOpcode() == ISD::UINT_TO_FP ? 4759 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 4760 (Op.getOpcode() == ISD::UINT_TO_FP ? 4761 PPCISD::FCFIDU : PPCISD::FCFID); 4762 MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 4763 MVT::f32 : MVT::f64; 4764 4765 if (Op.getOperand(0).getValueType() == MVT::i64) { 4766 SDValue SINT = Op.getOperand(0); 4767 // When converting to single-precision, we actually need to convert 4768 // to double-precision first and then round to single-precision. 4769 // To avoid double-rounding effects during that operation, we have 4770 // to prepare the input operand. Bits that might be truncated when 4771 // converting to double-precision are replaced by a bit that won't 4772 // be lost at this stage, but is below the single-precision rounding 4773 // position. 4774 // 4775 // However, if -enable-unsafe-fp-math is in effect, accept double 4776 // rounding to avoid the extra overhead. 4777 if (Op.getValueType() == MVT::f32 && 4778 !PPCSubTarget.hasFPCVT() && 4779 !DAG.getTarget().Options.UnsafeFPMath) { 4780 4781 // Twiddle input to make sure the low 11 bits are zero. (If this 4782 // is the case, we are guaranteed the value will fit into the 53 bit 4783 // mantissa of an IEEE double-precision value without rounding.) 4784 // If any of those low 11 bits were not zero originally, make sure 4785 // bit 12 (value 2048) is set instead, so that the final rounding 4786 // to single-precision gets the correct result. 4787 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 4788 SINT, DAG.getConstant(2047, MVT::i64)); 4789 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 4790 Round, DAG.getConstant(2047, MVT::i64)); 4791 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 4792 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 4793 Round, DAG.getConstant(-2048, MVT::i64)); 4794 4795 // However, we cannot use that value unconditionally: if the magnitude 4796 // of the input value is small, the bit-twiddling we did above might 4797 // end up visibly changing the output. Fortunately, in that case, we 4798 // don't need to twiddle bits since the original input will convert 4799 // exactly to double-precision floating-point already. Therefore, 4800 // construct a conditional to use the original value if the top 11 4801 // bits are all sign-bit copies, and use the rounded value computed 4802 // above otherwise. 4803 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 4804 SINT, DAG.getConstant(53, MVT::i32)); 4805 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 4806 Cond, DAG.getConstant(1, MVT::i64)); 4807 Cond = DAG.getSetCC(dl, MVT::i32, 4808 Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); 4809 4810 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 4811 } 4812 4813 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 4814 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 4815 4816 if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) 4817 FP = DAG.getNode(ISD::FP_ROUND, dl, 4818 MVT::f32, FP, DAG.getIntPtrConstant(0)); 4819 return FP; 4820 } 4821 4822 assert(Op.getOperand(0).getValueType() == MVT::i32 && 4823 "Unhandled INT_TO_FP type in custom expander!"); 4824 // Since we only generate this in 64-bit mode, we can take advantage of 4825 // 64-bit registers. In particular, sign extend the input value into the 4826 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 4827 // then lfd it and fcfid it. 4828 MachineFunction &MF = DAG.getMachineFunction(); 4829 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 4830 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4831 4832 SDValue Ld; 4833 if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) { 4834 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 4835 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 4836 4837 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 4838 MachinePointerInfo::getFixedStack(FrameIdx), 4839 false, false, 0); 4840 4841 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 4842 "Expected an i32 store"); 4843 MachineMemOperand *MMO = 4844 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), 4845 MachineMemOperand::MOLoad, 4, 4); 4846 SDValue Ops[] = { Store, FIdx }; 4847 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 4848 PPCISD::LFIWZX : PPCISD::LFIWAX, 4849 dl, DAG.getVTList(MVT::f64, MVT::Other), 4850 Ops, 2, MVT::i32, MMO); 4851 } else { 4852 assert(PPCSubTarget.isPPC64() && 4853 "i32->FP without LFIWAX supported only on PPC64"); 4854 4855 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 4856 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 4857 4858 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 4859 Op.getOperand(0)); 4860 4861 // STD the extended value into the stack slot. 4862 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, 4863 MachinePointerInfo::getFixedStack(FrameIdx), 4864 false, false, 0); 4865 4866 // Load the value as a double. 4867 Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, 4868 MachinePointerInfo::getFixedStack(FrameIdx), 4869 false, false, false, 0); 4870 } 4871 4872 // FCFID it and return it. 4873 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 4874 if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) 4875 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 4876 return FP; 4877} 4878 4879SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 4880 SelectionDAG &DAG) const { 4881 SDLoc dl(Op); 4882 /* 4883 The rounding mode is in bits 30:31 of FPSR, and has the following 4884 settings: 4885 00 Round to nearest 4886 01 Round to 0 4887 10 Round to +inf 4888 11 Round to -inf 4889 4890 FLT_ROUNDS, on the other hand, expects the following: 4891 -1 Undefined 4892 0 Round to 0 4893 1 Round to nearest 4894 2 Round to +inf 4895 3 Round to -inf 4896 4897 To perform the conversion, we do: 4898 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 4899 */ 4900 4901 MachineFunction &MF = DAG.getMachineFunction(); 4902 EVT VT = Op.getValueType(); 4903 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4904 SDValue MFFSreg, InFlag; 4905 4906 // Save FP Control Word to register 4907 EVT NodeTys[] = { 4908 MVT::f64, // return register 4909 MVT::Glue // unused in this context 4910 }; 4911 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); 4912 4913 // Save FP register to stack slot 4914 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 4915 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 4916 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 4917 StackSlot, MachinePointerInfo(), false, false,0); 4918 4919 // Load FP Control Word from low 32 bits of stack slot. 4920 SDValue Four = DAG.getConstant(4, PtrVT); 4921 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 4922 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 4923 false, false, false, 0); 4924 4925 // Transform as necessary 4926 SDValue CWD1 = 4927 DAG.getNode(ISD::AND, dl, MVT::i32, 4928 CWD, DAG.getConstant(3, MVT::i32)); 4929 SDValue CWD2 = 4930 DAG.getNode(ISD::SRL, dl, MVT::i32, 4931 DAG.getNode(ISD::AND, dl, MVT::i32, 4932 DAG.getNode(ISD::XOR, dl, MVT::i32, 4933 CWD, DAG.getConstant(3, MVT::i32)), 4934 DAG.getConstant(3, MVT::i32)), 4935 DAG.getConstant(1, MVT::i32)); 4936 4937 SDValue RetVal = 4938 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 4939 4940 return DAG.getNode((VT.getSizeInBits() < 16 ? 4941 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 4942} 4943 4944SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 4945 EVT VT = Op.getValueType(); 4946 unsigned BitWidth = VT.getSizeInBits(); 4947 SDLoc dl(Op); 4948 assert(Op.getNumOperands() == 3 && 4949 VT == Op.getOperand(1).getValueType() && 4950 "Unexpected SHL!"); 4951 4952 // Expand into a bunch of logical ops. Note that these ops 4953 // depend on the PPC behavior for oversized shift amounts. 4954 SDValue Lo = Op.getOperand(0); 4955 SDValue Hi = Op.getOperand(1); 4956 SDValue Amt = Op.getOperand(2); 4957 EVT AmtVT = Amt.getValueType(); 4958 4959 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 4960 DAG.getConstant(BitWidth, AmtVT), Amt); 4961 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 4962 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 4963 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 4964 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 4965 DAG.getConstant(-BitWidth, AmtVT)); 4966 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 4967 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 4968 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 4969 SDValue OutOps[] = { OutLo, OutHi }; 4970 return DAG.getMergeValues(OutOps, 2, dl); 4971} 4972 4973SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 4974 EVT VT = Op.getValueType(); 4975 SDLoc dl(Op); 4976 unsigned BitWidth = VT.getSizeInBits(); 4977 assert(Op.getNumOperands() == 3 && 4978 VT == Op.getOperand(1).getValueType() && 4979 "Unexpected SRL!"); 4980 4981 // Expand into a bunch of logical ops. Note that these ops 4982 // depend on the PPC behavior for oversized shift amounts. 4983 SDValue Lo = Op.getOperand(0); 4984 SDValue Hi = Op.getOperand(1); 4985 SDValue Amt = Op.getOperand(2); 4986 EVT AmtVT = Amt.getValueType(); 4987 4988 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 4989 DAG.getConstant(BitWidth, AmtVT), Amt); 4990 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 4991 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 4992 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 4993 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 4994 DAG.getConstant(-BitWidth, AmtVT)); 4995 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 4996 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 4997 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 4998 SDValue OutOps[] = { OutLo, OutHi }; 4999 return DAG.getMergeValues(OutOps, 2, dl); 5000} 5001 5002SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 5003 SDLoc dl(Op); 5004 EVT VT = Op.getValueType(); 5005 unsigned BitWidth = VT.getSizeInBits(); 5006 assert(Op.getNumOperands() == 3 && 5007 VT == Op.getOperand(1).getValueType() && 5008 "Unexpected SRA!"); 5009 5010 // Expand into a bunch of logical ops, followed by a select_cc. 5011 SDValue Lo = Op.getOperand(0); 5012 SDValue Hi = Op.getOperand(1); 5013 SDValue Amt = Op.getOperand(2); 5014 EVT AmtVT = Amt.getValueType(); 5015 5016 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5017 DAG.getConstant(BitWidth, AmtVT), Amt); 5018 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5019 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5020 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5021 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5022 DAG.getConstant(-BitWidth, AmtVT)); 5023 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 5024 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 5025 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 5026 Tmp4, Tmp6, ISD::SETLE); 5027 SDValue OutOps[] = { OutLo, OutHi }; 5028 return DAG.getMergeValues(OutOps, 2, dl); 5029} 5030 5031//===----------------------------------------------------------------------===// 5032// Vector related lowering. 5033// 5034 5035/// BuildSplatI - Build a canonical splati of Val with an element size of 5036/// SplatSize. Cast the result to VT. 5037static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 5038 SelectionDAG &DAG, SDLoc dl) { 5039 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 5040 5041 static const EVT VTys[] = { // canonical VT to use for each size. 5042 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 5043 }; 5044 5045 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 5046 5047 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 5048 if (Val == -1) 5049 SplatSize = 1; 5050 5051 EVT CanonicalVT = VTys[SplatSize-1]; 5052 5053 // Build a canonical splat for this value. 5054 SDValue Elt = DAG.getConstant(Val, MVT::i32); 5055 SmallVector<SDValue, 8> Ops; 5056 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 5057 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, 5058 &Ops[0], Ops.size()); 5059 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 5060} 5061 5062/// BuildIntrinsicOp - Return a unary operator intrinsic node with the 5063/// specified intrinsic ID. 5064static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, 5065 SelectionDAG &DAG, SDLoc dl, 5066 EVT DestVT = MVT::Other) { 5067 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 5068 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5069 DAG.getConstant(IID, MVT::i32), Op); 5070} 5071 5072/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 5073/// specified intrinsic ID. 5074static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 5075 SelectionDAG &DAG, SDLoc dl, 5076 EVT DestVT = MVT::Other) { 5077 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 5078 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5079 DAG.getConstant(IID, MVT::i32), LHS, RHS); 5080} 5081 5082/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 5083/// specified intrinsic ID. 5084static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 5085 SDValue Op2, SelectionDAG &DAG, 5086 SDLoc dl, EVT DestVT = MVT::Other) { 5087 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 5088 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5089 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 5090} 5091 5092 5093/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 5094/// amount. The result has the specified value type. 5095static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 5096 EVT VT, SelectionDAG &DAG, SDLoc dl) { 5097 // Force LHS/RHS to be the right type. 5098 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 5099 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 5100 5101 int Ops[16]; 5102 for (unsigned i = 0; i != 16; ++i) 5103 Ops[i] = i + Amt; 5104 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 5105 return DAG.getNode(ISD::BITCAST, dl, VT, T); 5106} 5107 5108// If this is a case we can't handle, return null and let the default 5109// expansion code take care of it. If we CAN select this case, and if it 5110// selects to a single instruction, return Op. Otherwise, if we can codegen 5111// this case more efficiently than a constant pool load, lower it to the 5112// sequence of ops that should be used. 5113SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 5114 SelectionDAG &DAG) const { 5115 SDLoc dl(Op); 5116 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 5117 assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 5118 5119 // Check if this is a splat of a constant value. 5120 APInt APSplatBits, APSplatUndef; 5121 unsigned SplatBitSize; 5122 bool HasAnyUndefs; 5123 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 5124 HasAnyUndefs, 0, true) || SplatBitSize > 32) 5125 return SDValue(); 5126 5127 unsigned SplatBits = APSplatBits.getZExtValue(); 5128 unsigned SplatUndef = APSplatUndef.getZExtValue(); 5129 unsigned SplatSize = SplatBitSize / 8; 5130 5131 // First, handle single instruction cases. 5132 5133 // All zeros? 5134 if (SplatBits == 0) { 5135 // Canonicalize all zero vectors to be v4i32. 5136 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 5137 SDValue Z = DAG.getConstant(0, MVT::i32); 5138 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 5139 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 5140 } 5141 return Op; 5142 } 5143 5144 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 5145 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 5146 (32-SplatBitSize)); 5147 if (SextVal >= -16 && SextVal <= 15) 5148 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 5149 5150 5151 // Two instruction sequences. 5152 5153 // If this value is in the range [-32,30] and is even, use: 5154 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 5155 // If this value is in the range [17,31] and is odd, use: 5156 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 5157 // If this value is in the range [-31,-17] and is odd, use: 5158 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 5159 // Note the last two are three-instruction sequences. 5160 if (SextVal >= -32 && SextVal <= 31) { 5161 // To avoid having these optimizations undone by constant folding, 5162 // we convert to a pseudo that will be expanded later into one of 5163 // the above forms. 5164 SDValue Elt = DAG.getConstant(SextVal, MVT::i32); 5165 EVT VT = Op.getValueType(); 5166 int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); 5167 SDValue EltSize = DAG.getConstant(Size, MVT::i32); 5168 return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 5169 } 5170 5171 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 5172 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 5173 // for fneg/fabs. 5174 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 5175 // Make -1 and vspltisw -1: 5176 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 5177 5178 // Make the VSLW intrinsic, computing 0x8000_0000. 5179 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 5180 OnesV, DAG, dl); 5181 5182 // xor by OnesV to invert it. 5183 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 5184 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5185 } 5186 5187 // Check to see if this is a wide variety of vsplti*, binop self cases. 5188 static const signed char SplatCsts[] = { 5189 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 5190 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 5191 }; 5192 5193 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 5194 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 5195 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 5196 int i = SplatCsts[idx]; 5197 5198 // Figure out what shift amount will be used by altivec if shifted by i in 5199 // this splat size. 5200 unsigned TypeShiftAmt = i & (SplatBitSize-1); 5201 5202 // vsplti + shl self. 5203 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 5204 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5205 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5206 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 5207 Intrinsic::ppc_altivec_vslw 5208 }; 5209 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5210 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5211 } 5212 5213 // vsplti + srl self. 5214 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5215 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5216 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5217 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 5218 Intrinsic::ppc_altivec_vsrw 5219 }; 5220 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5221 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5222 } 5223 5224 // vsplti + sra self. 5225 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5226 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5227 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5228 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 5229 Intrinsic::ppc_altivec_vsraw 5230 }; 5231 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5232 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5233 } 5234 5235 // vsplti + rol self. 5236 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 5237 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 5238 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5239 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5240 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 5241 Intrinsic::ppc_altivec_vrlw 5242 }; 5243 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5244 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5245 } 5246 5247 // t = vsplti c, result = vsldoi t, t, 1 5248 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 5249 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5250 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 5251 } 5252 // t = vsplti c, result = vsldoi t, t, 2 5253 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 5254 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5255 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 5256 } 5257 // t = vsplti c, result = vsldoi t, t, 3 5258 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 5259 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5260 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 5261 } 5262 } 5263 5264 return SDValue(); 5265} 5266 5267/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 5268/// the specified operations to build the shuffle. 5269static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 5270 SDValue RHS, SelectionDAG &DAG, 5271 SDLoc dl) { 5272 unsigned OpNum = (PFEntry >> 26) & 0x0F; 5273 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 5274 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 5275 5276 enum { 5277 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 5278 OP_VMRGHW, 5279 OP_VMRGLW, 5280 OP_VSPLTISW0, 5281 OP_VSPLTISW1, 5282 OP_VSPLTISW2, 5283 OP_VSPLTISW3, 5284 OP_VSLDOI4, 5285 OP_VSLDOI8, 5286 OP_VSLDOI12 5287 }; 5288 5289 if (OpNum == OP_COPY) { 5290 if (LHSID == (1*9+2)*9+3) return LHS; 5291 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 5292 return RHS; 5293 } 5294 5295 SDValue OpLHS, OpRHS; 5296 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 5297 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 5298 5299 int ShufIdxs[16]; 5300 switch (OpNum) { 5301 default: llvm_unreachable("Unknown i32 permute!"); 5302 case OP_VMRGHW: 5303 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 5304 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 5305 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 5306 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 5307 break; 5308 case OP_VMRGLW: 5309 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 5310 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 5311 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 5312 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 5313 break; 5314 case OP_VSPLTISW0: 5315 for (unsigned i = 0; i != 16; ++i) 5316 ShufIdxs[i] = (i&3)+0; 5317 break; 5318 case OP_VSPLTISW1: 5319 for (unsigned i = 0; i != 16; ++i) 5320 ShufIdxs[i] = (i&3)+4; 5321 break; 5322 case OP_VSPLTISW2: 5323 for (unsigned i = 0; i != 16; ++i) 5324 ShufIdxs[i] = (i&3)+8; 5325 break; 5326 case OP_VSPLTISW3: 5327 for (unsigned i = 0; i != 16; ++i) 5328 ShufIdxs[i] = (i&3)+12; 5329 break; 5330 case OP_VSLDOI4: 5331 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 5332 case OP_VSLDOI8: 5333 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 5334 case OP_VSLDOI12: 5335 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 5336 } 5337 EVT VT = OpLHS.getValueType(); 5338 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 5339 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 5340 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 5341 return DAG.getNode(ISD::BITCAST, dl, VT, T); 5342} 5343 5344/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 5345/// is a shuffle we can handle in a single instruction, return it. Otherwise, 5346/// return the code it can be lowered into. Worst case, it can always be 5347/// lowered into a vperm. 5348SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 5349 SelectionDAG &DAG) const { 5350 SDLoc dl(Op); 5351 SDValue V1 = Op.getOperand(0); 5352 SDValue V2 = Op.getOperand(1); 5353 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 5354 EVT VT = Op.getValueType(); 5355 5356 // Cases that are handled by instructions that take permute immediates 5357 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 5358 // selected by the instruction selector. 5359 if (V2.getOpcode() == ISD::UNDEF) { 5360 if (PPC::isSplatShuffleMask(SVOp, 1) || 5361 PPC::isSplatShuffleMask(SVOp, 2) || 5362 PPC::isSplatShuffleMask(SVOp, 4) || 5363 PPC::isVPKUWUMShuffleMask(SVOp, true) || 5364 PPC::isVPKUHUMShuffleMask(SVOp, true) || 5365 PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || 5366 PPC::isVMRGLShuffleMask(SVOp, 1, true) || 5367 PPC::isVMRGLShuffleMask(SVOp, 2, true) || 5368 PPC::isVMRGLShuffleMask(SVOp, 4, true) || 5369 PPC::isVMRGHShuffleMask(SVOp, 1, true) || 5370 PPC::isVMRGHShuffleMask(SVOp, 2, true) || 5371 PPC::isVMRGHShuffleMask(SVOp, 4, true)) { 5372 return Op; 5373 } 5374 } 5375 5376 // Altivec has a variety of "shuffle immediates" that take two vector inputs 5377 // and produce a fixed permutation. If any of these match, do not lower to 5378 // VPERM. 5379 if (PPC::isVPKUWUMShuffleMask(SVOp, false) || 5380 PPC::isVPKUHUMShuffleMask(SVOp, false) || 5381 PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || 5382 PPC::isVMRGLShuffleMask(SVOp, 1, false) || 5383 PPC::isVMRGLShuffleMask(SVOp, 2, false) || 5384 PPC::isVMRGLShuffleMask(SVOp, 4, false) || 5385 PPC::isVMRGHShuffleMask(SVOp, 1, false) || 5386 PPC::isVMRGHShuffleMask(SVOp, 2, false) || 5387 PPC::isVMRGHShuffleMask(SVOp, 4, false)) 5388 return Op; 5389 5390 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 5391 // perfect shuffle table to emit an optimal matching sequence. 5392 ArrayRef<int> PermMask = SVOp->getMask(); 5393 5394 unsigned PFIndexes[4]; 5395 bool isFourElementShuffle = true; 5396 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 5397 unsigned EltNo = 8; // Start out undef. 5398 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 5399 if (PermMask[i*4+j] < 0) 5400 continue; // Undef, ignore it. 5401 5402 unsigned ByteSource = PermMask[i*4+j]; 5403 if ((ByteSource & 3) != j) { 5404 isFourElementShuffle = false; 5405 break; 5406 } 5407 5408 if (EltNo == 8) { 5409 EltNo = ByteSource/4; 5410 } else if (EltNo != ByteSource/4) { 5411 isFourElementShuffle = false; 5412 break; 5413 } 5414 } 5415 PFIndexes[i] = EltNo; 5416 } 5417 5418 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 5419 // perfect shuffle vector to determine if it is cost effective to do this as 5420 // discrete instructions, or whether we should use a vperm. 5421 if (isFourElementShuffle) { 5422 // Compute the index in the perfect shuffle table. 5423 unsigned PFTableIndex = 5424 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5425 5426 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5427 unsigned Cost = (PFEntry >> 30); 5428 5429 // Determining when to avoid vperm is tricky. Many things affect the cost 5430 // of vperm, particularly how many times the perm mask needs to be computed. 5431 // For example, if the perm mask can be hoisted out of a loop or is already 5432 // used (perhaps because there are multiple permutes with the same shuffle 5433 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 5434 // the loop requires an extra register. 5435 // 5436 // As a compromise, we only emit discrete instructions if the shuffle can be 5437 // generated in 3 or fewer operations. When we have loop information 5438 // available, if this block is within a loop, we should avoid using vperm 5439 // for 3-operation perms and use a constant pool load instead. 5440 if (Cost < 3) 5441 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 5442 } 5443 5444 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 5445 // vector that will get spilled to the constant pool. 5446 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 5447 5448 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 5449 // that it is in input element units, not in bytes. Convert now. 5450 EVT EltVT = V1.getValueType().getVectorElementType(); 5451 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 5452 5453 SmallVector<SDValue, 16> ResultMask; 5454 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 5455 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 5456 5457 for (unsigned j = 0; j != BytesPerElement; ++j) 5458 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 5459 MVT::i32)); 5460 } 5461 5462 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 5463 &ResultMask[0], ResultMask.size()); 5464 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); 5465} 5466 5467/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 5468/// altivec comparison. If it is, return true and fill in Opc/isDot with 5469/// information about the intrinsic. 5470static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 5471 bool &isDot) { 5472 unsigned IntrinsicID = 5473 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 5474 CompareOpc = -1; 5475 isDot = false; 5476 switch (IntrinsicID) { 5477 default: return false; 5478 // Comparison predicates. 5479 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 5480 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 5481 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 5482 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 5483 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 5484 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 5485 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 5486 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 5487 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 5488 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 5489 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 5490 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 5491 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 5492 5493 // Normal Comparisons. 5494 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 5495 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 5496 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 5497 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 5498 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 5499 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 5500 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 5501 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 5502 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 5503 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 5504 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 5505 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 5506 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 5507 } 5508 return true; 5509} 5510 5511/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 5512/// lower, do it, otherwise return null. 5513SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 5514 SelectionDAG &DAG) const { 5515 // If this is a lowered altivec predicate compare, CompareOpc is set to the 5516 // opcode number of the comparison. 5517 SDLoc dl(Op); 5518 int CompareOpc; 5519 bool isDot; 5520 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 5521 return SDValue(); // Don't custom lower most intrinsics. 5522 5523 // If this is a non-dot comparison, make the VCMP node and we are done. 5524 if (!isDot) { 5525 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 5526 Op.getOperand(1), Op.getOperand(2), 5527 DAG.getConstant(CompareOpc, MVT::i32)); 5528 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 5529 } 5530 5531 // Create the PPCISD altivec 'dot' comparison node. 5532 SDValue Ops[] = { 5533 Op.getOperand(2), // LHS 5534 Op.getOperand(3), // RHS 5535 DAG.getConstant(CompareOpc, MVT::i32) 5536 }; 5537 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 5538 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 5539 5540 // Now that we have the comparison, emit a copy from the CR to a GPR. 5541 // This is flagged to the above dot comparison. 5542 SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32, 5543 DAG.getRegister(PPC::CR6, MVT::i32), 5544 CompNode.getValue(1)); 5545 5546 // Unpack the result based on how the target uses it. 5547 unsigned BitNo; // Bit # of CR6. 5548 bool InvertBit; // Invert result? 5549 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 5550 default: // Can't happen, don't crash on invalid number though. 5551 case 0: // Return the value of the EQ bit of CR6. 5552 BitNo = 0; InvertBit = false; 5553 break; 5554 case 1: // Return the inverted value of the EQ bit of CR6. 5555 BitNo = 0; InvertBit = true; 5556 break; 5557 case 2: // Return the value of the LT bit of CR6. 5558 BitNo = 2; InvertBit = false; 5559 break; 5560 case 3: // Return the inverted value of the LT bit of CR6. 5561 BitNo = 2; InvertBit = true; 5562 break; 5563 } 5564 5565 // Shift the bit into the low position. 5566 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 5567 DAG.getConstant(8-(3-BitNo), MVT::i32)); 5568 // Isolate the bit. 5569 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 5570 DAG.getConstant(1, MVT::i32)); 5571 5572 // If we are supposed to, toggle the bit. 5573 if (InvertBit) 5574 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 5575 DAG.getConstant(1, MVT::i32)); 5576 return Flags; 5577} 5578 5579SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 5580 SelectionDAG &DAG) const { 5581 SDLoc dl(Op); 5582 // Create a stack slot that is 16-byte aligned. 5583 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 5584 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 5585 EVT PtrVT = getPointerTy(); 5586 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5587 5588 // Store the input value into Value#0 of the stack slot. 5589 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 5590 Op.getOperand(0), FIdx, MachinePointerInfo(), 5591 false, false, 0); 5592 // Load it out. 5593 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 5594 false, false, false, 0); 5595} 5596 5597SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 5598 SDLoc dl(Op); 5599 if (Op.getValueType() == MVT::v4i32) { 5600 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5601 5602 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 5603 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 5604 5605 SDValue RHSSwap = // = vrlw RHS, 16 5606 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 5607 5608 // Shrinkify inputs to v8i16. 5609 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 5610 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 5611 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 5612 5613 // Low parts multiplied together, generating 32-bit results (we ignore the 5614 // top parts). 5615 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 5616 LHS, RHS, DAG, dl, MVT::v4i32); 5617 5618 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 5619 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 5620 // Shift the high parts up 16 bits. 5621 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 5622 Neg16, DAG, dl); 5623 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 5624 } else if (Op.getValueType() == MVT::v8i16) { 5625 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5626 5627 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 5628 5629 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 5630 LHS, RHS, Zero, DAG, dl); 5631 } else if (Op.getValueType() == MVT::v16i8) { 5632 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5633 5634 // Multiply the even 8-bit parts, producing 16-bit sums. 5635 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 5636 LHS, RHS, DAG, dl, MVT::v8i16); 5637 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 5638 5639 // Multiply the odd 8-bit parts, producing 16-bit sums. 5640 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 5641 LHS, RHS, DAG, dl, MVT::v8i16); 5642 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 5643 5644 // Merge the results together. 5645 int Ops[16]; 5646 for (unsigned i = 0; i != 8; ++i) { 5647 Ops[i*2 ] = 2*i+1; 5648 Ops[i*2+1] = 2*i+1+16; 5649 } 5650 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 5651 } else { 5652 llvm_unreachable("Unknown mul to lower!"); 5653 } 5654} 5655 5656/// LowerOperation - Provide custom lowering hooks for some operations. 5657/// 5658SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 5659 switch (Op.getOpcode()) { 5660 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 5661 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5662 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 5663 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5664 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5665 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5666 case ISD::SETCC: return LowerSETCC(Op, DAG); 5667 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 5668 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 5669 case ISD::VASTART: 5670 return LowerVASTART(Op, DAG, PPCSubTarget); 5671 5672 case ISD::VAARG: 5673 return LowerVAARG(Op, DAG, PPCSubTarget); 5674 5675 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 5676 case ISD::DYNAMIC_STACKALLOC: 5677 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 5678 5679 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 5680 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 5681 5682 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 5683 case ISD::FP_TO_UINT: 5684 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 5685 SDLoc(Op)); 5686 case ISD::UINT_TO_FP: 5687 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 5688 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5689 5690 // Lower 64-bit shifts. 5691 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 5692 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 5693 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 5694 5695 // Vector-related lowering. 5696 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 5697 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5698 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5699 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5700 case ISD::MUL: return LowerMUL(Op, DAG); 5701 5702 // For counter-based loop handling. 5703 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 5704 5705 // Frame & Return address. 5706 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5707 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5708 } 5709} 5710 5711void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 5712 SmallVectorImpl<SDValue>&Results, 5713 SelectionDAG &DAG) const { 5714 const TargetMachine &TM = getTargetMachine(); 5715 SDLoc dl(N); 5716 switch (N->getOpcode()) { 5717 default: 5718 llvm_unreachable("Do not know how to custom type legalize this operation!"); 5719 case ISD::INTRINSIC_W_CHAIN: { 5720 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 5721 Intrinsic::ppc_is_decremented_ctr_nonzero) 5722 break; 5723 5724 assert(N->getValueType(0) == MVT::i1 && 5725 "Unexpected result type for CTR decrement intrinsic"); 5726 EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); 5727 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 5728 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 5729 N->getOperand(1)); 5730 5731 Results.push_back(NewInt); 5732 Results.push_back(NewInt.getValue(1)); 5733 break; 5734 } 5735 case ISD::VAARG: { 5736 if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() 5737 || TM.getSubtarget<PPCSubtarget>().isPPC64()) 5738 return; 5739 5740 EVT VT = N->getValueType(0); 5741 5742 if (VT == MVT::i64) { 5743 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); 5744 5745 Results.push_back(NewNode); 5746 Results.push_back(NewNode.getValue(1)); 5747 } 5748 return; 5749 } 5750 case ISD::FP_ROUND_INREG: { 5751 assert(N->getValueType(0) == MVT::ppcf128); 5752 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 5753 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 5754 MVT::f64, N->getOperand(0), 5755 DAG.getIntPtrConstant(0)); 5756 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 5757 MVT::f64, N->getOperand(0), 5758 DAG.getIntPtrConstant(1)); 5759 5760 // Add the two halves of the long double in round-to-zero mode. 5761 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 5762 5763 // We know the low half is about to be thrown away, so just use something 5764 // convenient. 5765 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 5766 FPreg, FPreg)); 5767 return; 5768 } 5769 case ISD::FP_TO_SINT: 5770 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 5771 return; 5772 } 5773} 5774 5775 5776//===----------------------------------------------------------------------===// 5777// Other Lowering Code 5778//===----------------------------------------------------------------------===// 5779 5780MachineBasicBlock * 5781PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 5782 bool is64bit, unsigned BinOpcode) const { 5783 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5784 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5785 5786 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5787 MachineFunction *F = BB->getParent(); 5788 MachineFunction::iterator It = BB; 5789 ++It; 5790 5791 unsigned dest = MI->getOperand(0).getReg(); 5792 unsigned ptrA = MI->getOperand(1).getReg(); 5793 unsigned ptrB = MI->getOperand(2).getReg(); 5794 unsigned incr = MI->getOperand(3).getReg(); 5795 DebugLoc dl = MI->getDebugLoc(); 5796 5797 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 5798 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5799 F->insert(It, loopMBB); 5800 F->insert(It, exitMBB); 5801 exitMBB->splice(exitMBB->begin(), BB, 5802 llvm::next(MachineBasicBlock::iterator(MI)), 5803 BB->end()); 5804 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5805 5806 MachineRegisterInfo &RegInfo = F->getRegInfo(); 5807 unsigned TmpReg = (!BinOpcode) ? incr : 5808 RegInfo.createVirtualRegister( 5809 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 5810 (const TargetRegisterClass *) &PPC::GPRCRegClass); 5811 5812 // thisMBB: 5813 // ... 5814 // fallthrough --> loopMBB 5815 BB->addSuccessor(loopMBB); 5816 5817 // loopMBB: 5818 // l[wd]arx dest, ptr 5819 // add r0, dest, incr 5820 // st[wd]cx. r0, ptr 5821 // bne- loopMBB 5822 // fallthrough --> exitMBB 5823 BB = loopMBB; 5824 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 5825 .addReg(ptrA).addReg(ptrB); 5826 if (BinOpcode) 5827 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 5828 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 5829 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 5830 BuildMI(BB, dl, TII->get(PPC::BCC)) 5831 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 5832 BB->addSuccessor(loopMBB); 5833 BB->addSuccessor(exitMBB); 5834 5835 // exitMBB: 5836 // ... 5837 BB = exitMBB; 5838 return BB; 5839} 5840 5841MachineBasicBlock * 5842PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 5843 MachineBasicBlock *BB, 5844 bool is8bit, // operation 5845 unsigned BinOpcode) const { 5846 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5847 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5848 // In 64 bit mode we have to use 64 bits for addresses, even though the 5849 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 5850 // registers without caring whether they're 32 or 64, but here we're 5851 // doing actual arithmetic on the addresses. 5852 bool is64bit = PPCSubTarget.isPPC64(); 5853 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 5854 5855 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5856 MachineFunction *F = BB->getParent(); 5857 MachineFunction::iterator It = BB; 5858 ++It; 5859 5860 unsigned dest = MI->getOperand(0).getReg(); 5861 unsigned ptrA = MI->getOperand(1).getReg(); 5862 unsigned ptrB = MI->getOperand(2).getReg(); 5863 unsigned incr = MI->getOperand(3).getReg(); 5864 DebugLoc dl = MI->getDebugLoc(); 5865 5866 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 5867 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5868 F->insert(It, loopMBB); 5869 F->insert(It, exitMBB); 5870 exitMBB->splice(exitMBB->begin(), BB, 5871 llvm::next(MachineBasicBlock::iterator(MI)), 5872 BB->end()); 5873 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5874 5875 MachineRegisterInfo &RegInfo = F->getRegInfo(); 5876 const TargetRegisterClass *RC = 5877 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 5878 (const TargetRegisterClass *) &PPC::GPRCRegClass; 5879 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 5880 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 5881 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 5882 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 5883 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 5884 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 5885 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 5886 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 5887 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 5888 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 5889 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 5890 unsigned Ptr1Reg; 5891 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 5892 5893 // thisMBB: 5894 // ... 5895 // fallthrough --> loopMBB 5896 BB->addSuccessor(loopMBB); 5897 5898 // The 4-byte load must be aligned, while a char or short may be 5899 // anywhere in the word. Hence all this nasty bookkeeping code. 5900 // add ptr1, ptrA, ptrB [copy if ptrA==0] 5901 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 5902 // xori shift, shift1, 24 [16] 5903 // rlwinm ptr, ptr1, 0, 0, 29 5904 // slw incr2, incr, shift 5905 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 5906 // slw mask, mask2, shift 5907 // loopMBB: 5908 // lwarx tmpDest, ptr 5909 // add tmp, tmpDest, incr2 5910 // andc tmp2, tmpDest, mask 5911 // and tmp3, tmp, mask 5912 // or tmp4, tmp3, tmp2 5913 // stwcx. tmp4, ptr 5914 // bne- loopMBB 5915 // fallthrough --> exitMBB 5916 // srw dest, tmpDest, shift 5917 if (ptrA != ZeroReg) { 5918 Ptr1Reg = RegInfo.createVirtualRegister(RC); 5919 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 5920 .addReg(ptrA).addReg(ptrB); 5921 } else { 5922 Ptr1Reg = ptrB; 5923 } 5924 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 5925 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 5926 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 5927 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 5928 if (is64bit) 5929 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 5930 .addReg(Ptr1Reg).addImm(0).addImm(61); 5931 else 5932 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 5933 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 5934 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 5935 .addReg(incr).addReg(ShiftReg); 5936 if (is8bit) 5937 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 5938 else { 5939 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 5940 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 5941 } 5942 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 5943 .addReg(Mask2Reg).addReg(ShiftReg); 5944 5945 BB = loopMBB; 5946 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 5947 .addReg(ZeroReg).addReg(PtrReg); 5948 if (BinOpcode) 5949 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 5950 .addReg(Incr2Reg).addReg(TmpDestReg); 5951 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 5952 .addReg(TmpDestReg).addReg(MaskReg); 5953 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 5954 .addReg(TmpReg).addReg(MaskReg); 5955 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 5956 .addReg(Tmp3Reg).addReg(Tmp2Reg); 5957 BuildMI(BB, dl, TII->get(PPC::STWCX)) 5958 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 5959 BuildMI(BB, dl, TII->get(PPC::BCC)) 5960 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 5961 BB->addSuccessor(loopMBB); 5962 BB->addSuccessor(exitMBB); 5963 5964 // exitMBB: 5965 // ... 5966 BB = exitMBB; 5967 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 5968 .addReg(ShiftReg); 5969 return BB; 5970} 5971 5972llvm::MachineBasicBlock* 5973PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, 5974 MachineBasicBlock *MBB) const { 5975 DebugLoc DL = MI->getDebugLoc(); 5976 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5977 5978 MachineFunction *MF = MBB->getParent(); 5979 MachineRegisterInfo &MRI = MF->getRegInfo(); 5980 5981 const BasicBlock *BB = MBB->getBasicBlock(); 5982 MachineFunction::iterator I = MBB; 5983 ++I; 5984 5985 // Memory Reference 5986 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 5987 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 5988 5989 unsigned DstReg = MI->getOperand(0).getReg(); 5990 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 5991 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 5992 unsigned mainDstReg = MRI.createVirtualRegister(RC); 5993 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 5994 5995 MVT PVT = getPointerTy(); 5996 assert((PVT == MVT::i64 || PVT == MVT::i32) && 5997 "Invalid Pointer Size!"); 5998 // For v = setjmp(buf), we generate 5999 // 6000 // thisMBB: 6001 // SjLjSetup mainMBB 6002 // bl mainMBB 6003 // v_restore = 1 6004 // b sinkMBB 6005 // 6006 // mainMBB: 6007 // buf[LabelOffset] = LR 6008 // v_main = 0 6009 // 6010 // sinkMBB: 6011 // v = phi(main, restore) 6012 // 6013 6014 MachineBasicBlock *thisMBB = MBB; 6015 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 6016 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 6017 MF->insert(I, mainMBB); 6018 MF->insert(I, sinkMBB); 6019 6020 MachineInstrBuilder MIB; 6021 6022 // Transfer the remainder of BB and its successor edges to sinkMBB. 6023 sinkMBB->splice(sinkMBB->begin(), MBB, 6024 llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); 6025 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 6026 6027 // Note that the structure of the jmp_buf used here is not compatible 6028 // with that used by libc, and is not designed to be. Specifically, it 6029 // stores only those 'reserved' registers that LLVM does not otherwise 6030 // understand how to spill. Also, by convention, by the time this 6031 // intrinsic is called, Clang has already stored the frame address in the 6032 // first slot of the buffer and stack address in the third. Following the 6033 // X86 target code, we'll store the jump address in the second slot. We also 6034 // need to save the TOC pointer (R2) to handle jumps between shared 6035 // libraries, and that will be stored in the fourth slot. The thread 6036 // identifier (R13) is not affected. 6037 6038 // thisMBB: 6039 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 6040 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 6041 6042 // Prepare IP either in reg. 6043 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 6044 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 6045 unsigned BufReg = MI->getOperand(1).getReg(); 6046 6047 if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { 6048 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 6049 .addReg(PPC::X2) 6050 .addImm(TOCOffset) 6051 .addReg(BufReg); 6052 6053 MIB.setMemRefs(MMOBegin, MMOEnd); 6054 } 6055 6056 // Setup 6057 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 6058 const PPCRegisterInfo *TRI = 6059 static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo()); 6060 MIB.addRegMask(TRI->getNoPreservedMask()); 6061 6062 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 6063 6064 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 6065 .addMBB(mainMBB); 6066 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 6067 6068 thisMBB->addSuccessor(mainMBB, /* weight */ 0); 6069 thisMBB->addSuccessor(sinkMBB, /* weight */ 1); 6070 6071 // mainMBB: 6072 // mainDstReg = 0 6073 MIB = BuildMI(mainMBB, DL, 6074 TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 6075 6076 // Store IP 6077 if (PPCSubTarget.isPPC64()) { 6078 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 6079 .addReg(LabelReg) 6080 .addImm(LabelOffset) 6081 .addReg(BufReg); 6082 } else { 6083 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 6084 .addReg(LabelReg) 6085 .addImm(LabelOffset) 6086 .addReg(BufReg); 6087 } 6088 6089 MIB.setMemRefs(MMOBegin, MMOEnd); 6090 6091 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 6092 mainMBB->addSuccessor(sinkMBB); 6093 6094 // sinkMBB: 6095 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 6096 TII->get(PPC::PHI), DstReg) 6097 .addReg(mainDstReg).addMBB(mainMBB) 6098 .addReg(restoreDstReg).addMBB(thisMBB); 6099 6100 MI->eraseFromParent(); 6101 return sinkMBB; 6102} 6103 6104MachineBasicBlock * 6105PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, 6106 MachineBasicBlock *MBB) const { 6107 DebugLoc DL = MI->getDebugLoc(); 6108 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6109 6110 MachineFunction *MF = MBB->getParent(); 6111 MachineRegisterInfo &MRI = MF->getRegInfo(); 6112 6113 // Memory Reference 6114 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 6115 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 6116 6117 MVT PVT = getPointerTy(); 6118 assert((PVT == MVT::i64 || PVT == MVT::i32) && 6119 "Invalid Pointer Size!"); 6120 6121 const TargetRegisterClass *RC = 6122 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 6123 unsigned Tmp = MRI.createVirtualRegister(RC); 6124 // Since FP is only updated here but NOT referenced, it's treated as GPR. 6125 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 6126 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 6127 6128 MachineInstrBuilder MIB; 6129 6130 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 6131 const int64_t SPOffset = 2 * PVT.getStoreSize(); 6132 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 6133 6134 unsigned BufReg = MI->getOperand(0).getReg(); 6135 6136 // Reload FP (the jumped-to function may not have had a 6137 // frame pointer, and if so, then its r31 will be restored 6138 // as necessary). 6139 if (PVT == MVT::i64) { 6140 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 6141 .addImm(0) 6142 .addReg(BufReg); 6143 } else { 6144 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 6145 .addImm(0) 6146 .addReg(BufReg); 6147 } 6148 MIB.setMemRefs(MMOBegin, MMOEnd); 6149 6150 // Reload IP 6151 if (PVT == MVT::i64) { 6152 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 6153 .addImm(LabelOffset) 6154 .addReg(BufReg); 6155 } else { 6156 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 6157 .addImm(LabelOffset) 6158 .addReg(BufReg); 6159 } 6160 MIB.setMemRefs(MMOBegin, MMOEnd); 6161 6162 // Reload SP 6163 if (PVT == MVT::i64) { 6164 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 6165 .addImm(SPOffset) 6166 .addReg(BufReg); 6167 } else { 6168 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 6169 .addImm(SPOffset) 6170 .addReg(BufReg); 6171 } 6172 MIB.setMemRefs(MMOBegin, MMOEnd); 6173 6174 // FIXME: When we also support base pointers, that register must also be 6175 // restored here. 6176 6177 // Reload TOC 6178 if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { 6179 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 6180 .addImm(TOCOffset) 6181 .addReg(BufReg); 6182 6183 MIB.setMemRefs(MMOBegin, MMOEnd); 6184 } 6185 6186 // Jump 6187 BuildMI(*MBB, MI, DL, 6188 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 6189 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 6190 6191 MI->eraseFromParent(); 6192 return MBB; 6193} 6194 6195MachineBasicBlock * 6196PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 6197 MachineBasicBlock *BB) const { 6198 if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || 6199 MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { 6200 return emitEHSjLjSetJmp(MI, BB); 6201 } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || 6202 MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { 6203 return emitEHSjLjLongJmp(MI, BB); 6204 } 6205 6206 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6207 6208 // To "insert" these instructions we actually have to insert their 6209 // control-flow patterns. 6210 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6211 MachineFunction::iterator It = BB; 6212 ++It; 6213 6214 MachineFunction *F = BB->getParent(); 6215 6216 if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 6217 MI->getOpcode() == PPC::SELECT_CC_I8)) { 6218 SmallVector<MachineOperand, 2> Cond; 6219 Cond.push_back(MI->getOperand(4)); 6220 Cond.push_back(MI->getOperand(1)); 6221 6222 DebugLoc dl = MI->getDebugLoc(); 6223 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6224 TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), 6225 Cond, MI->getOperand(2).getReg(), 6226 MI->getOperand(3).getReg()); 6227 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 6228 MI->getOpcode() == PPC::SELECT_CC_I8 || 6229 MI->getOpcode() == PPC::SELECT_CC_F4 || 6230 MI->getOpcode() == PPC::SELECT_CC_F8 || 6231 MI->getOpcode() == PPC::SELECT_CC_VRRC) { 6232 6233 6234 // The incoming instruction knows the destination vreg to set, the 6235 // condition code register to branch on, the true/false values to 6236 // select between, and a branch opcode to use. 6237 6238 // thisMBB: 6239 // ... 6240 // TrueVal = ... 6241 // cmpTY ccX, r1, r2 6242 // bCC copy1MBB 6243 // fallthrough --> copy0MBB 6244 MachineBasicBlock *thisMBB = BB; 6245 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 6246 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 6247 unsigned SelectPred = MI->getOperand(4).getImm(); 6248 DebugLoc dl = MI->getDebugLoc(); 6249 F->insert(It, copy0MBB); 6250 F->insert(It, sinkMBB); 6251 6252 // Transfer the remainder of BB and its successor edges to sinkMBB. 6253 sinkMBB->splice(sinkMBB->begin(), BB, 6254 llvm::next(MachineBasicBlock::iterator(MI)), 6255 BB->end()); 6256 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 6257 6258 // Next, add the true and fallthrough blocks as its successors. 6259 BB->addSuccessor(copy0MBB); 6260 BB->addSuccessor(sinkMBB); 6261 6262 BuildMI(BB, dl, TII->get(PPC::BCC)) 6263 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 6264 6265 // copy0MBB: 6266 // %FalseValue = ... 6267 // # fallthrough to sinkMBB 6268 BB = copy0MBB; 6269 6270 // Update machine-CFG edges 6271 BB->addSuccessor(sinkMBB); 6272 6273 // sinkMBB: 6274 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 6275 // ... 6276 BB = sinkMBB; 6277 BuildMI(*BB, BB->begin(), dl, 6278 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 6279 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 6280 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 6281 } 6282 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 6283 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 6284 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 6285 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 6286 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 6287 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 6288 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 6289 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 6290 6291 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 6292 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 6293 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 6294 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 6295 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 6296 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 6297 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 6298 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 6299 6300 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 6301 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 6302 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 6303 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 6304 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 6305 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 6306 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 6307 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 6308 6309 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 6310 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 6311 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 6312 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 6313 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 6314 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 6315 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 6316 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 6317 6318 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 6319 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC); 6320 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 6321 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC); 6322 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 6323 BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC); 6324 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 6325 BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8); 6326 6327 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 6328 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 6329 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 6330 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 6331 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 6332 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 6333 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 6334 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 6335 6336 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 6337 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 6338 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 6339 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 6340 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 6341 BB = EmitAtomicBinary(MI, BB, false, 0); 6342 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 6343 BB = EmitAtomicBinary(MI, BB, true, 0); 6344 6345 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 6346 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 6347 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 6348 6349 unsigned dest = MI->getOperand(0).getReg(); 6350 unsigned ptrA = MI->getOperand(1).getReg(); 6351 unsigned ptrB = MI->getOperand(2).getReg(); 6352 unsigned oldval = MI->getOperand(3).getReg(); 6353 unsigned newval = MI->getOperand(4).getReg(); 6354 DebugLoc dl = MI->getDebugLoc(); 6355 6356 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 6357 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 6358 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6359 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6360 F->insert(It, loop1MBB); 6361 F->insert(It, loop2MBB); 6362 F->insert(It, midMBB); 6363 F->insert(It, exitMBB); 6364 exitMBB->splice(exitMBB->begin(), BB, 6365 llvm::next(MachineBasicBlock::iterator(MI)), 6366 BB->end()); 6367 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6368 6369 // thisMBB: 6370 // ... 6371 // fallthrough --> loopMBB 6372 BB->addSuccessor(loop1MBB); 6373 6374 // loop1MBB: 6375 // l[wd]arx dest, ptr 6376 // cmp[wd] dest, oldval 6377 // bne- midMBB 6378 // loop2MBB: 6379 // st[wd]cx. newval, ptr 6380 // bne- loopMBB 6381 // b exitBB 6382 // midMBB: 6383 // st[wd]cx. dest, ptr 6384 // exitBB: 6385 BB = loop1MBB; 6386 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 6387 .addReg(ptrA).addReg(ptrB); 6388 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 6389 .addReg(oldval).addReg(dest); 6390 BuildMI(BB, dl, TII->get(PPC::BCC)) 6391 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 6392 BB->addSuccessor(loop2MBB); 6393 BB->addSuccessor(midMBB); 6394 6395 BB = loop2MBB; 6396 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6397 .addReg(newval).addReg(ptrA).addReg(ptrB); 6398 BuildMI(BB, dl, TII->get(PPC::BCC)) 6399 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 6400 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6401 BB->addSuccessor(loop1MBB); 6402 BB->addSuccessor(exitMBB); 6403 6404 BB = midMBB; 6405 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6406 .addReg(dest).addReg(ptrA).addReg(ptrB); 6407 BB->addSuccessor(exitMBB); 6408 6409 // exitMBB: 6410 // ... 6411 BB = exitMBB; 6412 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 6413 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 6414 // We must use 64-bit registers for addresses when targeting 64-bit, 6415 // since we're actually doing arithmetic on them. Other registers 6416 // can be 32-bit. 6417 bool is64bit = PPCSubTarget.isPPC64(); 6418 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 6419 6420 unsigned dest = MI->getOperand(0).getReg(); 6421 unsigned ptrA = MI->getOperand(1).getReg(); 6422 unsigned ptrB = MI->getOperand(2).getReg(); 6423 unsigned oldval = MI->getOperand(3).getReg(); 6424 unsigned newval = MI->getOperand(4).getReg(); 6425 DebugLoc dl = MI->getDebugLoc(); 6426 6427 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 6428 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 6429 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6430 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6431 F->insert(It, loop1MBB); 6432 F->insert(It, loop2MBB); 6433 F->insert(It, midMBB); 6434 F->insert(It, exitMBB); 6435 exitMBB->splice(exitMBB->begin(), BB, 6436 llvm::next(MachineBasicBlock::iterator(MI)), 6437 BB->end()); 6438 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6439 6440 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6441 const TargetRegisterClass *RC = 6442 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 6443 (const TargetRegisterClass *) &PPC::GPRCRegClass; 6444 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 6445 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 6446 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 6447 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 6448 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 6449 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 6450 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 6451 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 6452 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 6453 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 6454 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 6455 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 6456 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6457 unsigned Ptr1Reg; 6458 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 6459 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 6460 // thisMBB: 6461 // ... 6462 // fallthrough --> loopMBB 6463 BB->addSuccessor(loop1MBB); 6464 6465 // The 4-byte load must be aligned, while a char or short may be 6466 // anywhere in the word. Hence all this nasty bookkeeping code. 6467 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6468 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6469 // xori shift, shift1, 24 [16] 6470 // rlwinm ptr, ptr1, 0, 0, 29 6471 // slw newval2, newval, shift 6472 // slw oldval2, oldval,shift 6473 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6474 // slw mask, mask2, shift 6475 // and newval3, newval2, mask 6476 // and oldval3, oldval2, mask 6477 // loop1MBB: 6478 // lwarx tmpDest, ptr 6479 // and tmp, tmpDest, mask 6480 // cmpw tmp, oldval3 6481 // bne- midMBB 6482 // loop2MBB: 6483 // andc tmp2, tmpDest, mask 6484 // or tmp4, tmp2, newval3 6485 // stwcx. tmp4, ptr 6486 // bne- loop1MBB 6487 // b exitBB 6488 // midMBB: 6489 // stwcx. tmpDest, ptr 6490 // exitBB: 6491 // srw dest, tmpDest, shift 6492 if (ptrA != ZeroReg) { 6493 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6494 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6495 .addReg(ptrA).addReg(ptrB); 6496 } else { 6497 Ptr1Reg = ptrB; 6498 } 6499 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6500 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6501 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6502 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6503 if (is64bit) 6504 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6505 .addReg(Ptr1Reg).addImm(0).addImm(61); 6506 else 6507 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6508 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6509 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 6510 .addReg(newval).addReg(ShiftReg); 6511 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 6512 .addReg(oldval).addReg(ShiftReg); 6513 if (is8bit) 6514 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6515 else { 6516 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6517 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 6518 .addReg(Mask3Reg).addImm(65535); 6519 } 6520 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6521 .addReg(Mask2Reg).addReg(ShiftReg); 6522 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 6523 .addReg(NewVal2Reg).addReg(MaskReg); 6524 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 6525 .addReg(OldVal2Reg).addReg(MaskReg); 6526 6527 BB = loop1MBB; 6528 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6529 .addReg(ZeroReg).addReg(PtrReg); 6530 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 6531 .addReg(TmpDestReg).addReg(MaskReg); 6532 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 6533 .addReg(TmpReg).addReg(OldVal3Reg); 6534 BuildMI(BB, dl, TII->get(PPC::BCC)) 6535 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 6536 BB->addSuccessor(loop2MBB); 6537 BB->addSuccessor(midMBB); 6538 6539 BB = loop2MBB; 6540 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 6541 .addReg(TmpDestReg).addReg(MaskReg); 6542 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 6543 .addReg(Tmp2Reg).addReg(NewVal3Reg); 6544 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 6545 .addReg(ZeroReg).addReg(PtrReg); 6546 BuildMI(BB, dl, TII->get(PPC::BCC)) 6547 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 6548 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6549 BB->addSuccessor(loop1MBB); 6550 BB->addSuccessor(exitMBB); 6551 6552 BB = midMBB; 6553 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 6554 .addReg(ZeroReg).addReg(PtrReg); 6555 BB->addSuccessor(exitMBB); 6556 6557 // exitMBB: 6558 // ... 6559 BB = exitMBB; 6560 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 6561 .addReg(ShiftReg); 6562 } else if (MI->getOpcode() == PPC::FADDrtz) { 6563 // This pseudo performs an FADD with rounding mode temporarily forced 6564 // to round-to-zero. We emit this via custom inserter since the FPSCR 6565 // is not modeled at the SelectionDAG level. 6566 unsigned Dest = MI->getOperand(0).getReg(); 6567 unsigned Src1 = MI->getOperand(1).getReg(); 6568 unsigned Src2 = MI->getOperand(2).getReg(); 6569 DebugLoc dl = MI->getDebugLoc(); 6570 6571 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6572 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 6573 6574 // Save FPSCR value. 6575 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 6576 6577 // Set rounding mode to round-to-zero. 6578 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 6579 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 6580 6581 // Perform addition. 6582 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 6583 6584 // Restore FPSCR value. 6585 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); 6586 } else if (MI->getOpcode() == PPC::FRINDrint || 6587 MI->getOpcode() == PPC::FRINSrint) { 6588 bool isf32 = MI->getOpcode() == PPC::FRINSrint; 6589 unsigned Dest = MI->getOperand(0).getReg(); 6590 unsigned Src = MI->getOperand(1).getReg(); 6591 DebugLoc dl = MI->getDebugLoc(); 6592 6593 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6594 unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 6595 6596 // Perform the rounding. 6597 BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest) 6598 .addReg(Src); 6599 6600 // Compare the results. 6601 BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg) 6602 .addReg(Dest).addReg(Src); 6603 6604 // If the results were not equal, then set the FPSCR XX bit. 6605 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6606 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6607 F->insert(It, midMBB); 6608 F->insert(It, exitMBB); 6609 exitMBB->splice(exitMBB->begin(), BB, 6610 llvm::next(MachineBasicBlock::iterator(MI)), 6611 BB->end()); 6612 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6613 6614 BuildMI(*BB, MI, dl, TII->get(PPC::BCC)) 6615 .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB); 6616 6617 BB->addSuccessor(midMBB); 6618 BB->addSuccessor(exitMBB); 6619 6620 BB = midMBB; 6621 6622 // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set 6623 // the FI bit here because that will not automatically set XX also, 6624 // and XX is what libm interprets as the FE_INEXACT flag. 6625 BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6); 6626 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6627 6628 BB->addSuccessor(exitMBB); 6629 6630 BB = exitMBB; 6631 } else { 6632 llvm_unreachable("Unexpected instr type to insert"); 6633 } 6634 6635 MI->eraseFromParent(); // The pseudo instruction is gone now. 6636 return BB; 6637} 6638 6639//===----------------------------------------------------------------------===// 6640// Target Optimization Hooks 6641//===----------------------------------------------------------------------===// 6642 6643SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, 6644 DAGCombinerInfo &DCI) const { 6645 if (DCI.isAfterLegalizeVectorOps()) 6646 return SDValue(); 6647 6648 EVT VT = Op.getValueType(); 6649 6650 if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) || 6651 (VT == MVT::f64 && PPCSubTarget.hasFRE()) || 6652 (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { 6653 6654 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 6655 // For the reciprocal, we need to find the zero of the function: 6656 // F(X) = A X - 1 [which has a zero at X = 1/A] 6657 // => 6658 // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 6659 // does not require additional intermediate precision] 6660 6661 // Convergence is quadratic, so we essentially double the number of digits 6662 // correct after every iteration. The minimum architected relative 6663 // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has 6664 // 23 digits and double has 52 digits. 6665 int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; 6666 if (VT.getScalarType() == MVT::f64) 6667 ++Iterations; 6668 6669 SelectionDAG &DAG = DCI.DAG; 6670 SDLoc dl(Op); 6671 6672 SDValue FPOne = 6673 DAG.getConstantFP(1.0, VT.getScalarType()); 6674 if (VT.isVector()) { 6675 assert(VT.getVectorNumElements() == 4 && 6676 "Unknown vector type"); 6677 FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 6678 FPOne, FPOne, FPOne, FPOne); 6679 } 6680 6681 SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op); 6682 DCI.AddToWorklist(Est.getNode()); 6683 6684 // Newton iterations: Est = Est + Est (1 - Arg * Est) 6685 for (int i = 0; i < Iterations; ++i) { 6686 SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est); 6687 DCI.AddToWorklist(NewEst.getNode()); 6688 6689 NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst); 6690 DCI.AddToWorklist(NewEst.getNode()); 6691 6692 NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); 6693 DCI.AddToWorklist(NewEst.getNode()); 6694 6695 Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst); 6696 DCI.AddToWorklist(Est.getNode()); 6697 } 6698 6699 return Est; 6700 } 6701 6702 return SDValue(); 6703} 6704 6705SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, 6706 DAGCombinerInfo &DCI) const { 6707 if (DCI.isAfterLegalizeVectorOps()) 6708 return SDValue(); 6709 6710 EVT VT = Op.getValueType(); 6711 6712 if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) || 6713 (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) || 6714 (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { 6715 6716 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 6717 // For the reciprocal sqrt, we need to find the zero of the function: 6718 // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 6719 // => 6720 // X_{i+1} = X_i (1.5 - A X_i^2 / 2) 6721 // As a result, we precompute A/2 prior to the iteration loop. 6722 6723 // Convergence is quadratic, so we essentially double the number of digits 6724 // correct after every iteration. The minimum architected relative 6725 // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has 6726 // 23 digits and double has 52 digits. 6727 int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; 6728 if (VT.getScalarType() == MVT::f64) 6729 ++Iterations; 6730 6731 SelectionDAG &DAG = DCI.DAG; 6732 SDLoc dl(Op); 6733 6734 SDValue FPThreeHalves = 6735 DAG.getConstantFP(1.5, VT.getScalarType()); 6736 if (VT.isVector()) { 6737 assert(VT.getVectorNumElements() == 4 && 6738 "Unknown vector type"); 6739 FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 6740 FPThreeHalves, FPThreeHalves, 6741 FPThreeHalves, FPThreeHalves); 6742 } 6743 6744 SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op); 6745 DCI.AddToWorklist(Est.getNode()); 6746 6747 // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that 6748 // this entire sequence requires only one FP constant. 6749 SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op); 6750 DCI.AddToWorklist(HalfArg.getNode()); 6751 6752 HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op); 6753 DCI.AddToWorklist(HalfArg.getNode()); 6754 6755 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 6756 for (int i = 0; i < Iterations; ++i) { 6757 SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est); 6758 DCI.AddToWorklist(NewEst.getNode()); 6759 6760 NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst); 6761 DCI.AddToWorklist(NewEst.getNode()); 6762 6763 NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst); 6764 DCI.AddToWorklist(NewEst.getNode()); 6765 6766 Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); 6767 DCI.AddToWorklist(Est.getNode()); 6768 } 6769 6770 return Est; 6771 } 6772 6773 return SDValue(); 6774} 6775 6776// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 6777// not enforce equality of the chain operands. 6778static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, 6779 unsigned Bytes, int Dist, 6780 SelectionDAG &DAG) { 6781 EVT VT = LS->getMemoryVT(); 6782 if (VT.getSizeInBits() / 8 != Bytes) 6783 return false; 6784 6785 SDValue Loc = LS->getBasePtr(); 6786 SDValue BaseLoc = Base->getBasePtr(); 6787 if (Loc.getOpcode() == ISD::FrameIndex) { 6788 if (BaseLoc.getOpcode() != ISD::FrameIndex) 6789 return false; 6790 const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 6791 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 6792 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 6793 int FS = MFI->getObjectSize(FI); 6794 int BFS = MFI->getObjectSize(BFI); 6795 if (FS != BFS || FS != (int)Bytes) return false; 6796 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 6797 } 6798 6799 // Handle X+C 6800 if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && 6801 cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) 6802 return true; 6803 6804 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6805 const GlobalValue *GV1 = NULL; 6806 const GlobalValue *GV2 = NULL; 6807 int64_t Offset1 = 0; 6808 int64_t Offset2 = 0; 6809 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 6810 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 6811 if (isGA1 && isGA2 && GV1 == GV2) 6812 return Offset1 == (Offset2 + Dist*Bytes); 6813 return false; 6814} 6815 6816// Return true is there is a nearyby consecutive load to the one provided 6817// (regardless of alignment). We search up and down the chain, looking though 6818// token factors and other loads (but nothing else). As a result, a true 6819// results indicates that it is safe to create a new consecutive load adjacent 6820// to the load provided. 6821static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 6822 SDValue Chain = LD->getChain(); 6823 EVT VT = LD->getMemoryVT(); 6824 6825 SmallSet<SDNode *, 16> LoadRoots; 6826 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 6827 SmallSet<SDNode *, 16> Visited; 6828 6829 // First, search up the chain, branching to follow all token-factor operands. 6830 // If we find a consecutive load, then we're done, otherwise, record all 6831 // nodes just above the top-level loads and token factors. 6832 while (!Queue.empty()) { 6833 SDNode *ChainNext = Queue.pop_back_val(); 6834 if (!Visited.insert(ChainNext)) 6835 continue; 6836 6837 if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) { 6838 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 6839 return true; 6840 6841 if (!Visited.count(ChainLD->getChain().getNode())) 6842 Queue.push_back(ChainLD->getChain().getNode()); 6843 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 6844 for (SDNode::op_iterator O = ChainNext->op_begin(), 6845 OE = ChainNext->op_end(); O != OE; ++O) 6846 if (!Visited.count(O->getNode())) 6847 Queue.push_back(O->getNode()); 6848 } else 6849 LoadRoots.insert(ChainNext); 6850 } 6851 6852 // Second, search down the chain, starting from the top-level nodes recorded 6853 // in the first phase. These top-level nodes are the nodes just above all 6854 // loads and token factors. Starting with their uses, recursively look though 6855 // all loads (just the chain uses) and token factors to find a consecutive 6856 // load. 6857 Visited.clear(); 6858 Queue.clear(); 6859 6860 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 6861 IE = LoadRoots.end(); I != IE; ++I) { 6862 Queue.push_back(*I); 6863 6864 while (!Queue.empty()) { 6865 SDNode *LoadRoot = Queue.pop_back_val(); 6866 if (!Visited.insert(LoadRoot)) 6867 continue; 6868 6869 if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot)) 6870 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 6871 return true; 6872 6873 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 6874 UE = LoadRoot->use_end(); UI != UE; ++UI) 6875 if (((isa<LoadSDNode>(*UI) && 6876 cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) || 6877 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 6878 Queue.push_back(*UI); 6879 } 6880 } 6881 6882 return false; 6883} 6884 6885SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 6886 DAGCombinerInfo &DCI) const { 6887 const TargetMachine &TM = getTargetMachine(); 6888 SelectionDAG &DAG = DCI.DAG; 6889 SDLoc dl(N); 6890 switch (N->getOpcode()) { 6891 default: break; 6892 case PPCISD::SHL: 6893 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6894 if (C->isNullValue()) // 0 << V -> 0. 6895 return N->getOperand(0); 6896 } 6897 break; 6898 case PPCISD::SRL: 6899 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6900 if (C->isNullValue()) // 0 >>u V -> 0. 6901 return N->getOperand(0); 6902 } 6903 break; 6904 case PPCISD::SRA: 6905 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6906 if (C->isNullValue() || // 0 >>s V -> 0. 6907 C->isAllOnesValue()) // -1 >>s V -> -1. 6908 return N->getOperand(0); 6909 } 6910 break; 6911 case ISD::FDIV: { 6912 assert(TM.Options.UnsafeFPMath && 6913 "Reciprocal estimates require UnsafeFPMath"); 6914 6915 if (N->getOperand(1).getOpcode() == ISD::FSQRT) { 6916 SDValue RV = 6917 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI); 6918 if (RV.getNode() != 0) { 6919 DCI.AddToWorklist(RV.getNode()); 6920 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 6921 N->getOperand(0), RV); 6922 } 6923 } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND && 6924 N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { 6925 SDValue RV = 6926 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), 6927 DCI); 6928 if (RV.getNode() != 0) { 6929 DCI.AddToWorklist(RV.getNode()); 6930 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)), 6931 N->getValueType(0), RV); 6932 DCI.AddToWorklist(RV.getNode()); 6933 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 6934 N->getOperand(0), RV); 6935 } 6936 } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND && 6937 N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { 6938 SDValue RV = 6939 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), 6940 DCI); 6941 if (RV.getNode() != 0) { 6942 DCI.AddToWorklist(RV.getNode()); 6943 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)), 6944 N->getValueType(0), RV, 6945 N->getOperand(1).getOperand(1)); 6946 DCI.AddToWorklist(RV.getNode()); 6947 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 6948 N->getOperand(0), RV); 6949 } 6950 } 6951 6952 SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI); 6953 if (RV.getNode() != 0) { 6954 DCI.AddToWorklist(RV.getNode()); 6955 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 6956 N->getOperand(0), RV); 6957 } 6958 6959 } 6960 break; 6961 case ISD::FSQRT: { 6962 assert(TM.Options.UnsafeFPMath && 6963 "Reciprocal estimates require UnsafeFPMath"); 6964 6965 // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the 6966 // reciprocal sqrt. 6967 SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI); 6968 if (RV.getNode() != 0) { 6969 DCI.AddToWorklist(RV.getNode()); 6970 RV = DAGCombineFastRecip(RV, DCI); 6971 if (RV.getNode() != 0) 6972 return RV; 6973 } 6974 6975 } 6976 break; 6977 case ISD::SINT_TO_FP: 6978 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 6979 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 6980 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 6981 // We allow the src/dst to be either f32/f64, but the intermediate 6982 // type must be i64. 6983 if (N->getOperand(0).getValueType() == MVT::i64 && 6984 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 6985 SDValue Val = N->getOperand(0).getOperand(0); 6986 if (Val.getValueType() == MVT::f32) { 6987 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 6988 DCI.AddToWorklist(Val.getNode()); 6989 } 6990 6991 Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); 6992 DCI.AddToWorklist(Val.getNode()); 6993 Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); 6994 DCI.AddToWorklist(Val.getNode()); 6995 if (N->getValueType(0) == MVT::f32) { 6996 Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, 6997 DAG.getIntPtrConstant(0)); 6998 DCI.AddToWorklist(Val.getNode()); 6999 } 7000 return Val; 7001 } else if (N->getOperand(0).getValueType() == MVT::i32) { 7002 // If the intermediate type is i32, we can avoid the load/store here 7003 // too. 7004 } 7005 } 7006 } 7007 break; 7008 case ISD::STORE: 7009 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 7010 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 7011 !cast<StoreSDNode>(N)->isTruncatingStore() && 7012 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 7013 N->getOperand(1).getValueType() == MVT::i32 && 7014 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 7015 SDValue Val = N->getOperand(1).getOperand(0); 7016 if (Val.getValueType() == MVT::f32) { 7017 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 7018 DCI.AddToWorklist(Val.getNode()); 7019 } 7020 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 7021 DCI.AddToWorklist(Val.getNode()); 7022 7023 SDValue Ops[] = { 7024 N->getOperand(0), Val, N->getOperand(2), 7025 DAG.getValueType(N->getOperand(1).getValueType()) 7026 }; 7027 7028 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 7029 DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), 7030 cast<StoreSDNode>(N)->getMemoryVT(), 7031 cast<StoreSDNode>(N)->getMemOperand()); 7032 DCI.AddToWorklist(Val.getNode()); 7033 return Val; 7034 } 7035 7036 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 7037 if (cast<StoreSDNode>(N)->isUnindexed() && 7038 N->getOperand(1).getOpcode() == ISD::BSWAP && 7039 N->getOperand(1).getNode()->hasOneUse() && 7040 (N->getOperand(1).getValueType() == MVT::i32 || 7041 N->getOperand(1).getValueType() == MVT::i16 || 7042 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 7043 TM.getSubtarget<PPCSubtarget>().isPPC64() && 7044 N->getOperand(1).getValueType() == MVT::i64))) { 7045 SDValue BSwapOp = N->getOperand(1).getOperand(0); 7046 // Do an any-extend to 32-bits if this is a half-word input. 7047 if (BSwapOp.getValueType() == MVT::i16) 7048 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 7049 7050 SDValue Ops[] = { 7051 N->getOperand(0), BSwapOp, N->getOperand(2), 7052 DAG.getValueType(N->getOperand(1).getValueType()) 7053 }; 7054 return 7055 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 7056 Ops, array_lengthof(Ops), 7057 cast<StoreSDNode>(N)->getMemoryVT(), 7058 cast<StoreSDNode>(N)->getMemOperand()); 7059 } 7060 break; 7061 case ISD::LOAD: { 7062 LoadSDNode *LD = cast<LoadSDNode>(N); 7063 EVT VT = LD->getValueType(0); 7064 Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); 7065 unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); 7066 if (ISD::isNON_EXTLoad(N) && VT.isVector() && 7067 TM.getSubtarget<PPCSubtarget>().hasAltivec() && 7068 DCI.getDAGCombineLevel() == AfterLegalizeTypes && 7069 LD->getAlignment() < ABIAlignment) { 7070 // This is a type-legal unaligned Altivec load. 7071 SDValue Chain = LD->getChain(); 7072 SDValue Ptr = LD->getBasePtr(); 7073 7074 // This implements the loading of unaligned vectors as described in 7075 // the venerable Apple Velocity Engine overview. Specifically: 7076 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 7077 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 7078 // 7079 // The general idea is to expand a sequence of one or more unaligned 7080 // loads into a alignment-based permutation-control instruction (lvsl), 7081 // a series of regular vector loads (which always truncate their 7082 // input address to an aligned address), and a series of permutations. 7083 // The results of these permutations are the requested loaded values. 7084 // The trick is that the last "extra" load is not taken from the address 7085 // you might suspect (sizeof(vector) bytes after the last requested 7086 // load), but rather sizeof(vector) - 1 bytes after the last 7087 // requested vector. The point of this is to avoid a page fault if the 7088 // base address happend to be aligned. This works because if the base 7089 // address is aligned, then adding less than a full vector length will 7090 // cause the last vector in the sequence to be (re)loaded. Otherwise, 7091 // the next vector will be fetched as you might suspect was necessary. 7092 7093 // We might be able to reuse the permutation generation from 7094 // a different base address offset from this one by an aligned amount. 7095 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 7096 // optimization later. 7097 SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr, 7098 DAG, dl, MVT::v16i8); 7099 7100 // Refine the alignment of the original load (a "new" load created here 7101 // which was identical to the first except for the alignment would be 7102 // merged with the existing node regardless). 7103 MachineFunction &MF = DAG.getMachineFunction(); 7104 MachineMemOperand *MMO = 7105 MF.getMachineMemOperand(LD->getPointerInfo(), 7106 LD->getMemOperand()->getFlags(), 7107 LD->getMemoryVT().getStoreSize(), 7108 ABIAlignment); 7109 LD->refineAlignment(MMO); 7110 SDValue BaseLoad = SDValue(LD, 0); 7111 7112 // Note that the value of IncOffset (which is provided to the next 7113 // load's pointer info offset value, and thus used to calculate the 7114 // alignment), and the value of IncValue (which is actually used to 7115 // increment the pointer value) are different! This is because we 7116 // require the next load to appear to be aligned, even though it 7117 // is actually offset from the base pointer by a lesser amount. 7118 int IncOffset = VT.getSizeInBits() / 8; 7119 int IncValue = IncOffset; 7120 7121 // Walk (both up and down) the chain looking for another load at the real 7122 // (aligned) offset (the alignment of the other load does not matter in 7123 // this case). If found, then do not use the offset reduction trick, as 7124 // that will prevent the loads from being later combined (as they would 7125 // otherwise be duplicates). 7126 if (!findConsecutiveLoad(LD, DAG)) 7127 --IncValue; 7128 7129 SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); 7130 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 7131 7132 SDValue ExtraLoad = 7133 DAG.getLoad(VT, dl, Chain, Ptr, 7134 LD->getPointerInfo().getWithOffset(IncOffset), 7135 LD->isVolatile(), LD->isNonTemporal(), 7136 LD->isInvariant(), ABIAlignment); 7137 7138 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 7139 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 7140 7141 if (BaseLoad.getValueType() != MVT::v4i32) 7142 BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad); 7143 7144 if (ExtraLoad.getValueType() != MVT::v4i32) 7145 ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad); 7146 7147 SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 7148 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 7149 7150 if (VT != MVT::v4i32) 7151 Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); 7152 7153 // Now we need to be really careful about how we update the users of the 7154 // original load. We cannot just call DCI.CombineTo (or 7155 // DAG.ReplaceAllUsesWith for that matter), because the load still has 7156 // uses created here (the permutation for example) that need to stay. 7157 SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 7158 while (UI != UE) { 7159 SDUse &Use = UI.getUse(); 7160 SDNode *User = *UI; 7161 // Note: BaseLoad is checked here because it might not be N, but a 7162 // bitcast of N. 7163 if (User == Perm.getNode() || User == BaseLoad.getNode() || 7164 User == TF.getNode() || Use.getResNo() > 1) { 7165 ++UI; 7166 continue; 7167 } 7168 7169 SDValue To = Use.getResNo() ? TF : Perm; 7170 ++UI; 7171 7172 SmallVector<SDValue, 8> Ops; 7173 for (SDNode::op_iterator O = User->op_begin(), 7174 OE = User->op_end(); O != OE; ++O) { 7175 if (*O == Use) 7176 Ops.push_back(To); 7177 else 7178 Ops.push_back(*O); 7179 } 7180 7181 DAG.UpdateNodeOperands(User, Ops.data(), Ops.size()); 7182 } 7183 7184 return SDValue(N, 0); 7185 } 7186 } 7187 break; 7188 case ISD::INTRINSIC_WO_CHAIN: 7189 if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == 7190 Intrinsic::ppc_altivec_lvsl && 7191 N->getOperand(1)->getOpcode() == ISD::ADD) { 7192 SDValue Add = N->getOperand(1); 7193 7194 if (DAG.MaskedValueIsZero(Add->getOperand(1), 7195 APInt::getAllOnesValue(4 /* 16 byte alignment */).zext( 7196 Add.getValueType().getScalarType().getSizeInBits()))) { 7197 SDNode *BasePtr = Add->getOperand(0).getNode(); 7198 for (SDNode::use_iterator UI = BasePtr->use_begin(), 7199 UE = BasePtr->use_end(); UI != UE; ++UI) { 7200 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 7201 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == 7202 Intrinsic::ppc_altivec_lvsl) { 7203 // We've found another LVSL, and this address if an aligned 7204 // multiple of that one. The results will be the same, so use the 7205 // one we've just found instead. 7206 7207 return SDValue(*UI, 0); 7208 } 7209 } 7210 } 7211 } 7212 case ISD::BSWAP: 7213 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 7214 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7215 N->getOperand(0).hasOneUse() && 7216 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 7217 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 7218 TM.getSubtarget<PPCSubtarget>().isPPC64() && 7219 N->getValueType(0) == MVT::i64))) { 7220 SDValue Load = N->getOperand(0); 7221 LoadSDNode *LD = cast<LoadSDNode>(Load); 7222 // Create the byte-swapping load. 7223 SDValue Ops[] = { 7224 LD->getChain(), // Chain 7225 LD->getBasePtr(), // Ptr 7226 DAG.getValueType(N->getValueType(0)) // VT 7227 }; 7228 SDValue BSLoad = 7229 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 7230 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 7231 MVT::i64 : MVT::i32, MVT::Other), 7232 Ops, 3, LD->getMemoryVT(), LD->getMemOperand()); 7233 7234 // If this is an i16 load, insert the truncate. 7235 SDValue ResVal = BSLoad; 7236 if (N->getValueType(0) == MVT::i16) 7237 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 7238 7239 // First, combine the bswap away. This makes the value produced by the 7240 // load dead. 7241 DCI.CombineTo(N, ResVal); 7242 7243 // Next, combine the load away, we give it a bogus result value but a real 7244 // chain result. The result value is dead because the bswap is dead. 7245 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 7246 7247 // Return N so it doesn't get rechecked! 7248 return SDValue(N, 0); 7249 } 7250 7251 break; 7252 case PPCISD::VCMP: { 7253 // If a VCMPo node already exists with exactly the same operands as this 7254 // node, use its result instead of this node (VCMPo computes both a CR6 and 7255 // a normal output). 7256 // 7257 if (!N->getOperand(0).hasOneUse() && 7258 !N->getOperand(1).hasOneUse() && 7259 !N->getOperand(2).hasOneUse()) { 7260 7261 // Scan all of the users of the LHS, looking for VCMPo's that match. 7262 SDNode *VCMPoNode = 0; 7263 7264 SDNode *LHSN = N->getOperand(0).getNode(); 7265 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 7266 UI != E; ++UI) 7267 if (UI->getOpcode() == PPCISD::VCMPo && 7268 UI->getOperand(1) == N->getOperand(1) && 7269 UI->getOperand(2) == N->getOperand(2) && 7270 UI->getOperand(0) == N->getOperand(0)) { 7271 VCMPoNode = *UI; 7272 break; 7273 } 7274 7275 // If there is no VCMPo node, or if the flag value has a single use, don't 7276 // transform this. 7277 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 7278 break; 7279 7280 // Look at the (necessarily single) use of the flag value. If it has a 7281 // chain, this transformation is more complex. Note that multiple things 7282 // could use the value result, which we should ignore. 7283 SDNode *FlagUser = 0; 7284 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 7285 FlagUser == 0; ++UI) { 7286 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 7287 SDNode *User = *UI; 7288 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 7289 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 7290 FlagUser = User; 7291 break; 7292 } 7293 } 7294 } 7295 7296 // If the user is a MFCR instruction, we know this is safe. Otherwise we 7297 // give up for right now. 7298 if (FlagUser->getOpcode() == PPCISD::MFCR) 7299 return SDValue(VCMPoNode, 0); 7300 } 7301 break; 7302 } 7303 case ISD::BR_CC: { 7304 // If this is a branch on an altivec predicate comparison, lower this so 7305 // that we don't have to do a MFCR: instead, branch directly on CR6. This 7306 // lowering is done pre-legalize, because the legalizer lowers the predicate 7307 // compare down to code that is difficult to reassemble. 7308 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 7309 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 7310 7311 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 7312 // value. If so, pass-through the AND to get to the intrinsic. 7313 if (LHS.getOpcode() == ISD::AND && 7314 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 7315 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 7316 Intrinsic::ppc_is_decremented_ctr_nonzero && 7317 isa<ConstantSDNode>(LHS.getOperand(1)) && 7318 !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()-> 7319 isZero()) 7320 LHS = LHS.getOperand(0); 7321 7322 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 7323 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 7324 Intrinsic::ppc_is_decremented_ctr_nonzero && 7325 isa<ConstantSDNode>(RHS)) { 7326 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 7327 "Counter decrement comparison is not EQ or NE"); 7328 7329 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 7330 bool isBDNZ = (CC == ISD::SETEQ && Val) || 7331 (CC == ISD::SETNE && !Val); 7332 7333 // We now need to make the intrinsic dead (it cannot be instruction 7334 // selected). 7335 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 7336 assert(LHS.getNode()->hasOneUse() && 7337 "Counter decrement has more than one use"); 7338 7339 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 7340 N->getOperand(0), N->getOperand(4)); 7341 } 7342 7343 int CompareOpc; 7344 bool isDot; 7345 7346 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 7347 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 7348 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 7349 assert(isDot && "Can't compare against a vector result!"); 7350 7351 // If this is a comparison against something other than 0/1, then we know 7352 // that the condition is never/always true. 7353 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 7354 if (Val != 0 && Val != 1) { 7355 if (CC == ISD::SETEQ) // Cond never true, remove branch. 7356 return N->getOperand(0); 7357 // Always !=, turn it into an unconditional branch. 7358 return DAG.getNode(ISD::BR, dl, MVT::Other, 7359 N->getOperand(0), N->getOperand(4)); 7360 } 7361 7362 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 7363 7364 // Create the PPCISD altivec 'dot' comparison node. 7365 SDValue Ops[] = { 7366 LHS.getOperand(2), // LHS of compare 7367 LHS.getOperand(3), // RHS of compare 7368 DAG.getConstant(CompareOpc, MVT::i32) 7369 }; 7370 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 7371 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 7372 7373 // Unpack the result based on how the target uses it. 7374 PPC::Predicate CompOpc; 7375 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 7376 default: // Can't happen, don't crash on invalid number though. 7377 case 0: // Branch on the value of the EQ bit of CR6. 7378 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 7379 break; 7380 case 1: // Branch on the inverted value of the EQ bit of CR6. 7381 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 7382 break; 7383 case 2: // Branch on the value of the LT bit of CR6. 7384 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 7385 break; 7386 case 3: // Branch on the inverted value of the LT bit of CR6. 7387 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 7388 break; 7389 } 7390 7391 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 7392 DAG.getConstant(CompOpc, MVT::i32), 7393 DAG.getRegister(PPC::CR6, MVT::i32), 7394 N->getOperand(4), CompNode.getValue(1)); 7395 } 7396 break; 7397 } 7398 } 7399 7400 return SDValue(); 7401} 7402 7403//===----------------------------------------------------------------------===// 7404// Inline Assembly Support 7405//===----------------------------------------------------------------------===// 7406 7407void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 7408 APInt &KnownZero, 7409 APInt &KnownOne, 7410 const SelectionDAG &DAG, 7411 unsigned Depth) const { 7412 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 7413 switch (Op.getOpcode()) { 7414 default: break; 7415 case PPCISD::LBRX: { 7416 // lhbrx is known to have the top bits cleared out. 7417 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 7418 KnownZero = 0xFFFF0000; 7419 break; 7420 } 7421 case ISD::INTRINSIC_WO_CHAIN: { 7422 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 7423 default: break; 7424 case Intrinsic::ppc_altivec_vcmpbfp_p: 7425 case Intrinsic::ppc_altivec_vcmpeqfp_p: 7426 case Intrinsic::ppc_altivec_vcmpequb_p: 7427 case Intrinsic::ppc_altivec_vcmpequh_p: 7428 case Intrinsic::ppc_altivec_vcmpequw_p: 7429 case Intrinsic::ppc_altivec_vcmpgefp_p: 7430 case Intrinsic::ppc_altivec_vcmpgtfp_p: 7431 case Intrinsic::ppc_altivec_vcmpgtsb_p: 7432 case Intrinsic::ppc_altivec_vcmpgtsh_p: 7433 case Intrinsic::ppc_altivec_vcmpgtsw_p: 7434 case Intrinsic::ppc_altivec_vcmpgtub_p: 7435 case Intrinsic::ppc_altivec_vcmpgtuh_p: 7436 case Intrinsic::ppc_altivec_vcmpgtuw_p: 7437 KnownZero = ~1U; // All bits but the low one are known to be zero. 7438 break; 7439 } 7440 } 7441 } 7442} 7443 7444 7445/// getConstraintType - Given a constraint, return the type of 7446/// constraint it is for this target. 7447PPCTargetLowering::ConstraintType 7448PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 7449 if (Constraint.size() == 1) { 7450 switch (Constraint[0]) { 7451 default: break; 7452 case 'b': 7453 case 'r': 7454 case 'f': 7455 case 'v': 7456 case 'y': 7457 return C_RegisterClass; 7458 case 'Z': 7459 // FIXME: While Z does indicate a memory constraint, it specifically 7460 // indicates an r+r address (used in conjunction with the 'y' modifier 7461 // in the replacement string). Currently, we're forcing the base 7462 // register to be r0 in the asm printer (which is interpreted as zero) 7463 // and forming the complete address in the second register. This is 7464 // suboptimal. 7465 return C_Memory; 7466 } 7467 } 7468 return TargetLowering::getConstraintType(Constraint); 7469} 7470 7471/// Examine constraint type and operand type and determine a weight value. 7472/// This object must already have been set up with the operand type 7473/// and the current alternative constraint selected. 7474TargetLowering::ConstraintWeight 7475PPCTargetLowering::getSingleConstraintMatchWeight( 7476 AsmOperandInfo &info, const char *constraint) const { 7477 ConstraintWeight weight = CW_Invalid; 7478 Value *CallOperandVal = info.CallOperandVal; 7479 // If we don't have a value, we can't do a match, 7480 // but allow it at the lowest weight. 7481 if (CallOperandVal == NULL) 7482 return CW_Default; 7483 Type *type = CallOperandVal->getType(); 7484 // Look at the constraint type. 7485 switch (*constraint) { 7486 default: 7487 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 7488 break; 7489 case 'b': 7490 if (type->isIntegerTy()) 7491 weight = CW_Register; 7492 break; 7493 case 'f': 7494 if (type->isFloatTy()) 7495 weight = CW_Register; 7496 break; 7497 case 'd': 7498 if (type->isDoubleTy()) 7499 weight = CW_Register; 7500 break; 7501 case 'v': 7502 if (type->isVectorTy()) 7503 weight = CW_Register; 7504 break; 7505 case 'y': 7506 weight = CW_Register; 7507 break; 7508 case 'Z': 7509 weight = CW_Memory; 7510 break; 7511 } 7512 return weight; 7513} 7514 7515std::pair<unsigned, const TargetRegisterClass*> 7516PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 7517 EVT VT) const { 7518 if (Constraint.size() == 1) { 7519 // GCC RS6000 Constraint Letters 7520 switch (Constraint[0]) { 7521 case 'b': // R1-R31 7522 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 7523 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 7524 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 7525 case 'r': // R0-R31 7526 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 7527 return std::make_pair(0U, &PPC::G8RCRegClass); 7528 return std::make_pair(0U, &PPC::GPRCRegClass); 7529 case 'f': 7530 if (VT == MVT::f32 || VT == MVT::i32) 7531 return std::make_pair(0U, &PPC::F4RCRegClass); 7532 if (VT == MVT::f64 || VT == MVT::i64) 7533 return std::make_pair(0U, &PPC::F8RCRegClass); 7534 break; 7535 case 'v': 7536 return std::make_pair(0U, &PPC::VRRCRegClass); 7537 case 'y': // crrc 7538 return std::make_pair(0U, &PPC::CRRCRegClass); 7539 } 7540 } 7541 7542 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 7543} 7544 7545 7546/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 7547/// vector. If it is invalid, don't add anything to Ops. 7548void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 7549 std::string &Constraint, 7550 std::vector<SDValue>&Ops, 7551 SelectionDAG &DAG) const { 7552 SDValue Result(0,0); 7553 7554 // Only support length 1 constraints. 7555 if (Constraint.length() > 1) return; 7556 7557 char Letter = Constraint[0]; 7558 switch (Letter) { 7559 default: break; 7560 case 'I': 7561 case 'J': 7562 case 'K': 7563 case 'L': 7564 case 'M': 7565 case 'N': 7566 case 'O': 7567 case 'P': { 7568 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 7569 if (!CST) return; // Must be an immediate to match. 7570 unsigned Value = CST->getZExtValue(); 7571 switch (Letter) { 7572 default: llvm_unreachable("Unknown constraint letter!"); 7573 case 'I': // "I" is a signed 16-bit constant. 7574 if ((short)Value == (int)Value) 7575 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7576 break; 7577 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 7578 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 7579 if ((short)Value == 0) 7580 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7581 break; 7582 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 7583 if ((Value >> 16) == 0) 7584 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7585 break; 7586 case 'M': // "M" is a constant that is greater than 31. 7587 if (Value > 31) 7588 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7589 break; 7590 case 'N': // "N" is a positive constant that is an exact power of two. 7591 if ((int)Value > 0 && isPowerOf2_32(Value)) 7592 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7593 break; 7594 case 'O': // "O" is the constant zero. 7595 if (Value == 0) 7596 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7597 break; 7598 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 7599 if ((short)-Value == (int)-Value) 7600 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7601 break; 7602 } 7603 break; 7604 } 7605 } 7606 7607 if (Result.getNode()) { 7608 Ops.push_back(Result); 7609 return; 7610 } 7611 7612 // Handle standard constraint letters. 7613 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 7614} 7615 7616// isLegalAddressingMode - Return true if the addressing mode represented 7617// by AM is legal for this target, for a load/store of the specified type. 7618bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 7619 Type *Ty) const { 7620 // FIXME: PPC does not allow r+i addressing modes for vectors! 7621 7622 // PPC allows a sign-extended 16-bit immediate field. 7623 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 7624 return false; 7625 7626 // No global is ever allowed as a base. 7627 if (AM.BaseGV) 7628 return false; 7629 7630 // PPC only support r+r, 7631 switch (AM.Scale) { 7632 case 0: // "r+i" or just "i", depending on HasBaseReg. 7633 break; 7634 case 1: 7635 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 7636 return false; 7637 // Otherwise we have r+r or r+i. 7638 break; 7639 case 2: 7640 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 7641 return false; 7642 // Allow 2*r as r+r. 7643 break; 7644 default: 7645 // No other scales are supported. 7646 return false; 7647 } 7648 7649 return true; 7650} 7651 7652SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 7653 SelectionDAG &DAG) const { 7654 MachineFunction &MF = DAG.getMachineFunction(); 7655 MachineFrameInfo *MFI = MF.getFrameInfo(); 7656 MFI->setReturnAddressIsTaken(true); 7657 7658 SDLoc dl(Op); 7659 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7660 7661 // Make sure the function does not optimize away the store of the RA to 7662 // the stack. 7663 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 7664 FuncInfo->setLRStoreRequired(); 7665 bool isPPC64 = PPCSubTarget.isPPC64(); 7666 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 7667 7668 if (Depth > 0) { 7669 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 7670 SDValue Offset = 7671 7672 DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), 7673 isPPC64? MVT::i64 : MVT::i32); 7674 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 7675 DAG.getNode(ISD::ADD, dl, getPointerTy(), 7676 FrameAddr, Offset), 7677 MachinePointerInfo(), false, false, false, 0); 7678 } 7679 7680 // Just load the return address off the stack. 7681 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 7682 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 7683 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 7684} 7685 7686SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 7687 SelectionDAG &DAG) const { 7688 SDLoc dl(Op); 7689 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7690 7691 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 7692 bool isPPC64 = PtrVT == MVT::i64; 7693 7694 MachineFunction &MF = DAG.getMachineFunction(); 7695 MachineFrameInfo *MFI = MF.getFrameInfo(); 7696 MFI->setFrameAddressIsTaken(true); 7697 7698 // Naked functions never have a frame pointer, and so we use r1. For all 7699 // other functions, this decision must be delayed until during PEI. 7700 unsigned FrameReg; 7701 if (MF.getFunction()->getAttributes().hasAttribute( 7702 AttributeSet::FunctionIndex, Attribute::Naked)) 7703 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 7704 else 7705 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 7706 7707 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 7708 PtrVT); 7709 while (Depth--) 7710 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 7711 FrameAddr, MachinePointerInfo(), false, false, 7712 false, 0); 7713 return FrameAddr; 7714} 7715 7716bool 7717PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 7718 // The PowerPC target isn't yet aware of offsets. 7719 return false; 7720} 7721 7722/// getOptimalMemOpType - Returns the target specific optimal type for load 7723/// and store operations as a result of memset, memcpy, and memmove 7724/// lowering. If DstAlign is zero that means it's safe to destination 7725/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 7726/// means there isn't a need to check it against alignment requirement, 7727/// probably because the source does not need to be loaded. If 'IsMemset' is 7728/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 7729/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 7730/// source is constant so it does not need to be loaded. 7731/// It returns EVT::Other if the type should be determined using generic 7732/// target-independent logic. 7733EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 7734 unsigned DstAlign, unsigned SrcAlign, 7735 bool IsMemset, bool ZeroMemset, 7736 bool MemcpyStrSrc, 7737 MachineFunction &MF) const { 7738 if (this->PPCSubTarget.isPPC64()) { 7739 return MVT::i64; 7740 } else { 7741 return MVT::i32; 7742 } 7743} 7744 7745bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, 7746 bool *Fast) const { 7747 if (DisablePPCUnaligned) 7748 return false; 7749 7750 // PowerPC supports unaligned memory access for simple non-vector types. 7751 // Although accessing unaligned addresses is not as efficient as accessing 7752 // aligned addresses, it is generally more efficient than manual expansion, 7753 // and generally only traps for software emulation when crossing page 7754 // boundaries. 7755 7756 if (!VT.isSimple()) 7757 return false; 7758 7759 if (VT.getSimpleVT().isVector()) 7760 return false; 7761 7762 if (VT == MVT::ppcf128) 7763 return false; 7764 7765 if (Fast) 7766 *Fast = true; 7767 7768 return true; 7769} 7770 7771/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than 7772/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to 7773/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd 7774/// is expanded to mul + add. 7775bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { 7776 if (!VT.isSimple()) 7777 return false; 7778 7779 switch (VT.getSimpleVT().SimpleTy) { 7780 case MVT::f32: 7781 case MVT::f64: 7782 case MVT::v4f32: 7783 return true; 7784 default: 7785 break; 7786 } 7787 7788 return false; 7789} 7790 7791Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 7792 if (DisableILPPref) 7793 return TargetLowering::getSchedulingPreference(N); 7794 7795 return Sched::ILP; 7796} 7797 7798