PPCISelLowering.cpp revision fd94f0ab358ea12cf2b17c9628207b3fd11d40b4
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "MCTargetDesc/PPCPredicates.h" 16#include "PPCMachineFunctionInfo.h" 17#include "PPCPerfectShuffle.h" 18#include "PPCTargetMachine.h" 19#include "llvm/ADT/STLExtras.h" 20#include "llvm/CodeGen/CallingConvLower.h" 21#include "llvm/CodeGen/MachineFrameInfo.h" 22#include "llvm/CodeGen/MachineFunction.h" 23#include "llvm/CodeGen/MachineInstrBuilder.h" 24#include "llvm/CodeGen/MachineRegisterInfo.h" 25#include "llvm/CodeGen/SelectionDAG.h" 26#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 27#include "llvm/IR/CallingConv.h" 28#include "llvm/IR/Constants.h" 29#include "llvm/IR/DerivedTypes.h" 30#include "llvm/IR/Function.h" 31#include "llvm/IR/Intrinsics.h" 32#include "llvm/Support/CommandLine.h" 33#include "llvm/Support/ErrorHandling.h" 34#include "llvm/Support/MathExtras.h" 35#include "llvm/Support/raw_ostream.h" 36#include "llvm/Target/TargetOptions.h" 37using namespace llvm; 38 39static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 40 CCValAssign::LocInfo &LocInfo, 41 ISD::ArgFlagsTy &ArgFlags, 42 CCState &State); 43static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 44 MVT &LocVT, 45 CCValAssign::LocInfo &LocInfo, 46 ISD::ArgFlagsTy &ArgFlags, 47 CCState &State); 48static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 49 MVT &LocVT, 50 CCValAssign::LocInfo &LocInfo, 51 ISD::ArgFlagsTy &ArgFlags, 52 CCState &State); 53 54static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 55cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 56 57static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 58cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 59 60static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 61cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 62 63static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { 64 if (TM.getSubtargetImpl()->isDarwin()) 65 return new TargetLoweringObjectFileMachO(); 66 67 return new TargetLoweringObjectFileELF(); 68} 69 70PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 71 : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { 72 const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); 73 PPCRegInfo = TM.getRegisterInfo(); 74 PPCII = TM.getInstrInfo(); 75 76 setPow2DivIsCheap(); 77 78 // Use _setjmp/_longjmp instead of setjmp/longjmp. 79 setUseUnderscoreSetJmp(true); 80 setUseUnderscoreLongJmp(true); 81 82 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 83 // arguments are at least 4/8 bytes aligned. 84 bool isPPC64 = Subtarget->isPPC64(); 85 setMinStackArgumentAlignment(isPPC64 ? 8:4); 86 87 // Set up the register classes. 88 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 89 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 90 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 91 92 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 93 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 94 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); 95 96 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 97 98 // PowerPC has pre-inc load and store's. 99 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 100 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 101 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 102 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 103 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 104 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 105 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 106 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 107 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 108 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 109 110 // This is used in the ppcf128->int sequence. Note it has different semantics 111 // from FP_ROUND: that rounds to nearest, this rounds to zero. 112 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 113 114 // We do not currently implement these libm ops for PowerPC. 115 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 116 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 117 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 118 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 119 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 120 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 121 122 // PowerPC has no SREM/UREM instructions 123 setOperationAction(ISD::SREM, MVT::i32, Expand); 124 setOperationAction(ISD::UREM, MVT::i32, Expand); 125 setOperationAction(ISD::SREM, MVT::i64, Expand); 126 setOperationAction(ISD::UREM, MVT::i64, Expand); 127 128 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 129 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 130 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 131 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 132 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 133 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 134 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 135 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 136 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 137 138 // We don't support sin/cos/sqrt/fmod/pow 139 setOperationAction(ISD::FSIN , MVT::f64, Expand); 140 setOperationAction(ISD::FCOS , MVT::f64, Expand); 141 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 142 setOperationAction(ISD::FREM , MVT::f64, Expand); 143 setOperationAction(ISD::FPOW , MVT::f64, Expand); 144 setOperationAction(ISD::FMA , MVT::f64, Legal); 145 setOperationAction(ISD::FSIN , MVT::f32, Expand); 146 setOperationAction(ISD::FCOS , MVT::f32, Expand); 147 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 148 setOperationAction(ISD::FREM , MVT::f32, Expand); 149 setOperationAction(ISD::FPOW , MVT::f32, Expand); 150 setOperationAction(ISD::FMA , MVT::f32, Legal); 151 152 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 153 154 // If we're enabling GP optimizations, use hardware square root 155 if (!Subtarget->hasFSQRT() && 156 !(TM.Options.UnsafeFPMath && 157 Subtarget->hasFRSQRTE() && Subtarget->hasFRE())) 158 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 159 160 if (!Subtarget->hasFSQRT() && 161 !(TM.Options.UnsafeFPMath && 162 Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) 163 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 164 165 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 166 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 167 168 if (Subtarget->hasFPRND()) { 169 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 170 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 171 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 172 173 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 174 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 175 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 176 177 // frin does not implement "ties to even." Thus, this is safe only in 178 // fast-math mode. 179 if (TM.Options.UnsafeFPMath) { 180 setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal); 181 setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal); 182 183 // These need to set FE_INEXACT, and use a custom inserter. 184 setOperationAction(ISD::FRINT, MVT::f64, Legal); 185 setOperationAction(ISD::FRINT, MVT::f32, Legal); 186 } 187 } 188 189 // PowerPC does not have BSWAP, CTPOP or CTTZ 190 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 191 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 192 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 193 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 194 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 195 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 196 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 197 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 198 199 if (Subtarget->hasPOPCNTD()) { 200 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 201 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 202 } else { 203 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 204 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 205 } 206 207 // PowerPC does not have ROTR 208 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 209 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 210 211 // PowerPC does not have Select 212 setOperationAction(ISD::SELECT, MVT::i32, Expand); 213 setOperationAction(ISD::SELECT, MVT::i64, Expand); 214 setOperationAction(ISD::SELECT, MVT::f32, Expand); 215 setOperationAction(ISD::SELECT, MVT::f64, Expand); 216 217 // PowerPC wants to turn select_cc of FP into fsel when possible. 218 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 219 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 220 221 // PowerPC wants to optimize integer setcc a bit 222 setOperationAction(ISD::SETCC, MVT::i32, Custom); 223 224 // PowerPC does not have BRCOND which requires SetCC 225 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 226 227 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 228 229 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 230 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 231 232 // PowerPC does not have [U|S]INT_TO_FP 233 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 234 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 235 236 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 237 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 238 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 239 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 240 241 // We cannot sextinreg(i1). Expand to shifts. 242 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 243 244 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 245 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 246 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 247 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 248 249 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 250 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 251 // support continuation, user-level threading, and etc.. As a result, no 252 // other SjLj exception interfaces are implemented and please don't build 253 // your own exception handling based on them. 254 // LLVM/Clang supports zero-cost DWARF exception handling. 255 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 256 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 257 258 // We want to legalize GlobalAddress and ConstantPool nodes into the 259 // appropriate instructions to materialize the address. 260 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 261 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 262 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 263 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 264 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 265 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 266 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 267 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 268 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 269 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 270 271 // TRAP is legal. 272 setOperationAction(ISD::TRAP, MVT::Other, Legal); 273 274 // TRAMPOLINE is custom lowered. 275 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 276 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 277 278 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 279 setOperationAction(ISD::VASTART , MVT::Other, Custom); 280 281 if (Subtarget->isSVR4ABI()) { 282 if (isPPC64) { 283 // VAARG always uses double-word chunks, so promote anything smaller. 284 setOperationAction(ISD::VAARG, MVT::i1, Promote); 285 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 286 setOperationAction(ISD::VAARG, MVT::i8, Promote); 287 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 288 setOperationAction(ISD::VAARG, MVT::i16, Promote); 289 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 290 setOperationAction(ISD::VAARG, MVT::i32, Promote); 291 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 292 setOperationAction(ISD::VAARG, MVT::Other, Expand); 293 } else { 294 // VAARG is custom lowered with the 32-bit SVR4 ABI. 295 setOperationAction(ISD::VAARG, MVT::Other, Custom); 296 setOperationAction(ISD::VAARG, MVT::i64, Custom); 297 } 298 } else 299 setOperationAction(ISD::VAARG, MVT::Other, Expand); 300 301 // Use the default implementation. 302 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 303 setOperationAction(ISD::VAEND , MVT::Other, Expand); 304 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 305 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 306 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 307 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 308 309 // We want to custom lower some of our intrinsics. 310 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 311 312 // Comparisons that require checking two conditions. 313 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 314 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 315 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 316 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 317 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 318 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 319 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 320 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 321 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 322 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 323 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 324 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 325 326 if (Subtarget->has64BitSupport()) { 327 // They also have instructions for converting between i64 and fp. 328 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 329 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 330 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 331 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 332 // This is just the low 32 bits of a (signed) fp->i64 conversion. 333 // We cannot do this with Promote because i64 is not a legal type. 334 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 335 336 if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64()) 337 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 338 } else { 339 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 340 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 341 } 342 343 // With the instructions enabled under FPCVT, we can do everything. 344 if (PPCSubTarget.hasFPCVT()) { 345 if (Subtarget->has64BitSupport()) { 346 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 347 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 348 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 349 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 350 } 351 352 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 353 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 354 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 355 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 356 } 357 358 if (Subtarget->use64BitRegs()) { 359 // 64-bit PowerPC implementations can support i64 types directly 360 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 361 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 362 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 363 // 64-bit PowerPC wants to expand i128 shifts itself. 364 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 365 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 366 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 367 } else { 368 // 32-bit PowerPC wants to expand i64 shifts itself. 369 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 370 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 371 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 372 } 373 374 if (Subtarget->hasAltivec()) { 375 // First set operation action for all vector types to expand. Then we 376 // will selectively turn on ones that can be effectively codegen'd. 377 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 378 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 379 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 380 381 // add/sub are legal for all supported vector VT's. 382 setOperationAction(ISD::ADD , VT, Legal); 383 setOperationAction(ISD::SUB , VT, Legal); 384 385 // We promote all shuffles to v16i8. 386 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 387 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 388 389 // We promote all non-typed operations to v4i32. 390 setOperationAction(ISD::AND , VT, Promote); 391 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 392 setOperationAction(ISD::OR , VT, Promote); 393 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 394 setOperationAction(ISD::XOR , VT, Promote); 395 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 396 setOperationAction(ISD::LOAD , VT, Promote); 397 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 398 setOperationAction(ISD::SELECT, VT, Promote); 399 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 400 setOperationAction(ISD::STORE, VT, Promote); 401 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 402 403 // No other operations are legal. 404 setOperationAction(ISD::MUL , VT, Expand); 405 setOperationAction(ISD::SDIV, VT, Expand); 406 setOperationAction(ISD::SREM, VT, Expand); 407 setOperationAction(ISD::UDIV, VT, Expand); 408 setOperationAction(ISD::UREM, VT, Expand); 409 setOperationAction(ISD::FDIV, VT, Expand); 410 setOperationAction(ISD::FNEG, VT, Expand); 411 setOperationAction(ISD::FSQRT, VT, Expand); 412 setOperationAction(ISD::FLOG, VT, Expand); 413 setOperationAction(ISD::FLOG10, VT, Expand); 414 setOperationAction(ISD::FLOG2, VT, Expand); 415 setOperationAction(ISD::FEXP, VT, Expand); 416 setOperationAction(ISD::FEXP2, VT, Expand); 417 setOperationAction(ISD::FSIN, VT, Expand); 418 setOperationAction(ISD::FCOS, VT, Expand); 419 setOperationAction(ISD::FABS, VT, Expand); 420 setOperationAction(ISD::FPOWI, VT, Expand); 421 setOperationAction(ISD::FFLOOR, VT, Expand); 422 setOperationAction(ISD::FCEIL, VT, Expand); 423 setOperationAction(ISD::FTRUNC, VT, Expand); 424 setOperationAction(ISD::FRINT, VT, Expand); 425 setOperationAction(ISD::FNEARBYINT, VT, Expand); 426 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 427 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 428 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 429 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 430 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 431 setOperationAction(ISD::UDIVREM, VT, Expand); 432 setOperationAction(ISD::SDIVREM, VT, Expand); 433 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 434 setOperationAction(ISD::FPOW, VT, Expand); 435 setOperationAction(ISD::CTPOP, VT, Expand); 436 setOperationAction(ISD::CTLZ, VT, Expand); 437 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 438 setOperationAction(ISD::CTTZ, VT, Expand); 439 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 440 setOperationAction(ISD::VSELECT, VT, Expand); 441 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 442 443 for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 444 j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { 445 MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j; 446 setTruncStoreAction(VT, InnerVT, Expand); 447 } 448 setLoadExtAction(ISD::SEXTLOAD, VT, Expand); 449 setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); 450 setLoadExtAction(ISD::EXTLOAD, VT, Expand); 451 } 452 453 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 454 // with merges, splats, etc. 455 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 456 457 setOperationAction(ISD::AND , MVT::v4i32, Legal); 458 setOperationAction(ISD::OR , MVT::v4i32, Legal); 459 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 460 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 461 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 462 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 463 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 464 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 465 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 466 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 467 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 468 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 469 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 470 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 471 472 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 473 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 474 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 475 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 476 477 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 478 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 479 480 if (TM.Options.UnsafeFPMath) { 481 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 482 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 483 } 484 485 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 486 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 487 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 488 489 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 490 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 491 492 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 493 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 494 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 495 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 496 497 // Altivec does not contain unordered floating-point compare instructions 498 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 499 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 500 setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); 501 setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); 502 setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); 503 setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); 504 } 505 506 if (Subtarget->has64BitSupport()) { 507 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 508 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); 509 } 510 511 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); 512 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); 513 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 514 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 515 516 setBooleanContents(ZeroOrOneBooleanContent); 517 // Altivec instructions set fields to all zeros or all ones. 518 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 519 520 if (isPPC64) { 521 setStackPointerRegisterToSaveRestore(PPC::X1); 522 setExceptionPointerRegister(PPC::X3); 523 setExceptionSelectorRegister(PPC::X4); 524 } else { 525 setStackPointerRegisterToSaveRestore(PPC::R1); 526 setExceptionPointerRegister(PPC::R3); 527 setExceptionSelectorRegister(PPC::R4); 528 } 529 530 // We have target-specific dag combine patterns for the following nodes: 531 setTargetDAGCombine(ISD::SINT_TO_FP); 532 setTargetDAGCombine(ISD::STORE); 533 setTargetDAGCombine(ISD::BR_CC); 534 setTargetDAGCombine(ISD::BSWAP); 535 536 // Use reciprocal estimates. 537 if (TM.Options.UnsafeFPMath) { 538 setTargetDAGCombine(ISD::FDIV); 539 setTargetDAGCombine(ISD::FSQRT); 540 } 541 542 // Darwin long double math library functions have $LDBL128 appended. 543 if (Subtarget->isDarwin()) { 544 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 545 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 546 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 547 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 548 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 549 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 550 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 551 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 552 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 553 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 554 } 555 556 setMinFunctionAlignment(2); 557 if (PPCSubTarget.isDarwin()) 558 setPrefFunctionAlignment(4); 559 560 if (isPPC64 && Subtarget->isJITCodeModel()) 561 // Temporary workaround for the inability of PPC64 JIT to handle jump 562 // tables. 563 setSupportJumpTables(false); 564 565 setInsertFencesForAtomic(true); 566 567 setSchedulingPreference(Sched::Hybrid); 568 569 computeRegisterProperties(); 570 571 // The Freescale cores does better with aggressive inlining of memcpy and 572 // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). 573 if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || 574 Subtarget->getDarwinDirective() == PPC::DIR_E5500) { 575 MaxStoresPerMemset = 32; 576 MaxStoresPerMemsetOptSize = 16; 577 MaxStoresPerMemcpy = 32; 578 MaxStoresPerMemcpyOptSize = 8; 579 MaxStoresPerMemmove = 32; 580 MaxStoresPerMemmoveOptSize = 8; 581 582 setPrefFunctionAlignment(4); 583 } 584} 585 586/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 587/// function arguments in the caller parameter area. 588unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 589 const TargetMachine &TM = getTargetMachine(); 590 // Darwin passes everything on 4 byte boundary. 591 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) 592 return 4; 593 594 // 16byte and wider vectors are passed on 16byte boundary. 595 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) 596 if (VTy->getBitWidth() >= 128) 597 return 16; 598 599 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 600 if (PPCSubTarget.isPPC64()) 601 return 8; 602 603 return 4; 604} 605 606const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 607 switch (Opcode) { 608 default: return 0; 609 case PPCISD::FSEL: return "PPCISD::FSEL"; 610 case PPCISD::FCFID: return "PPCISD::FCFID"; 611 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 612 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 613 case PPCISD::FRE: return "PPCISD::FRE"; 614 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 615 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 616 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 617 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 618 case PPCISD::VPERM: return "PPCISD::VPERM"; 619 case PPCISD::Hi: return "PPCISD::Hi"; 620 case PPCISD::Lo: return "PPCISD::Lo"; 621 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 622 case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE"; 623 case PPCISD::LOAD: return "PPCISD::LOAD"; 624 case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; 625 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 626 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 627 case PPCISD::SRL: return "PPCISD::SRL"; 628 case PPCISD::SRA: return "PPCISD::SRA"; 629 case PPCISD::SHL: return "PPCISD::SHL"; 630 case PPCISD::CALL: return "PPCISD::CALL"; 631 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 632 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 633 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 634 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 635 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 636 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 637 case PPCISD::MFCR: return "PPCISD::MFCR"; 638 case PPCISD::VCMP: return "PPCISD::VCMP"; 639 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 640 case PPCISD::LBRX: return "PPCISD::LBRX"; 641 case PPCISD::STBRX: return "PPCISD::STBRX"; 642 case PPCISD::LARX: return "PPCISD::LARX"; 643 case PPCISD::STCX: return "PPCISD::STCX"; 644 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 645 case PPCISD::MFFS: return "PPCISD::MFFS"; 646 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 647 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 648 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 649 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 650 case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; 651 case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; 652 case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; 653 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 654 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 655 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 656 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 657 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 658 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 659 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 660 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 661 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 662 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 663 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 664 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 665 } 666} 667 668EVT PPCTargetLowering::getSetCCResultType(EVT VT) const { 669 if (!VT.isVector()) 670 return MVT::i32; 671 return VT.changeVectorElementTypeToInteger(); 672} 673 674//===----------------------------------------------------------------------===// 675// Node matching predicates, for use by the tblgen matching code. 676//===----------------------------------------------------------------------===// 677 678/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 679static bool isFloatingPointZero(SDValue Op) { 680 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 681 return CFP->getValueAPF().isZero(); 682 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 683 // Maybe this has already been legalized into the constant pool? 684 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 685 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 686 return CFP->getValueAPF().isZero(); 687 } 688 return false; 689} 690 691/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 692/// true if Op is undef or if it matches the specified value. 693static bool isConstantOrUndef(int Op, int Val) { 694 return Op < 0 || Op == Val; 695} 696 697/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 698/// VPKUHUM instruction. 699bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 700 if (!isUnary) { 701 for (unsigned i = 0; i != 16; ++i) 702 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 703 return false; 704 } else { 705 for (unsigned i = 0; i != 8; ++i) 706 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || 707 !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) 708 return false; 709 } 710 return true; 711} 712 713/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 714/// VPKUWUM instruction. 715bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 716 if (!isUnary) { 717 for (unsigned i = 0; i != 16; i += 2) 718 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 719 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 720 return false; 721 } else { 722 for (unsigned i = 0; i != 8; i += 2) 723 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 724 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || 725 !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || 726 !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) 727 return false; 728 } 729 return true; 730} 731 732/// isVMerge - Common function, used to match vmrg* shuffles. 733/// 734static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 735 unsigned LHSStart, unsigned RHSStart) { 736 assert(N->getValueType(0) == MVT::v16i8 && 737 "PPC only supports shuffles by bytes!"); 738 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 739 "Unsupported merge size!"); 740 741 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 742 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 743 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 744 LHSStart+j+i*UnitSize) || 745 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 746 RHSStart+j+i*UnitSize)) 747 return false; 748 } 749 return true; 750} 751 752/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 753/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 754bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 755 bool isUnary) { 756 if (!isUnary) 757 return isVMerge(N, UnitSize, 8, 24); 758 return isVMerge(N, UnitSize, 8, 8); 759} 760 761/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 762/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 763bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 764 bool isUnary) { 765 if (!isUnary) 766 return isVMerge(N, UnitSize, 0, 16); 767 return isVMerge(N, UnitSize, 0, 0); 768} 769 770 771/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 772/// amount, otherwise return -1. 773int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 774 assert(N->getValueType(0) == MVT::v16i8 && 775 "PPC only supports shuffles by bytes!"); 776 777 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 778 779 // Find the first non-undef value in the shuffle mask. 780 unsigned i; 781 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 782 /*search*/; 783 784 if (i == 16) return -1; // all undef. 785 786 // Otherwise, check to see if the rest of the elements are consecutively 787 // numbered from this value. 788 unsigned ShiftAmt = SVOp->getMaskElt(i); 789 if (ShiftAmt < i) return -1; 790 ShiftAmt -= i; 791 792 if (!isUnary) { 793 // Check the rest of the elements to see if they are consecutive. 794 for (++i; i != 16; ++i) 795 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 796 return -1; 797 } else { 798 // Check the rest of the elements to see if they are consecutive. 799 for (++i; i != 16; ++i) 800 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 801 return -1; 802 } 803 return ShiftAmt; 804} 805 806/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 807/// specifies a splat of a single element that is suitable for input to 808/// VSPLTB/VSPLTH/VSPLTW. 809bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 810 assert(N->getValueType(0) == MVT::v16i8 && 811 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 812 813 // This is a splat operation if each element of the permute is the same, and 814 // if the value doesn't reference the second vector. 815 unsigned ElementBase = N->getMaskElt(0); 816 817 // FIXME: Handle UNDEF elements too! 818 if (ElementBase >= 16) 819 return false; 820 821 // Check that the indices are consecutive, in the case of a multi-byte element 822 // splatted with a v16i8 mask. 823 for (unsigned i = 1; i != EltSize; ++i) 824 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 825 return false; 826 827 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 828 if (N->getMaskElt(i) < 0) continue; 829 for (unsigned j = 0; j != EltSize; ++j) 830 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 831 return false; 832 } 833 return true; 834} 835 836/// isAllNegativeZeroVector - Returns true if all elements of build_vector 837/// are -0.0. 838bool PPC::isAllNegativeZeroVector(SDNode *N) { 839 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 840 841 APInt APVal, APUndef; 842 unsigned BitSize; 843 bool HasAnyUndefs; 844 845 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 846 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 847 return CFP->getValueAPF().isNegZero(); 848 849 return false; 850} 851 852/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 853/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 854unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 855 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 856 assert(isSplatShuffleMask(SVOp, EltSize)); 857 return SVOp->getMaskElt(0) / EltSize; 858} 859 860/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 861/// by using a vspltis[bhw] instruction of the specified element size, return 862/// the constant being splatted. The ByteSize field indicates the number of 863/// bytes of each element [124] -> [bhw]. 864SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 865 SDValue OpVal(0, 0); 866 867 // If ByteSize of the splat is bigger than the element size of the 868 // build_vector, then we have a case where we are checking for a splat where 869 // multiple elements of the buildvector are folded together into a single 870 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 871 unsigned EltSize = 16/N->getNumOperands(); 872 if (EltSize < ByteSize) { 873 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 874 SDValue UniquedVals[4]; 875 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 876 877 // See if all of the elements in the buildvector agree across. 878 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 879 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 880 // If the element isn't a constant, bail fully out. 881 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 882 883 884 if (UniquedVals[i&(Multiple-1)].getNode() == 0) 885 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 886 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 887 return SDValue(); // no match. 888 } 889 890 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 891 // either constant or undef values that are identical for each chunk. See 892 // if these chunks can form into a larger vspltis*. 893 894 // Check to see if all of the leading entries are either 0 or -1. If 895 // neither, then this won't fit into the immediate field. 896 bool LeadingZero = true; 897 bool LeadingOnes = true; 898 for (unsigned i = 0; i != Multiple-1; ++i) { 899 if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. 900 901 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 902 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 903 } 904 // Finally, check the least significant entry. 905 if (LeadingZero) { 906 if (UniquedVals[Multiple-1].getNode() == 0) 907 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 908 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 909 if (Val < 16) 910 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 911 } 912 if (LeadingOnes) { 913 if (UniquedVals[Multiple-1].getNode() == 0) 914 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 915 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 916 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 917 return DAG.getTargetConstant(Val, MVT::i32); 918 } 919 920 return SDValue(); 921 } 922 923 // Check to see if this buildvec has a single non-undef value in its elements. 924 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 925 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 926 if (OpVal.getNode() == 0) 927 OpVal = N->getOperand(i); 928 else if (OpVal != N->getOperand(i)) 929 return SDValue(); 930 } 931 932 if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. 933 934 unsigned ValSizeInBytes = EltSize; 935 uint64_t Value = 0; 936 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 937 Value = CN->getZExtValue(); 938 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 939 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 940 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 941 } 942 943 // If the splat value is larger than the element value, then we can never do 944 // this splat. The only case that we could fit the replicated bits into our 945 // immediate field for would be zero, and we prefer to use vxor for it. 946 if (ValSizeInBytes < ByteSize) return SDValue(); 947 948 // If the element value is larger than the splat value, cut it in half and 949 // check to see if the two halves are equal. Continue doing this until we 950 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 951 while (ValSizeInBytes > ByteSize) { 952 ValSizeInBytes >>= 1; 953 954 // If the top half equals the bottom half, we're still ok. 955 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 956 (Value & ((1 << (8*ValSizeInBytes))-1))) 957 return SDValue(); 958 } 959 960 // Properly sign extend the value. 961 int MaskVal = SignExtend32(Value, ByteSize * 8); 962 963 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 964 if (MaskVal == 0) return SDValue(); 965 966 // Finally, if this value fits in a 5 bit sext field, return it 967 if (SignExtend32<5>(MaskVal) == MaskVal) 968 return DAG.getTargetConstant(MaskVal, MVT::i32); 969 return SDValue(); 970} 971 972//===----------------------------------------------------------------------===// 973// Addressing Mode Selection 974//===----------------------------------------------------------------------===// 975 976/// isIntS16Immediate - This method tests to see if the node is either a 32-bit 977/// or 64-bit immediate, and if the value can be accurately represented as a 978/// sign extension from a 16-bit value. If so, this returns true and the 979/// immediate. 980static bool isIntS16Immediate(SDNode *N, short &Imm) { 981 if (N->getOpcode() != ISD::Constant) 982 return false; 983 984 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 985 if (N->getValueType(0) == MVT::i32) 986 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 987 else 988 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 989} 990static bool isIntS16Immediate(SDValue Op, short &Imm) { 991 return isIntS16Immediate(Op.getNode(), Imm); 992} 993 994 995/// SelectAddressRegReg - Given the specified addressed, check to see if it 996/// can be represented as an indexed [r+r] operation. Returns false if it 997/// can be more efficiently represented with [r+imm]. 998bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 999 SDValue &Index, 1000 SelectionDAG &DAG) const { 1001 short imm = 0; 1002 if (N.getOpcode() == ISD::ADD) { 1003 if (isIntS16Immediate(N.getOperand(1), imm)) 1004 return false; // r+i 1005 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1006 return false; // r+i 1007 1008 Base = N.getOperand(0); 1009 Index = N.getOperand(1); 1010 return true; 1011 } else if (N.getOpcode() == ISD::OR) { 1012 if (isIntS16Immediate(N.getOperand(1), imm)) 1013 return false; // r+i can fold it if we can. 1014 1015 // If this is an or of disjoint bitfields, we can codegen this as an add 1016 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1017 // disjoint. 1018 APInt LHSKnownZero, LHSKnownOne; 1019 APInt RHSKnownZero, RHSKnownOne; 1020 DAG.ComputeMaskedBits(N.getOperand(0), 1021 LHSKnownZero, LHSKnownOne); 1022 1023 if (LHSKnownZero.getBoolValue()) { 1024 DAG.ComputeMaskedBits(N.getOperand(1), 1025 RHSKnownZero, RHSKnownOne); 1026 // If all of the bits are known zero on the LHS or RHS, the add won't 1027 // carry. 1028 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1029 Base = N.getOperand(0); 1030 Index = N.getOperand(1); 1031 return true; 1032 } 1033 } 1034 } 1035 1036 return false; 1037} 1038 1039/// Returns true if the address N can be represented by a base register plus 1040/// a signed 16-bit displacement [r+imm], and if it is not better 1041/// represented as reg+reg. 1042bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1043 SDValue &Base, 1044 SelectionDAG &DAG) const { 1045 // FIXME dl should come from parent load or store, not from address 1046 DebugLoc dl = N.getDebugLoc(); 1047 // If this can be more profitably realized as r+r, fail. 1048 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1049 return false; 1050 1051 if (N.getOpcode() == ISD::ADD) { 1052 short imm = 0; 1053 if (isIntS16Immediate(N.getOperand(1), imm)) { 1054 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 1055 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1056 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1057 } else { 1058 Base = N.getOperand(0); 1059 } 1060 return true; // [r+i] 1061 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1062 // Match LOAD (ADD (X, Lo(G))). 1063 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1064 && "Cannot handle constant offsets yet!"); 1065 Disp = N.getOperand(1).getOperand(0); // The global address. 1066 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1067 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1068 Disp.getOpcode() == ISD::TargetConstantPool || 1069 Disp.getOpcode() == ISD::TargetJumpTable); 1070 Base = N.getOperand(0); 1071 return true; // [&g+r] 1072 } 1073 } else if (N.getOpcode() == ISD::OR) { 1074 short imm = 0; 1075 if (isIntS16Immediate(N.getOperand(1), imm)) { 1076 // If this is an or of disjoint bitfields, we can codegen this as an add 1077 // (for better address arithmetic) if the LHS and RHS of the OR are 1078 // provably disjoint. 1079 APInt LHSKnownZero, LHSKnownOne; 1080 DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1081 1082 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1083 // If all of the bits are known zero on the LHS or RHS, the add won't 1084 // carry. 1085 Base = N.getOperand(0); 1086 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 1087 return true; 1088 } 1089 } 1090 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1091 // Loading from a constant address. 1092 1093 // If this address fits entirely in a 16-bit sext immediate field, codegen 1094 // this as "d, 0" 1095 short Imm; 1096 if (isIntS16Immediate(CN, Imm)) { 1097 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 1098 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1099 CN->getValueType(0)); 1100 return true; 1101 } 1102 1103 // Handle 32-bit sext immediates with LIS + addr mode. 1104 if (CN->getValueType(0) == MVT::i32 || 1105 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { 1106 int Addr = (int)CN->getZExtValue(); 1107 1108 // Otherwise, break this down into an LIS + disp. 1109 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 1110 1111 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 1112 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1113 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1114 return true; 1115 } 1116 } 1117 1118 Disp = DAG.getTargetConstant(0, getPointerTy()); 1119 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 1120 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1121 else 1122 Base = N; 1123 return true; // [r+0] 1124} 1125 1126/// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1127/// represented as an indexed [r+r] operation. 1128bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1129 SDValue &Index, 1130 SelectionDAG &DAG) const { 1131 // Check to see if we can easily represent this as an [r+r] address. This 1132 // will fail if it thinks that the address is more profitably represented as 1133 // reg+imm, e.g. where imm = 0. 1134 if (SelectAddressRegReg(N, Base, Index, DAG)) 1135 return true; 1136 1137 // If the operand is an addition, always emit this as [r+r], since this is 1138 // better (for code size, and execution, as the memop does the add for free) 1139 // than emitting an explicit add. 1140 if (N.getOpcode() == ISD::ADD) { 1141 Base = N.getOperand(0); 1142 Index = N.getOperand(1); 1143 return true; 1144 } 1145 1146 // Otherwise, do it the hard way, using R0 as the base register. 1147 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1148 N.getValueType()); 1149 Index = N; 1150 return true; 1151} 1152 1153/// SelectAddressRegImmShift - Returns true if the address N can be 1154/// represented by a base register plus a signed 14-bit displacement 1155/// [r+imm*4]. Suitable for use by STD and friends. 1156bool PPCTargetLowering::SelectAddressRegImmShift(SDValue N, SDValue &Disp, 1157 SDValue &Base, 1158 SelectionDAG &DAG) const { 1159 // FIXME dl should come from the parent load or store, not the address 1160 DebugLoc dl = N.getDebugLoc(); 1161 // If this can be more profitably realized as r+r, fail. 1162 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1163 return false; 1164 1165 if (N.getOpcode() == ISD::ADD) { 1166 short imm = 0; 1167 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 1168 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 1169 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1170 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1171 } else { 1172 Base = N.getOperand(0); 1173 } 1174 return true; // [r+i] 1175 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1176 // Match LOAD (ADD (X, Lo(G))). 1177 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1178 && "Cannot handle constant offsets yet!"); 1179 Disp = N.getOperand(1).getOperand(0); // The global address. 1180 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1181 Disp.getOpcode() == ISD::TargetConstantPool || 1182 Disp.getOpcode() == ISD::TargetJumpTable); 1183 Base = N.getOperand(0); 1184 return true; // [&g+r] 1185 } 1186 } else if (N.getOpcode() == ISD::OR) { 1187 short imm = 0; 1188 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 1189 // If this is an or of disjoint bitfields, we can codegen this as an add 1190 // (for better address arithmetic) if the LHS and RHS of the OR are 1191 // provably disjoint. 1192 APInt LHSKnownZero, LHSKnownOne; 1193 DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1194 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1195 // If all of the bits are known zero on the LHS or RHS, the add won't 1196 // carry. 1197 Base = N.getOperand(0); 1198 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 1199 return true; 1200 } 1201 } 1202 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1203 // Loading from a constant address. Verify low two bits are clear. 1204 if ((CN->getZExtValue() & 3) == 0) { 1205 // If this address fits entirely in a 14-bit sext immediate field, codegen 1206 // this as "d, 0" 1207 short Imm; 1208 if (isIntS16Immediate(CN, Imm)) { 1209 Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); 1210 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1211 CN->getValueType(0)); 1212 return true; 1213 } 1214 1215 // Fold the low-part of 32-bit absolute addresses into addr mode. 1216 if (CN->getValueType(0) == MVT::i32 || 1217 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) { 1218 int Addr = (int)CN->getZExtValue(); 1219 1220 // Otherwise, break this down into an LIS + disp. 1221 Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); 1222 Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); 1223 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1224 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base),0); 1225 return true; 1226 } 1227 } 1228 } 1229 1230 Disp = DAG.getTargetConstant(0, getPointerTy()); 1231 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 1232 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1233 else 1234 Base = N; 1235 return true; // [r+0] 1236} 1237 1238 1239/// getPreIndexedAddressParts - returns true by value, base pointer and 1240/// offset pointer and addressing mode by reference if the node's address 1241/// can be legally represented as pre-indexed load / store address. 1242bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1243 SDValue &Offset, 1244 ISD::MemIndexedMode &AM, 1245 SelectionDAG &DAG) const { 1246 if (DisablePPCPreinc) return false; 1247 1248 bool isLoad = true; 1249 SDValue Ptr; 1250 EVT VT; 1251 unsigned Alignment; 1252 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1253 Ptr = LD->getBasePtr(); 1254 VT = LD->getMemoryVT(); 1255 Alignment = LD->getAlignment(); 1256 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1257 Ptr = ST->getBasePtr(); 1258 VT = ST->getMemoryVT(); 1259 Alignment = ST->getAlignment(); 1260 isLoad = false; 1261 } else 1262 return false; 1263 1264 // PowerPC doesn't have preinc load/store instructions for vectors. 1265 if (VT.isVector()) 1266 return false; 1267 1268 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 1269 1270 // Common code will reject creating a pre-inc form if the base pointer 1271 // is a frame index, or if N is a store and the base pointer is either 1272 // the same as or a predecessor of the value being stored. Check for 1273 // those situations here, and try with swapped Base/Offset instead. 1274 bool Swap = false; 1275 1276 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 1277 Swap = true; 1278 else if (!isLoad) { 1279 SDValue Val = cast<StoreSDNode>(N)->getValue(); 1280 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 1281 Swap = true; 1282 } 1283 1284 if (Swap) 1285 std::swap(Base, Offset); 1286 1287 AM = ISD::PRE_INC; 1288 return true; 1289 } 1290 1291 // LDU/STU use reg+imm*4, others use reg+imm. 1292 if (VT != MVT::i64) { 1293 // reg + imm 1294 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) 1295 return false; 1296 } else { 1297 // LDU/STU need an address with at least 4-byte alignment. 1298 if (Alignment < 4) 1299 return false; 1300 1301 // reg + imm * 4. 1302 if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) 1303 return false; 1304 } 1305 1306 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1307 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1308 // sext i32 to i64 when addr mode is r+i. 1309 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1310 LD->getExtensionType() == ISD::SEXTLOAD && 1311 isa<ConstantSDNode>(Offset)) 1312 return false; 1313 } 1314 1315 AM = ISD::PRE_INC; 1316 return true; 1317} 1318 1319//===----------------------------------------------------------------------===// 1320// LowerOperation implementation 1321//===----------------------------------------------------------------------===// 1322 1323/// GetLabelAccessInfo - Return true if we should reference labels using a 1324/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1325static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, 1326 unsigned &LoOpFlags, const GlobalValue *GV = 0) { 1327 HiOpFlags = PPCII::MO_HA16; 1328 LoOpFlags = PPCII::MO_LO16; 1329 1330 // Don't use the pic base if not in PIC relocation model. Or if we are on a 1331 // non-darwin platform. We don't support PIC on other platforms yet. 1332 bool isPIC = TM.getRelocationModel() == Reloc::PIC_ && 1333 TM.getSubtarget<PPCSubtarget>().isDarwin(); 1334 if (isPIC) { 1335 HiOpFlags |= PPCII::MO_PIC_FLAG; 1336 LoOpFlags |= PPCII::MO_PIC_FLAG; 1337 } 1338 1339 // If this is a reference to a global value that requires a non-lazy-ptr, make 1340 // sure that instruction lowering adds it. 1341 if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { 1342 HiOpFlags |= PPCII::MO_NLP_FLAG; 1343 LoOpFlags |= PPCII::MO_NLP_FLAG; 1344 1345 if (GV->hasHiddenVisibility()) { 1346 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1347 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1348 } 1349 } 1350 1351 return isPIC; 1352} 1353 1354static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1355 SelectionDAG &DAG) { 1356 EVT PtrVT = HiPart.getValueType(); 1357 SDValue Zero = DAG.getConstant(0, PtrVT); 1358 DebugLoc DL = HiPart.getDebugLoc(); 1359 1360 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1361 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1362 1363 // With PIC, the first instruction is actually "GR+hi(&G)". 1364 if (isPIC) 1365 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1366 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1367 1368 // Generate non-pic code that has direct accesses to the constant pool. 1369 // The address of the global is just (hi(&g)+lo(&g)). 1370 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1371} 1372 1373SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1374 SelectionDAG &DAG) const { 1375 EVT PtrVT = Op.getValueType(); 1376 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1377 const Constant *C = CP->getConstVal(); 1378 1379 // 64-bit SVR4 ABI code is always position-independent. 1380 // The actual address of the GlobalValue is stored in the TOC. 1381 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1382 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 1383 return DAG.getNode(PPCISD::TOC_ENTRY, CP->getDebugLoc(), MVT::i64, GA, 1384 DAG.getRegister(PPC::X2, MVT::i64)); 1385 } 1386 1387 unsigned MOHiFlag, MOLoFlag; 1388 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1389 SDValue CPIHi = 1390 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1391 SDValue CPILo = 1392 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1393 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1394} 1395 1396SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1397 EVT PtrVT = Op.getValueType(); 1398 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1399 1400 // 64-bit SVR4 ABI code is always position-independent. 1401 // The actual address of the GlobalValue is stored in the TOC. 1402 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1403 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1404 return DAG.getNode(PPCISD::TOC_ENTRY, JT->getDebugLoc(), MVT::i64, GA, 1405 DAG.getRegister(PPC::X2, MVT::i64)); 1406 } 1407 1408 unsigned MOHiFlag, MOLoFlag; 1409 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1410 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1411 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1412 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1413} 1414 1415SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1416 SelectionDAG &DAG) const { 1417 EVT PtrVT = Op.getValueType(); 1418 1419 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1420 1421 unsigned MOHiFlag, MOLoFlag; 1422 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1423 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 1424 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 1425 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1426} 1427 1428SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1429 SelectionDAG &DAG) const { 1430 1431 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1432 DebugLoc dl = GA->getDebugLoc(); 1433 const GlobalValue *GV = GA->getGlobal(); 1434 EVT PtrVT = getPointerTy(); 1435 bool is64bit = PPCSubTarget.isPPC64(); 1436 1437 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 1438 1439 if (Model == TLSModel::LocalExec) { 1440 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1441 PPCII::MO_TPREL16_HA); 1442 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1443 PPCII::MO_TPREL16_LO); 1444 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 1445 is64bit ? MVT::i64 : MVT::i32); 1446 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 1447 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 1448 } 1449 1450 if (!is64bit) 1451 llvm_unreachable("only local-exec is currently supported for ppc32"); 1452 1453 if (Model == TLSModel::InitialExec) { 1454 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1455 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1456 SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 1457 PtrVT, GOTReg, TGA); 1458 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 1459 PtrVT, TGA, TPOffsetHi); 1460 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGA); 1461 } 1462 1463 if (Model == TLSModel::GeneralDynamic) { 1464 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1465 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1466 SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 1467 GOTReg, TGA); 1468 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, 1469 GOTEntryHi, TGA); 1470 1471 // We need a chain node, and don't have one handy. The underlying 1472 // call has no side effects, so using the function entry node 1473 // suffices. 1474 SDValue Chain = DAG.getEntryNode(); 1475 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); 1476 SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); 1477 SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, 1478 PtrVT, ParmReg, TGA); 1479 // The return value from GET_TLS_ADDR really is in X3 already, but 1480 // some hacks are needed here to tie everything together. The extra 1481 // copies dissolve during subsequent transforms. 1482 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); 1483 return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); 1484 } 1485 1486 if (Model == TLSModel::LocalDynamic) { 1487 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1488 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1489 SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 1490 GOTReg, TGA); 1491 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, 1492 GOTEntryHi, TGA); 1493 1494 // We need a chain node, and don't have one handy. The underlying 1495 // call has no side effects, so using the function entry node 1496 // suffices. 1497 SDValue Chain = DAG.getEntryNode(); 1498 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); 1499 SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); 1500 SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, 1501 PtrVT, ParmReg, TGA); 1502 // The return value from GET_TLSLD_ADDR really is in X3 already, but 1503 // some hacks are needed here to tie everything together. The extra 1504 // copies dissolve during subsequent transforms. 1505 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); 1506 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, 1507 Chain, ParmReg, TGA); 1508 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 1509 } 1510 1511 llvm_unreachable("Unknown TLS model!"); 1512} 1513 1514SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1515 SelectionDAG &DAG) const { 1516 EVT PtrVT = Op.getValueType(); 1517 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1518 DebugLoc DL = GSDN->getDebugLoc(); 1519 const GlobalValue *GV = GSDN->getGlobal(); 1520 1521 // 64-bit SVR4 ABI code is always position-independent. 1522 // The actual address of the GlobalValue is stored in the TOC. 1523 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1524 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 1525 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, 1526 DAG.getRegister(PPC::X2, MVT::i64)); 1527 } 1528 1529 unsigned MOHiFlag, MOLoFlag; 1530 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); 1531 1532 SDValue GAHi = 1533 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 1534 SDValue GALo = 1535 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 1536 1537 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 1538 1539 // If the global reference is actually to a non-lazy-pointer, we have to do an 1540 // extra load to get the address of the global. 1541 if (MOHiFlag & PPCII::MO_NLP_FLAG) 1542 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 1543 false, false, false, 0); 1544 return Ptr; 1545} 1546 1547SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 1548 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1549 DebugLoc dl = Op.getDebugLoc(); 1550 1551 // If we're comparing for equality to zero, expose the fact that this is 1552 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1553 // fold the new nodes. 1554 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1555 if (C->isNullValue() && CC == ISD::SETEQ) { 1556 EVT VT = Op.getOperand(0).getValueType(); 1557 SDValue Zext = Op.getOperand(0); 1558 if (VT.bitsLT(MVT::i32)) { 1559 VT = MVT::i32; 1560 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 1561 } 1562 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1563 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 1564 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 1565 DAG.getConstant(Log2b, MVT::i32)); 1566 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 1567 } 1568 // Leave comparisons against 0 and -1 alone for now, since they're usually 1569 // optimized. FIXME: revisit this when we can custom lower all setcc 1570 // optimizations. 1571 if (C->isAllOnesValue() || C->isNullValue()) 1572 return SDValue(); 1573 } 1574 1575 // If we have an integer seteq/setne, turn it into a compare against zero 1576 // by xor'ing the rhs with the lhs, which is faster than setting a 1577 // condition register, reading it back out, and masking the correct bit. The 1578 // normal approach here uses sub to do this instead of xor. Using xor exposes 1579 // the result to other bit-twiddling opportunities. 1580 EVT LHSVT = Op.getOperand(0).getValueType(); 1581 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1582 EVT VT = Op.getValueType(); 1583 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 1584 Op.getOperand(1)); 1585 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 1586 } 1587 return SDValue(); 1588} 1589 1590SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1591 const PPCSubtarget &Subtarget) const { 1592 SDNode *Node = Op.getNode(); 1593 EVT VT = Node->getValueType(0); 1594 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1595 SDValue InChain = Node->getOperand(0); 1596 SDValue VAListPtr = Node->getOperand(1); 1597 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 1598 DebugLoc dl = Node->getDebugLoc(); 1599 1600 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 1601 1602 // gpr_index 1603 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1604 VAListPtr, MachinePointerInfo(SV), MVT::i8, 1605 false, false, 0); 1606 InChain = GprIndex.getValue(1); 1607 1608 if (VT == MVT::i64) { 1609 // Check if GprIndex is even 1610 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 1611 DAG.getConstant(1, MVT::i32)); 1612 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 1613 DAG.getConstant(0, MVT::i32), ISD::SETNE); 1614 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 1615 DAG.getConstant(1, MVT::i32)); 1616 // Align GprIndex to be even if it isn't 1617 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 1618 GprIndex); 1619 } 1620 1621 // fpr index is 1 byte after gpr 1622 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1623 DAG.getConstant(1, MVT::i32)); 1624 1625 // fpr 1626 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1627 FprPtr, MachinePointerInfo(SV), MVT::i8, 1628 false, false, 0); 1629 InChain = FprIndex.getValue(1); 1630 1631 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1632 DAG.getConstant(8, MVT::i32)); 1633 1634 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1635 DAG.getConstant(4, MVT::i32)); 1636 1637 // areas 1638 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 1639 MachinePointerInfo(), false, false, 1640 false, 0); 1641 InChain = OverflowArea.getValue(1); 1642 1643 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 1644 MachinePointerInfo(), false, false, 1645 false, 0); 1646 InChain = RegSaveArea.getValue(1); 1647 1648 // select overflow_area if index > 8 1649 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 1650 DAG.getConstant(8, MVT::i32), ISD::SETLT); 1651 1652 // adjustment constant gpr_index * 4/8 1653 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 1654 VT.isInteger() ? GprIndex : FprIndex, 1655 DAG.getConstant(VT.isInteger() ? 4 : 8, 1656 MVT::i32)); 1657 1658 // OurReg = RegSaveArea + RegConstant 1659 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 1660 RegConstant); 1661 1662 // Floating types are 32 bytes into RegSaveArea 1663 if (VT.isFloatingPoint()) 1664 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 1665 DAG.getConstant(32, MVT::i32)); 1666 1667 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 1668 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 1669 VT.isInteger() ? GprIndex : FprIndex, 1670 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 1671 MVT::i32)); 1672 1673 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 1674 VT.isInteger() ? VAListPtr : FprPtr, 1675 MachinePointerInfo(SV), 1676 MVT::i8, false, false, 0); 1677 1678 // determine if we should load from reg_save_area or overflow_area 1679 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 1680 1681 // increase overflow_area by 4/8 if gpr/fpr > 8 1682 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 1683 DAG.getConstant(VT.isInteger() ? 4 : 8, 1684 MVT::i32)); 1685 1686 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 1687 OverflowAreaPlusN); 1688 1689 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 1690 OverflowAreaPtr, 1691 MachinePointerInfo(), 1692 MVT::i32, false, false, 0); 1693 1694 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 1695 false, false, false, 0); 1696} 1697 1698SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 1699 SelectionDAG &DAG) const { 1700 return Op.getOperand(0); 1701} 1702 1703SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 1704 SelectionDAG &DAG) const { 1705 SDValue Chain = Op.getOperand(0); 1706 SDValue Trmp = Op.getOperand(1); // trampoline 1707 SDValue FPtr = Op.getOperand(2); // nested function 1708 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 1709 DebugLoc dl = Op.getDebugLoc(); 1710 1711 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1712 bool isPPC64 = (PtrVT == MVT::i64); 1713 Type *IntPtrTy = 1714 DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( 1715 *DAG.getContext()); 1716 1717 TargetLowering::ArgListTy Args; 1718 TargetLowering::ArgListEntry Entry; 1719 1720 Entry.Ty = IntPtrTy; 1721 Entry.Node = Trmp; Args.push_back(Entry); 1722 1723 // TrampSize == (isPPC64 ? 48 : 40); 1724 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 1725 isPPC64 ? MVT::i64 : MVT::i32); 1726 Args.push_back(Entry); 1727 1728 Entry.Node = FPtr; Args.push_back(Entry); 1729 Entry.Node = Nest; Args.push_back(Entry); 1730 1731 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 1732 TargetLowering::CallLoweringInfo CLI(Chain, 1733 Type::getVoidTy(*DAG.getContext()), 1734 false, false, false, false, 0, 1735 CallingConv::C, 1736 /*isTailCall=*/false, 1737 /*doesNotRet=*/false, 1738 /*isReturnValueUsed=*/true, 1739 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 1740 Args, DAG, dl); 1741 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 1742 1743 return CallResult.second; 1744} 1745 1746SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 1747 const PPCSubtarget &Subtarget) const { 1748 MachineFunction &MF = DAG.getMachineFunction(); 1749 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1750 1751 DebugLoc dl = Op.getDebugLoc(); 1752 1753 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 1754 // vastart just stores the address of the VarArgsFrameIndex slot into the 1755 // memory location argument. 1756 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1757 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1758 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1759 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 1760 MachinePointerInfo(SV), 1761 false, false, 0); 1762 } 1763 1764 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 1765 // We suppose the given va_list is already allocated. 1766 // 1767 // typedef struct { 1768 // char gpr; /* index into the array of 8 GPRs 1769 // * stored in the register save area 1770 // * gpr=0 corresponds to r3, 1771 // * gpr=1 to r4, etc. 1772 // */ 1773 // char fpr; /* index into the array of 8 FPRs 1774 // * stored in the register save area 1775 // * fpr=0 corresponds to f1, 1776 // * fpr=1 to f2, etc. 1777 // */ 1778 // char *overflow_arg_area; 1779 // /* location on stack that holds 1780 // * the next overflow argument 1781 // */ 1782 // char *reg_save_area; 1783 // /* where r3:r10 and f1:f8 (if saved) 1784 // * are stored 1785 // */ 1786 // } va_list[1]; 1787 1788 1789 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 1790 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 1791 1792 1793 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1794 1795 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 1796 PtrVT); 1797 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1798 PtrVT); 1799 1800 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 1801 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 1802 1803 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 1804 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 1805 1806 uint64_t FPROffset = 1; 1807 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 1808 1809 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1810 1811 // Store first byte : number of int regs 1812 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 1813 Op.getOperand(1), 1814 MachinePointerInfo(SV), 1815 MVT::i8, false, false, 0); 1816 uint64_t nextOffset = FPROffset; 1817 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 1818 ConstFPROffset); 1819 1820 // Store second byte : number of float regs 1821 SDValue secondStore = 1822 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 1823 MachinePointerInfo(SV, nextOffset), MVT::i8, 1824 false, false, 0); 1825 nextOffset += StackOffset; 1826 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 1827 1828 // Store second word : arguments given on stack 1829 SDValue thirdStore = 1830 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 1831 MachinePointerInfo(SV, nextOffset), 1832 false, false, 0); 1833 nextOffset += FrameOffset; 1834 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 1835 1836 // Store third word : arguments given in registers 1837 return DAG.getStore(thirdStore, dl, FR, nextPtr, 1838 MachinePointerInfo(SV, nextOffset), 1839 false, false, 0); 1840 1841} 1842 1843#include "PPCGenCallingConv.inc" 1844 1845static bool CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 1846 CCValAssign::LocInfo &LocInfo, 1847 ISD::ArgFlagsTy &ArgFlags, 1848 CCState &State) { 1849 return true; 1850} 1851 1852static bool CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 1853 MVT &LocVT, 1854 CCValAssign::LocInfo &LocInfo, 1855 ISD::ArgFlagsTy &ArgFlags, 1856 CCState &State) { 1857 static const uint16_t ArgRegs[] = { 1858 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1859 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1860 }; 1861 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1862 1863 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1864 1865 // Skip one register if the first unallocated register has an even register 1866 // number and there are still argument registers available which have not been 1867 // allocated yet. RegNum is actually an index into ArgRegs, which means we 1868 // need to skip a register if RegNum is odd. 1869 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 1870 State.AllocateReg(ArgRegs[RegNum]); 1871 } 1872 1873 // Always return false here, as this function only makes sure that the first 1874 // unallocated register has an odd register number and does not actually 1875 // allocate a register for the current argument. 1876 return false; 1877} 1878 1879static bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 1880 MVT &LocVT, 1881 CCValAssign::LocInfo &LocInfo, 1882 ISD::ArgFlagsTy &ArgFlags, 1883 CCState &State) { 1884 static const uint16_t ArgRegs[] = { 1885 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1886 PPC::F8 1887 }; 1888 1889 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1890 1891 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1892 1893 // If there is only one Floating-point register left we need to put both f64 1894 // values of a split ppc_fp128 value on the stack. 1895 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 1896 State.AllocateReg(ArgRegs[RegNum]); 1897 } 1898 1899 // Always return false here, as this function only makes sure that the two f64 1900 // values a ppc_fp128 value is split into are both passed in registers or both 1901 // passed on the stack and does not actually allocate a register for the 1902 // current argument. 1903 return false; 1904} 1905 1906/// GetFPR - Get the set of FP registers that should be allocated for arguments, 1907/// on Darwin. 1908static const uint16_t *GetFPR() { 1909 static const uint16_t FPR[] = { 1910 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1911 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1912 }; 1913 1914 return FPR; 1915} 1916 1917/// CalculateStackSlotSize - Calculates the size reserved for this argument on 1918/// the stack. 1919static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 1920 unsigned PtrByteSize) { 1921 unsigned ArgSize = ArgVT.getSizeInBits()/8; 1922 if (Flags.isByVal()) 1923 ArgSize = Flags.getByValSize(); 1924 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1925 1926 return ArgSize; 1927} 1928 1929SDValue 1930PPCTargetLowering::LowerFormalArguments(SDValue Chain, 1931 CallingConv::ID CallConv, bool isVarArg, 1932 const SmallVectorImpl<ISD::InputArg> 1933 &Ins, 1934 DebugLoc dl, SelectionDAG &DAG, 1935 SmallVectorImpl<SDValue> &InVals) 1936 const { 1937 if (PPCSubTarget.isSVR4ABI()) { 1938 if (PPCSubTarget.isPPC64()) 1939 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 1940 dl, DAG, InVals); 1941 else 1942 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 1943 dl, DAG, InVals); 1944 } else { 1945 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 1946 dl, DAG, InVals); 1947 } 1948} 1949 1950SDValue 1951PPCTargetLowering::LowerFormalArguments_32SVR4( 1952 SDValue Chain, 1953 CallingConv::ID CallConv, bool isVarArg, 1954 const SmallVectorImpl<ISD::InputArg> 1955 &Ins, 1956 DebugLoc dl, SelectionDAG &DAG, 1957 SmallVectorImpl<SDValue> &InVals) const { 1958 1959 // 32-bit SVR4 ABI Stack Frame Layout: 1960 // +-----------------------------------+ 1961 // +--> | Back chain | 1962 // | +-----------------------------------+ 1963 // | | Floating-point register save area | 1964 // | +-----------------------------------+ 1965 // | | General register save area | 1966 // | +-----------------------------------+ 1967 // | | CR save word | 1968 // | +-----------------------------------+ 1969 // | | VRSAVE save word | 1970 // | +-----------------------------------+ 1971 // | | Alignment padding | 1972 // | +-----------------------------------+ 1973 // | | Vector register save area | 1974 // | +-----------------------------------+ 1975 // | | Local variable space | 1976 // | +-----------------------------------+ 1977 // | | Parameter list area | 1978 // | +-----------------------------------+ 1979 // | | LR save word | 1980 // | +-----------------------------------+ 1981 // SP--> +--- | Back chain | 1982 // +-----------------------------------+ 1983 // 1984 // Specifications: 1985 // System V Application Binary Interface PowerPC Processor Supplement 1986 // AltiVec Technology Programming Interface Manual 1987 1988 MachineFunction &MF = DAG.getMachineFunction(); 1989 MachineFrameInfo *MFI = MF.getFrameInfo(); 1990 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1991 1992 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1993 // Potential tail calls could cause overwriting of argument stack slots. 1994 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 1995 (CallConv == CallingConv::Fast)); 1996 unsigned PtrByteSize = 4; 1997 1998 // Assign locations to all of the incoming arguments. 1999 SmallVector<CCValAssign, 16> ArgLocs; 2000 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2001 getTargetMachine(), ArgLocs, *DAG.getContext()); 2002 2003 // Reserve space for the linkage area on the stack. 2004 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 2005 2006 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2007 2008 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2009 CCValAssign &VA = ArgLocs[i]; 2010 2011 // Arguments stored in registers. 2012 if (VA.isRegLoc()) { 2013 const TargetRegisterClass *RC; 2014 EVT ValVT = VA.getValVT(); 2015 2016 switch (ValVT.getSimpleVT().SimpleTy) { 2017 default: 2018 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2019 case MVT::i32: 2020 RC = &PPC::GPRCRegClass; 2021 break; 2022 case MVT::f32: 2023 RC = &PPC::F4RCRegClass; 2024 break; 2025 case MVT::f64: 2026 RC = &PPC::F8RCRegClass; 2027 break; 2028 case MVT::v16i8: 2029 case MVT::v8i16: 2030 case MVT::v4i32: 2031 case MVT::v4f32: 2032 RC = &PPC::VRRCRegClass; 2033 break; 2034 } 2035 2036 // Transform the arguments stored in physical registers into virtual ones. 2037 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2038 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); 2039 2040 InVals.push_back(ArgValue); 2041 } else { 2042 // Argument stored in memory. 2043 assert(VA.isMemLoc()); 2044 2045 unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; 2046 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 2047 isImmutable); 2048 2049 // Create load nodes to retrieve arguments from the stack. 2050 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2051 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2052 MachinePointerInfo(), 2053 false, false, false, 0)); 2054 } 2055 } 2056 2057 // Assign locations to all of the incoming aggregate by value arguments. 2058 // Aggregates passed by value are stored in the local variable space of the 2059 // caller's stack frame, right above the parameter list area. 2060 SmallVector<CCValAssign, 16> ByValArgLocs; 2061 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2062 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 2063 2064 // Reserve stack space for the allocations in CCInfo. 2065 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2066 2067 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 2068 2069 // Area that is at least reserved in the caller of this function. 2070 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 2071 2072 // Set the size that is at least reserved in caller of this function. Tail 2073 // call optimized function's reserved stack space needs to be aligned so that 2074 // taking the difference between two stack areas will result in an aligned 2075 // stack. 2076 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2077 2078 MinReservedArea = 2079 std::max(MinReservedArea, 2080 PPCFrameLowering::getMinCallFrameSize(false, false)); 2081 2082 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> 2083 getStackAlignment(); 2084 unsigned AlignMask = TargetAlign-1; 2085 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 2086 2087 FI->setMinReservedArea(MinReservedArea); 2088 2089 SmallVector<SDValue, 8> MemOps; 2090 2091 // If the function takes variable number of arguments, make a frame index for 2092 // the start of the first vararg value... for expansion of llvm.va_start. 2093 if (isVarArg) { 2094 static const uint16_t GPArgRegs[] = { 2095 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2096 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2097 }; 2098 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 2099 2100 static const uint16_t FPArgRegs[] = { 2101 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2102 PPC::F8 2103 }; 2104 const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 2105 2106 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, 2107 NumGPArgRegs)); 2108 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, 2109 NumFPArgRegs)); 2110 2111 // Make room for NumGPArgRegs and NumFPArgRegs. 2112 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 2113 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; 2114 2115 FuncInfo->setVarArgsStackOffset( 2116 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2117 CCInfo.getNextStackOffset(), true)); 2118 2119 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 2120 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2121 2122 // The fixed integer arguments of a variadic function are stored to the 2123 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 2124 // the result of va_next. 2125 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 2126 // Get an existing live-in vreg, or add a new one. 2127 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 2128 if (!VReg) 2129 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 2130 2131 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2132 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2133 MachinePointerInfo(), false, false, 0); 2134 MemOps.push_back(Store); 2135 // Increment the address by four for the next argument to store 2136 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2137 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2138 } 2139 2140 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 2141 // is set. 2142 // The double arguments are stored to the VarArgsFrameIndex 2143 // on the stack. 2144 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 2145 // Get an existing live-in vreg, or add a new one. 2146 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 2147 if (!VReg) 2148 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 2149 2150 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 2151 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2152 MachinePointerInfo(), false, false, 0); 2153 MemOps.push_back(Store); 2154 // Increment the address by eight for the next argument to store 2155 SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, 2156 PtrVT); 2157 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2158 } 2159 } 2160 2161 if (!MemOps.empty()) 2162 Chain = DAG.getNode(ISD::TokenFactor, dl, 2163 MVT::Other, &MemOps[0], MemOps.size()); 2164 2165 return Chain; 2166} 2167 2168// PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2169// value to MVT::i64 and then truncate to the correct register size. 2170SDValue 2171PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 2172 SelectionDAG &DAG, SDValue ArgVal, 2173 DebugLoc dl) const { 2174 if (Flags.isSExt()) 2175 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2176 DAG.getValueType(ObjectVT)); 2177 else if (Flags.isZExt()) 2178 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2179 DAG.getValueType(ObjectVT)); 2180 2181 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 2182} 2183 2184// Set the size that is at least reserved in caller of this function. Tail 2185// call optimized functions' reserved stack space needs to be aligned so that 2186// taking the difference between two stack areas will result in an aligned 2187// stack. 2188void 2189PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, 2190 unsigned nAltivecParamsAtEnd, 2191 unsigned MinReservedArea, 2192 bool isPPC64) const { 2193 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2194 // Add the Altivec parameters at the end, if needed. 2195 if (nAltivecParamsAtEnd) { 2196 MinReservedArea = ((MinReservedArea+15)/16)*16; 2197 MinReservedArea += 16*nAltivecParamsAtEnd; 2198 } 2199 MinReservedArea = 2200 std::max(MinReservedArea, 2201 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2202 unsigned TargetAlign 2203 = DAG.getMachineFunction().getTarget().getFrameLowering()-> 2204 getStackAlignment(); 2205 unsigned AlignMask = TargetAlign-1; 2206 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 2207 FI->setMinReservedArea(MinReservedArea); 2208} 2209 2210SDValue 2211PPCTargetLowering::LowerFormalArguments_64SVR4( 2212 SDValue Chain, 2213 CallingConv::ID CallConv, bool isVarArg, 2214 const SmallVectorImpl<ISD::InputArg> 2215 &Ins, 2216 DebugLoc dl, SelectionDAG &DAG, 2217 SmallVectorImpl<SDValue> &InVals) const { 2218 // TODO: add description of PPC stack frame format, or at least some docs. 2219 // 2220 MachineFunction &MF = DAG.getMachineFunction(); 2221 MachineFrameInfo *MFI = MF.getFrameInfo(); 2222 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2223 2224 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2225 // Potential tail calls could cause overwriting of argument stack slots. 2226 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2227 (CallConv == CallingConv::Fast)); 2228 unsigned PtrByteSize = 8; 2229 2230 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); 2231 // Area that is at least reserved in caller of this function. 2232 unsigned MinReservedArea = ArgOffset; 2233 2234 static const uint16_t GPR[] = { 2235 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2236 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2237 }; 2238 2239 static const uint16_t *FPR = GetFPR(); 2240 2241 static const uint16_t VR[] = { 2242 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2243 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2244 }; 2245 2246 const unsigned Num_GPR_Regs = array_lengthof(GPR); 2247 const unsigned Num_FPR_Regs = 13; 2248 const unsigned Num_VR_Regs = array_lengthof(VR); 2249 2250 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2251 2252 // Add DAG nodes to load the arguments or copy them out of registers. On 2253 // entry to a function on PPC, the arguments start after the linkage area, 2254 // although the first ones are often in registers. 2255 2256 SmallVector<SDValue, 8> MemOps; 2257 unsigned nAltivecParamsAtEnd = 0; 2258 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2259 unsigned CurArgIdx = 0; 2260 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2261 SDValue ArgVal; 2262 bool needsLoad = false; 2263 EVT ObjectVT = Ins[ArgNo].VT; 2264 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 2265 unsigned ArgSize = ObjSize; 2266 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2267 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); 2268 CurArgIdx = Ins[ArgNo].OrigArgIndex; 2269 2270 unsigned CurArgOffset = ArgOffset; 2271 2272 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 2273 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 2274 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 2275 if (isVarArg) { 2276 MinReservedArea = ((MinReservedArea+15)/16)*16; 2277 MinReservedArea += CalculateStackSlotSize(ObjectVT, 2278 Flags, 2279 PtrByteSize); 2280 } else 2281 nAltivecParamsAtEnd++; 2282 } else 2283 // Calculate min reserved area. 2284 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 2285 Flags, 2286 PtrByteSize); 2287 2288 // FIXME the codegen can be much improved in some cases. 2289 // We do not have to keep everything in memory. 2290 if (Flags.isByVal()) { 2291 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2292 ObjSize = Flags.getByValSize(); 2293 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2294 // Empty aggregate parameters do not take up registers. Examples: 2295 // struct { } a; 2296 // union { } b; 2297 // int c[0]; 2298 // etc. However, we have to provide a place-holder in InVals, so 2299 // pretend we have an 8-byte item at the current address for that 2300 // purpose. 2301 if (!ObjSize) { 2302 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2303 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2304 InVals.push_back(FIN); 2305 continue; 2306 } 2307 // All aggregates smaller than 8 bytes must be passed right-justified. 2308 if (ObjSize < PtrByteSize) 2309 CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); 2310 // The value of the object is its address. 2311 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2312 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2313 InVals.push_back(FIN); 2314 2315 if (ObjSize < 8) { 2316 if (GPR_idx != Num_GPR_Regs) { 2317 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2318 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2319 SDValue Store; 2320 2321 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 2322 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 2323 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 2324 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2325 MachinePointerInfo(FuncArg, CurArgOffset), 2326 ObjType, false, false, 0); 2327 } else { 2328 // For sizes that don't fit a truncating store (3, 5, 6, 7), 2329 // store the whole register as-is to the parameter save area 2330 // slot. The address of the parameter was already calculated 2331 // above (InVals.push_back(FIN)) to be the right-justified 2332 // offset within the slot. For this store, we need a new 2333 // frame index that points at the beginning of the slot. 2334 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2335 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2336 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2337 MachinePointerInfo(FuncArg, ArgOffset), 2338 false, false, 0); 2339 } 2340 2341 MemOps.push_back(Store); 2342 ++GPR_idx; 2343 } 2344 // Whether we copied from a register or not, advance the offset 2345 // into the parameter save area by a full doubleword. 2346 ArgOffset += PtrByteSize; 2347 continue; 2348 } 2349 2350 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2351 // Store whatever pieces of the object are in registers 2352 // to memory. ArgOffset will be the address of the beginning 2353 // of the object. 2354 if (GPR_idx != Num_GPR_Regs) { 2355 unsigned VReg; 2356 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2357 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2358 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2359 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2360 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2361 MachinePointerInfo(FuncArg, ArgOffset), 2362 false, false, 0); 2363 MemOps.push_back(Store); 2364 ++GPR_idx; 2365 ArgOffset += PtrByteSize; 2366 } else { 2367 ArgOffset += ArgSize - j; 2368 break; 2369 } 2370 } 2371 continue; 2372 } 2373 2374 switch (ObjectVT.getSimpleVT().SimpleTy) { 2375 default: llvm_unreachable("Unhandled argument type!"); 2376 case MVT::i32: 2377 case MVT::i64: 2378 if (GPR_idx != Num_GPR_Regs) { 2379 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2380 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2381 2382 if (ObjectVT == MVT::i32) 2383 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2384 // value to MVT::i64 and then truncate to the correct register size. 2385 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2386 2387 ++GPR_idx; 2388 } else { 2389 needsLoad = true; 2390 ArgSize = PtrByteSize; 2391 } 2392 ArgOffset += 8; 2393 break; 2394 2395 case MVT::f32: 2396 case MVT::f64: 2397 // Every 8 bytes of argument space consumes one of the GPRs available for 2398 // argument passing. 2399 if (GPR_idx != Num_GPR_Regs) { 2400 ++GPR_idx; 2401 } 2402 if (FPR_idx != Num_FPR_Regs) { 2403 unsigned VReg; 2404 2405 if (ObjectVT == MVT::f32) 2406 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2407 else 2408 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2409 2410 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2411 ++FPR_idx; 2412 } else { 2413 needsLoad = true; 2414 ArgSize = PtrByteSize; 2415 } 2416 2417 ArgOffset += 8; 2418 break; 2419 case MVT::v4f32: 2420 case MVT::v4i32: 2421 case MVT::v8i16: 2422 case MVT::v16i8: 2423 // Note that vector arguments in registers don't reserve stack space, 2424 // except in varargs functions. 2425 if (VR_idx != Num_VR_Regs) { 2426 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2427 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2428 if (isVarArg) { 2429 while ((ArgOffset % 16) != 0) { 2430 ArgOffset += PtrByteSize; 2431 if (GPR_idx != Num_GPR_Regs) 2432 GPR_idx++; 2433 } 2434 ArgOffset += 16; 2435 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2436 } 2437 ++VR_idx; 2438 } else { 2439 // Vectors are aligned. 2440 ArgOffset = ((ArgOffset+15)/16)*16; 2441 CurArgOffset = ArgOffset; 2442 ArgOffset += 16; 2443 needsLoad = true; 2444 } 2445 break; 2446 } 2447 2448 // We need to load the argument to a virtual register if we determined 2449 // above that we ran out of physical registers of the appropriate type. 2450 if (needsLoad) { 2451 int FI = MFI->CreateFixedObject(ObjSize, 2452 CurArgOffset + (ArgSize - ObjSize), 2453 isImmutable); 2454 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2455 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2456 false, false, false, 0); 2457 } 2458 2459 InVals.push_back(ArgVal); 2460 } 2461 2462 // Set the size that is at least reserved in caller of this function. Tail 2463 // call optimized functions' reserved stack space needs to be aligned so that 2464 // taking the difference between two stack areas will result in an aligned 2465 // stack. 2466 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true); 2467 2468 // If the function takes variable number of arguments, make a frame index for 2469 // the start of the first vararg value... for expansion of llvm.va_start. 2470 if (isVarArg) { 2471 int Depth = ArgOffset; 2472 2473 FuncInfo->setVarArgsFrameIndex( 2474 MFI->CreateFixedObject(PtrByteSize, Depth, true)); 2475 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2476 2477 // If this function is vararg, store any remaining integer argument regs 2478 // to their spots on the stack so that they may be loaded by deferencing the 2479 // result of va_next. 2480 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2481 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2482 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2483 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2484 MachinePointerInfo(), false, false, 0); 2485 MemOps.push_back(Store); 2486 // Increment the address by four for the next argument to store 2487 SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); 2488 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2489 } 2490 } 2491 2492 if (!MemOps.empty()) 2493 Chain = DAG.getNode(ISD::TokenFactor, dl, 2494 MVT::Other, &MemOps[0], MemOps.size()); 2495 2496 return Chain; 2497} 2498 2499SDValue 2500PPCTargetLowering::LowerFormalArguments_Darwin( 2501 SDValue Chain, 2502 CallingConv::ID CallConv, bool isVarArg, 2503 const SmallVectorImpl<ISD::InputArg> 2504 &Ins, 2505 DebugLoc dl, SelectionDAG &DAG, 2506 SmallVectorImpl<SDValue> &InVals) const { 2507 // TODO: add description of PPC stack frame format, or at least some docs. 2508 // 2509 MachineFunction &MF = DAG.getMachineFunction(); 2510 MachineFrameInfo *MFI = MF.getFrameInfo(); 2511 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2512 2513 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2514 bool isPPC64 = PtrVT == MVT::i64; 2515 // Potential tail calls could cause overwriting of argument stack slots. 2516 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2517 (CallConv == CallingConv::Fast)); 2518 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2519 2520 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 2521 // Area that is at least reserved in caller of this function. 2522 unsigned MinReservedArea = ArgOffset; 2523 2524 static const uint16_t GPR_32[] = { // 32-bit registers. 2525 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2526 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2527 }; 2528 static const uint16_t GPR_64[] = { // 64-bit registers. 2529 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2530 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2531 }; 2532 2533 static const uint16_t *FPR = GetFPR(); 2534 2535 static const uint16_t VR[] = { 2536 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2537 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2538 }; 2539 2540 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 2541 const unsigned Num_FPR_Regs = 13; 2542 const unsigned Num_VR_Regs = array_lengthof( VR); 2543 2544 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2545 2546 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 2547 2548 // In 32-bit non-varargs functions, the stack space for vectors is after the 2549 // stack space for non-vectors. We do not use this space unless we have 2550 // too many vectors to fit in registers, something that only occurs in 2551 // constructed examples:), but we have to walk the arglist to figure 2552 // that out...for the pathological case, compute VecArgOffset as the 2553 // start of the vector parameter area. Computing VecArgOffset is the 2554 // entire point of the following loop. 2555 unsigned VecArgOffset = ArgOffset; 2556 if (!isVarArg && !isPPC64) { 2557 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 2558 ++ArgNo) { 2559 EVT ObjectVT = Ins[ArgNo].VT; 2560 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2561 2562 if (Flags.isByVal()) { 2563 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 2564 unsigned ObjSize = Flags.getByValSize(); 2565 unsigned ArgSize = 2566 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2567 VecArgOffset += ArgSize; 2568 continue; 2569 } 2570 2571 switch(ObjectVT.getSimpleVT().SimpleTy) { 2572 default: llvm_unreachable("Unhandled argument type!"); 2573 case MVT::i32: 2574 case MVT::f32: 2575 VecArgOffset += 4; 2576 break; 2577 case MVT::i64: // PPC64 2578 case MVT::f64: 2579 // FIXME: We are guaranteed to be !isPPC64 at this point. 2580 // Does MVT::i64 apply? 2581 VecArgOffset += 8; 2582 break; 2583 case MVT::v4f32: 2584 case MVT::v4i32: 2585 case MVT::v8i16: 2586 case MVT::v16i8: 2587 // Nothing to do, we're only looking at Nonvector args here. 2588 break; 2589 } 2590 } 2591 } 2592 // We've found where the vector parameter area in memory is. Skip the 2593 // first 12 parameters; these don't use that memory. 2594 VecArgOffset = ((VecArgOffset+15)/16)*16; 2595 VecArgOffset += 12*16; 2596 2597 // Add DAG nodes to load the arguments or copy them out of registers. On 2598 // entry to a function on PPC, the arguments start after the linkage area, 2599 // although the first ones are often in registers. 2600 2601 SmallVector<SDValue, 8> MemOps; 2602 unsigned nAltivecParamsAtEnd = 0; 2603 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2604 unsigned CurArgIdx = 0; 2605 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2606 SDValue ArgVal; 2607 bool needsLoad = false; 2608 EVT ObjectVT = Ins[ArgNo].VT; 2609 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 2610 unsigned ArgSize = ObjSize; 2611 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2612 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); 2613 CurArgIdx = Ins[ArgNo].OrigArgIndex; 2614 2615 unsigned CurArgOffset = ArgOffset; 2616 2617 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 2618 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 2619 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 2620 if (isVarArg || isPPC64) { 2621 MinReservedArea = ((MinReservedArea+15)/16)*16; 2622 MinReservedArea += CalculateStackSlotSize(ObjectVT, 2623 Flags, 2624 PtrByteSize); 2625 } else nAltivecParamsAtEnd++; 2626 } else 2627 // Calculate min reserved area. 2628 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 2629 Flags, 2630 PtrByteSize); 2631 2632 // FIXME the codegen can be much improved in some cases. 2633 // We do not have to keep everything in memory. 2634 if (Flags.isByVal()) { 2635 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2636 ObjSize = Flags.getByValSize(); 2637 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2638 // Objects of size 1 and 2 are right justified, everything else is 2639 // left justified. This means the memory address is adjusted forwards. 2640 if (ObjSize==1 || ObjSize==2) { 2641 CurArgOffset = CurArgOffset + (4 - ObjSize); 2642 } 2643 // The value of the object is its address. 2644 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2645 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2646 InVals.push_back(FIN); 2647 if (ObjSize==1 || ObjSize==2) { 2648 if (GPR_idx != Num_GPR_Regs) { 2649 unsigned VReg; 2650 if (isPPC64) 2651 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2652 else 2653 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2654 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2655 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 2656 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2657 MachinePointerInfo(FuncArg, 2658 CurArgOffset), 2659 ObjType, false, false, 0); 2660 MemOps.push_back(Store); 2661 ++GPR_idx; 2662 } 2663 2664 ArgOffset += PtrByteSize; 2665 2666 continue; 2667 } 2668 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2669 // Store whatever pieces of the object are in registers 2670 // to memory. ArgOffset will be the address of the beginning 2671 // of the object. 2672 if (GPR_idx != Num_GPR_Regs) { 2673 unsigned VReg; 2674 if (isPPC64) 2675 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2676 else 2677 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2678 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2679 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2680 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2681 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2682 MachinePointerInfo(FuncArg, ArgOffset), 2683 false, false, 0); 2684 MemOps.push_back(Store); 2685 ++GPR_idx; 2686 ArgOffset += PtrByteSize; 2687 } else { 2688 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 2689 break; 2690 } 2691 } 2692 continue; 2693 } 2694 2695 switch (ObjectVT.getSimpleVT().SimpleTy) { 2696 default: llvm_unreachable("Unhandled argument type!"); 2697 case MVT::i32: 2698 if (!isPPC64) { 2699 if (GPR_idx != Num_GPR_Regs) { 2700 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2701 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2702 ++GPR_idx; 2703 } else { 2704 needsLoad = true; 2705 ArgSize = PtrByteSize; 2706 } 2707 // All int arguments reserve stack space in the Darwin ABI. 2708 ArgOffset += PtrByteSize; 2709 break; 2710 } 2711 // FALLTHROUGH 2712 case MVT::i64: // PPC64 2713 if (GPR_idx != Num_GPR_Regs) { 2714 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2715 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2716 2717 if (ObjectVT == MVT::i32) 2718 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2719 // value to MVT::i64 and then truncate to the correct register size. 2720 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2721 2722 ++GPR_idx; 2723 } else { 2724 needsLoad = true; 2725 ArgSize = PtrByteSize; 2726 } 2727 // All int arguments reserve stack space in the Darwin ABI. 2728 ArgOffset += 8; 2729 break; 2730 2731 case MVT::f32: 2732 case MVT::f64: 2733 // Every 4 bytes of argument space consumes one of the GPRs available for 2734 // argument passing. 2735 if (GPR_idx != Num_GPR_Regs) { 2736 ++GPR_idx; 2737 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 2738 ++GPR_idx; 2739 } 2740 if (FPR_idx != Num_FPR_Regs) { 2741 unsigned VReg; 2742 2743 if (ObjectVT == MVT::f32) 2744 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2745 else 2746 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2747 2748 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2749 ++FPR_idx; 2750 } else { 2751 needsLoad = true; 2752 } 2753 2754 // All FP arguments reserve stack space in the Darwin ABI. 2755 ArgOffset += isPPC64 ? 8 : ObjSize; 2756 break; 2757 case MVT::v4f32: 2758 case MVT::v4i32: 2759 case MVT::v8i16: 2760 case MVT::v16i8: 2761 // Note that vector arguments in registers don't reserve stack space, 2762 // except in varargs functions. 2763 if (VR_idx != Num_VR_Regs) { 2764 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2765 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2766 if (isVarArg) { 2767 while ((ArgOffset % 16) != 0) { 2768 ArgOffset += PtrByteSize; 2769 if (GPR_idx != Num_GPR_Regs) 2770 GPR_idx++; 2771 } 2772 ArgOffset += 16; 2773 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2774 } 2775 ++VR_idx; 2776 } else { 2777 if (!isVarArg && !isPPC64) { 2778 // Vectors go after all the nonvectors. 2779 CurArgOffset = VecArgOffset; 2780 VecArgOffset += 16; 2781 } else { 2782 // Vectors are aligned. 2783 ArgOffset = ((ArgOffset+15)/16)*16; 2784 CurArgOffset = ArgOffset; 2785 ArgOffset += 16; 2786 } 2787 needsLoad = true; 2788 } 2789 break; 2790 } 2791 2792 // We need to load the argument to a virtual register if we determined above 2793 // that we ran out of physical registers of the appropriate type. 2794 if (needsLoad) { 2795 int FI = MFI->CreateFixedObject(ObjSize, 2796 CurArgOffset + (ArgSize - ObjSize), 2797 isImmutable); 2798 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2799 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2800 false, false, false, 0); 2801 } 2802 2803 InVals.push_back(ArgVal); 2804 } 2805 2806 // Set the size that is at least reserved in caller of this function. Tail 2807 // call optimized functions' reserved stack space needs to be aligned so that 2808 // taking the difference between two stack areas will result in an aligned 2809 // stack. 2810 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64); 2811 2812 // If the function takes variable number of arguments, make a frame index for 2813 // the start of the first vararg value... for expansion of llvm.va_start. 2814 if (isVarArg) { 2815 int Depth = ArgOffset; 2816 2817 FuncInfo->setVarArgsFrameIndex( 2818 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2819 Depth, true)); 2820 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2821 2822 // If this function is vararg, store any remaining integer argument regs 2823 // to their spots on the stack so that they may be loaded by deferencing the 2824 // result of va_next. 2825 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2826 unsigned VReg; 2827 2828 if (isPPC64) 2829 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2830 else 2831 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2832 2833 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2834 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2835 MachinePointerInfo(), false, false, 0); 2836 MemOps.push_back(Store); 2837 // Increment the address by four for the next argument to store 2838 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2839 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2840 } 2841 } 2842 2843 if (!MemOps.empty()) 2844 Chain = DAG.getNode(ISD::TokenFactor, dl, 2845 MVT::Other, &MemOps[0], MemOps.size()); 2846 2847 return Chain; 2848} 2849 2850/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus 2851/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI. 2852static unsigned 2853CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, 2854 bool isPPC64, 2855 bool isVarArg, 2856 unsigned CC, 2857 const SmallVectorImpl<ISD::OutputArg> 2858 &Outs, 2859 const SmallVectorImpl<SDValue> &OutVals, 2860 unsigned &nAltivecParamsAtEnd) { 2861 // Count how many bytes are to be pushed on the stack, including the linkage 2862 // area, and parameter passing area. We start with 24/48 bytes, which is 2863 // prereserved space for [SP][CR][LR][3 x unused]. 2864 unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true); 2865 unsigned NumOps = Outs.size(); 2866 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2867 2868 // Add up all the space actually used. 2869 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 2870 // they all go in registers, but we must reserve stack space for them for 2871 // possible use by the caller. In varargs or 64-bit calls, parameters are 2872 // assigned stack space in order, with padding so Altivec parameters are 2873 // 16-byte aligned. 2874 nAltivecParamsAtEnd = 0; 2875 for (unsigned i = 0; i != NumOps; ++i) { 2876 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2877 EVT ArgVT = Outs[i].VT; 2878 // Varargs Altivec parameters are padded to a 16 byte boundary. 2879 if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || 2880 ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { 2881 if (!isVarArg && !isPPC64) { 2882 // Non-varargs Altivec parameters go after all the non-Altivec 2883 // parameters; handle those later so we know how much padding we need. 2884 nAltivecParamsAtEnd++; 2885 continue; 2886 } 2887 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 2888 NumBytes = ((NumBytes+15)/16)*16; 2889 } 2890 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2891 } 2892 2893 // Allow for Altivec parameters at the end, if needed. 2894 if (nAltivecParamsAtEnd) { 2895 NumBytes = ((NumBytes+15)/16)*16; 2896 NumBytes += 16*nAltivecParamsAtEnd; 2897 } 2898 2899 // The prolog code of the callee may store up to 8 GPR argument registers to 2900 // the stack, allowing va_start to index over them in memory if its varargs. 2901 // Because we cannot tell if this is needed on the caller side, we have to 2902 // conservatively assume that it is needed. As such, make sure we have at 2903 // least enough stack space for the caller to store the 8 GPRs. 2904 NumBytes = std::max(NumBytes, 2905 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2906 2907 // Tail call needs the stack to be aligned. 2908 if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ 2909 unsigned TargetAlign = DAG.getMachineFunction().getTarget(). 2910 getFrameLowering()->getStackAlignment(); 2911 unsigned AlignMask = TargetAlign-1; 2912 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2913 } 2914 2915 return NumBytes; 2916} 2917 2918/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 2919/// adjusted to accommodate the arguments for the tailcall. 2920static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 2921 unsigned ParamSize) { 2922 2923 if (!isTailCall) return 0; 2924 2925 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 2926 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 2927 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 2928 // Remember only if the new adjustement is bigger. 2929 if (SPDiff < FI->getTailCallSPDelta()) 2930 FI->setTailCallSPDelta(SPDiff); 2931 2932 return SPDiff; 2933} 2934 2935/// IsEligibleForTailCallOptimization - Check whether the call is eligible 2936/// for tail call optimization. Targets which want to do tail call 2937/// optimization should implement this function. 2938bool 2939PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 2940 CallingConv::ID CalleeCC, 2941 bool isVarArg, 2942 const SmallVectorImpl<ISD::InputArg> &Ins, 2943 SelectionDAG& DAG) const { 2944 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 2945 return false; 2946 2947 // Variable argument functions are not supported. 2948 if (isVarArg) 2949 return false; 2950 2951 MachineFunction &MF = DAG.getMachineFunction(); 2952 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 2953 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 2954 // Functions containing by val parameters are not supported. 2955 for (unsigned i = 0; i != Ins.size(); i++) { 2956 ISD::ArgFlagsTy Flags = Ins[i].Flags; 2957 if (Flags.isByVal()) return false; 2958 } 2959 2960 // Non PIC/GOT tail calls are supported. 2961 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 2962 return true; 2963 2964 // At the moment we can only do local tail calls (in same module, hidden 2965 // or protected) if we are generating PIC. 2966 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2967 return G->getGlobal()->hasHiddenVisibility() 2968 || G->getGlobal()->hasProtectedVisibility(); 2969 } 2970 2971 return false; 2972} 2973 2974/// isCallCompatibleAddress - Return the immediate to use if the specified 2975/// 32-bit value is representable in the immediate field of a BxA instruction. 2976static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 2977 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 2978 if (!C) return 0; 2979 2980 int Addr = C->getZExtValue(); 2981 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 2982 SignExtend32<26>(Addr) != Addr) 2983 return 0; // Top 6 bits have to be sext of immediate. 2984 2985 return DAG.getConstant((int)C->getZExtValue() >> 2, 2986 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 2987} 2988 2989namespace { 2990 2991struct TailCallArgumentInfo { 2992 SDValue Arg; 2993 SDValue FrameIdxOp; 2994 int FrameIdx; 2995 2996 TailCallArgumentInfo() : FrameIdx(0) {} 2997}; 2998 2999} 3000 3001/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 3002static void 3003StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 3004 SDValue Chain, 3005 const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs, 3006 SmallVector<SDValue, 8> &MemOpChains, 3007 DebugLoc dl) { 3008 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 3009 SDValue Arg = TailCallArgs[i].Arg; 3010 SDValue FIN = TailCallArgs[i].FrameIdxOp; 3011 int FI = TailCallArgs[i].FrameIdx; 3012 // Store relative to framepointer. 3013 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 3014 MachinePointerInfo::getFixedStack(FI), 3015 false, false, 0)); 3016 } 3017} 3018 3019/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 3020/// the appropriate stack slot for the tail call optimized function call. 3021static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 3022 MachineFunction &MF, 3023 SDValue Chain, 3024 SDValue OldRetAddr, 3025 SDValue OldFP, 3026 int SPDiff, 3027 bool isPPC64, 3028 bool isDarwinABI, 3029 DebugLoc dl) { 3030 if (SPDiff) { 3031 // Calculate the new stack slot for the return address. 3032 int SlotSize = isPPC64 ? 8 : 4; 3033 int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, 3034 isDarwinABI); 3035 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 3036 NewRetAddrLoc, true); 3037 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3038 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 3039 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 3040 MachinePointerInfo::getFixedStack(NewRetAddr), 3041 false, false, 0); 3042 3043 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 3044 // slot as the FP is never overwritten. 3045 if (isDarwinABI) { 3046 int NewFPLoc = 3047 SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); 3048 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 3049 true); 3050 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 3051 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 3052 MachinePointerInfo::getFixedStack(NewFPIdx), 3053 false, false, 0); 3054 } 3055 } 3056 return Chain; 3057} 3058 3059/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 3060/// the position of the argument. 3061static void 3062CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 3063 SDValue Arg, int SPDiff, unsigned ArgOffset, 3064 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { 3065 int Offset = ArgOffset + SPDiff; 3066 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 3067 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 3068 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3069 SDValue FIN = DAG.getFrameIndex(FI, VT); 3070 TailCallArgumentInfo Info; 3071 Info.Arg = Arg; 3072 Info.FrameIdxOp = FIN; 3073 Info.FrameIdx = FI; 3074 TailCallArguments.push_back(Info); 3075} 3076 3077/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 3078/// stack slot. Returns the chain as result and the loaded frame pointers in 3079/// LROpOut/FPOpout. Used when tail calling. 3080SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 3081 int SPDiff, 3082 SDValue Chain, 3083 SDValue &LROpOut, 3084 SDValue &FPOpOut, 3085 bool isDarwinABI, 3086 DebugLoc dl) const { 3087 if (SPDiff) { 3088 // Load the LR and FP stack slot for later adjusting. 3089 EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; 3090 LROpOut = getReturnAddrFrameIndex(DAG); 3091 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 3092 false, false, false, 0); 3093 Chain = SDValue(LROpOut.getNode(), 1); 3094 3095 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 3096 // slot as the FP is never overwritten. 3097 if (isDarwinABI) { 3098 FPOpOut = getFramePointerFrameIndex(DAG); 3099 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 3100 false, false, false, 0); 3101 Chain = SDValue(FPOpOut.getNode(), 1); 3102 } 3103 } 3104 return Chain; 3105} 3106 3107/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 3108/// by "Src" to address "Dst" of size "Size". Alignment information is 3109/// specified by the specific parameter attribute. The copy will be passed as 3110/// a byval function parameter. 3111/// Sometimes what we are copying is the end of a larger object, the part that 3112/// does not fit in registers. 3113static SDValue 3114CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 3115 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 3116 DebugLoc dl) { 3117 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 3118 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 3119 false, false, MachinePointerInfo(0), 3120 MachinePointerInfo(0)); 3121} 3122 3123/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 3124/// tail calls. 3125static void 3126LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 3127 SDValue Arg, SDValue PtrOff, int SPDiff, 3128 unsigned ArgOffset, bool isPPC64, bool isTailCall, 3129 bool isVector, SmallVector<SDValue, 8> &MemOpChains, 3130 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments, 3131 DebugLoc dl) { 3132 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3133 if (!isTailCall) { 3134 if (isVector) { 3135 SDValue StackPtr; 3136 if (isPPC64) 3137 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3138 else 3139 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3140 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3141 DAG.getConstant(ArgOffset, PtrVT)); 3142 } 3143 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3144 MachinePointerInfo(), false, false, 0)); 3145 // Calculate and remember argument location. 3146 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 3147 TailCallArguments); 3148} 3149 3150static 3151void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 3152 DebugLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 3153 SDValue LROp, SDValue FPOp, bool isDarwinABI, 3154 SmallVector<TailCallArgumentInfo, 8> &TailCallArguments) { 3155 MachineFunction &MF = DAG.getMachineFunction(); 3156 3157 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 3158 // might overwrite each other in case of tail call optimization. 3159 SmallVector<SDValue, 8> MemOpChains2; 3160 // Do not flag preceding copytoreg stuff together with the following stuff. 3161 InFlag = SDValue(); 3162 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 3163 MemOpChains2, dl); 3164 if (!MemOpChains2.empty()) 3165 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3166 &MemOpChains2[0], MemOpChains2.size()); 3167 3168 // Store the return address to the appropriate stack slot. 3169 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 3170 isPPC64, isDarwinABI, dl); 3171 3172 // Emit callseq_end just before tailcall node. 3173 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3174 DAG.getIntPtrConstant(0, true), InFlag); 3175 InFlag = Chain.getValue(1); 3176} 3177 3178static 3179unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 3180 SDValue &Chain, DebugLoc dl, int SPDiff, bool isTailCall, 3181 SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, 3182 SmallVector<SDValue, 8> &Ops, std::vector<EVT> &NodeTys, 3183 const PPCSubtarget &PPCSubTarget) { 3184 3185 bool isPPC64 = PPCSubTarget.isPPC64(); 3186 bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); 3187 3188 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3189 NodeTys.push_back(MVT::Other); // Returns a chain 3190 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 3191 3192 unsigned CallOpc = PPCISD::CALL; 3193 3194 bool needIndirectCall = true; 3195 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 3196 // If this is an absolute destination address, use the munged value. 3197 Callee = SDValue(Dest, 0); 3198 needIndirectCall = false; 3199 } 3200 3201 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3202 // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 3203 // Use indirect calls for ALL functions calls in JIT mode, since the 3204 // far-call stubs may be outside relocation limits for a BL instruction. 3205 if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { 3206 unsigned OpFlags = 0; 3207 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 3208 (PPCSubTarget.getTargetTriple().isMacOSX() && 3209 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 3210 (G->getGlobal()->isDeclaration() || 3211 G->getGlobal()->isWeakForLinker())) { 3212 // PC-relative references to external symbols should go through $stub, 3213 // unless we're building with the leopard linker or later, which 3214 // automatically synthesizes these stubs. 3215 OpFlags = PPCII::MO_DARWIN_STUB; 3216 } 3217 3218 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 3219 // every direct call is) turn it into a TargetGlobalAddress / 3220 // TargetExternalSymbol node so that legalize doesn't hack it. 3221 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 3222 Callee.getValueType(), 3223 0, OpFlags); 3224 needIndirectCall = false; 3225 } 3226 } 3227 3228 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3229 unsigned char OpFlags = 0; 3230 3231 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 3232 (PPCSubTarget.getTargetTriple().isMacOSX() && 3233 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { 3234 // PC-relative references to external symbols should go through $stub, 3235 // unless we're building with the leopard linker or later, which 3236 // automatically synthesizes these stubs. 3237 OpFlags = PPCII::MO_DARWIN_STUB; 3238 } 3239 3240 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 3241 OpFlags); 3242 needIndirectCall = false; 3243 } 3244 3245 if (needIndirectCall) { 3246 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 3247 // to do the call, we can't use PPCISD::CALL. 3248 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 3249 3250 if (isSVR4ABI && isPPC64) { 3251 // Function pointers in the 64-bit SVR4 ABI do not point to the function 3252 // entry point, but to the function descriptor (the function entry point 3253 // address is part of the function descriptor though). 3254 // The function descriptor is a three doubleword structure with the 3255 // following fields: function entry point, TOC base address and 3256 // environment pointer. 3257 // Thus for a call through a function pointer, the following actions need 3258 // to be performed: 3259 // 1. Save the TOC of the caller in the TOC save area of its stack 3260 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 3261 // 2. Load the address of the function entry point from the function 3262 // descriptor. 3263 // 3. Load the TOC of the callee from the function descriptor into r2. 3264 // 4. Load the environment pointer from the function descriptor into 3265 // r11. 3266 // 5. Branch to the function entry point address. 3267 // 6. On return of the callee, the TOC of the caller needs to be 3268 // restored (this is done in FinishCall()). 3269 // 3270 // All those operations are flagged together to ensure that no other 3271 // operations can be scheduled in between. E.g. without flagging the 3272 // operations together, a TOC access in the caller could be scheduled 3273 // between the load of the callee TOC and the branch to the callee, which 3274 // results in the TOC access going through the TOC of the callee instead 3275 // of going through the TOC of the caller, which leads to incorrect code. 3276 3277 // Load the address of the function entry point from the function 3278 // descriptor. 3279 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); 3280 SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, 3281 InFlag.getNode() ? 3 : 2); 3282 Chain = LoadFuncPtr.getValue(1); 3283 InFlag = LoadFuncPtr.getValue(2); 3284 3285 // Load environment pointer into r11. 3286 // Offset of the environment pointer within the function descriptor. 3287 SDValue PtrOff = DAG.getIntPtrConstant(16); 3288 3289 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 3290 SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, 3291 InFlag); 3292 Chain = LoadEnvPtr.getValue(1); 3293 InFlag = LoadEnvPtr.getValue(2); 3294 3295 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 3296 InFlag); 3297 Chain = EnvVal.getValue(0); 3298 InFlag = EnvVal.getValue(1); 3299 3300 // Load TOC of the callee into r2. We are using a target-specific load 3301 // with r2 hard coded, because the result of a target-independent load 3302 // would never go directly into r2, since r2 is a reserved register (which 3303 // prevents the register allocator from allocating it), resulting in an 3304 // additional register being allocated and an unnecessary move instruction 3305 // being generated. 3306 VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3307 SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, 3308 Callee, InFlag); 3309 Chain = LoadTOCPtr.getValue(0); 3310 InFlag = LoadTOCPtr.getValue(1); 3311 3312 MTCTROps[0] = Chain; 3313 MTCTROps[1] = LoadFuncPtr; 3314 MTCTROps[2] = InFlag; 3315 } 3316 3317 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, 3318 2 + (InFlag.getNode() != 0)); 3319 InFlag = Chain.getValue(1); 3320 3321 NodeTys.clear(); 3322 NodeTys.push_back(MVT::Other); 3323 NodeTys.push_back(MVT::Glue); 3324 Ops.push_back(Chain); 3325 CallOpc = PPCISD::BCTRL; 3326 Callee.setNode(0); 3327 // Add use of X11 (holding environment pointer) 3328 if (isSVR4ABI && isPPC64) 3329 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 3330 // Add CTR register as callee so a bctr can be emitted later. 3331 if (isTailCall) 3332 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 3333 } 3334 3335 // If this is a direct call, pass the chain and the callee. 3336 if (Callee.getNode()) { 3337 Ops.push_back(Chain); 3338 Ops.push_back(Callee); 3339 } 3340 // If this is a tail call add stack pointer delta. 3341 if (isTailCall) 3342 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 3343 3344 // Add argument registers to the end of the list so that they are known live 3345 // into the call. 3346 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 3347 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 3348 RegsToPass[i].second.getValueType())); 3349 3350 return CallOpc; 3351} 3352 3353static 3354bool isLocalCall(const SDValue &Callee) 3355{ 3356 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3357 return !G->getGlobal()->isDeclaration() && 3358 !G->getGlobal()->isWeakForLinker(); 3359 return false; 3360} 3361 3362SDValue 3363PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 3364 CallingConv::ID CallConv, bool isVarArg, 3365 const SmallVectorImpl<ISD::InputArg> &Ins, 3366 DebugLoc dl, SelectionDAG &DAG, 3367 SmallVectorImpl<SDValue> &InVals) const { 3368 3369 SmallVector<CCValAssign, 16> RVLocs; 3370 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3371 getTargetMachine(), RVLocs, *DAG.getContext()); 3372 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 3373 3374 // Copy all of the result registers out of their specified physreg. 3375 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 3376 CCValAssign &VA = RVLocs[i]; 3377 assert(VA.isRegLoc() && "Can only return in registers!"); 3378 3379 SDValue Val = DAG.getCopyFromReg(Chain, dl, 3380 VA.getLocReg(), VA.getLocVT(), InFlag); 3381 Chain = Val.getValue(1); 3382 InFlag = Val.getValue(2); 3383 3384 switch (VA.getLocInfo()) { 3385 default: llvm_unreachable("Unknown loc info!"); 3386 case CCValAssign::Full: break; 3387 case CCValAssign::AExt: 3388 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3389 break; 3390 case CCValAssign::ZExt: 3391 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 3392 DAG.getValueType(VA.getValVT())); 3393 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3394 break; 3395 case CCValAssign::SExt: 3396 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 3397 DAG.getValueType(VA.getValVT())); 3398 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3399 break; 3400 } 3401 3402 InVals.push_back(Val); 3403 } 3404 3405 return Chain; 3406} 3407 3408SDValue 3409PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, 3410 bool isTailCall, bool isVarArg, 3411 SelectionDAG &DAG, 3412 SmallVector<std::pair<unsigned, SDValue>, 8> 3413 &RegsToPass, 3414 SDValue InFlag, SDValue Chain, 3415 SDValue &Callee, 3416 int SPDiff, unsigned NumBytes, 3417 const SmallVectorImpl<ISD::InputArg> &Ins, 3418 SmallVectorImpl<SDValue> &InVals) const { 3419 std::vector<EVT> NodeTys; 3420 SmallVector<SDValue, 8> Ops; 3421 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, 3422 isTailCall, RegsToPass, Ops, NodeTys, 3423 PPCSubTarget); 3424 3425 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 3426 if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) 3427 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 3428 3429 // When performing tail call optimization the callee pops its arguments off 3430 // the stack. Account for this here so these bytes can be pushed back on in 3431 // PPCFrameLowering::eliminateCallFramePseudoInstr. 3432 int BytesCalleePops = 3433 (CallConv == CallingConv::Fast && 3434 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 3435 3436 // Add a register mask operand representing the call-preserved registers. 3437 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 3438 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 3439 assert(Mask && "Missing call preserved mask for calling convention"); 3440 Ops.push_back(DAG.getRegisterMask(Mask)); 3441 3442 if (InFlag.getNode()) 3443 Ops.push_back(InFlag); 3444 3445 // Emit tail call. 3446 if (isTailCall) { 3447 assert(((Callee.getOpcode() == ISD::Register && 3448 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 3449 Callee.getOpcode() == ISD::TargetExternalSymbol || 3450 Callee.getOpcode() == ISD::TargetGlobalAddress || 3451 isa<ConstantSDNode>(Callee)) && 3452 "Expecting an global address, external symbol, absolute value or register"); 3453 3454 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); 3455 } 3456 3457 // Add a NOP immediately after the branch instruction when using the 64-bit 3458 // SVR4 ABI. At link time, if caller and callee are in a different module and 3459 // thus have a different TOC, the call will be replaced with a call to a stub 3460 // function which saves the current TOC, loads the TOC of the callee and 3461 // branches to the callee. The NOP will be replaced with a load instruction 3462 // which restores the TOC of the caller from the TOC save slot of the current 3463 // stack frame. If caller and callee belong to the same module (and have the 3464 // same TOC), the NOP will remain unchanged. 3465 3466 bool needsTOCRestore = false; 3467 if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { 3468 if (CallOpc == PPCISD::BCTRL) { 3469 // This is a call through a function pointer. 3470 // Restore the caller TOC from the save area into R2. 3471 // See PrepareCall() for more information about calls through function 3472 // pointers in the 64-bit SVR4 ABI. 3473 // We are using a target-specific load with r2 hard coded, because the 3474 // result of a target-independent load would never go directly into r2, 3475 // since r2 is a reserved register (which prevents the register allocator 3476 // from allocating it), resulting in an additional register being 3477 // allocated and an unnecessary move instruction being generated. 3478 needsTOCRestore = true; 3479 } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) { 3480 // Otherwise insert NOP for non-local calls. 3481 CallOpc = PPCISD::CALL_NOP; 3482 } 3483 } 3484 3485 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 3486 InFlag = Chain.getValue(1); 3487 3488 if (needsTOCRestore) { 3489 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3490 Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); 3491 InFlag = Chain.getValue(1); 3492 } 3493 3494 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3495 DAG.getIntPtrConstant(BytesCalleePops, true), 3496 InFlag); 3497 if (!Ins.empty()) 3498 InFlag = Chain.getValue(1); 3499 3500 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 3501 Ins, dl, DAG, InVals); 3502} 3503 3504SDValue 3505PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 3506 SmallVectorImpl<SDValue> &InVals) const { 3507 SelectionDAG &DAG = CLI.DAG; 3508 DebugLoc &dl = CLI.DL; 3509 SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs; 3510 SmallVector<SDValue, 32> &OutVals = CLI.OutVals; 3511 SmallVector<ISD::InputArg, 32> &Ins = CLI.Ins; 3512 SDValue Chain = CLI.Chain; 3513 SDValue Callee = CLI.Callee; 3514 bool &isTailCall = CLI.IsTailCall; 3515 CallingConv::ID CallConv = CLI.CallConv; 3516 bool isVarArg = CLI.IsVarArg; 3517 3518 if (isTailCall) 3519 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 3520 Ins, DAG); 3521 3522 if (PPCSubTarget.isSVR4ABI()) { 3523 if (PPCSubTarget.isPPC64()) 3524 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 3525 isTailCall, Outs, OutVals, Ins, 3526 dl, DAG, InVals); 3527 else 3528 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 3529 isTailCall, Outs, OutVals, Ins, 3530 dl, DAG, InVals); 3531 } 3532 3533 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 3534 isTailCall, Outs, OutVals, Ins, 3535 dl, DAG, InVals); 3536} 3537 3538SDValue 3539PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 3540 CallingConv::ID CallConv, bool isVarArg, 3541 bool isTailCall, 3542 const SmallVectorImpl<ISD::OutputArg> &Outs, 3543 const SmallVectorImpl<SDValue> &OutVals, 3544 const SmallVectorImpl<ISD::InputArg> &Ins, 3545 DebugLoc dl, SelectionDAG &DAG, 3546 SmallVectorImpl<SDValue> &InVals) const { 3547 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 3548 // of the 32-bit SVR4 ABI stack frame layout. 3549 3550 assert((CallConv == CallingConv::C || 3551 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 3552 3553 unsigned PtrByteSize = 4; 3554 3555 MachineFunction &MF = DAG.getMachineFunction(); 3556 3557 // Mark this function as potentially containing a function that contains a 3558 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3559 // and restoring the callers stack pointer in this functions epilog. This is 3560 // done because by tail calling the called function might overwrite the value 3561 // in this function's (MF) stack pointer stack slot 0(SP). 3562 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3563 CallConv == CallingConv::Fast) 3564 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3565 3566 // Count how many bytes are to be pushed on the stack, including the linkage 3567 // area, parameter list area and the part of the local variable space which 3568 // contains copies of aggregates which are passed by value. 3569 3570 // Assign locations to all of the outgoing arguments. 3571 SmallVector<CCValAssign, 16> ArgLocs; 3572 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3573 getTargetMachine(), ArgLocs, *DAG.getContext()); 3574 3575 // Reserve space for the linkage area on the stack. 3576 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 3577 3578 if (isVarArg) { 3579 // Handle fixed and variable vector arguments differently. 3580 // Fixed vector arguments go into registers as long as registers are 3581 // available. Variable vector arguments always go into memory. 3582 unsigned NumArgs = Outs.size(); 3583 3584 for (unsigned i = 0; i != NumArgs; ++i) { 3585 MVT ArgVT = Outs[i].VT; 3586 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3587 bool Result; 3588 3589 if (Outs[i].IsFixed) { 3590 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 3591 CCInfo); 3592 } else { 3593 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 3594 ArgFlags, CCInfo); 3595 } 3596 3597 if (Result) { 3598#ifndef NDEBUG 3599 errs() << "Call operand #" << i << " has unhandled type " 3600 << EVT(ArgVT).getEVTString() << "\n"; 3601#endif 3602 llvm_unreachable(0); 3603 } 3604 } 3605 } else { 3606 // All arguments are treated the same. 3607 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 3608 } 3609 3610 // Assign locations to all of the outgoing aggregate by value arguments. 3611 SmallVector<CCValAssign, 16> ByValArgLocs; 3612 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3613 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 3614 3615 // Reserve stack space for the allocations in CCInfo. 3616 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 3617 3618 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 3619 3620 // Size of the linkage area, parameter list area and the part of the local 3621 // space variable where copies of aggregates which are passed by value are 3622 // stored. 3623 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 3624 3625 // Calculate by how many bytes the stack has to be adjusted in case of tail 3626 // call optimization. 3627 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3628 3629 // Adjust the stack pointer for the new arguments... 3630 // These operations are automatically eliminated by the prolog/epilog pass 3631 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 3632 SDValue CallSeqStart = Chain; 3633 3634 // Load the return address and frame pointer so it can be moved somewhere else 3635 // later. 3636 SDValue LROp, FPOp; 3637 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 3638 dl); 3639 3640 // Set up a copy of the stack pointer for use loading and storing any 3641 // arguments that may not fit in the registers available for argument 3642 // passing. 3643 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3644 3645 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3646 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3647 SmallVector<SDValue, 8> MemOpChains; 3648 3649 bool seenFloatArg = false; 3650 // Walk the register/memloc assignments, inserting copies/loads. 3651 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 3652 i != e; 3653 ++i) { 3654 CCValAssign &VA = ArgLocs[i]; 3655 SDValue Arg = OutVals[i]; 3656 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3657 3658 if (Flags.isByVal()) { 3659 // Argument is an aggregate which is passed by value, thus we need to 3660 // create a copy of it in the local variable space of the current stack 3661 // frame (which is the stack frame of the caller) and pass the address of 3662 // this copy to the callee. 3663 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 3664 CCValAssign &ByValVA = ByValArgLocs[j++]; 3665 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 3666 3667 // Memory reserved in the local variable space of the callers stack frame. 3668 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 3669 3670 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3671 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3672 3673 // Create a copy of the argument in the local area of the current 3674 // stack frame. 3675 SDValue MemcpyCall = 3676 CreateCopyOfByValArgument(Arg, PtrOff, 3677 CallSeqStart.getNode()->getOperand(0), 3678 Flags, DAG, dl); 3679 3680 // This must go outside the CALLSEQ_START..END. 3681 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3682 CallSeqStart.getNode()->getOperand(1)); 3683 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3684 NewCallSeqStart.getNode()); 3685 Chain = CallSeqStart = NewCallSeqStart; 3686 3687 // Pass the address of the aggregate copy on the stack either in a 3688 // physical register or in the parameter list area of the current stack 3689 // frame to the callee. 3690 Arg = PtrOff; 3691 } 3692 3693 if (VA.isRegLoc()) { 3694 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 3695 // Put argument in a physical register. 3696 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 3697 } else { 3698 // Put argument in the parameter list area of the current stack frame. 3699 assert(VA.isMemLoc()); 3700 unsigned LocMemOffset = VA.getLocMemOffset(); 3701 3702 if (!isTailCall) { 3703 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3704 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3705 3706 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3707 MachinePointerInfo(), 3708 false, false, 0)); 3709 } else { 3710 // Calculate and remember argument location. 3711 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 3712 TailCallArguments); 3713 } 3714 } 3715 } 3716 3717 if (!MemOpChains.empty()) 3718 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3719 &MemOpChains[0], MemOpChains.size()); 3720 3721 // Build a sequence of copy-to-reg nodes chained together with token chain 3722 // and flag operands which copy the outgoing args into the appropriate regs. 3723 SDValue InFlag; 3724 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 3725 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 3726 RegsToPass[i].second, InFlag); 3727 InFlag = Chain.getValue(1); 3728 } 3729 3730 // Set CR bit 6 to true if this is a vararg call with floating args passed in 3731 // registers. 3732 if (isVarArg) { 3733 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3734 SDValue Ops[] = { Chain, InFlag }; 3735 3736 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 3737 dl, VTs, Ops, InFlag.getNode() ? 2 : 1); 3738 3739 InFlag = Chain.getValue(1); 3740 } 3741 3742 if (isTailCall) 3743 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 3744 false, TailCallArguments); 3745 3746 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 3747 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 3748 Ins, InVals); 3749} 3750 3751// Copy an argument into memory, being careful to do this outside the 3752// call sequence for the call to which the argument belongs. 3753SDValue 3754PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 3755 SDValue CallSeqStart, 3756 ISD::ArgFlagsTy Flags, 3757 SelectionDAG &DAG, 3758 DebugLoc dl) const { 3759 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 3760 CallSeqStart.getNode()->getOperand(0), 3761 Flags, DAG, dl); 3762 // The MEMCPY must go outside the CALLSEQ_START..END. 3763 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3764 CallSeqStart.getNode()->getOperand(1)); 3765 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3766 NewCallSeqStart.getNode()); 3767 return NewCallSeqStart; 3768} 3769 3770SDValue 3771PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 3772 CallingConv::ID CallConv, bool isVarArg, 3773 bool isTailCall, 3774 const SmallVectorImpl<ISD::OutputArg> &Outs, 3775 const SmallVectorImpl<SDValue> &OutVals, 3776 const SmallVectorImpl<ISD::InputArg> &Ins, 3777 DebugLoc dl, SelectionDAG &DAG, 3778 SmallVectorImpl<SDValue> &InVals) const { 3779 3780 unsigned NumOps = Outs.size(); 3781 3782 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3783 unsigned PtrByteSize = 8; 3784 3785 MachineFunction &MF = DAG.getMachineFunction(); 3786 3787 // Mark this function as potentially containing a function that contains a 3788 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3789 // and restoring the callers stack pointer in this functions epilog. This is 3790 // done because by tail calling the called function might overwrite the value 3791 // in this function's (MF) stack pointer stack slot 0(SP). 3792 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3793 CallConv == CallingConv::Fast) 3794 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3795 3796 unsigned nAltivecParamsAtEnd = 0; 3797 3798 // Count how many bytes are to be pushed on the stack, including the linkage 3799 // area, and parameter passing area. We start with at least 48 bytes, which 3800 // is reserved space for [SP][CR][LR][3 x unused]. 3801 // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result 3802 // of this call. 3803 unsigned NumBytes = 3804 CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv, 3805 Outs, OutVals, nAltivecParamsAtEnd); 3806 3807 // Calculate by how many bytes the stack has to be adjusted in case of tail 3808 // call optimization. 3809 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3810 3811 // To protect arguments on the stack from being clobbered in a tail call, 3812 // force all the loads to happen before doing any other lowering. 3813 if (isTailCall) 3814 Chain = DAG.getStackArgumentTokenFactor(Chain); 3815 3816 // Adjust the stack pointer for the new arguments... 3817 // These operations are automatically eliminated by the prolog/epilog pass 3818 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 3819 SDValue CallSeqStart = Chain; 3820 3821 // Load the return address and frame pointer so it can be move somewhere else 3822 // later. 3823 SDValue LROp, FPOp; 3824 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 3825 dl); 3826 3827 // Set up a copy of the stack pointer for use loading and storing any 3828 // arguments that may not fit in the registers available for argument 3829 // passing. 3830 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3831 3832 // Figure out which arguments are going to go in registers, and which in 3833 // memory. Also, if this is a vararg function, floating point operations 3834 // must be stored to our stack, and loaded into integer regs as well, if 3835 // any integer regs are available for argument passing. 3836 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); 3837 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3838 3839 static const uint16_t GPR[] = { 3840 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3841 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3842 }; 3843 static const uint16_t *FPR = GetFPR(); 3844 3845 static const uint16_t VR[] = { 3846 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3847 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3848 }; 3849 const unsigned NumGPRs = array_lengthof(GPR); 3850 const unsigned NumFPRs = 13; 3851 const unsigned NumVRs = array_lengthof(VR); 3852 3853 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3854 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3855 3856 SmallVector<SDValue, 8> MemOpChains; 3857 for (unsigned i = 0; i != NumOps; ++i) { 3858 SDValue Arg = OutVals[i]; 3859 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3860 3861 // PtrOff will be used to store the current argument to the stack if a 3862 // register cannot be found for it. 3863 SDValue PtrOff; 3864 3865 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 3866 3867 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 3868 3869 // Promote integers to 64-bit values. 3870 if (Arg.getValueType() == MVT::i32) { 3871 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 3872 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 3873 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 3874 } 3875 3876 // FIXME memcpy is used way more than necessary. Correctness first. 3877 // Note: "by value" is code for passing a structure by value, not 3878 // basic types. 3879 if (Flags.isByVal()) { 3880 // Note: Size includes alignment padding, so 3881 // struct x { short a; char b; } 3882 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 3883 // These are the proper values we need for right-justifying the 3884 // aggregate in a parameter register. 3885 unsigned Size = Flags.getByValSize(); 3886 3887 // An empty aggregate parameter takes up no storage and no 3888 // registers. 3889 if (Size == 0) 3890 continue; 3891 3892 // All aggregates smaller than 8 bytes must be passed right-justified. 3893 if (Size==1 || Size==2 || Size==4) { 3894 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 3895 if (GPR_idx != NumGPRs) { 3896 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 3897 MachinePointerInfo(), VT, 3898 false, false, 0); 3899 MemOpChains.push_back(Load.getValue(1)); 3900 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3901 3902 ArgOffset += PtrByteSize; 3903 continue; 3904 } 3905 } 3906 3907 if (GPR_idx == NumGPRs && Size < 8) { 3908 SDValue Const = DAG.getConstant(PtrByteSize - Size, 3909 PtrOff.getValueType()); 3910 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 3911 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 3912 CallSeqStart, 3913 Flags, DAG, dl); 3914 ArgOffset += PtrByteSize; 3915 continue; 3916 } 3917 // Copy entire object into memory. There are cases where gcc-generated 3918 // code assumes it is there, even if it could be put entirely into 3919 // registers. (This is not what the doc says.) 3920 3921 // FIXME: The above statement is likely due to a misunderstanding of the 3922 // documents. All arguments must be copied into the parameter area BY 3923 // THE CALLEE in the event that the callee takes the address of any 3924 // formal argument. That has not yet been implemented. However, it is 3925 // reasonable to use the stack area as a staging area for the register 3926 // load. 3927 3928 // Skip this for small aggregates, as we will use the same slot for a 3929 // right-justified copy, below. 3930 if (Size >= 8) 3931 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 3932 CallSeqStart, 3933 Flags, DAG, dl); 3934 3935 // When a register is available, pass a small aggregate right-justified. 3936 if (Size < 8 && GPR_idx != NumGPRs) { 3937 // The easiest way to get this right-justified in a register 3938 // is to copy the structure into the rightmost portion of a 3939 // local variable slot, then load the whole slot into the 3940 // register. 3941 // FIXME: The memcpy seems to produce pretty awful code for 3942 // small aggregates, particularly for packed ones. 3943 // FIXME: It would be preferable to use the slot in the 3944 // parameter save area instead of a new local variable. 3945 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); 3946 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 3947 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 3948 CallSeqStart, 3949 Flags, DAG, dl); 3950 3951 // Load the slot into the register. 3952 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 3953 MachinePointerInfo(), 3954 false, false, false, 0); 3955 MemOpChains.push_back(Load.getValue(1)); 3956 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3957 3958 // Done with this argument. 3959 ArgOffset += PtrByteSize; 3960 continue; 3961 } 3962 3963 // For aggregates larger than PtrByteSize, copy the pieces of the 3964 // object that fit into registers from the parameter save area. 3965 for (unsigned j=0; j<Size; j+=PtrByteSize) { 3966 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 3967 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 3968 if (GPR_idx != NumGPRs) { 3969 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 3970 MachinePointerInfo(), 3971 false, false, false, 0); 3972 MemOpChains.push_back(Load.getValue(1)); 3973 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3974 ArgOffset += PtrByteSize; 3975 } else { 3976 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 3977 break; 3978 } 3979 } 3980 continue; 3981 } 3982 3983 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 3984 default: llvm_unreachable("Unexpected ValueType for argument!"); 3985 case MVT::i32: 3986 case MVT::i64: 3987 if (GPR_idx != NumGPRs) { 3988 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 3989 } else { 3990 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 3991 true, isTailCall, false, MemOpChains, 3992 TailCallArguments, dl); 3993 } 3994 ArgOffset += PtrByteSize; 3995 break; 3996 case MVT::f32: 3997 case MVT::f64: 3998 if (FPR_idx != NumFPRs) { 3999 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4000 4001 if (isVarArg) { 4002 // A single float or an aggregate containing only a single float 4003 // must be passed right-justified in the stack doubleword, and 4004 // in the GPR, if one is available. 4005 SDValue StoreOff; 4006 if (Arg.getValueType().getSimpleVT().SimpleTy == MVT::f32) { 4007 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4008 StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4009 } else 4010 StoreOff = PtrOff; 4011 4012 SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff, 4013 MachinePointerInfo(), false, false, 0); 4014 MemOpChains.push_back(Store); 4015 4016 // Float varargs are always shadowed in available integer registers 4017 if (GPR_idx != NumGPRs) { 4018 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4019 MachinePointerInfo(), false, false, 4020 false, 0); 4021 MemOpChains.push_back(Load.getValue(1)); 4022 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4023 } 4024 } else if (GPR_idx != NumGPRs) 4025 // If we have any FPRs remaining, we may also have GPRs remaining. 4026 ++GPR_idx; 4027 } else { 4028 // Single-precision floating-point values are mapped to the 4029 // second (rightmost) word of the stack doubleword. 4030 if (Arg.getValueType() == MVT::f32) { 4031 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4032 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4033 } 4034 4035 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4036 true, isTailCall, false, MemOpChains, 4037 TailCallArguments, dl); 4038 } 4039 ArgOffset += 8; 4040 break; 4041 case MVT::v4f32: 4042 case MVT::v4i32: 4043 case MVT::v8i16: 4044 case MVT::v16i8: 4045 if (isVarArg) { 4046 // These go aligned on the stack, or in the corresponding R registers 4047 // when within range. The Darwin PPC ABI doc claims they also go in 4048 // V registers; in fact gcc does this only for arguments that are 4049 // prototyped, not for those that match the ... We do it for all 4050 // arguments, seems to work. 4051 while (ArgOffset % 16 !=0) { 4052 ArgOffset += PtrByteSize; 4053 if (GPR_idx != NumGPRs) 4054 GPR_idx++; 4055 } 4056 // We could elide this store in the case where the object fits 4057 // entirely in R registers. Maybe later. 4058 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4059 DAG.getConstant(ArgOffset, PtrVT)); 4060 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4061 MachinePointerInfo(), false, false, 0); 4062 MemOpChains.push_back(Store); 4063 if (VR_idx != NumVRs) { 4064 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4065 MachinePointerInfo(), 4066 false, false, false, 0); 4067 MemOpChains.push_back(Load.getValue(1)); 4068 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 4069 } 4070 ArgOffset += 16; 4071 for (unsigned i=0; i<16; i+=PtrByteSize) { 4072 if (GPR_idx == NumGPRs) 4073 break; 4074 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4075 DAG.getConstant(i, PtrVT)); 4076 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4077 false, false, false, 0); 4078 MemOpChains.push_back(Load.getValue(1)); 4079 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4080 } 4081 break; 4082 } 4083 4084 // Non-varargs Altivec params generally go in registers, but have 4085 // stack space allocated at the end. 4086 if (VR_idx != NumVRs) { 4087 // Doesn't have GPR space allocated. 4088 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 4089 } else { 4090 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4091 true, isTailCall, true, MemOpChains, 4092 TailCallArguments, dl); 4093 ArgOffset += 16; 4094 } 4095 break; 4096 } 4097 } 4098 4099 if (!MemOpChains.empty()) 4100 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 4101 &MemOpChains[0], MemOpChains.size()); 4102 4103 // Check if this is an indirect call (MTCTR/BCTRL). 4104 // See PrepareCall() for more information about calls through function 4105 // pointers in the 64-bit SVR4 ABI. 4106 if (!isTailCall && 4107 !dyn_cast<GlobalAddressSDNode>(Callee) && 4108 !dyn_cast<ExternalSymbolSDNode>(Callee) && 4109 !isBLACompatibleAddress(Callee, DAG)) { 4110 // Load r2 into a virtual register and store it to the TOC save area. 4111 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 4112 // TOC save area offset. 4113 SDValue PtrOff = DAG.getIntPtrConstant(40); 4114 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4115 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), 4116 false, false, 0); 4117 // R12 must contain the address of an indirect callee. This does not 4118 // mean the MTCTR instruction must use R12; it's easier to model this 4119 // as an extra parameter, so do that. 4120 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 4121 } 4122 4123 // Build a sequence of copy-to-reg nodes chained together with token chain 4124 // and flag operands which copy the outgoing args into the appropriate regs. 4125 SDValue InFlag; 4126 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4127 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4128 RegsToPass[i].second, InFlag); 4129 InFlag = Chain.getValue(1); 4130 } 4131 4132 if (isTailCall) 4133 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 4134 FPOp, true, TailCallArguments); 4135 4136 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4137 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4138 Ins, InVals); 4139} 4140 4141SDValue 4142PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 4143 CallingConv::ID CallConv, bool isVarArg, 4144 bool isTailCall, 4145 const SmallVectorImpl<ISD::OutputArg> &Outs, 4146 const SmallVectorImpl<SDValue> &OutVals, 4147 const SmallVectorImpl<ISD::InputArg> &Ins, 4148 DebugLoc dl, SelectionDAG &DAG, 4149 SmallVectorImpl<SDValue> &InVals) const { 4150 4151 unsigned NumOps = Outs.size(); 4152 4153 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4154 bool isPPC64 = PtrVT == MVT::i64; 4155 unsigned PtrByteSize = isPPC64 ? 8 : 4; 4156 4157 MachineFunction &MF = DAG.getMachineFunction(); 4158 4159 // Mark this function as potentially containing a function that contains a 4160 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4161 // and restoring the callers stack pointer in this functions epilog. This is 4162 // done because by tail calling the called function might overwrite the value 4163 // in this function's (MF) stack pointer stack slot 0(SP). 4164 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4165 CallConv == CallingConv::Fast) 4166 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4167 4168 unsigned nAltivecParamsAtEnd = 0; 4169 4170 // Count how many bytes are to be pushed on the stack, including the linkage 4171 // area, and parameter passing area. We start with 24/48 bytes, which is 4172 // prereserved space for [SP][CR][LR][3 x unused]. 4173 unsigned NumBytes = 4174 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, 4175 Outs, OutVals, 4176 nAltivecParamsAtEnd); 4177 4178 // Calculate by how many bytes the stack has to be adjusted in case of tail 4179 // call optimization. 4180 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4181 4182 // To protect arguments on the stack from being clobbered in a tail call, 4183 // force all the loads to happen before doing any other lowering. 4184 if (isTailCall) 4185 Chain = DAG.getStackArgumentTokenFactor(Chain); 4186 4187 // Adjust the stack pointer for the new arguments... 4188 // These operations are automatically eliminated by the prolog/epilog pass 4189 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); 4190 SDValue CallSeqStart = Chain; 4191 4192 // Load the return address and frame pointer so it can be move somewhere else 4193 // later. 4194 SDValue LROp, FPOp; 4195 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4196 dl); 4197 4198 // Set up a copy of the stack pointer for use loading and storing any 4199 // arguments that may not fit in the registers available for argument 4200 // passing. 4201 SDValue StackPtr; 4202 if (isPPC64) 4203 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4204 else 4205 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4206 4207 // Figure out which arguments are going to go in registers, and which in 4208 // memory. Also, if this is a vararg function, floating point operations 4209 // must be stored to our stack, and loaded into integer regs as well, if 4210 // any integer regs are available for argument passing. 4211 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 4212 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4213 4214 static const uint16_t GPR_32[] = { // 32-bit registers. 4215 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4216 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4217 }; 4218 static const uint16_t GPR_64[] = { // 64-bit registers. 4219 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4220 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4221 }; 4222 static const uint16_t *FPR = GetFPR(); 4223 4224 static const uint16_t VR[] = { 4225 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4226 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4227 }; 4228 const unsigned NumGPRs = array_lengthof(GPR_32); 4229 const unsigned NumFPRs = 13; 4230 const unsigned NumVRs = array_lengthof(VR); 4231 4232 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 4233 4234 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4235 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4236 4237 SmallVector<SDValue, 8> MemOpChains; 4238 for (unsigned i = 0; i != NumOps; ++i) { 4239 SDValue Arg = OutVals[i]; 4240 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4241 4242 // PtrOff will be used to store the current argument to the stack if a 4243 // register cannot be found for it. 4244 SDValue PtrOff; 4245 4246 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4247 4248 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4249 4250 // On PPC64, promote integers to 64-bit values. 4251 if (isPPC64 && Arg.getValueType() == MVT::i32) { 4252 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4253 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4254 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4255 } 4256 4257 // FIXME memcpy is used way more than necessary. Correctness first. 4258 // Note: "by value" is code for passing a structure by value, not 4259 // basic types. 4260 if (Flags.isByVal()) { 4261 unsigned Size = Flags.getByValSize(); 4262 // Very small objects are passed right-justified. Everything else is 4263 // passed left-justified. 4264 if (Size==1 || Size==2) { 4265 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 4266 if (GPR_idx != NumGPRs) { 4267 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4268 MachinePointerInfo(), VT, 4269 false, false, 0); 4270 MemOpChains.push_back(Load.getValue(1)); 4271 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4272 4273 ArgOffset += PtrByteSize; 4274 } else { 4275 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4276 PtrOff.getValueType()); 4277 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4278 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4279 CallSeqStart, 4280 Flags, DAG, dl); 4281 ArgOffset += PtrByteSize; 4282 } 4283 continue; 4284 } 4285 // Copy entire object into memory. There are cases where gcc-generated 4286 // code assumes it is there, even if it could be put entirely into 4287 // registers. (This is not what the doc says.) 4288 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4289 CallSeqStart, 4290 Flags, DAG, dl); 4291 4292 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 4293 // copy the pieces of the object that fit into registers from the 4294 // parameter save area. 4295 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4296 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4297 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4298 if (GPR_idx != NumGPRs) { 4299 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4300 MachinePointerInfo(), 4301 false, false, false, 0); 4302 MemOpChains.push_back(Load.getValue(1)); 4303 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4304 ArgOffset += PtrByteSize; 4305 } else { 4306 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4307 break; 4308 } 4309 } 4310 continue; 4311 } 4312 4313 switch (Arg.getValueType().getSimpleVT().SimpleTy) { 4314 default: llvm_unreachable("Unexpected ValueType for argument!"); 4315 case MVT::i32: 4316 case MVT::i64: 4317 if (GPR_idx != NumGPRs) { 4318 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4319 } else { 4320 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4321 isPPC64, isTailCall, false, MemOpChains, 4322 TailCallArguments, dl); 4323 } 4324 ArgOffset += PtrByteSize; 4325 break; 4326 case MVT::f32: 4327 case MVT::f64: 4328 if (FPR_idx != NumFPRs) { 4329 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4330 4331 if (isVarArg) { 4332 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4333 MachinePointerInfo(), false, false, 0); 4334 MemOpChains.push_back(Store); 4335 4336 // Float varargs are always shadowed in available integer registers 4337 if (GPR_idx != NumGPRs) { 4338 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4339 MachinePointerInfo(), false, false, 4340 false, 0); 4341 MemOpChains.push_back(Load.getValue(1)); 4342 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4343 } 4344 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 4345 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4346 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4347 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4348 MachinePointerInfo(), 4349 false, false, false, 0); 4350 MemOpChains.push_back(Load.getValue(1)); 4351 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4352 } 4353 } else { 4354 // If we have any FPRs remaining, we may also have GPRs remaining. 4355 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 4356 // GPRs. 4357 if (GPR_idx != NumGPRs) 4358 ++GPR_idx; 4359 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 4360 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 4361 ++GPR_idx; 4362 } 4363 } else 4364 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4365 isPPC64, isTailCall, false, MemOpChains, 4366 TailCallArguments, dl); 4367 if (isPPC64) 4368 ArgOffset += 8; 4369 else 4370 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 4371 break; 4372 case MVT::v4f32: 4373 case MVT::v4i32: 4374 case MVT::v8i16: 4375 case MVT::v16i8: 4376 if (isVarArg) { 4377 // These go aligned on the stack, or in the corresponding R registers 4378 // when within range. The Darwin PPC ABI doc claims they also go in 4379 // V registers; in fact gcc does this only for arguments that are 4380 // prototyped, not for those that match the ... We do it for all 4381 // arguments, seems to work. 4382 while (ArgOffset % 16 !=0) { 4383 ArgOffset += PtrByteSize; 4384 if (GPR_idx != NumGPRs) 4385 GPR_idx++; 4386 } 4387 // We could elide this store in the case where the object fits 4388 // entirely in R registers. Maybe later. 4389 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4390 DAG.getConstant(ArgOffset, PtrVT)); 4391 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4392 MachinePointerInfo(), false, false, 0); 4393 MemOpChains.push_back(Store); 4394 if (VR_idx != NumVRs) { 4395 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4396 MachinePointerInfo(), 4397 false, false, false, 0); 4398 MemOpChains.push_back(Load.getValue(1)); 4399 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 4400 } 4401 ArgOffset += 16; 4402 for (unsigned i=0; i<16; i+=PtrByteSize) { 4403 if (GPR_idx == NumGPRs) 4404 break; 4405 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4406 DAG.getConstant(i, PtrVT)); 4407 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4408 false, false, false, 0); 4409 MemOpChains.push_back(Load.getValue(1)); 4410 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4411 } 4412 break; 4413 } 4414 4415 // Non-varargs Altivec params generally go in registers, but have 4416 // stack space allocated at the end. 4417 if (VR_idx != NumVRs) { 4418 // Doesn't have GPR space allocated. 4419 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 4420 } else if (nAltivecParamsAtEnd==0) { 4421 // We are emitting Altivec params in order. 4422 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4423 isPPC64, isTailCall, true, MemOpChains, 4424 TailCallArguments, dl); 4425 ArgOffset += 16; 4426 } 4427 break; 4428 } 4429 } 4430 // If all Altivec parameters fit in registers, as they usually do, 4431 // they get stack space following the non-Altivec parameters. We 4432 // don't track this here because nobody below needs it. 4433 // If there are more Altivec parameters than fit in registers emit 4434 // the stores here. 4435 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 4436 unsigned j = 0; 4437 // Offset is aligned; skip 1st 12 params which go in V registers. 4438 ArgOffset = ((ArgOffset+15)/16)*16; 4439 ArgOffset += 12*16; 4440 for (unsigned i = 0; i != NumOps; ++i) { 4441 SDValue Arg = OutVals[i]; 4442 EVT ArgType = Outs[i].VT; 4443 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 4444 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 4445 if (++j > NumVRs) { 4446 SDValue PtrOff; 4447 // We are emitting Altivec params in order. 4448 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4449 isPPC64, isTailCall, true, MemOpChains, 4450 TailCallArguments, dl); 4451 ArgOffset += 16; 4452 } 4453 } 4454 } 4455 } 4456 4457 if (!MemOpChains.empty()) 4458 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 4459 &MemOpChains[0], MemOpChains.size()); 4460 4461 // On Darwin, R12 must contain the address of an indirect callee. This does 4462 // not mean the MTCTR instruction must use R12; it's easier to model this as 4463 // an extra parameter, so do that. 4464 if (!isTailCall && 4465 !dyn_cast<GlobalAddressSDNode>(Callee) && 4466 !dyn_cast<ExternalSymbolSDNode>(Callee) && 4467 !isBLACompatibleAddress(Callee, DAG)) 4468 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 4469 PPC::R12), Callee)); 4470 4471 // Build a sequence of copy-to-reg nodes chained together with token chain 4472 // and flag operands which copy the outgoing args into the appropriate regs. 4473 SDValue InFlag; 4474 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4475 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4476 RegsToPass[i].second, InFlag); 4477 InFlag = Chain.getValue(1); 4478 } 4479 4480 if (isTailCall) 4481 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 4482 FPOp, true, TailCallArguments); 4483 4484 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4485 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4486 Ins, InVals); 4487} 4488 4489bool 4490PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 4491 MachineFunction &MF, bool isVarArg, 4492 const SmallVectorImpl<ISD::OutputArg> &Outs, 4493 LLVMContext &Context) const { 4494 SmallVector<CCValAssign, 16> RVLocs; 4495 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 4496 RVLocs, Context); 4497 return CCInfo.CheckReturn(Outs, RetCC_PPC); 4498} 4499 4500SDValue 4501PPCTargetLowering::LowerReturn(SDValue Chain, 4502 CallingConv::ID CallConv, bool isVarArg, 4503 const SmallVectorImpl<ISD::OutputArg> &Outs, 4504 const SmallVectorImpl<SDValue> &OutVals, 4505 DebugLoc dl, SelectionDAG &DAG) const { 4506 4507 SmallVector<CCValAssign, 16> RVLocs; 4508 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4509 getTargetMachine(), RVLocs, *DAG.getContext()); 4510 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 4511 4512 SDValue Flag; 4513 SmallVector<SDValue, 4> RetOps(1, Chain); 4514 4515 // Copy the result values into the output registers. 4516 for (unsigned i = 0; i != RVLocs.size(); ++i) { 4517 CCValAssign &VA = RVLocs[i]; 4518 assert(VA.isRegLoc() && "Can only return in registers!"); 4519 4520 SDValue Arg = OutVals[i]; 4521 4522 switch (VA.getLocInfo()) { 4523 default: llvm_unreachable("Unknown loc info!"); 4524 case CCValAssign::Full: break; 4525 case CCValAssign::AExt: 4526 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 4527 break; 4528 case CCValAssign::ZExt: 4529 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 4530 break; 4531 case CCValAssign::SExt: 4532 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 4533 break; 4534 } 4535 4536 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 4537 Flag = Chain.getValue(1); 4538 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4539 } 4540 4541 RetOps[0] = Chain; // Update chain. 4542 4543 // Add the flag if we have it. 4544 if (Flag.getNode()) 4545 RetOps.push_back(Flag); 4546 4547 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, 4548 &RetOps[0], RetOps.size()); 4549} 4550 4551SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 4552 const PPCSubtarget &Subtarget) const { 4553 // When we pop the dynamic allocation we need to restore the SP link. 4554 DebugLoc dl = Op.getDebugLoc(); 4555 4556 // Get the corect type for pointers. 4557 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4558 4559 // Construct the stack pointer operand. 4560 bool isPPC64 = Subtarget.isPPC64(); 4561 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 4562 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 4563 4564 // Get the operands for the STACKRESTORE. 4565 SDValue Chain = Op.getOperand(0); 4566 SDValue SaveSP = Op.getOperand(1); 4567 4568 // Load the old link SP. 4569 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 4570 MachinePointerInfo(), 4571 false, false, false, 0); 4572 4573 // Restore the stack pointer. 4574 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 4575 4576 // Store the old link SP. 4577 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 4578 false, false, 0); 4579} 4580 4581 4582 4583SDValue 4584PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 4585 MachineFunction &MF = DAG.getMachineFunction(); 4586 bool isPPC64 = PPCSubTarget.isPPC64(); 4587 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 4588 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4589 4590 // Get current frame pointer save index. The users of this index will be 4591 // primarily DYNALLOC instructions. 4592 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 4593 int RASI = FI->getReturnAddrSaveIndex(); 4594 4595 // If the frame pointer save index hasn't been defined yet. 4596 if (!RASI) { 4597 // Find out what the fix offset of the frame pointer save area. 4598 int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); 4599 // Allocate the frame index for frame pointer save area. 4600 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); 4601 // Save the result. 4602 FI->setReturnAddrSaveIndex(RASI); 4603 } 4604 return DAG.getFrameIndex(RASI, PtrVT); 4605} 4606 4607SDValue 4608PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 4609 MachineFunction &MF = DAG.getMachineFunction(); 4610 bool isPPC64 = PPCSubTarget.isPPC64(); 4611 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 4612 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4613 4614 // Get current frame pointer save index. The users of this index will be 4615 // primarily DYNALLOC instructions. 4616 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 4617 int FPSI = FI->getFramePointerSaveIndex(); 4618 4619 // If the frame pointer save index hasn't been defined yet. 4620 if (!FPSI) { 4621 // Find out what the fix offset of the frame pointer save area. 4622 int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, 4623 isDarwinABI); 4624 4625 // Allocate the frame index for frame pointer save area. 4626 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 4627 // Save the result. 4628 FI->setFramePointerSaveIndex(FPSI); 4629 } 4630 return DAG.getFrameIndex(FPSI, PtrVT); 4631} 4632 4633SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 4634 SelectionDAG &DAG, 4635 const PPCSubtarget &Subtarget) const { 4636 // Get the inputs. 4637 SDValue Chain = Op.getOperand(0); 4638 SDValue Size = Op.getOperand(1); 4639 DebugLoc dl = Op.getDebugLoc(); 4640 4641 // Get the corect type for pointers. 4642 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4643 // Negate the size. 4644 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 4645 DAG.getConstant(0, PtrVT), Size); 4646 // Construct a node for the frame pointer save index. 4647 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 4648 // Build a DYNALLOC node. 4649 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 4650 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 4651 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); 4652} 4653 4654SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 4655 SelectionDAG &DAG) const { 4656 DebugLoc DL = Op.getDebugLoc(); 4657 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 4658 DAG.getVTList(MVT::i32, MVT::Other), 4659 Op.getOperand(0), Op.getOperand(1)); 4660} 4661 4662SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 4663 SelectionDAG &DAG) const { 4664 DebugLoc DL = Op.getDebugLoc(); 4665 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 4666 Op.getOperand(0), Op.getOperand(1)); 4667} 4668 4669/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 4670/// possible. 4671SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 4672 // Not FP? Not a fsel. 4673 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 4674 !Op.getOperand(2).getValueType().isFloatingPoint()) 4675 return Op; 4676 4677 // We might be able to do better than this under some circumstances, but in 4678 // general, fsel-based lowering of select is a finite-math-only optimization. 4679 // For more information, see section F.3 of the 2.06 ISA specification. 4680 if (!DAG.getTarget().Options.NoInfsFPMath || 4681 !DAG.getTarget().Options.NoNaNsFPMath) 4682 return Op; 4683 4684 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 4685 4686 EVT ResVT = Op.getValueType(); 4687 EVT CmpVT = Op.getOperand(0).getValueType(); 4688 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 4689 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 4690 DebugLoc dl = Op.getDebugLoc(); 4691 4692 // If the RHS of the comparison is a 0.0, we don't need to do the 4693 // subtraction at all. 4694 SDValue Sel1; 4695 if (isFloatingPointZero(RHS)) 4696 switch (CC) { 4697 default: break; // SETUO etc aren't handled by fsel. 4698 case ISD::SETNE: 4699 std::swap(TV, FV); 4700 case ISD::SETEQ: 4701 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4702 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4703 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 4704 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 4705 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 4706 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4707 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 4708 case ISD::SETULT: 4709 case ISD::SETLT: 4710 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 4711 case ISD::SETOGE: 4712 case ISD::SETGE: 4713 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4714 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4715 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 4716 case ISD::SETUGT: 4717 case ISD::SETGT: 4718 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 4719 case ISD::SETOLE: 4720 case ISD::SETLE: 4721 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4722 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4723 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4724 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 4725 } 4726 4727 SDValue Cmp; 4728 switch (CC) { 4729 default: break; // SETUO etc aren't handled by fsel. 4730 case ISD::SETNE: 4731 std::swap(TV, FV); 4732 case ISD::SETEQ: 4733 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4734 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4735 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4736 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4737 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 4738 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 4739 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4740 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 4741 case ISD::SETULT: 4742 case ISD::SETLT: 4743 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4744 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4745 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4746 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 4747 case ISD::SETOGE: 4748 case ISD::SETGE: 4749 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4750 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4751 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4752 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4753 case ISD::SETUGT: 4754 case ISD::SETGT: 4755 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 4756 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4757 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4758 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 4759 case ISD::SETOLE: 4760 case ISD::SETLE: 4761 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 4762 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4763 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4764 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4765 } 4766 return Op; 4767} 4768 4769// FIXME: Split this code up when LegalizeDAGTypes lands. 4770SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 4771 DebugLoc dl) const { 4772 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 4773 SDValue Src = Op.getOperand(0); 4774 if (Src.getValueType() == MVT::f32) 4775 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 4776 4777 SDValue Tmp; 4778 switch (Op.getValueType().getSimpleVT().SimpleTy) { 4779 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 4780 case MVT::i32: 4781 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : 4782 (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ : 4783 PPCISD::FCTIDZ), 4784 dl, MVT::f64, Src); 4785 break; 4786 case MVT::i64: 4787 assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) && 4788 "i64 FP_TO_UINT is supported only with FPCVT"); 4789 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 4790 PPCISD::FCTIDUZ, 4791 dl, MVT::f64, Src); 4792 break; 4793 } 4794 4795 // Convert the FP value to an int value through memory. 4796 bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() && 4797 (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()); 4798 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 4799 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 4800 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); 4801 4802 // Emit a store to the stack slot. 4803 SDValue Chain; 4804 if (i32Stack) { 4805 MachineFunction &MF = DAG.getMachineFunction(); 4806 MachineMemOperand *MMO = 4807 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 4808 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 4809 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 4810 DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), 4811 MVT::i32, MMO); 4812 } else 4813 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 4814 MPI, false, false, 0); 4815 4816 // Result is a load from the stack slot. If loading 4 bytes, make sure to 4817 // add in a bias. 4818 if (Op.getValueType() == MVT::i32 && !i32Stack) { 4819 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 4820 DAG.getConstant(4, FIPtr.getValueType())); 4821 MPI = MachinePointerInfo(); 4822 } 4823 4824 return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI, 4825 false, false, false, 0); 4826} 4827 4828SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 4829 SelectionDAG &DAG) const { 4830 DebugLoc dl = Op.getDebugLoc(); 4831 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 4832 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 4833 return SDValue(); 4834 4835 assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && 4836 "UINT_TO_FP is supported only with FPCVT"); 4837 4838 // If we have FCFIDS, then use it when converting to single-precision. 4839 // Otherwise, convert to double-precision and then round. 4840 unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 4841 (Op.getOpcode() == ISD::UINT_TO_FP ? 4842 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 4843 (Op.getOpcode() == ISD::UINT_TO_FP ? 4844 PPCISD::FCFIDU : PPCISD::FCFID); 4845 MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 4846 MVT::f32 : MVT::f64; 4847 4848 if (Op.getOperand(0).getValueType() == MVT::i64) { 4849 SDValue SINT = Op.getOperand(0); 4850 // When converting to single-precision, we actually need to convert 4851 // to double-precision first and then round to single-precision. 4852 // To avoid double-rounding effects during that operation, we have 4853 // to prepare the input operand. Bits that might be truncated when 4854 // converting to double-precision are replaced by a bit that won't 4855 // be lost at this stage, but is below the single-precision rounding 4856 // position. 4857 // 4858 // However, if -enable-unsafe-fp-math is in effect, accept double 4859 // rounding to avoid the extra overhead. 4860 if (Op.getValueType() == MVT::f32 && 4861 !PPCSubTarget.hasFPCVT() && 4862 !DAG.getTarget().Options.UnsafeFPMath) { 4863 4864 // Twiddle input to make sure the low 11 bits are zero. (If this 4865 // is the case, we are guaranteed the value will fit into the 53 bit 4866 // mantissa of an IEEE double-precision value without rounding.) 4867 // If any of those low 11 bits were not zero originally, make sure 4868 // bit 12 (value 2048) is set instead, so that the final rounding 4869 // to single-precision gets the correct result. 4870 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 4871 SINT, DAG.getConstant(2047, MVT::i64)); 4872 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 4873 Round, DAG.getConstant(2047, MVT::i64)); 4874 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 4875 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 4876 Round, DAG.getConstant(-2048, MVT::i64)); 4877 4878 // However, we cannot use that value unconditionally: if the magnitude 4879 // of the input value is small, the bit-twiddling we did above might 4880 // end up visibly changing the output. Fortunately, in that case, we 4881 // don't need to twiddle bits since the original input will convert 4882 // exactly to double-precision floating-point already. Therefore, 4883 // construct a conditional to use the original value if the top 11 4884 // bits are all sign-bit copies, and use the rounded value computed 4885 // above otherwise. 4886 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 4887 SINT, DAG.getConstant(53, MVT::i32)); 4888 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 4889 Cond, DAG.getConstant(1, MVT::i64)); 4890 Cond = DAG.getSetCC(dl, MVT::i32, 4891 Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); 4892 4893 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 4894 } 4895 4896 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 4897 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 4898 4899 if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) 4900 FP = DAG.getNode(ISD::FP_ROUND, dl, 4901 MVT::f32, FP, DAG.getIntPtrConstant(0)); 4902 return FP; 4903 } 4904 4905 assert(Op.getOperand(0).getValueType() == MVT::i32 && 4906 "Unhandled INT_TO_FP type in custom expander!"); 4907 // Since we only generate this in 64-bit mode, we can take advantage of 4908 // 64-bit registers. In particular, sign extend the input value into the 4909 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 4910 // then lfd it and fcfid it. 4911 MachineFunction &MF = DAG.getMachineFunction(); 4912 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 4913 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4914 4915 SDValue Ld; 4916 if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) { 4917 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 4918 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 4919 4920 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 4921 MachinePointerInfo::getFixedStack(FrameIdx), 4922 false, false, 0); 4923 4924 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 4925 "Expected an i32 store"); 4926 MachineMemOperand *MMO = 4927 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), 4928 MachineMemOperand::MOLoad, 4, 4); 4929 SDValue Ops[] = { Store, FIdx }; 4930 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 4931 PPCISD::LFIWZX : PPCISD::LFIWAX, 4932 dl, DAG.getVTList(MVT::f64, MVT::Other), 4933 Ops, 2, MVT::i32, MMO); 4934 } else { 4935 assert(PPCSubTarget.isPPC64() && 4936 "i32->FP without LFIWAX supported only on PPC64"); 4937 4938 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 4939 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 4940 4941 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 4942 Op.getOperand(0)); 4943 4944 // STD the extended value into the stack slot. 4945 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, 4946 MachinePointerInfo::getFixedStack(FrameIdx), 4947 false, false, 0); 4948 4949 // Load the value as a double. 4950 Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, 4951 MachinePointerInfo::getFixedStack(FrameIdx), 4952 false, false, false, 0); 4953 } 4954 4955 // FCFID it and return it. 4956 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 4957 if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) 4958 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 4959 return FP; 4960} 4961 4962SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 4963 SelectionDAG &DAG) const { 4964 DebugLoc dl = Op.getDebugLoc(); 4965 /* 4966 The rounding mode is in bits 30:31 of FPSR, and has the following 4967 settings: 4968 00 Round to nearest 4969 01 Round to 0 4970 10 Round to +inf 4971 11 Round to -inf 4972 4973 FLT_ROUNDS, on the other hand, expects the following: 4974 -1 Undefined 4975 0 Round to 0 4976 1 Round to nearest 4977 2 Round to +inf 4978 3 Round to -inf 4979 4980 To perform the conversion, we do: 4981 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 4982 */ 4983 4984 MachineFunction &MF = DAG.getMachineFunction(); 4985 EVT VT = Op.getValueType(); 4986 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4987 SDValue MFFSreg, InFlag; 4988 4989 // Save FP Control Word to register 4990 EVT NodeTys[] = { 4991 MVT::f64, // return register 4992 MVT::Glue // unused in this context 4993 }; 4994 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); 4995 4996 // Save FP register to stack slot 4997 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 4998 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 4999 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 5000 StackSlot, MachinePointerInfo(), false, false,0); 5001 5002 // Load FP Control Word from low 32 bits of stack slot. 5003 SDValue Four = DAG.getConstant(4, PtrVT); 5004 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 5005 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 5006 false, false, false, 0); 5007 5008 // Transform as necessary 5009 SDValue CWD1 = 5010 DAG.getNode(ISD::AND, dl, MVT::i32, 5011 CWD, DAG.getConstant(3, MVT::i32)); 5012 SDValue CWD2 = 5013 DAG.getNode(ISD::SRL, dl, MVT::i32, 5014 DAG.getNode(ISD::AND, dl, MVT::i32, 5015 DAG.getNode(ISD::XOR, dl, MVT::i32, 5016 CWD, DAG.getConstant(3, MVT::i32)), 5017 DAG.getConstant(3, MVT::i32)), 5018 DAG.getConstant(1, MVT::i32)); 5019 5020 SDValue RetVal = 5021 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 5022 5023 return DAG.getNode((VT.getSizeInBits() < 16 ? 5024 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 5025} 5026 5027SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5028 EVT VT = Op.getValueType(); 5029 unsigned BitWidth = VT.getSizeInBits(); 5030 DebugLoc dl = Op.getDebugLoc(); 5031 assert(Op.getNumOperands() == 3 && 5032 VT == Op.getOperand(1).getValueType() && 5033 "Unexpected SHL!"); 5034 5035 // Expand into a bunch of logical ops. Note that these ops 5036 // depend on the PPC behavior for oversized shift amounts. 5037 SDValue Lo = Op.getOperand(0); 5038 SDValue Hi = Op.getOperand(1); 5039 SDValue Amt = Op.getOperand(2); 5040 EVT AmtVT = Amt.getValueType(); 5041 5042 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5043 DAG.getConstant(BitWidth, AmtVT), Amt); 5044 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 5045 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 5046 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 5047 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5048 DAG.getConstant(-BitWidth, AmtVT)); 5049 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 5050 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5051 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 5052 SDValue OutOps[] = { OutLo, OutHi }; 5053 return DAG.getMergeValues(OutOps, 2, dl); 5054} 5055 5056SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5057 EVT VT = Op.getValueType(); 5058 DebugLoc dl = Op.getDebugLoc(); 5059 unsigned BitWidth = VT.getSizeInBits(); 5060 assert(Op.getNumOperands() == 3 && 5061 VT == Op.getOperand(1).getValueType() && 5062 "Unexpected SRL!"); 5063 5064 // Expand into a bunch of logical ops. Note that these ops 5065 // depend on the PPC behavior for oversized shift amounts. 5066 SDValue Lo = Op.getOperand(0); 5067 SDValue Hi = Op.getOperand(1); 5068 SDValue Amt = Op.getOperand(2); 5069 EVT AmtVT = Amt.getValueType(); 5070 5071 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5072 DAG.getConstant(BitWidth, AmtVT), Amt); 5073 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5074 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5075 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5076 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5077 DAG.getConstant(-BitWidth, AmtVT)); 5078 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 5079 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5080 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 5081 SDValue OutOps[] = { OutLo, OutHi }; 5082 return DAG.getMergeValues(OutOps, 2, dl); 5083} 5084 5085SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 5086 DebugLoc dl = Op.getDebugLoc(); 5087 EVT VT = Op.getValueType(); 5088 unsigned BitWidth = VT.getSizeInBits(); 5089 assert(Op.getNumOperands() == 3 && 5090 VT == Op.getOperand(1).getValueType() && 5091 "Unexpected SRA!"); 5092 5093 // Expand into a bunch of logical ops, followed by a select_cc. 5094 SDValue Lo = Op.getOperand(0); 5095 SDValue Hi = Op.getOperand(1); 5096 SDValue Amt = Op.getOperand(2); 5097 EVT AmtVT = Amt.getValueType(); 5098 5099 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5100 DAG.getConstant(BitWidth, AmtVT), Amt); 5101 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5102 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5103 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5104 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5105 DAG.getConstant(-BitWidth, AmtVT)); 5106 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 5107 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 5108 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 5109 Tmp4, Tmp6, ISD::SETLE); 5110 SDValue OutOps[] = { OutLo, OutHi }; 5111 return DAG.getMergeValues(OutOps, 2, dl); 5112} 5113 5114//===----------------------------------------------------------------------===// 5115// Vector related lowering. 5116// 5117 5118/// BuildSplatI - Build a canonical splati of Val with an element size of 5119/// SplatSize. Cast the result to VT. 5120static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 5121 SelectionDAG &DAG, DebugLoc dl) { 5122 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 5123 5124 static const EVT VTys[] = { // canonical VT to use for each size. 5125 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 5126 }; 5127 5128 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 5129 5130 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 5131 if (Val == -1) 5132 SplatSize = 1; 5133 5134 EVT CanonicalVT = VTys[SplatSize-1]; 5135 5136 // Build a canonical splat for this value. 5137 SDValue Elt = DAG.getConstant(Val, MVT::i32); 5138 SmallVector<SDValue, 8> Ops; 5139 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 5140 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, 5141 &Ops[0], Ops.size()); 5142 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 5143} 5144 5145/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 5146/// specified intrinsic ID. 5147static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 5148 SelectionDAG &DAG, DebugLoc dl, 5149 EVT DestVT = MVT::Other) { 5150 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 5151 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5152 DAG.getConstant(IID, MVT::i32), LHS, RHS); 5153} 5154 5155/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 5156/// specified intrinsic ID. 5157static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 5158 SDValue Op2, SelectionDAG &DAG, 5159 DebugLoc dl, EVT DestVT = MVT::Other) { 5160 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 5161 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5162 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 5163} 5164 5165 5166/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 5167/// amount. The result has the specified value type. 5168static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 5169 EVT VT, SelectionDAG &DAG, DebugLoc dl) { 5170 // Force LHS/RHS to be the right type. 5171 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 5172 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 5173 5174 int Ops[16]; 5175 for (unsigned i = 0; i != 16; ++i) 5176 Ops[i] = i + Amt; 5177 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 5178 return DAG.getNode(ISD::BITCAST, dl, VT, T); 5179} 5180 5181// If this is a case we can't handle, return null and let the default 5182// expansion code take care of it. If we CAN select this case, and if it 5183// selects to a single instruction, return Op. Otherwise, if we can codegen 5184// this case more efficiently than a constant pool load, lower it to the 5185// sequence of ops that should be used. 5186SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 5187 SelectionDAG &DAG) const { 5188 DebugLoc dl = Op.getDebugLoc(); 5189 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 5190 assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 5191 5192 // Check if this is a splat of a constant value. 5193 APInt APSplatBits, APSplatUndef; 5194 unsigned SplatBitSize; 5195 bool HasAnyUndefs; 5196 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 5197 HasAnyUndefs, 0, true) || SplatBitSize > 32) 5198 return SDValue(); 5199 5200 unsigned SplatBits = APSplatBits.getZExtValue(); 5201 unsigned SplatUndef = APSplatUndef.getZExtValue(); 5202 unsigned SplatSize = SplatBitSize / 8; 5203 5204 // First, handle single instruction cases. 5205 5206 // All zeros? 5207 if (SplatBits == 0) { 5208 // Canonicalize all zero vectors to be v4i32. 5209 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 5210 SDValue Z = DAG.getConstant(0, MVT::i32); 5211 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 5212 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 5213 } 5214 return Op; 5215 } 5216 5217 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 5218 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 5219 (32-SplatBitSize)); 5220 if (SextVal >= -16 && SextVal <= 15) 5221 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 5222 5223 5224 // Two instruction sequences. 5225 5226 // If this value is in the range [-32,30] and is even, use: 5227 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 5228 // If this value is in the range [17,31] and is odd, use: 5229 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 5230 // If this value is in the range [-31,-17] and is odd, use: 5231 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 5232 // Note the last two are three-instruction sequences. 5233 if (SextVal >= -32 && SextVal <= 31) { 5234 // To avoid having these optimizations undone by constant folding, 5235 // we convert to a pseudo that will be expanded later into one of 5236 // the above forms. 5237 SDValue Elt = DAG.getConstant(SextVal, MVT::i32); 5238 EVT VT = Op.getValueType(); 5239 int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); 5240 SDValue EltSize = DAG.getConstant(Size, MVT::i32); 5241 return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 5242 } 5243 5244 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 5245 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 5246 // for fneg/fabs. 5247 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 5248 // Make -1 and vspltisw -1: 5249 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 5250 5251 // Make the VSLW intrinsic, computing 0x8000_0000. 5252 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 5253 OnesV, DAG, dl); 5254 5255 // xor by OnesV to invert it. 5256 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 5257 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5258 } 5259 5260 // Check to see if this is a wide variety of vsplti*, binop self cases. 5261 static const signed char SplatCsts[] = { 5262 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 5263 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 5264 }; 5265 5266 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 5267 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 5268 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 5269 int i = SplatCsts[idx]; 5270 5271 // Figure out what shift amount will be used by altivec if shifted by i in 5272 // this splat size. 5273 unsigned TypeShiftAmt = i & (SplatBitSize-1); 5274 5275 // vsplti + shl self. 5276 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 5277 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5278 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5279 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 5280 Intrinsic::ppc_altivec_vslw 5281 }; 5282 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5283 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5284 } 5285 5286 // vsplti + srl self. 5287 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5288 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5289 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5290 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 5291 Intrinsic::ppc_altivec_vsrw 5292 }; 5293 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5294 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5295 } 5296 5297 // vsplti + sra self. 5298 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5299 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5300 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5301 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 5302 Intrinsic::ppc_altivec_vsraw 5303 }; 5304 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5305 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5306 } 5307 5308 // vsplti + rol self. 5309 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 5310 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 5311 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5312 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5313 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 5314 Intrinsic::ppc_altivec_vrlw 5315 }; 5316 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5317 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5318 } 5319 5320 // t = vsplti c, result = vsldoi t, t, 1 5321 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 5322 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5323 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 5324 } 5325 // t = vsplti c, result = vsldoi t, t, 2 5326 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 5327 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5328 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 5329 } 5330 // t = vsplti c, result = vsldoi t, t, 3 5331 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 5332 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5333 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 5334 } 5335 } 5336 5337 return SDValue(); 5338} 5339 5340/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 5341/// the specified operations to build the shuffle. 5342static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 5343 SDValue RHS, SelectionDAG &DAG, 5344 DebugLoc dl) { 5345 unsigned OpNum = (PFEntry >> 26) & 0x0F; 5346 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 5347 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 5348 5349 enum { 5350 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 5351 OP_VMRGHW, 5352 OP_VMRGLW, 5353 OP_VSPLTISW0, 5354 OP_VSPLTISW1, 5355 OP_VSPLTISW2, 5356 OP_VSPLTISW3, 5357 OP_VSLDOI4, 5358 OP_VSLDOI8, 5359 OP_VSLDOI12 5360 }; 5361 5362 if (OpNum == OP_COPY) { 5363 if (LHSID == (1*9+2)*9+3) return LHS; 5364 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 5365 return RHS; 5366 } 5367 5368 SDValue OpLHS, OpRHS; 5369 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 5370 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 5371 5372 int ShufIdxs[16]; 5373 switch (OpNum) { 5374 default: llvm_unreachable("Unknown i32 permute!"); 5375 case OP_VMRGHW: 5376 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 5377 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 5378 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 5379 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 5380 break; 5381 case OP_VMRGLW: 5382 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 5383 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 5384 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 5385 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 5386 break; 5387 case OP_VSPLTISW0: 5388 for (unsigned i = 0; i != 16; ++i) 5389 ShufIdxs[i] = (i&3)+0; 5390 break; 5391 case OP_VSPLTISW1: 5392 for (unsigned i = 0; i != 16; ++i) 5393 ShufIdxs[i] = (i&3)+4; 5394 break; 5395 case OP_VSPLTISW2: 5396 for (unsigned i = 0; i != 16; ++i) 5397 ShufIdxs[i] = (i&3)+8; 5398 break; 5399 case OP_VSPLTISW3: 5400 for (unsigned i = 0; i != 16; ++i) 5401 ShufIdxs[i] = (i&3)+12; 5402 break; 5403 case OP_VSLDOI4: 5404 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 5405 case OP_VSLDOI8: 5406 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 5407 case OP_VSLDOI12: 5408 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 5409 } 5410 EVT VT = OpLHS.getValueType(); 5411 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 5412 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 5413 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 5414 return DAG.getNode(ISD::BITCAST, dl, VT, T); 5415} 5416 5417/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 5418/// is a shuffle we can handle in a single instruction, return it. Otherwise, 5419/// return the code it can be lowered into. Worst case, it can always be 5420/// lowered into a vperm. 5421SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 5422 SelectionDAG &DAG) const { 5423 DebugLoc dl = Op.getDebugLoc(); 5424 SDValue V1 = Op.getOperand(0); 5425 SDValue V2 = Op.getOperand(1); 5426 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 5427 EVT VT = Op.getValueType(); 5428 5429 // Cases that are handled by instructions that take permute immediates 5430 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 5431 // selected by the instruction selector. 5432 if (V2.getOpcode() == ISD::UNDEF) { 5433 if (PPC::isSplatShuffleMask(SVOp, 1) || 5434 PPC::isSplatShuffleMask(SVOp, 2) || 5435 PPC::isSplatShuffleMask(SVOp, 4) || 5436 PPC::isVPKUWUMShuffleMask(SVOp, true) || 5437 PPC::isVPKUHUMShuffleMask(SVOp, true) || 5438 PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || 5439 PPC::isVMRGLShuffleMask(SVOp, 1, true) || 5440 PPC::isVMRGLShuffleMask(SVOp, 2, true) || 5441 PPC::isVMRGLShuffleMask(SVOp, 4, true) || 5442 PPC::isVMRGHShuffleMask(SVOp, 1, true) || 5443 PPC::isVMRGHShuffleMask(SVOp, 2, true) || 5444 PPC::isVMRGHShuffleMask(SVOp, 4, true)) { 5445 return Op; 5446 } 5447 } 5448 5449 // Altivec has a variety of "shuffle immediates" that take two vector inputs 5450 // and produce a fixed permutation. If any of these match, do not lower to 5451 // VPERM. 5452 if (PPC::isVPKUWUMShuffleMask(SVOp, false) || 5453 PPC::isVPKUHUMShuffleMask(SVOp, false) || 5454 PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || 5455 PPC::isVMRGLShuffleMask(SVOp, 1, false) || 5456 PPC::isVMRGLShuffleMask(SVOp, 2, false) || 5457 PPC::isVMRGLShuffleMask(SVOp, 4, false) || 5458 PPC::isVMRGHShuffleMask(SVOp, 1, false) || 5459 PPC::isVMRGHShuffleMask(SVOp, 2, false) || 5460 PPC::isVMRGHShuffleMask(SVOp, 4, false)) 5461 return Op; 5462 5463 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 5464 // perfect shuffle table to emit an optimal matching sequence. 5465 ArrayRef<int> PermMask = SVOp->getMask(); 5466 5467 unsigned PFIndexes[4]; 5468 bool isFourElementShuffle = true; 5469 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 5470 unsigned EltNo = 8; // Start out undef. 5471 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 5472 if (PermMask[i*4+j] < 0) 5473 continue; // Undef, ignore it. 5474 5475 unsigned ByteSource = PermMask[i*4+j]; 5476 if ((ByteSource & 3) != j) { 5477 isFourElementShuffle = false; 5478 break; 5479 } 5480 5481 if (EltNo == 8) { 5482 EltNo = ByteSource/4; 5483 } else if (EltNo != ByteSource/4) { 5484 isFourElementShuffle = false; 5485 break; 5486 } 5487 } 5488 PFIndexes[i] = EltNo; 5489 } 5490 5491 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 5492 // perfect shuffle vector to determine if it is cost effective to do this as 5493 // discrete instructions, or whether we should use a vperm. 5494 if (isFourElementShuffle) { 5495 // Compute the index in the perfect shuffle table. 5496 unsigned PFTableIndex = 5497 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5498 5499 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5500 unsigned Cost = (PFEntry >> 30); 5501 5502 // Determining when to avoid vperm is tricky. Many things affect the cost 5503 // of vperm, particularly how many times the perm mask needs to be computed. 5504 // For example, if the perm mask can be hoisted out of a loop or is already 5505 // used (perhaps because there are multiple permutes with the same shuffle 5506 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 5507 // the loop requires an extra register. 5508 // 5509 // As a compromise, we only emit discrete instructions if the shuffle can be 5510 // generated in 3 or fewer operations. When we have loop information 5511 // available, if this block is within a loop, we should avoid using vperm 5512 // for 3-operation perms and use a constant pool load instead. 5513 if (Cost < 3) 5514 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 5515 } 5516 5517 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 5518 // vector that will get spilled to the constant pool. 5519 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 5520 5521 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 5522 // that it is in input element units, not in bytes. Convert now. 5523 EVT EltVT = V1.getValueType().getVectorElementType(); 5524 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 5525 5526 SmallVector<SDValue, 16> ResultMask; 5527 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 5528 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 5529 5530 for (unsigned j = 0; j != BytesPerElement; ++j) 5531 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 5532 MVT::i32)); 5533 } 5534 5535 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 5536 &ResultMask[0], ResultMask.size()); 5537 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); 5538} 5539 5540/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 5541/// altivec comparison. If it is, return true and fill in Opc/isDot with 5542/// information about the intrinsic. 5543static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 5544 bool &isDot) { 5545 unsigned IntrinsicID = 5546 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 5547 CompareOpc = -1; 5548 isDot = false; 5549 switch (IntrinsicID) { 5550 default: return false; 5551 // Comparison predicates. 5552 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 5553 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 5554 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 5555 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 5556 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 5557 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 5558 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 5559 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 5560 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 5561 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 5562 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 5563 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 5564 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 5565 5566 // Normal Comparisons. 5567 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 5568 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 5569 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 5570 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 5571 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 5572 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 5573 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 5574 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 5575 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 5576 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 5577 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 5578 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 5579 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 5580 } 5581 return true; 5582} 5583 5584/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 5585/// lower, do it, otherwise return null. 5586SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 5587 SelectionDAG &DAG) const { 5588 // If this is a lowered altivec predicate compare, CompareOpc is set to the 5589 // opcode number of the comparison. 5590 DebugLoc dl = Op.getDebugLoc(); 5591 int CompareOpc; 5592 bool isDot; 5593 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 5594 return SDValue(); // Don't custom lower most intrinsics. 5595 5596 // If this is a non-dot comparison, make the VCMP node and we are done. 5597 if (!isDot) { 5598 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 5599 Op.getOperand(1), Op.getOperand(2), 5600 DAG.getConstant(CompareOpc, MVT::i32)); 5601 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 5602 } 5603 5604 // Create the PPCISD altivec 'dot' comparison node. 5605 SDValue Ops[] = { 5606 Op.getOperand(2), // LHS 5607 Op.getOperand(3), // RHS 5608 DAG.getConstant(CompareOpc, MVT::i32) 5609 }; 5610 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 5611 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 5612 5613 // Now that we have the comparison, emit a copy from the CR to a GPR. 5614 // This is flagged to the above dot comparison. 5615 SDValue Flags = DAG.getNode(PPCISD::MFCR, dl, MVT::i32, 5616 DAG.getRegister(PPC::CR6, MVT::i32), 5617 CompNode.getValue(1)); 5618 5619 // Unpack the result based on how the target uses it. 5620 unsigned BitNo; // Bit # of CR6. 5621 bool InvertBit; // Invert result? 5622 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 5623 default: // Can't happen, don't crash on invalid number though. 5624 case 0: // Return the value of the EQ bit of CR6. 5625 BitNo = 0; InvertBit = false; 5626 break; 5627 case 1: // Return the inverted value of the EQ bit of CR6. 5628 BitNo = 0; InvertBit = true; 5629 break; 5630 case 2: // Return the value of the LT bit of CR6. 5631 BitNo = 2; InvertBit = false; 5632 break; 5633 case 3: // Return the inverted value of the LT bit of CR6. 5634 BitNo = 2; InvertBit = true; 5635 break; 5636 } 5637 5638 // Shift the bit into the low position. 5639 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 5640 DAG.getConstant(8-(3-BitNo), MVT::i32)); 5641 // Isolate the bit. 5642 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 5643 DAG.getConstant(1, MVT::i32)); 5644 5645 // If we are supposed to, toggle the bit. 5646 if (InvertBit) 5647 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 5648 DAG.getConstant(1, MVT::i32)); 5649 return Flags; 5650} 5651 5652SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 5653 SelectionDAG &DAG) const { 5654 DebugLoc dl = Op.getDebugLoc(); 5655 // Create a stack slot that is 16-byte aligned. 5656 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 5657 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 5658 EVT PtrVT = getPointerTy(); 5659 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5660 5661 // Store the input value into Value#0 of the stack slot. 5662 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 5663 Op.getOperand(0), FIdx, MachinePointerInfo(), 5664 false, false, 0); 5665 // Load it out. 5666 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 5667 false, false, false, 0); 5668} 5669 5670SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 5671 DebugLoc dl = Op.getDebugLoc(); 5672 if (Op.getValueType() == MVT::v4i32) { 5673 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5674 5675 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 5676 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 5677 5678 SDValue RHSSwap = // = vrlw RHS, 16 5679 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 5680 5681 // Shrinkify inputs to v8i16. 5682 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 5683 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 5684 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 5685 5686 // Low parts multiplied together, generating 32-bit results (we ignore the 5687 // top parts). 5688 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 5689 LHS, RHS, DAG, dl, MVT::v4i32); 5690 5691 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 5692 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 5693 // Shift the high parts up 16 bits. 5694 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 5695 Neg16, DAG, dl); 5696 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 5697 } else if (Op.getValueType() == MVT::v8i16) { 5698 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5699 5700 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 5701 5702 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 5703 LHS, RHS, Zero, DAG, dl); 5704 } else if (Op.getValueType() == MVT::v16i8) { 5705 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5706 5707 // Multiply the even 8-bit parts, producing 16-bit sums. 5708 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 5709 LHS, RHS, DAG, dl, MVT::v8i16); 5710 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 5711 5712 // Multiply the odd 8-bit parts, producing 16-bit sums. 5713 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 5714 LHS, RHS, DAG, dl, MVT::v8i16); 5715 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 5716 5717 // Merge the results together. 5718 int Ops[16]; 5719 for (unsigned i = 0; i != 8; ++i) { 5720 Ops[i*2 ] = 2*i+1; 5721 Ops[i*2+1] = 2*i+1+16; 5722 } 5723 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 5724 } else { 5725 llvm_unreachable("Unknown mul to lower!"); 5726 } 5727} 5728 5729/// LowerOperation - Provide custom lowering hooks for some operations. 5730/// 5731SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 5732 switch (Op.getOpcode()) { 5733 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 5734 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5735 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 5736 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5737 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5738 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5739 case ISD::SETCC: return LowerSETCC(Op, DAG); 5740 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 5741 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 5742 case ISD::VASTART: 5743 return LowerVASTART(Op, DAG, PPCSubTarget); 5744 5745 case ISD::VAARG: 5746 return LowerVAARG(Op, DAG, PPCSubTarget); 5747 5748 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 5749 case ISD::DYNAMIC_STACKALLOC: 5750 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 5751 5752 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 5753 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 5754 5755 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 5756 case ISD::FP_TO_UINT: 5757 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 5758 Op.getDebugLoc()); 5759 case ISD::UINT_TO_FP: 5760 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 5761 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5762 5763 // Lower 64-bit shifts. 5764 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 5765 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 5766 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 5767 5768 // Vector-related lowering. 5769 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 5770 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5771 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5772 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5773 case ISD::MUL: return LowerMUL(Op, DAG); 5774 5775 // Frame & Return address. 5776 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5777 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5778 } 5779} 5780 5781void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 5782 SmallVectorImpl<SDValue>&Results, 5783 SelectionDAG &DAG) const { 5784 const TargetMachine &TM = getTargetMachine(); 5785 DebugLoc dl = N->getDebugLoc(); 5786 switch (N->getOpcode()) { 5787 default: 5788 llvm_unreachable("Do not know how to custom type legalize this operation!"); 5789 case ISD::VAARG: { 5790 if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() 5791 || TM.getSubtarget<PPCSubtarget>().isPPC64()) 5792 return; 5793 5794 EVT VT = N->getValueType(0); 5795 5796 if (VT == MVT::i64) { 5797 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); 5798 5799 Results.push_back(NewNode); 5800 Results.push_back(NewNode.getValue(1)); 5801 } 5802 return; 5803 } 5804 case ISD::FP_ROUND_INREG: { 5805 assert(N->getValueType(0) == MVT::ppcf128); 5806 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 5807 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 5808 MVT::f64, N->getOperand(0), 5809 DAG.getIntPtrConstant(0)); 5810 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 5811 MVT::f64, N->getOperand(0), 5812 DAG.getIntPtrConstant(1)); 5813 5814 // Add the two halves of the long double in round-to-zero mode. 5815 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 5816 5817 // We know the low half is about to be thrown away, so just use something 5818 // convenient. 5819 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 5820 FPreg, FPreg)); 5821 return; 5822 } 5823 case ISD::FP_TO_SINT: 5824 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 5825 return; 5826 } 5827} 5828 5829 5830//===----------------------------------------------------------------------===// 5831// Other Lowering Code 5832//===----------------------------------------------------------------------===// 5833 5834MachineBasicBlock * 5835PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 5836 bool is64bit, unsigned BinOpcode) const { 5837 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5838 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5839 5840 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5841 MachineFunction *F = BB->getParent(); 5842 MachineFunction::iterator It = BB; 5843 ++It; 5844 5845 unsigned dest = MI->getOperand(0).getReg(); 5846 unsigned ptrA = MI->getOperand(1).getReg(); 5847 unsigned ptrB = MI->getOperand(2).getReg(); 5848 unsigned incr = MI->getOperand(3).getReg(); 5849 DebugLoc dl = MI->getDebugLoc(); 5850 5851 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 5852 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5853 F->insert(It, loopMBB); 5854 F->insert(It, exitMBB); 5855 exitMBB->splice(exitMBB->begin(), BB, 5856 llvm::next(MachineBasicBlock::iterator(MI)), 5857 BB->end()); 5858 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5859 5860 MachineRegisterInfo &RegInfo = F->getRegInfo(); 5861 unsigned TmpReg = (!BinOpcode) ? incr : 5862 RegInfo.createVirtualRegister( 5863 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 5864 (const TargetRegisterClass *) &PPC::GPRCRegClass); 5865 5866 // thisMBB: 5867 // ... 5868 // fallthrough --> loopMBB 5869 BB->addSuccessor(loopMBB); 5870 5871 // loopMBB: 5872 // l[wd]arx dest, ptr 5873 // add r0, dest, incr 5874 // st[wd]cx. r0, ptr 5875 // bne- loopMBB 5876 // fallthrough --> exitMBB 5877 BB = loopMBB; 5878 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 5879 .addReg(ptrA).addReg(ptrB); 5880 if (BinOpcode) 5881 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 5882 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 5883 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 5884 BuildMI(BB, dl, TII->get(PPC::BCC)) 5885 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 5886 BB->addSuccessor(loopMBB); 5887 BB->addSuccessor(exitMBB); 5888 5889 // exitMBB: 5890 // ... 5891 BB = exitMBB; 5892 return BB; 5893} 5894 5895MachineBasicBlock * 5896PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 5897 MachineBasicBlock *BB, 5898 bool is8bit, // operation 5899 unsigned BinOpcode) const { 5900 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5901 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5902 // In 64 bit mode we have to use 64 bits for addresses, even though the 5903 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 5904 // registers without caring whether they're 32 or 64, but here we're 5905 // doing actual arithmetic on the addresses. 5906 bool is64bit = PPCSubTarget.isPPC64(); 5907 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 5908 5909 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5910 MachineFunction *F = BB->getParent(); 5911 MachineFunction::iterator It = BB; 5912 ++It; 5913 5914 unsigned dest = MI->getOperand(0).getReg(); 5915 unsigned ptrA = MI->getOperand(1).getReg(); 5916 unsigned ptrB = MI->getOperand(2).getReg(); 5917 unsigned incr = MI->getOperand(3).getReg(); 5918 DebugLoc dl = MI->getDebugLoc(); 5919 5920 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 5921 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5922 F->insert(It, loopMBB); 5923 F->insert(It, exitMBB); 5924 exitMBB->splice(exitMBB->begin(), BB, 5925 llvm::next(MachineBasicBlock::iterator(MI)), 5926 BB->end()); 5927 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5928 5929 MachineRegisterInfo &RegInfo = F->getRegInfo(); 5930 const TargetRegisterClass *RC = 5931 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 5932 (const TargetRegisterClass *) &PPC::GPRCRegClass; 5933 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 5934 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 5935 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 5936 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 5937 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 5938 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 5939 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 5940 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 5941 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 5942 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 5943 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 5944 unsigned Ptr1Reg; 5945 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 5946 5947 // thisMBB: 5948 // ... 5949 // fallthrough --> loopMBB 5950 BB->addSuccessor(loopMBB); 5951 5952 // The 4-byte load must be aligned, while a char or short may be 5953 // anywhere in the word. Hence all this nasty bookkeeping code. 5954 // add ptr1, ptrA, ptrB [copy if ptrA==0] 5955 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 5956 // xori shift, shift1, 24 [16] 5957 // rlwinm ptr, ptr1, 0, 0, 29 5958 // slw incr2, incr, shift 5959 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 5960 // slw mask, mask2, shift 5961 // loopMBB: 5962 // lwarx tmpDest, ptr 5963 // add tmp, tmpDest, incr2 5964 // andc tmp2, tmpDest, mask 5965 // and tmp3, tmp, mask 5966 // or tmp4, tmp3, tmp2 5967 // stwcx. tmp4, ptr 5968 // bne- loopMBB 5969 // fallthrough --> exitMBB 5970 // srw dest, tmpDest, shift 5971 if (ptrA != ZeroReg) { 5972 Ptr1Reg = RegInfo.createVirtualRegister(RC); 5973 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 5974 .addReg(ptrA).addReg(ptrB); 5975 } else { 5976 Ptr1Reg = ptrB; 5977 } 5978 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 5979 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 5980 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 5981 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 5982 if (is64bit) 5983 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 5984 .addReg(Ptr1Reg).addImm(0).addImm(61); 5985 else 5986 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 5987 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 5988 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 5989 .addReg(incr).addReg(ShiftReg); 5990 if (is8bit) 5991 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 5992 else { 5993 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 5994 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 5995 } 5996 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 5997 .addReg(Mask2Reg).addReg(ShiftReg); 5998 5999 BB = loopMBB; 6000 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6001 .addReg(ZeroReg).addReg(PtrReg); 6002 if (BinOpcode) 6003 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 6004 .addReg(Incr2Reg).addReg(TmpDestReg); 6005 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 6006 .addReg(TmpDestReg).addReg(MaskReg); 6007 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 6008 .addReg(TmpReg).addReg(MaskReg); 6009 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 6010 .addReg(Tmp3Reg).addReg(Tmp2Reg); 6011 BuildMI(BB, dl, TII->get(PPC::STWCX)) 6012 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 6013 BuildMI(BB, dl, TII->get(PPC::BCC)) 6014 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 6015 BB->addSuccessor(loopMBB); 6016 BB->addSuccessor(exitMBB); 6017 6018 // exitMBB: 6019 // ... 6020 BB = exitMBB; 6021 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 6022 .addReg(ShiftReg); 6023 return BB; 6024} 6025 6026llvm::MachineBasicBlock* 6027PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, 6028 MachineBasicBlock *MBB) const { 6029 DebugLoc DL = MI->getDebugLoc(); 6030 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6031 6032 MachineFunction *MF = MBB->getParent(); 6033 MachineRegisterInfo &MRI = MF->getRegInfo(); 6034 6035 const BasicBlock *BB = MBB->getBasicBlock(); 6036 MachineFunction::iterator I = MBB; 6037 ++I; 6038 6039 // Memory Reference 6040 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 6041 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 6042 6043 unsigned DstReg = MI->getOperand(0).getReg(); 6044 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 6045 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 6046 unsigned mainDstReg = MRI.createVirtualRegister(RC); 6047 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 6048 6049 MVT PVT = getPointerTy(); 6050 assert((PVT == MVT::i64 || PVT == MVT::i32) && 6051 "Invalid Pointer Size!"); 6052 // For v = setjmp(buf), we generate 6053 // 6054 // thisMBB: 6055 // SjLjSetup mainMBB 6056 // bl mainMBB 6057 // v_restore = 1 6058 // b sinkMBB 6059 // 6060 // mainMBB: 6061 // buf[LabelOffset] = LR 6062 // v_main = 0 6063 // 6064 // sinkMBB: 6065 // v = phi(main, restore) 6066 // 6067 6068 MachineBasicBlock *thisMBB = MBB; 6069 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 6070 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 6071 MF->insert(I, mainMBB); 6072 MF->insert(I, sinkMBB); 6073 6074 MachineInstrBuilder MIB; 6075 6076 // Transfer the remainder of BB and its successor edges to sinkMBB. 6077 sinkMBB->splice(sinkMBB->begin(), MBB, 6078 llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); 6079 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 6080 6081 // Note that the structure of the jmp_buf used here is not compatible 6082 // with that used by libc, and is not designed to be. Specifically, it 6083 // stores only those 'reserved' registers that LLVM does not otherwise 6084 // understand how to spill. Also, by convention, by the time this 6085 // intrinsic is called, Clang has already stored the frame address in the 6086 // first slot of the buffer and stack address in the third. Following the 6087 // X86 target code, we'll store the jump address in the second slot. We also 6088 // need to save the TOC pointer (R2) to handle jumps between shared 6089 // libraries, and that will be stored in the fourth slot. The thread 6090 // identifier (R13) is not affected. 6091 6092 // thisMBB: 6093 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 6094 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 6095 6096 // Prepare IP either in reg. 6097 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 6098 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 6099 unsigned BufReg = MI->getOperand(1).getReg(); 6100 6101 if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { 6102 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 6103 .addReg(PPC::X2) 6104 .addImm(TOCOffset / 4) 6105 .addReg(BufReg); 6106 6107 MIB.setMemRefs(MMOBegin, MMOEnd); 6108 } 6109 6110 // Setup 6111 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 6112 MIB.addRegMask(PPCRegInfo->getNoPreservedMask()); 6113 6114 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 6115 6116 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 6117 .addMBB(mainMBB); 6118 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 6119 6120 thisMBB->addSuccessor(mainMBB, /* weight */ 0); 6121 thisMBB->addSuccessor(sinkMBB, /* weight */ 1); 6122 6123 // mainMBB: 6124 // mainDstReg = 0 6125 MIB = BuildMI(mainMBB, DL, 6126 TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 6127 6128 // Store IP 6129 if (PPCSubTarget.isPPC64()) { 6130 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 6131 .addReg(LabelReg) 6132 .addImm(LabelOffset / 4) 6133 .addReg(BufReg); 6134 } else { 6135 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 6136 .addReg(LabelReg) 6137 .addImm(LabelOffset) 6138 .addReg(BufReg); 6139 } 6140 6141 MIB.setMemRefs(MMOBegin, MMOEnd); 6142 6143 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 6144 mainMBB->addSuccessor(sinkMBB); 6145 6146 // sinkMBB: 6147 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 6148 TII->get(PPC::PHI), DstReg) 6149 .addReg(mainDstReg).addMBB(mainMBB) 6150 .addReg(restoreDstReg).addMBB(thisMBB); 6151 6152 MI->eraseFromParent(); 6153 return sinkMBB; 6154} 6155 6156MachineBasicBlock * 6157PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, 6158 MachineBasicBlock *MBB) const { 6159 DebugLoc DL = MI->getDebugLoc(); 6160 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6161 6162 MachineFunction *MF = MBB->getParent(); 6163 MachineRegisterInfo &MRI = MF->getRegInfo(); 6164 6165 // Memory Reference 6166 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 6167 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 6168 6169 MVT PVT = getPointerTy(); 6170 assert((PVT == MVT::i64 || PVT == MVT::i32) && 6171 "Invalid Pointer Size!"); 6172 6173 const TargetRegisterClass *RC = 6174 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 6175 unsigned Tmp = MRI.createVirtualRegister(RC); 6176 // Since FP is only updated here but NOT referenced, it's treated as GPR. 6177 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 6178 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 6179 6180 MachineInstrBuilder MIB; 6181 6182 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 6183 const int64_t SPOffset = 2 * PVT.getStoreSize(); 6184 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 6185 6186 unsigned BufReg = MI->getOperand(0).getReg(); 6187 6188 // Reload FP (the jumped-to function may not have had a 6189 // frame pointer, and if so, then its r31 will be restored 6190 // as necessary). 6191 if (PVT == MVT::i64) { 6192 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 6193 .addImm(0) 6194 .addReg(BufReg); 6195 } else { 6196 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 6197 .addImm(0) 6198 .addReg(BufReg); 6199 } 6200 MIB.setMemRefs(MMOBegin, MMOEnd); 6201 6202 // Reload IP 6203 if (PVT == MVT::i64) { 6204 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 6205 .addImm(LabelOffset / 4) 6206 .addReg(BufReg); 6207 } else { 6208 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 6209 .addImm(LabelOffset) 6210 .addReg(BufReg); 6211 } 6212 MIB.setMemRefs(MMOBegin, MMOEnd); 6213 6214 // Reload SP 6215 if (PVT == MVT::i64) { 6216 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 6217 .addImm(SPOffset / 4) 6218 .addReg(BufReg); 6219 } else { 6220 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 6221 .addImm(SPOffset) 6222 .addReg(BufReg); 6223 } 6224 MIB.setMemRefs(MMOBegin, MMOEnd); 6225 6226 // FIXME: When we also support base pointers, that register must also be 6227 // restored here. 6228 6229 // Reload TOC 6230 if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { 6231 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 6232 .addImm(TOCOffset / 4) 6233 .addReg(BufReg); 6234 6235 MIB.setMemRefs(MMOBegin, MMOEnd); 6236 } 6237 6238 // Jump 6239 BuildMI(*MBB, MI, DL, 6240 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 6241 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 6242 6243 MI->eraseFromParent(); 6244 return MBB; 6245} 6246 6247MachineBasicBlock * 6248PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 6249 MachineBasicBlock *BB) const { 6250 if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || 6251 MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { 6252 return emitEHSjLjSetJmp(MI, BB); 6253 } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || 6254 MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { 6255 return emitEHSjLjLongJmp(MI, BB); 6256 } 6257 6258 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6259 6260 // To "insert" these instructions we actually have to insert their 6261 // control-flow patterns. 6262 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6263 MachineFunction::iterator It = BB; 6264 ++It; 6265 6266 MachineFunction *F = BB->getParent(); 6267 6268 if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 6269 MI->getOpcode() == PPC::SELECT_CC_I8)) { 6270 SmallVector<MachineOperand, 2> Cond; 6271 Cond.push_back(MI->getOperand(4)); 6272 Cond.push_back(MI->getOperand(1)); 6273 6274 DebugLoc dl = MI->getDebugLoc(); 6275 PPCII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), Cond, 6276 MI->getOperand(2).getReg(), MI->getOperand(3).getReg()); 6277 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 6278 MI->getOpcode() == PPC::SELECT_CC_I8 || 6279 MI->getOpcode() == PPC::SELECT_CC_F4 || 6280 MI->getOpcode() == PPC::SELECT_CC_F8 || 6281 MI->getOpcode() == PPC::SELECT_CC_VRRC) { 6282 6283 6284 // The incoming instruction knows the destination vreg to set, the 6285 // condition code register to branch on, the true/false values to 6286 // select between, and a branch opcode to use. 6287 6288 // thisMBB: 6289 // ... 6290 // TrueVal = ... 6291 // cmpTY ccX, r1, r2 6292 // bCC copy1MBB 6293 // fallthrough --> copy0MBB 6294 MachineBasicBlock *thisMBB = BB; 6295 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 6296 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 6297 unsigned SelectPred = MI->getOperand(4).getImm(); 6298 DebugLoc dl = MI->getDebugLoc(); 6299 F->insert(It, copy0MBB); 6300 F->insert(It, sinkMBB); 6301 6302 // Transfer the remainder of BB and its successor edges to sinkMBB. 6303 sinkMBB->splice(sinkMBB->begin(), BB, 6304 llvm::next(MachineBasicBlock::iterator(MI)), 6305 BB->end()); 6306 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 6307 6308 // Next, add the true and fallthrough blocks as its successors. 6309 BB->addSuccessor(copy0MBB); 6310 BB->addSuccessor(sinkMBB); 6311 6312 BuildMI(BB, dl, TII->get(PPC::BCC)) 6313 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 6314 6315 // copy0MBB: 6316 // %FalseValue = ... 6317 // # fallthrough to sinkMBB 6318 BB = copy0MBB; 6319 6320 // Update machine-CFG edges 6321 BB->addSuccessor(sinkMBB); 6322 6323 // sinkMBB: 6324 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 6325 // ... 6326 BB = sinkMBB; 6327 BuildMI(*BB, BB->begin(), dl, 6328 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 6329 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 6330 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 6331 } 6332 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 6333 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 6334 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 6335 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 6336 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 6337 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 6338 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 6339 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 6340 6341 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 6342 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 6343 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 6344 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 6345 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 6346 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 6347 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 6348 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 6349 6350 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 6351 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 6352 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 6353 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 6354 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 6355 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 6356 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 6357 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 6358 6359 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 6360 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 6361 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 6362 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 6363 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 6364 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 6365 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 6366 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 6367 6368 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 6369 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC); 6370 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 6371 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC); 6372 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 6373 BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC); 6374 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 6375 BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8); 6376 6377 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 6378 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 6379 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 6380 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 6381 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 6382 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 6383 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 6384 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 6385 6386 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 6387 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 6388 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 6389 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 6390 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 6391 BB = EmitAtomicBinary(MI, BB, false, 0); 6392 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 6393 BB = EmitAtomicBinary(MI, BB, true, 0); 6394 6395 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 6396 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 6397 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 6398 6399 unsigned dest = MI->getOperand(0).getReg(); 6400 unsigned ptrA = MI->getOperand(1).getReg(); 6401 unsigned ptrB = MI->getOperand(2).getReg(); 6402 unsigned oldval = MI->getOperand(3).getReg(); 6403 unsigned newval = MI->getOperand(4).getReg(); 6404 DebugLoc dl = MI->getDebugLoc(); 6405 6406 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 6407 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 6408 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6409 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6410 F->insert(It, loop1MBB); 6411 F->insert(It, loop2MBB); 6412 F->insert(It, midMBB); 6413 F->insert(It, exitMBB); 6414 exitMBB->splice(exitMBB->begin(), BB, 6415 llvm::next(MachineBasicBlock::iterator(MI)), 6416 BB->end()); 6417 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6418 6419 // thisMBB: 6420 // ... 6421 // fallthrough --> loopMBB 6422 BB->addSuccessor(loop1MBB); 6423 6424 // loop1MBB: 6425 // l[wd]arx dest, ptr 6426 // cmp[wd] dest, oldval 6427 // bne- midMBB 6428 // loop2MBB: 6429 // st[wd]cx. newval, ptr 6430 // bne- loopMBB 6431 // b exitBB 6432 // midMBB: 6433 // st[wd]cx. dest, ptr 6434 // exitBB: 6435 BB = loop1MBB; 6436 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 6437 .addReg(ptrA).addReg(ptrB); 6438 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 6439 .addReg(oldval).addReg(dest); 6440 BuildMI(BB, dl, TII->get(PPC::BCC)) 6441 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 6442 BB->addSuccessor(loop2MBB); 6443 BB->addSuccessor(midMBB); 6444 6445 BB = loop2MBB; 6446 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6447 .addReg(newval).addReg(ptrA).addReg(ptrB); 6448 BuildMI(BB, dl, TII->get(PPC::BCC)) 6449 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 6450 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6451 BB->addSuccessor(loop1MBB); 6452 BB->addSuccessor(exitMBB); 6453 6454 BB = midMBB; 6455 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6456 .addReg(dest).addReg(ptrA).addReg(ptrB); 6457 BB->addSuccessor(exitMBB); 6458 6459 // exitMBB: 6460 // ... 6461 BB = exitMBB; 6462 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 6463 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 6464 // We must use 64-bit registers for addresses when targeting 64-bit, 6465 // since we're actually doing arithmetic on them. Other registers 6466 // can be 32-bit. 6467 bool is64bit = PPCSubTarget.isPPC64(); 6468 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 6469 6470 unsigned dest = MI->getOperand(0).getReg(); 6471 unsigned ptrA = MI->getOperand(1).getReg(); 6472 unsigned ptrB = MI->getOperand(2).getReg(); 6473 unsigned oldval = MI->getOperand(3).getReg(); 6474 unsigned newval = MI->getOperand(4).getReg(); 6475 DebugLoc dl = MI->getDebugLoc(); 6476 6477 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 6478 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 6479 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6480 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6481 F->insert(It, loop1MBB); 6482 F->insert(It, loop2MBB); 6483 F->insert(It, midMBB); 6484 F->insert(It, exitMBB); 6485 exitMBB->splice(exitMBB->begin(), BB, 6486 llvm::next(MachineBasicBlock::iterator(MI)), 6487 BB->end()); 6488 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6489 6490 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6491 const TargetRegisterClass *RC = 6492 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 6493 (const TargetRegisterClass *) &PPC::GPRCRegClass; 6494 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 6495 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 6496 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 6497 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 6498 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 6499 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 6500 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 6501 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 6502 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 6503 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 6504 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 6505 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 6506 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6507 unsigned Ptr1Reg; 6508 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 6509 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 6510 // thisMBB: 6511 // ... 6512 // fallthrough --> loopMBB 6513 BB->addSuccessor(loop1MBB); 6514 6515 // The 4-byte load must be aligned, while a char or short may be 6516 // anywhere in the word. Hence all this nasty bookkeeping code. 6517 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6518 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6519 // xori shift, shift1, 24 [16] 6520 // rlwinm ptr, ptr1, 0, 0, 29 6521 // slw newval2, newval, shift 6522 // slw oldval2, oldval,shift 6523 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6524 // slw mask, mask2, shift 6525 // and newval3, newval2, mask 6526 // and oldval3, oldval2, mask 6527 // loop1MBB: 6528 // lwarx tmpDest, ptr 6529 // and tmp, tmpDest, mask 6530 // cmpw tmp, oldval3 6531 // bne- midMBB 6532 // loop2MBB: 6533 // andc tmp2, tmpDest, mask 6534 // or tmp4, tmp2, newval3 6535 // stwcx. tmp4, ptr 6536 // bne- loop1MBB 6537 // b exitBB 6538 // midMBB: 6539 // stwcx. tmpDest, ptr 6540 // exitBB: 6541 // srw dest, tmpDest, shift 6542 if (ptrA != ZeroReg) { 6543 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6544 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6545 .addReg(ptrA).addReg(ptrB); 6546 } else { 6547 Ptr1Reg = ptrB; 6548 } 6549 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6550 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6551 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6552 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6553 if (is64bit) 6554 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6555 .addReg(Ptr1Reg).addImm(0).addImm(61); 6556 else 6557 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6558 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6559 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 6560 .addReg(newval).addReg(ShiftReg); 6561 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 6562 .addReg(oldval).addReg(ShiftReg); 6563 if (is8bit) 6564 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6565 else { 6566 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6567 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 6568 .addReg(Mask3Reg).addImm(65535); 6569 } 6570 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6571 .addReg(Mask2Reg).addReg(ShiftReg); 6572 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 6573 .addReg(NewVal2Reg).addReg(MaskReg); 6574 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 6575 .addReg(OldVal2Reg).addReg(MaskReg); 6576 6577 BB = loop1MBB; 6578 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6579 .addReg(ZeroReg).addReg(PtrReg); 6580 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 6581 .addReg(TmpDestReg).addReg(MaskReg); 6582 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 6583 .addReg(TmpReg).addReg(OldVal3Reg); 6584 BuildMI(BB, dl, TII->get(PPC::BCC)) 6585 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 6586 BB->addSuccessor(loop2MBB); 6587 BB->addSuccessor(midMBB); 6588 6589 BB = loop2MBB; 6590 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 6591 .addReg(TmpDestReg).addReg(MaskReg); 6592 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 6593 .addReg(Tmp2Reg).addReg(NewVal3Reg); 6594 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 6595 .addReg(ZeroReg).addReg(PtrReg); 6596 BuildMI(BB, dl, TII->get(PPC::BCC)) 6597 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 6598 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6599 BB->addSuccessor(loop1MBB); 6600 BB->addSuccessor(exitMBB); 6601 6602 BB = midMBB; 6603 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 6604 .addReg(ZeroReg).addReg(PtrReg); 6605 BB->addSuccessor(exitMBB); 6606 6607 // exitMBB: 6608 // ... 6609 BB = exitMBB; 6610 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 6611 .addReg(ShiftReg); 6612 } else if (MI->getOpcode() == PPC::FADDrtz) { 6613 // This pseudo performs an FADD with rounding mode temporarily forced 6614 // to round-to-zero. We emit this via custom inserter since the FPSCR 6615 // is not modeled at the SelectionDAG level. 6616 unsigned Dest = MI->getOperand(0).getReg(); 6617 unsigned Src1 = MI->getOperand(1).getReg(); 6618 unsigned Src2 = MI->getOperand(2).getReg(); 6619 DebugLoc dl = MI->getDebugLoc(); 6620 6621 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6622 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 6623 6624 // Save FPSCR value. 6625 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 6626 6627 // Set rounding mode to round-to-zero. 6628 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 6629 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 6630 6631 // Perform addition. 6632 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 6633 6634 // Restore FPSCR value. 6635 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); 6636 } else if (MI->getOpcode() == PPC::FRINDrint || 6637 MI->getOpcode() == PPC::FRINSrint) { 6638 bool isf32 = MI->getOpcode() == PPC::FRINSrint; 6639 unsigned Dest = MI->getOperand(0).getReg(); 6640 unsigned Src = MI->getOperand(1).getReg(); 6641 DebugLoc dl = MI->getDebugLoc(); 6642 6643 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6644 unsigned CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass); 6645 6646 // Perform the rounding. 6647 BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FRINS : PPC::FRIND), Dest) 6648 .addReg(Src); 6649 6650 // Compare the results. 6651 BuildMI(*BB, MI, dl, TII->get(isf32 ? PPC::FCMPUS : PPC::FCMPUD), CRReg) 6652 .addReg(Dest).addReg(Src); 6653 6654 // If the results were not equal, then set the FPSCR XX bit. 6655 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6656 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6657 F->insert(It, midMBB); 6658 F->insert(It, exitMBB); 6659 exitMBB->splice(exitMBB->begin(), BB, 6660 llvm::next(MachineBasicBlock::iterator(MI)), 6661 BB->end()); 6662 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6663 6664 BuildMI(*BB, MI, dl, TII->get(PPC::BCC)) 6665 .addImm(PPC::PRED_EQ).addReg(CRReg).addMBB(exitMBB); 6666 6667 BB->addSuccessor(midMBB); 6668 BB->addSuccessor(exitMBB); 6669 6670 BB = midMBB; 6671 6672 // Set the FPSCR XX bit (FE_INEXACT). Note that we cannot just set 6673 // the FI bit here because that will not automatically set XX also, 6674 // and XX is what libm interprets as the FE_INEXACT flag. 6675 BuildMI(BB, dl, TII->get(PPC::MTFSB1)).addImm(/* 38 - 32 = */ 6); 6676 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6677 6678 BB->addSuccessor(exitMBB); 6679 6680 BB = exitMBB; 6681 } else { 6682 llvm_unreachable("Unexpected instr type to insert"); 6683 } 6684 6685 MI->eraseFromParent(); // The pseudo instruction is gone now. 6686 return BB; 6687} 6688 6689//===----------------------------------------------------------------------===// 6690// Target Optimization Hooks 6691//===----------------------------------------------------------------------===// 6692 6693SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, 6694 DAGCombinerInfo &DCI) const { 6695 if (DCI.isAfterLegalizeVectorOps()) 6696 return SDValue(); 6697 6698 EVT VT = Op.getValueType(); 6699 6700 if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) || 6701 (VT == MVT::f64 && PPCSubTarget.hasFRE()) || 6702 (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { 6703 6704 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 6705 // For the reciprocal, we need to find the zero of the function: 6706 // F(X) = A X - 1 [which has a zero at X = 1/A] 6707 // => 6708 // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 6709 // does not require additional intermediate precision] 6710 6711 // Convergence is quadratic, so we essentially double the number of digits 6712 // correct after every iteration. The minimum architected relative 6713 // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has 6714 // 23 digits and double has 52 digits. 6715 int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; 6716 if (VT.getScalarType() == MVT::f64) 6717 ++Iterations; 6718 6719 SelectionDAG &DAG = DCI.DAG; 6720 DebugLoc dl = Op.getDebugLoc(); 6721 6722 SDValue FPOne = 6723 DAG.getConstantFP(1.0, VT.getScalarType()); 6724 if (VT.isVector()) { 6725 assert(VT.getVectorNumElements() == 4 && 6726 "Unknown vector type"); 6727 FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 6728 FPOne, FPOne, FPOne, FPOne); 6729 } 6730 6731 SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op); 6732 DCI.AddToWorklist(Est.getNode()); 6733 6734 // Newton iterations: Est = Est + Est (1 - Arg * Est) 6735 for (int i = 0; i < Iterations; ++i) { 6736 SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est); 6737 DCI.AddToWorklist(NewEst.getNode()); 6738 6739 NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst); 6740 DCI.AddToWorklist(NewEst.getNode()); 6741 6742 NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); 6743 DCI.AddToWorklist(NewEst.getNode()); 6744 6745 Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst); 6746 DCI.AddToWorklist(Est.getNode()); 6747 } 6748 6749 return Est; 6750 } 6751 6752 return SDValue(); 6753} 6754 6755SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, 6756 DAGCombinerInfo &DCI) const { 6757 if (DCI.isAfterLegalizeVectorOps()) 6758 return SDValue(); 6759 6760 EVT VT = Op.getValueType(); 6761 6762 if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) || 6763 (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) || 6764 (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { 6765 6766 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 6767 // For the reciprocal sqrt, we need to find the zero of the function: 6768 // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 6769 // => 6770 // X_{i+1} = X_i (1.5 - A X_i^2 / 2) 6771 // As a result, we precompute A/2 prior to the iteration loop. 6772 6773 // Convergence is quadratic, so we essentially double the number of digits 6774 // correct after every iteration. The minimum architected relative 6775 // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has 6776 // 23 digits and double has 52 digits. 6777 int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; 6778 if (VT.getScalarType() == MVT::f64) 6779 ++Iterations; 6780 6781 SelectionDAG &DAG = DCI.DAG; 6782 DebugLoc dl = Op.getDebugLoc(); 6783 6784 SDValue FPThreeHalves = 6785 DAG.getConstantFP(1.5, VT.getScalarType()); 6786 if (VT.isVector()) { 6787 assert(VT.getVectorNumElements() == 4 && 6788 "Unknown vector type"); 6789 FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 6790 FPThreeHalves, FPThreeHalves, 6791 FPThreeHalves, FPThreeHalves); 6792 } 6793 6794 SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op); 6795 DCI.AddToWorklist(Est.getNode()); 6796 6797 // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that 6798 // this entire sequence requires only one FP constant. 6799 SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op); 6800 DCI.AddToWorklist(HalfArg.getNode()); 6801 6802 HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op); 6803 DCI.AddToWorklist(HalfArg.getNode()); 6804 6805 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 6806 for (int i = 0; i < Iterations; ++i) { 6807 SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est); 6808 DCI.AddToWorklist(NewEst.getNode()); 6809 6810 NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst); 6811 DCI.AddToWorklist(NewEst.getNode()); 6812 6813 NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst); 6814 DCI.AddToWorklist(NewEst.getNode()); 6815 6816 Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); 6817 DCI.AddToWorklist(Est.getNode()); 6818 } 6819 6820 return Est; 6821 } 6822 6823 return SDValue(); 6824} 6825 6826SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 6827 DAGCombinerInfo &DCI) const { 6828 const TargetMachine &TM = getTargetMachine(); 6829 SelectionDAG &DAG = DCI.DAG; 6830 DebugLoc dl = N->getDebugLoc(); 6831 switch (N->getOpcode()) { 6832 default: break; 6833 case PPCISD::SHL: 6834 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6835 if (C->isNullValue()) // 0 << V -> 0. 6836 return N->getOperand(0); 6837 } 6838 break; 6839 case PPCISD::SRL: 6840 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6841 if (C->isNullValue()) // 0 >>u V -> 0. 6842 return N->getOperand(0); 6843 } 6844 break; 6845 case PPCISD::SRA: 6846 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6847 if (C->isNullValue() || // 0 >>s V -> 0. 6848 C->isAllOnesValue()) // -1 >>s V -> -1. 6849 return N->getOperand(0); 6850 } 6851 break; 6852 case ISD::FDIV: { 6853 assert(TM.Options.UnsafeFPMath && 6854 "Reciprocal estimates require UnsafeFPMath"); 6855 6856 if (N->getOperand(1).getOpcode() == ISD::FSQRT) { 6857 SDValue RV = 6858 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI); 6859 if (RV.getNode() != 0) { 6860 DCI.AddToWorklist(RV.getNode()); 6861 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 6862 N->getOperand(0), RV); 6863 } 6864 } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND && 6865 N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { 6866 SDValue RV = 6867 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), 6868 DCI); 6869 if (RV.getNode() != 0) { 6870 DCI.AddToWorklist(RV.getNode()); 6871 RV = DAG.getNode(ISD::FP_EXTEND, N->getOperand(1).getDebugLoc(), 6872 N->getValueType(0), RV); 6873 DCI.AddToWorklist(RV.getNode()); 6874 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 6875 N->getOperand(0), RV); 6876 } 6877 } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND && 6878 N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { 6879 SDValue RV = 6880 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), 6881 DCI); 6882 if (RV.getNode() != 0) { 6883 DCI.AddToWorklist(RV.getNode()); 6884 RV = DAG.getNode(ISD::FP_ROUND, N->getOperand(1).getDebugLoc(), 6885 N->getValueType(0), RV, 6886 N->getOperand(1).getOperand(1)); 6887 DCI.AddToWorklist(RV.getNode()); 6888 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 6889 N->getOperand(0), RV); 6890 } 6891 } 6892 6893 SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI); 6894 if (RV.getNode() != 0) { 6895 DCI.AddToWorklist(RV.getNode()); 6896 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 6897 N->getOperand(0), RV); 6898 } 6899 6900 } 6901 break; 6902 case ISD::FSQRT: { 6903 assert(TM.Options.UnsafeFPMath && 6904 "Reciprocal estimates require UnsafeFPMath"); 6905 6906 // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the 6907 // reciprocal sqrt. 6908 SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI); 6909 if (RV.getNode() != 0) { 6910 DCI.AddToWorklist(RV.getNode()); 6911 RV = DAGCombineFastRecip(RV, DCI); 6912 if (RV.getNode() != 0) 6913 return RV; 6914 } 6915 6916 } 6917 break; 6918 case ISD::SINT_TO_FP: 6919 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 6920 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 6921 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 6922 // We allow the src/dst to be either f32/f64, but the intermediate 6923 // type must be i64. 6924 if (N->getOperand(0).getValueType() == MVT::i64 && 6925 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 6926 SDValue Val = N->getOperand(0).getOperand(0); 6927 if (Val.getValueType() == MVT::f32) { 6928 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 6929 DCI.AddToWorklist(Val.getNode()); 6930 } 6931 6932 Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); 6933 DCI.AddToWorklist(Val.getNode()); 6934 Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); 6935 DCI.AddToWorklist(Val.getNode()); 6936 if (N->getValueType(0) == MVT::f32) { 6937 Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, 6938 DAG.getIntPtrConstant(0)); 6939 DCI.AddToWorklist(Val.getNode()); 6940 } 6941 return Val; 6942 } else if (N->getOperand(0).getValueType() == MVT::i32) { 6943 // If the intermediate type is i32, we can avoid the load/store here 6944 // too. 6945 } 6946 } 6947 } 6948 break; 6949 case ISD::STORE: 6950 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 6951 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 6952 !cast<StoreSDNode>(N)->isTruncatingStore() && 6953 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 6954 N->getOperand(1).getValueType() == MVT::i32 && 6955 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 6956 SDValue Val = N->getOperand(1).getOperand(0); 6957 if (Val.getValueType() == MVT::f32) { 6958 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 6959 DCI.AddToWorklist(Val.getNode()); 6960 } 6961 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 6962 DCI.AddToWorklist(Val.getNode()); 6963 6964 SDValue Ops[] = { 6965 N->getOperand(0), Val, N->getOperand(2), 6966 DAG.getValueType(N->getOperand(1).getValueType()) 6967 }; 6968 6969 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 6970 DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), 6971 cast<StoreSDNode>(N)->getMemoryVT(), 6972 cast<StoreSDNode>(N)->getMemOperand()); 6973 DCI.AddToWorklist(Val.getNode()); 6974 return Val; 6975 } 6976 6977 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 6978 if (cast<StoreSDNode>(N)->isUnindexed() && 6979 N->getOperand(1).getOpcode() == ISD::BSWAP && 6980 N->getOperand(1).getNode()->hasOneUse() && 6981 (N->getOperand(1).getValueType() == MVT::i32 || 6982 N->getOperand(1).getValueType() == MVT::i16 || 6983 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 6984 TM.getSubtarget<PPCSubtarget>().isPPC64() && 6985 N->getOperand(1).getValueType() == MVT::i64))) { 6986 SDValue BSwapOp = N->getOperand(1).getOperand(0); 6987 // Do an any-extend to 32-bits if this is a half-word input. 6988 if (BSwapOp.getValueType() == MVT::i16) 6989 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 6990 6991 SDValue Ops[] = { 6992 N->getOperand(0), BSwapOp, N->getOperand(2), 6993 DAG.getValueType(N->getOperand(1).getValueType()) 6994 }; 6995 return 6996 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 6997 Ops, array_lengthof(Ops), 6998 cast<StoreSDNode>(N)->getMemoryVT(), 6999 cast<StoreSDNode>(N)->getMemOperand()); 7000 } 7001 break; 7002 case ISD::BSWAP: 7003 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 7004 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7005 N->getOperand(0).hasOneUse() && 7006 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 7007 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 7008 TM.getSubtarget<PPCSubtarget>().isPPC64() && 7009 N->getValueType(0) == MVT::i64))) { 7010 SDValue Load = N->getOperand(0); 7011 LoadSDNode *LD = cast<LoadSDNode>(Load); 7012 // Create the byte-swapping load. 7013 SDValue Ops[] = { 7014 LD->getChain(), // Chain 7015 LD->getBasePtr(), // Ptr 7016 DAG.getValueType(N->getValueType(0)) // VT 7017 }; 7018 SDValue BSLoad = 7019 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 7020 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 7021 MVT::i64 : MVT::i32, MVT::Other), 7022 Ops, 3, LD->getMemoryVT(), LD->getMemOperand()); 7023 7024 // If this is an i16 load, insert the truncate. 7025 SDValue ResVal = BSLoad; 7026 if (N->getValueType(0) == MVT::i16) 7027 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 7028 7029 // First, combine the bswap away. This makes the value produced by the 7030 // load dead. 7031 DCI.CombineTo(N, ResVal); 7032 7033 // Next, combine the load away, we give it a bogus result value but a real 7034 // chain result. The result value is dead because the bswap is dead. 7035 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 7036 7037 // Return N so it doesn't get rechecked! 7038 return SDValue(N, 0); 7039 } 7040 7041 break; 7042 case PPCISD::VCMP: { 7043 // If a VCMPo node already exists with exactly the same operands as this 7044 // node, use its result instead of this node (VCMPo computes both a CR6 and 7045 // a normal output). 7046 // 7047 if (!N->getOperand(0).hasOneUse() && 7048 !N->getOperand(1).hasOneUse() && 7049 !N->getOperand(2).hasOneUse()) { 7050 7051 // Scan all of the users of the LHS, looking for VCMPo's that match. 7052 SDNode *VCMPoNode = 0; 7053 7054 SDNode *LHSN = N->getOperand(0).getNode(); 7055 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 7056 UI != E; ++UI) 7057 if (UI->getOpcode() == PPCISD::VCMPo && 7058 UI->getOperand(1) == N->getOperand(1) && 7059 UI->getOperand(2) == N->getOperand(2) && 7060 UI->getOperand(0) == N->getOperand(0)) { 7061 VCMPoNode = *UI; 7062 break; 7063 } 7064 7065 // If there is no VCMPo node, or if the flag value has a single use, don't 7066 // transform this. 7067 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 7068 break; 7069 7070 // Look at the (necessarily single) use of the flag value. If it has a 7071 // chain, this transformation is more complex. Note that multiple things 7072 // could use the value result, which we should ignore. 7073 SDNode *FlagUser = 0; 7074 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 7075 FlagUser == 0; ++UI) { 7076 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 7077 SDNode *User = *UI; 7078 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 7079 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 7080 FlagUser = User; 7081 break; 7082 } 7083 } 7084 } 7085 7086 // If the user is a MFCR instruction, we know this is safe. Otherwise we 7087 // give up for right now. 7088 if (FlagUser->getOpcode() == PPCISD::MFCR) 7089 return SDValue(VCMPoNode, 0); 7090 } 7091 break; 7092 } 7093 case ISD::BR_CC: { 7094 // If this is a branch on an altivec predicate comparison, lower this so 7095 // that we don't have to do a MFCR: instead, branch directly on CR6. This 7096 // lowering is done pre-legalize, because the legalizer lowers the predicate 7097 // compare down to code that is difficult to reassemble. 7098 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 7099 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 7100 int CompareOpc; 7101 bool isDot; 7102 7103 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 7104 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 7105 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 7106 assert(isDot && "Can't compare against a vector result!"); 7107 7108 // If this is a comparison against something other than 0/1, then we know 7109 // that the condition is never/always true. 7110 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 7111 if (Val != 0 && Val != 1) { 7112 if (CC == ISD::SETEQ) // Cond never true, remove branch. 7113 return N->getOperand(0); 7114 // Always !=, turn it into an unconditional branch. 7115 return DAG.getNode(ISD::BR, dl, MVT::Other, 7116 N->getOperand(0), N->getOperand(4)); 7117 } 7118 7119 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 7120 7121 // Create the PPCISD altivec 'dot' comparison node. 7122 SDValue Ops[] = { 7123 LHS.getOperand(2), // LHS of compare 7124 LHS.getOperand(3), // RHS of compare 7125 DAG.getConstant(CompareOpc, MVT::i32) 7126 }; 7127 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 7128 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 7129 7130 // Unpack the result based on how the target uses it. 7131 PPC::Predicate CompOpc; 7132 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 7133 default: // Can't happen, don't crash on invalid number though. 7134 case 0: // Branch on the value of the EQ bit of CR6. 7135 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 7136 break; 7137 case 1: // Branch on the inverted value of the EQ bit of CR6. 7138 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 7139 break; 7140 case 2: // Branch on the value of the LT bit of CR6. 7141 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 7142 break; 7143 case 3: // Branch on the inverted value of the LT bit of CR6. 7144 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 7145 break; 7146 } 7147 7148 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 7149 DAG.getConstant(CompOpc, MVT::i32), 7150 DAG.getRegister(PPC::CR6, MVT::i32), 7151 N->getOperand(4), CompNode.getValue(1)); 7152 } 7153 break; 7154 } 7155 } 7156 7157 return SDValue(); 7158} 7159 7160//===----------------------------------------------------------------------===// 7161// Inline Assembly Support 7162//===----------------------------------------------------------------------===// 7163 7164void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 7165 APInt &KnownZero, 7166 APInt &KnownOne, 7167 const SelectionDAG &DAG, 7168 unsigned Depth) const { 7169 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 7170 switch (Op.getOpcode()) { 7171 default: break; 7172 case PPCISD::LBRX: { 7173 // lhbrx is known to have the top bits cleared out. 7174 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 7175 KnownZero = 0xFFFF0000; 7176 break; 7177 } 7178 case ISD::INTRINSIC_WO_CHAIN: { 7179 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 7180 default: break; 7181 case Intrinsic::ppc_altivec_vcmpbfp_p: 7182 case Intrinsic::ppc_altivec_vcmpeqfp_p: 7183 case Intrinsic::ppc_altivec_vcmpequb_p: 7184 case Intrinsic::ppc_altivec_vcmpequh_p: 7185 case Intrinsic::ppc_altivec_vcmpequw_p: 7186 case Intrinsic::ppc_altivec_vcmpgefp_p: 7187 case Intrinsic::ppc_altivec_vcmpgtfp_p: 7188 case Intrinsic::ppc_altivec_vcmpgtsb_p: 7189 case Intrinsic::ppc_altivec_vcmpgtsh_p: 7190 case Intrinsic::ppc_altivec_vcmpgtsw_p: 7191 case Intrinsic::ppc_altivec_vcmpgtub_p: 7192 case Intrinsic::ppc_altivec_vcmpgtuh_p: 7193 case Intrinsic::ppc_altivec_vcmpgtuw_p: 7194 KnownZero = ~1U; // All bits but the low one are known to be zero. 7195 break; 7196 } 7197 } 7198 } 7199} 7200 7201 7202/// getConstraintType - Given a constraint, return the type of 7203/// constraint it is for this target. 7204PPCTargetLowering::ConstraintType 7205PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 7206 if (Constraint.size() == 1) { 7207 switch (Constraint[0]) { 7208 default: break; 7209 case 'b': 7210 case 'r': 7211 case 'f': 7212 case 'v': 7213 case 'y': 7214 return C_RegisterClass; 7215 case 'Z': 7216 // FIXME: While Z does indicate a memory constraint, it specifically 7217 // indicates an r+r address (used in conjunction with the 'y' modifier 7218 // in the replacement string). Currently, we're forcing the base 7219 // register to be r0 in the asm printer (which is interpreted as zero) 7220 // and forming the complete address in the second register. This is 7221 // suboptimal. 7222 return C_Memory; 7223 } 7224 } 7225 return TargetLowering::getConstraintType(Constraint); 7226} 7227 7228/// Examine constraint type and operand type and determine a weight value. 7229/// This object must already have been set up with the operand type 7230/// and the current alternative constraint selected. 7231TargetLowering::ConstraintWeight 7232PPCTargetLowering::getSingleConstraintMatchWeight( 7233 AsmOperandInfo &info, const char *constraint) const { 7234 ConstraintWeight weight = CW_Invalid; 7235 Value *CallOperandVal = info.CallOperandVal; 7236 // If we don't have a value, we can't do a match, 7237 // but allow it at the lowest weight. 7238 if (CallOperandVal == NULL) 7239 return CW_Default; 7240 Type *type = CallOperandVal->getType(); 7241 // Look at the constraint type. 7242 switch (*constraint) { 7243 default: 7244 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 7245 break; 7246 case 'b': 7247 if (type->isIntegerTy()) 7248 weight = CW_Register; 7249 break; 7250 case 'f': 7251 if (type->isFloatTy()) 7252 weight = CW_Register; 7253 break; 7254 case 'd': 7255 if (type->isDoubleTy()) 7256 weight = CW_Register; 7257 break; 7258 case 'v': 7259 if (type->isVectorTy()) 7260 weight = CW_Register; 7261 break; 7262 case 'y': 7263 weight = CW_Register; 7264 break; 7265 case 'Z': 7266 weight = CW_Memory; 7267 break; 7268 } 7269 return weight; 7270} 7271 7272std::pair<unsigned, const TargetRegisterClass*> 7273PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 7274 EVT VT) const { 7275 if (Constraint.size() == 1) { 7276 // GCC RS6000 Constraint Letters 7277 switch (Constraint[0]) { 7278 case 'b': // R1-R31 7279 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 7280 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 7281 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 7282 case 'r': // R0-R31 7283 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 7284 return std::make_pair(0U, &PPC::G8RCRegClass); 7285 return std::make_pair(0U, &PPC::GPRCRegClass); 7286 case 'f': 7287 if (VT == MVT::f32 || VT == MVT::i32) 7288 return std::make_pair(0U, &PPC::F4RCRegClass); 7289 if (VT == MVT::f64 || VT == MVT::i64) 7290 return std::make_pair(0U, &PPC::F8RCRegClass); 7291 break; 7292 case 'v': 7293 return std::make_pair(0U, &PPC::VRRCRegClass); 7294 case 'y': // crrc 7295 return std::make_pair(0U, &PPC::CRRCRegClass); 7296 } 7297 } 7298 7299 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 7300} 7301 7302 7303/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 7304/// vector. If it is invalid, don't add anything to Ops. 7305void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 7306 std::string &Constraint, 7307 std::vector<SDValue>&Ops, 7308 SelectionDAG &DAG) const { 7309 SDValue Result(0,0); 7310 7311 // Only support length 1 constraints. 7312 if (Constraint.length() > 1) return; 7313 7314 char Letter = Constraint[0]; 7315 switch (Letter) { 7316 default: break; 7317 case 'I': 7318 case 'J': 7319 case 'K': 7320 case 'L': 7321 case 'M': 7322 case 'N': 7323 case 'O': 7324 case 'P': { 7325 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 7326 if (!CST) return; // Must be an immediate to match. 7327 unsigned Value = CST->getZExtValue(); 7328 switch (Letter) { 7329 default: llvm_unreachable("Unknown constraint letter!"); 7330 case 'I': // "I" is a signed 16-bit constant. 7331 if ((short)Value == (int)Value) 7332 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7333 break; 7334 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 7335 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 7336 if ((short)Value == 0) 7337 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7338 break; 7339 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 7340 if ((Value >> 16) == 0) 7341 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7342 break; 7343 case 'M': // "M" is a constant that is greater than 31. 7344 if (Value > 31) 7345 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7346 break; 7347 case 'N': // "N" is a positive constant that is an exact power of two. 7348 if ((int)Value > 0 && isPowerOf2_32(Value)) 7349 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7350 break; 7351 case 'O': // "O" is the constant zero. 7352 if (Value == 0) 7353 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7354 break; 7355 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 7356 if ((short)-Value == (int)-Value) 7357 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7358 break; 7359 } 7360 break; 7361 } 7362 } 7363 7364 if (Result.getNode()) { 7365 Ops.push_back(Result); 7366 return; 7367 } 7368 7369 // Handle standard constraint letters. 7370 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 7371} 7372 7373// isLegalAddressingMode - Return true if the addressing mode represented 7374// by AM is legal for this target, for a load/store of the specified type. 7375bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 7376 Type *Ty) const { 7377 // FIXME: PPC does not allow r+i addressing modes for vectors! 7378 7379 // PPC allows a sign-extended 16-bit immediate field. 7380 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 7381 return false; 7382 7383 // No global is ever allowed as a base. 7384 if (AM.BaseGV) 7385 return false; 7386 7387 // PPC only support r+r, 7388 switch (AM.Scale) { 7389 case 0: // "r+i" or just "i", depending on HasBaseReg. 7390 break; 7391 case 1: 7392 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 7393 return false; 7394 // Otherwise we have r+r or r+i. 7395 break; 7396 case 2: 7397 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 7398 return false; 7399 // Allow 2*r as r+r. 7400 break; 7401 default: 7402 // No other scales are supported. 7403 return false; 7404 } 7405 7406 return true; 7407} 7408 7409SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 7410 SelectionDAG &DAG) const { 7411 MachineFunction &MF = DAG.getMachineFunction(); 7412 MachineFrameInfo *MFI = MF.getFrameInfo(); 7413 MFI->setReturnAddressIsTaken(true); 7414 7415 DebugLoc dl = Op.getDebugLoc(); 7416 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7417 7418 // Make sure the function does not optimize away the store of the RA to 7419 // the stack. 7420 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 7421 FuncInfo->setLRStoreRequired(); 7422 bool isPPC64 = PPCSubTarget.isPPC64(); 7423 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 7424 7425 if (Depth > 0) { 7426 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 7427 SDValue Offset = 7428 7429 DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), 7430 isPPC64? MVT::i64 : MVT::i32); 7431 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 7432 DAG.getNode(ISD::ADD, dl, getPointerTy(), 7433 FrameAddr, Offset), 7434 MachinePointerInfo(), false, false, false, 0); 7435 } 7436 7437 // Just load the return address off the stack. 7438 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 7439 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 7440 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 7441} 7442 7443SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 7444 SelectionDAG &DAG) const { 7445 DebugLoc dl = Op.getDebugLoc(); 7446 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7447 7448 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 7449 bool isPPC64 = PtrVT == MVT::i64; 7450 7451 MachineFunction &MF = DAG.getMachineFunction(); 7452 MachineFrameInfo *MFI = MF.getFrameInfo(); 7453 MFI->setFrameAddressIsTaken(true); 7454 7455 // Naked functions never have a frame pointer, and so we use r1. For all 7456 // other functions, this decision must be delayed until during PEI. 7457 unsigned FrameReg; 7458 if (MF.getFunction()->getAttributes().hasAttribute( 7459 AttributeSet::FunctionIndex, Attribute::Naked)) 7460 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 7461 else 7462 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 7463 7464 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 7465 PtrVT); 7466 while (Depth--) 7467 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 7468 FrameAddr, MachinePointerInfo(), false, false, 7469 false, 0); 7470 return FrameAddr; 7471} 7472 7473bool 7474PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 7475 // The PowerPC target isn't yet aware of offsets. 7476 return false; 7477} 7478 7479/// getOptimalMemOpType - Returns the target specific optimal type for load 7480/// and store operations as a result of memset, memcpy, and memmove 7481/// lowering. If DstAlign is zero that means it's safe to destination 7482/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 7483/// means there isn't a need to check it against alignment requirement, 7484/// probably because the source does not need to be loaded. If 'IsMemset' is 7485/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 7486/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 7487/// source is constant so it does not need to be loaded. 7488/// It returns EVT::Other if the type should be determined using generic 7489/// target-independent logic. 7490EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 7491 unsigned DstAlign, unsigned SrcAlign, 7492 bool IsMemset, bool ZeroMemset, 7493 bool MemcpyStrSrc, 7494 MachineFunction &MF) const { 7495 if (this->PPCSubTarget.isPPC64()) { 7496 return MVT::i64; 7497 } else { 7498 return MVT::i32; 7499 } 7500} 7501 7502bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, 7503 bool *Fast) const { 7504 if (DisablePPCUnaligned) 7505 return false; 7506 7507 // PowerPC supports unaligned memory access for simple non-vector types. 7508 // Although accessing unaligned addresses is not as efficient as accessing 7509 // aligned addresses, it is generally more efficient than manual expansion, 7510 // and generally only traps for software emulation when crossing page 7511 // boundaries. 7512 7513 if (!VT.isSimple()) 7514 return false; 7515 7516 if (VT.getSimpleVT().isVector()) 7517 return false; 7518 7519 if (VT == MVT::ppcf128) 7520 return false; 7521 7522 if (Fast) 7523 *Fast = true; 7524 7525 return true; 7526} 7527 7528/// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than 7529/// a pair of mul and add instructions. fmuladd intrinsics will be expanded to 7530/// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd 7531/// is expanded to mul + add. 7532bool PPCTargetLowering::isFMAFasterThanMulAndAdd(EVT VT) const { 7533 if (!VT.isSimple()) 7534 return false; 7535 7536 switch (VT.getSimpleVT().SimpleTy) { 7537 case MVT::f32: 7538 case MVT::f64: 7539 case MVT::v4f32: 7540 return true; 7541 default: 7542 break; 7543 } 7544 7545 return false; 7546} 7547 7548Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 7549 if (DisableILPPref) 7550 return TargetLowering::getSchedulingPreference(N); 7551 7552 return Sched::ILP; 7553} 7554 7555