PPCISelLowering.cpp revision 98bae99266d8e527e7399c717a79c6dc9a073331
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "MCTargetDesc/PPCPredicates.h" 16#include "PPCMachineFunctionInfo.h" 17#include "PPCPerfectShuffle.h" 18#include "PPCTargetMachine.h" 19#include "PPCTargetObjectFile.h" 20#include "llvm/ADT/STLExtras.h" 21#include "llvm/CodeGen/CallingConvLower.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineFunction.h" 24#include "llvm/CodeGen/MachineInstrBuilder.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/SelectionDAG.h" 27#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28#include "llvm/IR/CallingConv.h" 29#include "llvm/IR/Constants.h" 30#include "llvm/IR/DerivedTypes.h" 31#include "llvm/IR/Function.h" 32#include "llvm/IR/Intrinsics.h" 33#include "llvm/Support/CommandLine.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Support/raw_ostream.h" 37#include "llvm/Target/TargetOptions.h" 38using namespace llvm; 39 40static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc", 41cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden); 42 43static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref", 44cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden); 45 46static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned", 47cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); 48 49static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { 50 if (TM.getSubtargetImpl()->isDarwin()) 51 return new TargetLoweringObjectFileMachO(); 52 53 if (TM.getSubtargetImpl()->isSVR4ABI()) 54 return new PPC64LinuxTargetObjectFile(); 55 56 return new TargetLoweringObjectFileELF(); 57} 58 59PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 60 : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { 61 const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); 62 63 setPow2DivIsCheap(); 64 65 // Use _setjmp/_longjmp instead of setjmp/longjmp. 66 setUseUnderscoreSetJmp(true); 67 setUseUnderscoreLongJmp(true); 68 69 // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all 70 // arguments are at least 4/8 bytes aligned. 71 bool isPPC64 = Subtarget->isPPC64(); 72 setMinStackArgumentAlignment(isPPC64 ? 8:4); 73 74 // Set up the register classes. 75 addRegisterClass(MVT::i32, &PPC::GPRCRegClass); 76 addRegisterClass(MVT::f32, &PPC::F4RCRegClass); 77 addRegisterClass(MVT::f64, &PPC::F8RCRegClass); 78 79 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 80 setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); 81 setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); 82 83 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 84 85 // PowerPC has pre-inc load and store's. 86 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 87 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 88 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 89 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 90 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 91 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 92 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 93 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 94 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 95 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 96 97 // This is used in the ppcf128->int sequence. Note it has different semantics 98 // from FP_ROUND: that rounds to nearest, this rounds to zero. 99 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 100 101 // We do not currently implement these libm ops for PowerPC. 102 setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand); 103 setOperationAction(ISD::FCEIL, MVT::ppcf128, Expand); 104 setOperationAction(ISD::FTRUNC, MVT::ppcf128, Expand); 105 setOperationAction(ISD::FRINT, MVT::ppcf128, Expand); 106 setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); 107 setOperationAction(ISD::FREM, MVT::ppcf128, Expand); 108 109 // PowerPC has no SREM/UREM instructions 110 setOperationAction(ISD::SREM, MVT::i32, Expand); 111 setOperationAction(ISD::UREM, MVT::i32, Expand); 112 setOperationAction(ISD::SREM, MVT::i64, Expand); 113 setOperationAction(ISD::UREM, MVT::i64, Expand); 114 115 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 116 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 117 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 118 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 119 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 120 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 121 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 122 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 123 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 124 125 // We don't support sin/cos/sqrt/fmod/pow 126 setOperationAction(ISD::FSIN , MVT::f64, Expand); 127 setOperationAction(ISD::FCOS , MVT::f64, Expand); 128 setOperationAction(ISD::FSINCOS, MVT::f64, Expand); 129 setOperationAction(ISD::FREM , MVT::f64, Expand); 130 setOperationAction(ISD::FPOW , MVT::f64, Expand); 131 setOperationAction(ISD::FMA , MVT::f64, Legal); 132 setOperationAction(ISD::FSIN , MVT::f32, Expand); 133 setOperationAction(ISD::FCOS , MVT::f32, Expand); 134 setOperationAction(ISD::FSINCOS, MVT::f32, Expand); 135 setOperationAction(ISD::FREM , MVT::f32, Expand); 136 setOperationAction(ISD::FPOW , MVT::f32, Expand); 137 setOperationAction(ISD::FMA , MVT::f32, Legal); 138 139 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 140 141 // If we're enabling GP optimizations, use hardware square root 142 if (!Subtarget->hasFSQRT() && 143 !(TM.Options.UnsafeFPMath && 144 Subtarget->hasFRSQRTE() && Subtarget->hasFRE())) 145 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 146 147 if (!Subtarget->hasFSQRT() && 148 !(TM.Options.UnsafeFPMath && 149 Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) 150 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 151 152 if (Subtarget->hasFCPSGN()) { 153 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); 154 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); 155 } else { 156 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 157 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 158 } 159 160 if (Subtarget->hasFPRND()) { 161 setOperationAction(ISD::FFLOOR, MVT::f64, Legal); 162 setOperationAction(ISD::FCEIL, MVT::f64, Legal); 163 setOperationAction(ISD::FTRUNC, MVT::f64, Legal); 164 setOperationAction(ISD::FROUND, MVT::f64, Legal); 165 166 setOperationAction(ISD::FFLOOR, MVT::f32, Legal); 167 setOperationAction(ISD::FCEIL, MVT::f32, Legal); 168 setOperationAction(ISD::FTRUNC, MVT::f32, Legal); 169 setOperationAction(ISD::FROUND, MVT::f32, Legal); 170 } 171 172 // PowerPC does not have BSWAP, CTPOP or CTTZ 173 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 174 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 175 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); 176 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); 177 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 178 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 179 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); 180 setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); 181 182 if (Subtarget->hasPOPCNTD()) { 183 setOperationAction(ISD::CTPOP, MVT::i32 , Legal); 184 setOperationAction(ISD::CTPOP, MVT::i64 , Legal); 185 } else { 186 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 187 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 188 } 189 190 // PowerPC does not have ROTR 191 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 192 setOperationAction(ISD::ROTR, MVT::i64 , Expand); 193 194 // PowerPC does not have Select 195 setOperationAction(ISD::SELECT, MVT::i32, Expand); 196 setOperationAction(ISD::SELECT, MVT::i64, Expand); 197 setOperationAction(ISD::SELECT, MVT::f32, Expand); 198 setOperationAction(ISD::SELECT, MVT::f64, Expand); 199 200 // PowerPC wants to turn select_cc of FP into fsel when possible. 201 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 202 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 203 204 // PowerPC wants to optimize integer setcc a bit 205 setOperationAction(ISD::SETCC, MVT::i32, Custom); 206 207 // PowerPC does not have BRCOND which requires SetCC 208 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 209 210 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 211 212 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 213 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 214 215 // PowerPC does not have [U|S]INT_TO_FP 216 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 217 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 218 219 setOperationAction(ISD::BITCAST, MVT::f32, Expand); 220 setOperationAction(ISD::BITCAST, MVT::i32, Expand); 221 setOperationAction(ISD::BITCAST, MVT::i64, Expand); 222 setOperationAction(ISD::BITCAST, MVT::f64, Expand); 223 224 // We cannot sextinreg(i1). Expand to shifts. 225 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 226 227 // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support 228 // SjLj exception handling but a light-weight setjmp/longjmp replacement to 229 // support continuation, user-level threading, and etc.. As a result, no 230 // other SjLj exception interfaces are implemented and please don't build 231 // your own exception handling based on them. 232 // LLVM/Clang supports zero-cost DWARF exception handling. 233 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); 234 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); 235 236 // We want to legalize GlobalAddress and ConstantPool nodes into the 237 // appropriate instructions to materialize the address. 238 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 239 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 240 setOperationAction(ISD::BlockAddress, MVT::i32, Custom); 241 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 242 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 243 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 244 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 245 setOperationAction(ISD::BlockAddress, MVT::i64, Custom); 246 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 247 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 248 249 // TRAP is legal. 250 setOperationAction(ISD::TRAP, MVT::Other, Legal); 251 252 // TRAMPOLINE is custom lowered. 253 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); 254 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); 255 256 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 257 setOperationAction(ISD::VASTART , MVT::Other, Custom); 258 259 if (Subtarget->isSVR4ABI()) { 260 if (isPPC64) { 261 // VAARG always uses double-word chunks, so promote anything smaller. 262 setOperationAction(ISD::VAARG, MVT::i1, Promote); 263 AddPromotedToType (ISD::VAARG, MVT::i1, MVT::i64); 264 setOperationAction(ISD::VAARG, MVT::i8, Promote); 265 AddPromotedToType (ISD::VAARG, MVT::i8, MVT::i64); 266 setOperationAction(ISD::VAARG, MVT::i16, Promote); 267 AddPromotedToType (ISD::VAARG, MVT::i16, MVT::i64); 268 setOperationAction(ISD::VAARG, MVT::i32, Promote); 269 AddPromotedToType (ISD::VAARG, MVT::i32, MVT::i64); 270 setOperationAction(ISD::VAARG, MVT::Other, Expand); 271 } else { 272 // VAARG is custom lowered with the 32-bit SVR4 ABI. 273 setOperationAction(ISD::VAARG, MVT::Other, Custom); 274 setOperationAction(ISD::VAARG, MVT::i64, Custom); 275 } 276 } else 277 setOperationAction(ISD::VAARG, MVT::Other, Expand); 278 279 if (Subtarget->isSVR4ABI() && !isPPC64) 280 // VACOPY is custom lowered with the 32-bit SVR4 ABI. 281 setOperationAction(ISD::VACOPY , MVT::Other, Custom); 282 else 283 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 284 285 // Use the default implementation. 286 setOperationAction(ISD::VAEND , MVT::Other, Expand); 287 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 288 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 289 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 290 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 291 292 // We want to custom lower some of our intrinsics. 293 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 294 295 // To handle counter-based loop conditions. 296 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); 297 298 // Comparisons that require checking two conditions. 299 setCondCodeAction(ISD::SETULT, MVT::f32, Expand); 300 setCondCodeAction(ISD::SETULT, MVT::f64, Expand); 301 setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); 302 setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); 303 setCondCodeAction(ISD::SETUEQ, MVT::f32, Expand); 304 setCondCodeAction(ISD::SETUEQ, MVT::f64, Expand); 305 setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); 306 setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); 307 setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); 308 setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); 309 setCondCodeAction(ISD::SETONE, MVT::f32, Expand); 310 setCondCodeAction(ISD::SETONE, MVT::f64, Expand); 311 312 if (Subtarget->has64BitSupport()) { 313 // They also have instructions for converting between i64 and fp. 314 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 315 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 316 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 317 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 318 // This is just the low 32 bits of a (signed) fp->i64 conversion. 319 // We cannot do this with Promote because i64 is not a legal type. 320 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 321 322 if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64()) 323 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 324 } else { 325 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 326 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 327 } 328 329 // With the instructions enabled under FPCVT, we can do everything. 330 if (PPCSubTarget.hasFPCVT()) { 331 if (Subtarget->has64BitSupport()) { 332 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 333 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); 334 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 335 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); 336 } 337 338 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 339 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); 340 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 341 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); 342 } 343 344 if (Subtarget->use64BitRegs()) { 345 // 64-bit PowerPC implementations can support i64 types directly 346 addRegisterClass(MVT::i64, &PPC::G8RCRegClass); 347 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 348 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 349 // 64-bit PowerPC wants to expand i128 shifts itself. 350 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 351 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 352 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 353 } else { 354 // 32-bit PowerPC wants to expand i64 shifts itself. 355 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 356 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 357 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 358 } 359 360 if (Subtarget->hasAltivec()) { 361 // First set operation action for all vector types to expand. Then we 362 // will selectively turn on ones that can be effectively codegen'd. 363 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 364 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 365 MVT::SimpleValueType VT = (MVT::SimpleValueType)i; 366 367 // add/sub are legal for all supported vector VT's. 368 setOperationAction(ISD::ADD , VT, Legal); 369 setOperationAction(ISD::SUB , VT, Legal); 370 371 // We promote all shuffles to v16i8. 372 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 373 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 374 375 // We promote all non-typed operations to v4i32. 376 setOperationAction(ISD::AND , VT, Promote); 377 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 378 setOperationAction(ISD::OR , VT, Promote); 379 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 380 setOperationAction(ISD::XOR , VT, Promote); 381 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 382 setOperationAction(ISD::LOAD , VT, Promote); 383 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 384 setOperationAction(ISD::SELECT, VT, Promote); 385 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 386 setOperationAction(ISD::STORE, VT, Promote); 387 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 388 389 // No other operations are legal. 390 setOperationAction(ISD::MUL , VT, Expand); 391 setOperationAction(ISD::SDIV, VT, Expand); 392 setOperationAction(ISD::SREM, VT, Expand); 393 setOperationAction(ISD::UDIV, VT, Expand); 394 setOperationAction(ISD::UREM, VT, Expand); 395 setOperationAction(ISD::FDIV, VT, Expand); 396 setOperationAction(ISD::FREM, VT, Expand); 397 setOperationAction(ISD::FNEG, VT, Expand); 398 setOperationAction(ISD::FSQRT, VT, Expand); 399 setOperationAction(ISD::FLOG, VT, Expand); 400 setOperationAction(ISD::FLOG10, VT, Expand); 401 setOperationAction(ISD::FLOG2, VT, Expand); 402 setOperationAction(ISD::FEXP, VT, Expand); 403 setOperationAction(ISD::FEXP2, VT, Expand); 404 setOperationAction(ISD::FSIN, VT, Expand); 405 setOperationAction(ISD::FCOS, VT, Expand); 406 setOperationAction(ISD::FABS, VT, Expand); 407 setOperationAction(ISD::FPOWI, VT, Expand); 408 setOperationAction(ISD::FFLOOR, VT, Expand); 409 setOperationAction(ISD::FCEIL, VT, Expand); 410 setOperationAction(ISD::FTRUNC, VT, Expand); 411 setOperationAction(ISD::FRINT, VT, Expand); 412 setOperationAction(ISD::FNEARBYINT, VT, Expand); 413 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 414 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 415 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 416 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 417 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 418 setOperationAction(ISD::UDIVREM, VT, Expand); 419 setOperationAction(ISD::SDIVREM, VT, Expand); 420 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 421 setOperationAction(ISD::FPOW, VT, Expand); 422 setOperationAction(ISD::CTPOP, VT, Expand); 423 setOperationAction(ISD::CTLZ, VT, Expand); 424 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); 425 setOperationAction(ISD::CTTZ, VT, Expand); 426 setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); 427 setOperationAction(ISD::VSELECT, VT, Expand); 428 setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand); 429 430 for (unsigned j = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 431 j <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++j) { 432 MVT::SimpleValueType InnerVT = (MVT::SimpleValueType)j; 433 setTruncStoreAction(VT, InnerVT, Expand); 434 } 435 setLoadExtAction(ISD::SEXTLOAD, VT, Expand); 436 setLoadExtAction(ISD::ZEXTLOAD, VT, Expand); 437 setLoadExtAction(ISD::EXTLOAD, VT, Expand); 438 } 439 440 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 441 // with merges, splats, etc. 442 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 443 444 setOperationAction(ISD::AND , MVT::v4i32, Legal); 445 setOperationAction(ISD::OR , MVT::v4i32, Legal); 446 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 447 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 448 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 449 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 450 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); 451 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); 452 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); 453 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal); 454 setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); 455 setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); 456 setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); 457 setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); 458 459 addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass); 460 addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass); 461 addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass); 462 addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass); 463 464 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 465 setOperationAction(ISD::FMA, MVT::v4f32, Legal); 466 467 if (TM.Options.UnsafeFPMath) { 468 setOperationAction(ISD::FDIV, MVT::v4f32, Legal); 469 setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); 470 } 471 472 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 473 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 474 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 475 476 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 477 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 478 479 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 480 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 481 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 482 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 483 484 // Altivec does not contain unordered floating-point compare instructions 485 setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); 486 setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); 487 setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); 488 setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); 489 setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); 490 setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); 491 492 setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); 493 setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); 494 } 495 496 if (Subtarget->has64BitSupport()) { 497 setOperationAction(ISD::PREFETCH, MVT::Other, Legal); 498 setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); 499 } 500 501 setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); 502 setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); 503 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Expand); 504 setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Expand); 505 506 setBooleanContents(ZeroOrOneBooleanContent); 507 // Altivec instructions set fields to all zeros or all ones. 508 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); 509 510 if (isPPC64) { 511 setStackPointerRegisterToSaveRestore(PPC::X1); 512 setExceptionPointerRegister(PPC::X3); 513 setExceptionSelectorRegister(PPC::X4); 514 } else { 515 setStackPointerRegisterToSaveRestore(PPC::R1); 516 setExceptionPointerRegister(PPC::R3); 517 setExceptionSelectorRegister(PPC::R4); 518 } 519 520 // We have target-specific dag combine patterns for the following nodes: 521 setTargetDAGCombine(ISD::SINT_TO_FP); 522 setTargetDAGCombine(ISD::LOAD); 523 setTargetDAGCombine(ISD::STORE); 524 setTargetDAGCombine(ISD::BR_CC); 525 setTargetDAGCombine(ISD::BSWAP); 526 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); 527 528 // Use reciprocal estimates. 529 if (TM.Options.UnsafeFPMath) { 530 setTargetDAGCombine(ISD::FDIV); 531 setTargetDAGCombine(ISD::FSQRT); 532 } 533 534 // Darwin long double math library functions have $LDBL128 appended. 535 if (Subtarget->isDarwin()) { 536 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 537 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 538 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 539 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 540 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 541 setLibcallName(RTLIB::LOG_PPCF128, "logl$LDBL128"); 542 setLibcallName(RTLIB::LOG2_PPCF128, "log2l$LDBL128"); 543 setLibcallName(RTLIB::LOG10_PPCF128, "log10l$LDBL128"); 544 setLibcallName(RTLIB::EXP_PPCF128, "expl$LDBL128"); 545 setLibcallName(RTLIB::EXP2_PPCF128, "exp2l$LDBL128"); 546 } 547 548 setMinFunctionAlignment(2); 549 if (PPCSubTarget.isDarwin()) 550 setPrefFunctionAlignment(4); 551 552 if (isPPC64 && Subtarget->isJITCodeModel()) 553 // Temporary workaround for the inability of PPC64 JIT to handle jump 554 // tables. 555 setSupportJumpTables(false); 556 557 setInsertFencesForAtomic(true); 558 559 if (Subtarget->enableMachineScheduler()) 560 setSchedulingPreference(Sched::Source); 561 else 562 setSchedulingPreference(Sched::Hybrid); 563 564 computeRegisterProperties(); 565 566 // The Freescale cores does better with aggressive inlining of memcpy and 567 // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). 568 if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || 569 Subtarget->getDarwinDirective() == PPC::DIR_E5500) { 570 MaxStoresPerMemset = 32; 571 MaxStoresPerMemsetOptSize = 16; 572 MaxStoresPerMemcpy = 32; 573 MaxStoresPerMemcpyOptSize = 8; 574 MaxStoresPerMemmove = 32; 575 MaxStoresPerMemmoveOptSize = 8; 576 577 setPrefFunctionAlignment(4); 578 } 579} 580 581/// getMaxByValAlign - Helper for getByValTypeAlignment to determine 582/// the desired ByVal argument alignment. 583static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, 584 unsigned MaxMaxAlign) { 585 if (MaxAlign == MaxMaxAlign) 586 return; 587 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { 588 if (MaxMaxAlign >= 32 && VTy->getBitWidth() >= 256) 589 MaxAlign = 32; 590 else if (VTy->getBitWidth() >= 128 && MaxAlign < 16) 591 MaxAlign = 16; 592 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { 593 unsigned EltAlign = 0; 594 getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign); 595 if (EltAlign > MaxAlign) 596 MaxAlign = EltAlign; 597 } else if (StructType *STy = dyn_cast<StructType>(Ty)) { 598 for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) { 599 unsigned EltAlign = 0; 600 getMaxByValAlign(STy->getElementType(i), EltAlign, MaxMaxAlign); 601 if (EltAlign > MaxAlign) 602 MaxAlign = EltAlign; 603 if (MaxAlign == MaxMaxAlign) 604 break; 605 } 606 } 607} 608 609/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 610/// function arguments in the caller parameter area. 611unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { 612 // Darwin passes everything on 4 byte boundary. 613 if (PPCSubTarget.isDarwin()) 614 return 4; 615 616 // 16byte and wider vectors are passed on 16byte boundary. 617 // The rest is 8 on PPC64 and 4 on PPC32 boundary. 618 unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4; 619 if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX()) 620 getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16); 621 return Align; 622} 623 624const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 625 switch (Opcode) { 626 default: return 0; 627 case PPCISD::FSEL: return "PPCISD::FSEL"; 628 case PPCISD::FCFID: return "PPCISD::FCFID"; 629 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 630 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 631 case PPCISD::FRE: return "PPCISD::FRE"; 632 case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE"; 633 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 634 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 635 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 636 case PPCISD::VPERM: return "PPCISD::VPERM"; 637 case PPCISD::Hi: return "PPCISD::Hi"; 638 case PPCISD::Lo: return "PPCISD::Lo"; 639 case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; 640 case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE"; 641 case PPCISD::LOAD: return "PPCISD::LOAD"; 642 case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; 643 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 644 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 645 case PPCISD::SRL: return "PPCISD::SRL"; 646 case PPCISD::SRA: return "PPCISD::SRA"; 647 case PPCISD::SHL: return "PPCISD::SHL"; 648 case PPCISD::CALL: return "PPCISD::CALL"; 649 case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP"; 650 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 651 case PPCISD::BCTRL: return "PPCISD::BCTRL"; 652 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 653 case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; 654 case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP"; 655 case PPCISD::MFOCRF: return "PPCISD::MFOCRF"; 656 case PPCISD::VCMP: return "PPCISD::VCMP"; 657 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 658 case PPCISD::LBRX: return "PPCISD::LBRX"; 659 case PPCISD::STBRX: return "PPCISD::STBRX"; 660 case PPCISD::LARX: return "PPCISD::LARX"; 661 case PPCISD::STCX: return "PPCISD::STCX"; 662 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 663 case PPCISD::BDNZ: return "PPCISD::BDNZ"; 664 case PPCISD::BDZ: return "PPCISD::BDZ"; 665 case PPCISD::MFFS: return "PPCISD::MFFS"; 666 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 667 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 668 case PPCISD::CR6SET: return "PPCISD::CR6SET"; 669 case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET"; 670 case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA"; 671 case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L"; 672 case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L"; 673 case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA"; 674 case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L"; 675 case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS"; 676 case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA"; 677 case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L"; 678 case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR"; 679 case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA"; 680 case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L"; 681 case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR"; 682 case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA"; 683 case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L"; 684 case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT"; 685 case PPCISD::SC: return "PPCISD::SC"; 686 } 687} 688 689EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { 690 if (!VT.isVector()) 691 return MVT::i32; 692 return VT.changeVectorElementTypeToInteger(); 693} 694 695//===----------------------------------------------------------------------===// 696// Node matching predicates, for use by the tblgen matching code. 697//===----------------------------------------------------------------------===// 698 699/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 700static bool isFloatingPointZero(SDValue Op) { 701 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 702 return CFP->getValueAPF().isZero(); 703 else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) { 704 // Maybe this has already been legalized into the constant pool? 705 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 706 if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 707 return CFP->getValueAPF().isZero(); 708 } 709 return false; 710} 711 712/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 713/// true if Op is undef or if it matches the specified value. 714static bool isConstantOrUndef(int Op, int Val) { 715 return Op < 0 || Op == Val; 716} 717 718/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 719/// VPKUHUM instruction. 720bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 721 if (!isUnary) { 722 for (unsigned i = 0; i != 16; ++i) 723 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) 724 return false; 725 } else { 726 for (unsigned i = 0; i != 8; ++i) 727 if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || 728 !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) 729 return false; 730 } 731 return true; 732} 733 734/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 735/// VPKUWUM instruction. 736bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { 737 if (!isUnary) { 738 for (unsigned i = 0; i != 16; i += 2) 739 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 740 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) 741 return false; 742 } else { 743 for (unsigned i = 0; i != 8; i += 2) 744 if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || 745 !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || 746 !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || 747 !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) 748 return false; 749 } 750 return true; 751} 752 753/// isVMerge - Common function, used to match vmrg* shuffles. 754/// 755static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, 756 unsigned LHSStart, unsigned RHSStart) { 757 assert(N->getValueType(0) == MVT::v16i8 && 758 "PPC only supports shuffles by bytes!"); 759 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 760 "Unsupported merge size!"); 761 762 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 763 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 764 if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j), 765 LHSStart+j+i*UnitSize) || 766 !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j), 767 RHSStart+j+i*UnitSize)) 768 return false; 769 } 770 return true; 771} 772 773/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 774/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 775bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 776 bool isUnary) { 777 if (!isUnary) 778 return isVMerge(N, UnitSize, 8, 24); 779 return isVMerge(N, UnitSize, 8, 8); 780} 781 782/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 783/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 784bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, 785 bool isUnary) { 786 if (!isUnary) 787 return isVMerge(N, UnitSize, 0, 16); 788 return isVMerge(N, UnitSize, 0, 0); 789} 790 791 792/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 793/// amount, otherwise return -1. 794int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 795 assert(N->getValueType(0) == MVT::v16i8 && 796 "PPC only supports shuffles by bytes!"); 797 798 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 799 800 // Find the first non-undef value in the shuffle mask. 801 unsigned i; 802 for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i) 803 /*search*/; 804 805 if (i == 16) return -1; // all undef. 806 807 // Otherwise, check to see if the rest of the elements are consecutively 808 // numbered from this value. 809 unsigned ShiftAmt = SVOp->getMaskElt(i); 810 if (ShiftAmt < i) return -1; 811 ShiftAmt -= i; 812 813 if (!isUnary) { 814 // Check the rest of the elements to see if they are consecutive. 815 for (++i; i != 16; ++i) 816 if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) 817 return -1; 818 } else { 819 // Check the rest of the elements to see if they are consecutive. 820 for (++i; i != 16; ++i) 821 if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) 822 return -1; 823 } 824 return ShiftAmt; 825} 826 827/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 828/// specifies a splat of a single element that is suitable for input to 829/// VSPLTB/VSPLTH/VSPLTW. 830bool PPC::isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize) { 831 assert(N->getValueType(0) == MVT::v16i8 && 832 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 833 834 // This is a splat operation if each element of the permute is the same, and 835 // if the value doesn't reference the second vector. 836 unsigned ElementBase = N->getMaskElt(0); 837 838 // FIXME: Handle UNDEF elements too! 839 if (ElementBase >= 16) 840 return false; 841 842 // Check that the indices are consecutive, in the case of a multi-byte element 843 // splatted with a v16i8 mask. 844 for (unsigned i = 1; i != EltSize; ++i) 845 if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase)) 846 return false; 847 848 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 849 if (N->getMaskElt(i) < 0) continue; 850 for (unsigned j = 0; j != EltSize; ++j) 851 if (N->getMaskElt(i+j) != N->getMaskElt(j)) 852 return false; 853 } 854 return true; 855} 856 857/// isAllNegativeZeroVector - Returns true if all elements of build_vector 858/// are -0.0. 859bool PPC::isAllNegativeZeroVector(SDNode *N) { 860 BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); 861 862 APInt APVal, APUndef; 863 unsigned BitSize; 864 bool HasAnyUndefs; 865 866 if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true)) 867 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0))) 868 return CFP->getValueAPF().isNegZero(); 869 870 return false; 871} 872 873/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 874/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 875unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 876 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); 877 assert(isSplatShuffleMask(SVOp, EltSize)); 878 return SVOp->getMaskElt(0) / EltSize; 879} 880 881/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 882/// by using a vspltis[bhw] instruction of the specified element size, return 883/// the constant being splatted. The ByteSize field indicates the number of 884/// bytes of each element [124] -> [bhw]. 885SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 886 SDValue OpVal(0, 0); 887 888 // If ByteSize of the splat is bigger than the element size of the 889 // build_vector, then we have a case where we are checking for a splat where 890 // multiple elements of the buildvector are folded together into a single 891 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 892 unsigned EltSize = 16/N->getNumOperands(); 893 if (EltSize < ByteSize) { 894 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 895 SDValue UniquedVals[4]; 896 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 897 898 // See if all of the elements in the buildvector agree across. 899 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 900 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 901 // If the element isn't a constant, bail fully out. 902 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue(); 903 904 905 if (UniquedVals[i&(Multiple-1)].getNode() == 0) 906 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 907 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 908 return SDValue(); // no match. 909 } 910 911 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 912 // either constant or undef values that are identical for each chunk. See 913 // if these chunks can form into a larger vspltis*. 914 915 // Check to see if all of the leading entries are either 0 or -1. If 916 // neither, then this won't fit into the immediate field. 917 bool LeadingZero = true; 918 bool LeadingOnes = true; 919 for (unsigned i = 0; i != Multiple-1; ++i) { 920 if (UniquedVals[i].getNode() == 0) continue; // Must have been undefs. 921 922 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 923 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 924 } 925 // Finally, check the least significant entry. 926 if (LeadingZero) { 927 if (UniquedVals[Multiple-1].getNode() == 0) 928 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 929 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue(); 930 if (Val < 16) 931 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 932 } 933 if (LeadingOnes) { 934 if (UniquedVals[Multiple-1].getNode() == 0) 935 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 936 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue(); 937 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 938 return DAG.getTargetConstant(Val, MVT::i32); 939 } 940 941 return SDValue(); 942 } 943 944 // Check to see if this buildvec has a single non-undef value in its elements. 945 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 946 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 947 if (OpVal.getNode() == 0) 948 OpVal = N->getOperand(i); 949 else if (OpVal != N->getOperand(i)) 950 return SDValue(); 951 } 952 953 if (OpVal.getNode() == 0) return SDValue(); // All UNDEF: use implicit def. 954 955 unsigned ValSizeInBytes = EltSize; 956 uint64_t Value = 0; 957 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 958 Value = CN->getZExtValue(); 959 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 960 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 961 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 962 } 963 964 // If the splat value is larger than the element value, then we can never do 965 // this splat. The only case that we could fit the replicated bits into our 966 // immediate field for would be zero, and we prefer to use vxor for it. 967 if (ValSizeInBytes < ByteSize) return SDValue(); 968 969 // If the element value is larger than the splat value, cut it in half and 970 // check to see if the two halves are equal. Continue doing this until we 971 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 972 while (ValSizeInBytes > ByteSize) { 973 ValSizeInBytes >>= 1; 974 975 // If the top half equals the bottom half, we're still ok. 976 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 977 (Value & ((1 << (8*ValSizeInBytes))-1))) 978 return SDValue(); 979 } 980 981 // Properly sign extend the value. 982 int MaskVal = SignExtend32(Value, ByteSize * 8); 983 984 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 985 if (MaskVal == 0) return SDValue(); 986 987 // Finally, if this value fits in a 5 bit sext field, return it 988 if (SignExtend32<5>(MaskVal) == MaskVal) 989 return DAG.getTargetConstant(MaskVal, MVT::i32); 990 return SDValue(); 991} 992 993//===----------------------------------------------------------------------===// 994// Addressing Mode Selection 995//===----------------------------------------------------------------------===// 996 997/// isIntS16Immediate - This method tests to see if the node is either a 32-bit 998/// or 64-bit immediate, and if the value can be accurately represented as a 999/// sign extension from a 16-bit value. If so, this returns true and the 1000/// immediate. 1001static bool isIntS16Immediate(SDNode *N, short &Imm) { 1002 if (N->getOpcode() != ISD::Constant) 1003 return false; 1004 1005 Imm = (short)cast<ConstantSDNode>(N)->getZExtValue(); 1006 if (N->getValueType(0) == MVT::i32) 1007 return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue(); 1008 else 1009 return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue(); 1010} 1011static bool isIntS16Immediate(SDValue Op, short &Imm) { 1012 return isIntS16Immediate(Op.getNode(), Imm); 1013} 1014 1015 1016/// SelectAddressRegReg - Given the specified addressed, check to see if it 1017/// can be represented as an indexed [r+r] operation. Returns false if it 1018/// can be more efficiently represented with [r+imm]. 1019bool PPCTargetLowering::SelectAddressRegReg(SDValue N, SDValue &Base, 1020 SDValue &Index, 1021 SelectionDAG &DAG) const { 1022 short imm = 0; 1023 if (N.getOpcode() == ISD::ADD) { 1024 if (isIntS16Immediate(N.getOperand(1), imm)) 1025 return false; // r+i 1026 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 1027 return false; // r+i 1028 1029 Base = N.getOperand(0); 1030 Index = N.getOperand(1); 1031 return true; 1032 } else if (N.getOpcode() == ISD::OR) { 1033 if (isIntS16Immediate(N.getOperand(1), imm)) 1034 return false; // r+i can fold it if we can. 1035 1036 // If this is an or of disjoint bitfields, we can codegen this as an add 1037 // (for better address arithmetic) if the LHS and RHS of the OR are provably 1038 // disjoint. 1039 APInt LHSKnownZero, LHSKnownOne; 1040 APInt RHSKnownZero, RHSKnownOne; 1041 DAG.ComputeMaskedBits(N.getOperand(0), 1042 LHSKnownZero, LHSKnownOne); 1043 1044 if (LHSKnownZero.getBoolValue()) { 1045 DAG.ComputeMaskedBits(N.getOperand(1), 1046 RHSKnownZero, RHSKnownOne); 1047 // If all of the bits are known zero on the LHS or RHS, the add won't 1048 // carry. 1049 if (~(LHSKnownZero | RHSKnownZero) == 0) { 1050 Base = N.getOperand(0); 1051 Index = N.getOperand(1); 1052 return true; 1053 } 1054 } 1055 } 1056 1057 return false; 1058} 1059 1060// If we happen to be doing an i64 load or store into a stack slot that has 1061// less than a 4-byte alignment, then the frame-index elimination may need to 1062// use an indexed load or store instruction (because the offset may not be a 1063// multiple of 4). The extra register needed to hold the offset comes from the 1064// register scavenger, and it is possible that the scavenger will need to use 1065// an emergency spill slot. As a result, we need to make sure that a spill slot 1066// is allocated when doing an i64 load/store into a less-than-4-byte-aligned 1067// stack slot. 1068static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) { 1069 // FIXME: This does not handle the LWA case. 1070 if (VT != MVT::i64) 1071 return; 1072 1073 // NOTE: We'll exclude negative FIs here, which come from argument 1074 // lowering, because there are no known test cases triggering this problem 1075 // using packed structures (or similar). We can remove this exclusion if 1076 // we find such a test case. The reason why this is so test-case driven is 1077 // because this entire 'fixup' is only to prevent crashes (from the 1078 // register scavenger) on not-really-valid inputs. For example, if we have: 1079 // %a = alloca i1 1080 // %b = bitcast i1* %a to i64* 1081 // store i64* a, i64 b 1082 // then the store should really be marked as 'align 1', but is not. If it 1083 // were marked as 'align 1' then the indexed form would have been 1084 // instruction-selected initially, and the problem this 'fixup' is preventing 1085 // won't happen regardless. 1086 if (FrameIdx < 0) 1087 return; 1088 1089 MachineFunction &MF = DAG.getMachineFunction(); 1090 MachineFrameInfo *MFI = MF.getFrameInfo(); 1091 1092 unsigned Align = MFI->getObjectAlignment(FrameIdx); 1093 if (Align >= 4) 1094 return; 1095 1096 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1097 FuncInfo->setHasNonRISpills(); 1098} 1099 1100/// Returns true if the address N can be represented by a base register plus 1101/// a signed 16-bit displacement [r+imm], and if it is not better 1102/// represented as reg+reg. If Aligned is true, only accept displacements 1103/// suitable for STD and friends, i.e. multiples of 4. 1104bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, 1105 SDValue &Base, 1106 SelectionDAG &DAG, 1107 bool Aligned) const { 1108 // FIXME dl should come from parent load or store, not from address 1109 SDLoc dl(N); 1110 // If this can be more profitably realized as r+r, fail. 1111 if (SelectAddressRegReg(N, Disp, Base, DAG)) 1112 return false; 1113 1114 if (N.getOpcode() == ISD::ADD) { 1115 short imm = 0; 1116 if (isIntS16Immediate(N.getOperand(1), imm) && 1117 (!Aligned || (imm & 3) == 0)) { 1118 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1119 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 1120 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1121 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1122 } else { 1123 Base = N.getOperand(0); 1124 } 1125 return true; // [r+i] 1126 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 1127 // Match LOAD (ADD (X, Lo(G))). 1128 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue() 1129 && "Cannot handle constant offsets yet!"); 1130 Disp = N.getOperand(1).getOperand(0); // The global address. 1131 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 1132 Disp.getOpcode() == ISD::TargetGlobalTLSAddress || 1133 Disp.getOpcode() == ISD::TargetConstantPool || 1134 Disp.getOpcode() == ISD::TargetJumpTable); 1135 Base = N.getOperand(0); 1136 return true; // [&g+r] 1137 } 1138 } else if (N.getOpcode() == ISD::OR) { 1139 short imm = 0; 1140 if (isIntS16Immediate(N.getOperand(1), imm) && 1141 (!Aligned || (imm & 3) == 0)) { 1142 // If this is an or of disjoint bitfields, we can codegen this as an add 1143 // (for better address arithmetic) if the LHS and RHS of the OR are 1144 // provably disjoint. 1145 APInt LHSKnownZero, LHSKnownOne; 1146 DAG.ComputeMaskedBits(N.getOperand(0), LHSKnownZero, LHSKnownOne); 1147 1148 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 1149 // If all of the bits are known zero on the LHS or RHS, the add won't 1150 // carry. 1151 Base = N.getOperand(0); 1152 Disp = DAG.getTargetConstant(imm, N.getValueType()); 1153 return true; 1154 } 1155 } 1156 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 1157 // Loading from a constant address. 1158 1159 // If this address fits entirely in a 16-bit sext immediate field, codegen 1160 // this as "d, 0" 1161 short Imm; 1162 if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { 1163 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 1164 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1165 CN->getValueType(0)); 1166 return true; 1167 } 1168 1169 // Handle 32-bit sext immediates with LIS + addr mode. 1170 if ((CN->getValueType(0) == MVT::i32 || 1171 (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) && 1172 (!Aligned || (CN->getZExtValue() & 3) == 0)) { 1173 int Addr = (int)CN->getZExtValue(); 1174 1175 // Otherwise, break this down into an LIS + disp. 1176 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 1177 1178 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 1179 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 1180 Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0); 1181 return true; 1182 } 1183 } 1184 1185 Disp = DAG.getTargetConstant(0, getPointerTy()); 1186 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) { 1187 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 1188 fixupFuncForFI(DAG, FI->getIndex(), N.getValueType()); 1189 } else 1190 Base = N; 1191 return true; // [r+0] 1192} 1193 1194/// SelectAddressRegRegOnly - Given the specified addressed, force it to be 1195/// represented as an indexed [r+r] operation. 1196bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, 1197 SDValue &Index, 1198 SelectionDAG &DAG) const { 1199 // Check to see if we can easily represent this as an [r+r] address. This 1200 // will fail if it thinks that the address is more profitably represented as 1201 // reg+imm, e.g. where imm = 0. 1202 if (SelectAddressRegReg(N, Base, Index, DAG)) 1203 return true; 1204 1205 // If the operand is an addition, always emit this as [r+r], since this is 1206 // better (for code size, and execution, as the memop does the add for free) 1207 // than emitting an explicit add. 1208 if (N.getOpcode() == ISD::ADD) { 1209 Base = N.getOperand(0); 1210 Index = N.getOperand(1); 1211 return true; 1212 } 1213 1214 // Otherwise, do it the hard way, using R0 as the base register. 1215 Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, 1216 N.getValueType()); 1217 Index = N; 1218 return true; 1219} 1220 1221/// getPreIndexedAddressParts - returns true by value, base pointer and 1222/// offset pointer and addressing mode by reference if the node's address 1223/// can be legally represented as pre-indexed load / store address. 1224bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base, 1225 SDValue &Offset, 1226 ISD::MemIndexedMode &AM, 1227 SelectionDAG &DAG) const { 1228 if (DisablePPCPreinc) return false; 1229 1230 bool isLoad = true; 1231 SDValue Ptr; 1232 EVT VT; 1233 unsigned Alignment; 1234 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1235 Ptr = LD->getBasePtr(); 1236 VT = LD->getMemoryVT(); 1237 Alignment = LD->getAlignment(); 1238 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1239 Ptr = ST->getBasePtr(); 1240 VT = ST->getMemoryVT(); 1241 Alignment = ST->getAlignment(); 1242 isLoad = false; 1243 } else 1244 return false; 1245 1246 // PowerPC doesn't have preinc load/store instructions for vectors. 1247 if (VT.isVector()) 1248 return false; 1249 1250 if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) { 1251 1252 // Common code will reject creating a pre-inc form if the base pointer 1253 // is a frame index, or if N is a store and the base pointer is either 1254 // the same as or a predecessor of the value being stored. Check for 1255 // those situations here, and try with swapped Base/Offset instead. 1256 bool Swap = false; 1257 1258 if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base)) 1259 Swap = true; 1260 else if (!isLoad) { 1261 SDValue Val = cast<StoreSDNode>(N)->getValue(); 1262 if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode())) 1263 Swap = true; 1264 } 1265 1266 if (Swap) 1267 std::swap(Base, Offset); 1268 1269 AM = ISD::PRE_INC; 1270 return true; 1271 } 1272 1273 // LDU/STU can only handle immediates that are a multiple of 4. 1274 if (VT != MVT::i64) { 1275 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, false)) 1276 return false; 1277 } else { 1278 // LDU/STU need an address with at least 4-byte alignment. 1279 if (Alignment < 4) 1280 return false; 1281 1282 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, true)) 1283 return false; 1284 } 1285 1286 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1287 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1288 // sext i32 to i64 when addr mode is r+i. 1289 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1290 LD->getExtensionType() == ISD::SEXTLOAD && 1291 isa<ConstantSDNode>(Offset)) 1292 return false; 1293 } 1294 1295 AM = ISD::PRE_INC; 1296 return true; 1297} 1298 1299//===----------------------------------------------------------------------===// 1300// LowerOperation implementation 1301//===----------------------------------------------------------------------===// 1302 1303/// GetLabelAccessInfo - Return true if we should reference labels using a 1304/// PICBase, set the HiOpFlags and LoOpFlags to the target MO flags. 1305static bool GetLabelAccessInfo(const TargetMachine &TM, unsigned &HiOpFlags, 1306 unsigned &LoOpFlags, const GlobalValue *GV = 0) { 1307 HiOpFlags = PPCII::MO_HA; 1308 LoOpFlags = PPCII::MO_LO; 1309 1310 // Don't use the pic base if not in PIC relocation model. Or if we are on a 1311 // non-darwin platform. We don't support PIC on other platforms yet. 1312 bool isPIC = TM.getRelocationModel() == Reloc::PIC_ && 1313 TM.getSubtarget<PPCSubtarget>().isDarwin(); 1314 if (isPIC) { 1315 HiOpFlags |= PPCII::MO_PIC_FLAG; 1316 LoOpFlags |= PPCII::MO_PIC_FLAG; 1317 } 1318 1319 // If this is a reference to a global value that requires a non-lazy-ptr, make 1320 // sure that instruction lowering adds it. 1321 if (GV && TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV, TM)) { 1322 HiOpFlags |= PPCII::MO_NLP_FLAG; 1323 LoOpFlags |= PPCII::MO_NLP_FLAG; 1324 1325 if (GV->hasHiddenVisibility()) { 1326 HiOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1327 LoOpFlags |= PPCII::MO_NLP_HIDDEN_FLAG; 1328 } 1329 } 1330 1331 return isPIC; 1332} 1333 1334static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, 1335 SelectionDAG &DAG) { 1336 EVT PtrVT = HiPart.getValueType(); 1337 SDValue Zero = DAG.getConstant(0, PtrVT); 1338 SDLoc DL(HiPart); 1339 1340 SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero); 1341 SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero); 1342 1343 // With PIC, the first instruction is actually "GR+hi(&G)". 1344 if (isPIC) 1345 Hi = DAG.getNode(ISD::ADD, DL, PtrVT, 1346 DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi); 1347 1348 // Generate non-pic code that has direct accesses to the constant pool. 1349 // The address of the global is just (hi(&g)+lo(&g)). 1350 return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo); 1351} 1352 1353SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, 1354 SelectionDAG &DAG) const { 1355 EVT PtrVT = Op.getValueType(); 1356 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1357 const Constant *C = CP->getConstVal(); 1358 1359 // 64-bit SVR4 ABI code is always position-independent. 1360 // The actual address of the GlobalValue is stored in the TOC. 1361 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1362 SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); 1363 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, 1364 DAG.getRegister(PPC::X2, MVT::i64)); 1365 } 1366 1367 unsigned MOHiFlag, MOLoFlag; 1368 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1369 SDValue CPIHi = 1370 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOHiFlag); 1371 SDValue CPILo = 1372 DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0, MOLoFlag); 1373 return LowerLabelRef(CPIHi, CPILo, isPIC, DAG); 1374} 1375 1376SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { 1377 EVT PtrVT = Op.getValueType(); 1378 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1379 1380 // 64-bit SVR4 ABI code is always position-independent. 1381 // The actual address of the GlobalValue is stored in the TOC. 1382 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1383 SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1384 return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, 1385 DAG.getRegister(PPC::X2, MVT::i64)); 1386 } 1387 1388 unsigned MOHiFlag, MOLoFlag; 1389 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1390 SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag); 1391 SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag); 1392 return LowerLabelRef(JTIHi, JTILo, isPIC, DAG); 1393} 1394 1395SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op, 1396 SelectionDAG &DAG) const { 1397 EVT PtrVT = Op.getValueType(); 1398 1399 const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); 1400 1401 unsigned MOHiFlag, MOLoFlag; 1402 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag); 1403 SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag); 1404 SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag); 1405 return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG); 1406} 1407 1408SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, 1409 SelectionDAG &DAG) const { 1410 1411 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); 1412 SDLoc dl(GA); 1413 const GlobalValue *GV = GA->getGlobal(); 1414 EVT PtrVT = getPointerTy(); 1415 bool is64bit = PPCSubTarget.isPPC64(); 1416 1417 TLSModel::Model Model = getTargetMachine().getTLSModel(GV); 1418 1419 if (Model == TLSModel::LocalExec) { 1420 SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1421 PPCII::MO_TPREL_HA); 1422 SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1423 PPCII::MO_TPREL_LO); 1424 SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2, 1425 is64bit ? MVT::i64 : MVT::i32); 1426 SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg); 1427 return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi); 1428 } 1429 1430 if (!is64bit) 1431 llvm_unreachable("only local-exec is currently supported for ppc32"); 1432 1433 if (Model == TLSModel::InitialExec) { 1434 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1435 SDValue TGATLS = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 1436 PPCII::MO_TLS); 1437 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1438 SDValue TPOffsetHi = DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, 1439 PtrVT, GOTReg, TGA); 1440 SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, 1441 PtrVT, TGA, TPOffsetHi); 1442 return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS); 1443 } 1444 1445 if (Model == TLSModel::GeneralDynamic) { 1446 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1447 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1448 SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT, 1449 GOTReg, TGA); 1450 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT, 1451 GOTEntryHi, TGA); 1452 1453 // We need a chain node, and don't have one handy. The underlying 1454 // call has no side effects, so using the function entry node 1455 // suffices. 1456 SDValue Chain = DAG.getEntryNode(); 1457 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); 1458 SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); 1459 SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl, 1460 PtrVT, ParmReg, TGA); 1461 // The return value from GET_TLS_ADDR really is in X3 already, but 1462 // some hacks are needed here to tie everything together. The extra 1463 // copies dissolve during subsequent transforms. 1464 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); 1465 return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT); 1466 } 1467 1468 if (Model == TLSModel::LocalDynamic) { 1469 SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0); 1470 SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64); 1471 SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT, 1472 GOTReg, TGA); 1473 SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT, 1474 GOTEntryHi, TGA); 1475 1476 // We need a chain node, and don't have one handy. The underlying 1477 // call has no side effects, so using the function entry node 1478 // suffices. 1479 SDValue Chain = DAG.getEntryNode(); 1480 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry); 1481 SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64); 1482 SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl, 1483 PtrVT, ParmReg, TGA); 1484 // The return value from GET_TLSLD_ADDR really is in X3 already, but 1485 // some hacks are needed here to tie everything together. The extra 1486 // copies dissolve during subsequent transforms. 1487 Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr); 1488 SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT, 1489 Chain, ParmReg, TGA); 1490 return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA); 1491 } 1492 1493 llvm_unreachable("Unknown TLS model!"); 1494} 1495 1496SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, 1497 SelectionDAG &DAG) const { 1498 EVT PtrVT = Op.getValueType(); 1499 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1500 SDLoc DL(GSDN); 1501 const GlobalValue *GV = GSDN->getGlobal(); 1502 1503 // 64-bit SVR4 ABI code is always position-independent. 1504 // The actual address of the GlobalValue is stored in the TOC. 1505 if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { 1506 SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); 1507 return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, 1508 DAG.getRegister(PPC::X2, MVT::i64)); 1509 } 1510 1511 unsigned MOHiFlag, MOLoFlag; 1512 bool isPIC = GetLabelAccessInfo(DAG.getTarget(), MOHiFlag, MOLoFlag, GV); 1513 1514 SDValue GAHi = 1515 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag); 1516 SDValue GALo = 1517 DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag); 1518 1519 SDValue Ptr = LowerLabelRef(GAHi, GALo, isPIC, DAG); 1520 1521 // If the global reference is actually to a non-lazy-pointer, we have to do an 1522 // extra load to get the address of the global. 1523 if (MOHiFlag & PPCII::MO_NLP_FLAG) 1524 Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), 1525 false, false, false, 0); 1526 return Ptr; 1527} 1528 1529SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { 1530 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1531 SDLoc dl(Op); 1532 1533 // If we're comparing for equality to zero, expose the fact that this is 1534 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1535 // fold the new nodes. 1536 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1537 if (C->isNullValue() && CC == ISD::SETEQ) { 1538 EVT VT = Op.getOperand(0).getValueType(); 1539 SDValue Zext = Op.getOperand(0); 1540 if (VT.bitsLT(MVT::i32)) { 1541 VT = MVT::i32; 1542 Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0)); 1543 } 1544 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1545 SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext); 1546 SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz, 1547 DAG.getConstant(Log2b, MVT::i32)); 1548 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc); 1549 } 1550 // Leave comparisons against 0 and -1 alone for now, since they're usually 1551 // optimized. FIXME: revisit this when we can custom lower all setcc 1552 // optimizations. 1553 if (C->isAllOnesValue() || C->isNullValue()) 1554 return SDValue(); 1555 } 1556 1557 // If we have an integer seteq/setne, turn it into a compare against zero 1558 // by xor'ing the rhs with the lhs, which is faster than setting a 1559 // condition register, reading it back out, and masking the correct bit. The 1560 // normal approach here uses sub to do this instead of xor. Using xor exposes 1561 // the result to other bit-twiddling opportunities. 1562 EVT LHSVT = Op.getOperand(0).getValueType(); 1563 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1564 EVT VT = Op.getValueType(); 1565 SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, Op.getOperand(0), 1566 Op.getOperand(1)); 1567 return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, LHSVT), CC); 1568 } 1569 return SDValue(); 1570} 1571 1572SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, 1573 const PPCSubtarget &Subtarget) const { 1574 SDNode *Node = Op.getNode(); 1575 EVT VT = Node->getValueType(0); 1576 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1577 SDValue InChain = Node->getOperand(0); 1578 SDValue VAListPtr = Node->getOperand(1); 1579 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue(); 1580 SDLoc dl(Node); 1581 1582 assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only"); 1583 1584 // gpr_index 1585 SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1586 VAListPtr, MachinePointerInfo(SV), MVT::i8, 1587 false, false, 0); 1588 InChain = GprIndex.getValue(1); 1589 1590 if (VT == MVT::i64) { 1591 // Check if GprIndex is even 1592 SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex, 1593 DAG.getConstant(1, MVT::i32)); 1594 SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd, 1595 DAG.getConstant(0, MVT::i32), ISD::SETNE); 1596 SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex, 1597 DAG.getConstant(1, MVT::i32)); 1598 // Align GprIndex to be even if it isn't 1599 GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne, 1600 GprIndex); 1601 } 1602 1603 // fpr index is 1 byte after gpr 1604 SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1605 DAG.getConstant(1, MVT::i32)); 1606 1607 // fpr 1608 SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain, 1609 FprPtr, MachinePointerInfo(SV), MVT::i8, 1610 false, false, 0); 1611 InChain = FprIndex.getValue(1); 1612 1613 SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1614 DAG.getConstant(8, MVT::i32)); 1615 1616 SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr, 1617 DAG.getConstant(4, MVT::i32)); 1618 1619 // areas 1620 SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, 1621 MachinePointerInfo(), false, false, 1622 false, 0); 1623 InChain = OverflowArea.getValue(1); 1624 1625 SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, 1626 MachinePointerInfo(), false, false, 1627 false, 0); 1628 InChain = RegSaveArea.getValue(1); 1629 1630 // select overflow_area if index > 8 1631 SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex, 1632 DAG.getConstant(8, MVT::i32), ISD::SETLT); 1633 1634 // adjustment constant gpr_index * 4/8 1635 SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32, 1636 VT.isInteger() ? GprIndex : FprIndex, 1637 DAG.getConstant(VT.isInteger() ? 4 : 8, 1638 MVT::i32)); 1639 1640 // OurReg = RegSaveArea + RegConstant 1641 SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea, 1642 RegConstant); 1643 1644 // Floating types are 32 bytes into RegSaveArea 1645 if (VT.isFloatingPoint()) 1646 OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg, 1647 DAG.getConstant(32, MVT::i32)); 1648 1649 // increase {f,g}pr_index by 1 (or 2 if VT is i64) 1650 SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32, 1651 VT.isInteger() ? GprIndex : FprIndex, 1652 DAG.getConstant(VT == MVT::i64 ? 2 : 1, 1653 MVT::i32)); 1654 1655 InChain = DAG.getTruncStore(InChain, dl, IndexPlus1, 1656 VT.isInteger() ? VAListPtr : FprPtr, 1657 MachinePointerInfo(SV), 1658 MVT::i8, false, false, 0); 1659 1660 // determine if we should load from reg_save_area or overflow_area 1661 SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea); 1662 1663 // increase overflow_area by 4/8 if gpr/fpr > 8 1664 SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea, 1665 DAG.getConstant(VT.isInteger() ? 4 : 8, 1666 MVT::i32)); 1667 1668 OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea, 1669 OverflowAreaPlusN); 1670 1671 InChain = DAG.getTruncStore(InChain, dl, OverflowArea, 1672 OverflowAreaPtr, 1673 MachinePointerInfo(), 1674 MVT::i32, false, false, 0); 1675 1676 return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), 1677 false, false, false, 0); 1678} 1679 1680SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG, 1681 const PPCSubtarget &Subtarget) const { 1682 assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only"); 1683 1684 // We have to copy the entire va_list struct: 1685 // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte 1686 return DAG.getMemcpy(Op.getOperand(0), Op, 1687 Op.getOperand(1), Op.getOperand(2), 1688 DAG.getConstant(12, MVT::i32), 8, false, true, 1689 MachinePointerInfo(), MachinePointerInfo()); 1690} 1691 1692SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, 1693 SelectionDAG &DAG) const { 1694 return Op.getOperand(0); 1695} 1696 1697SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, 1698 SelectionDAG &DAG) const { 1699 SDValue Chain = Op.getOperand(0); 1700 SDValue Trmp = Op.getOperand(1); // trampoline 1701 SDValue FPtr = Op.getOperand(2); // nested function 1702 SDValue Nest = Op.getOperand(3); // 'nest' parameter value 1703 SDLoc dl(Op); 1704 1705 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1706 bool isPPC64 = (PtrVT == MVT::i64); 1707 Type *IntPtrTy = 1708 DAG.getTargetLoweringInfo().getDataLayout()->getIntPtrType( 1709 *DAG.getContext()); 1710 1711 TargetLowering::ArgListTy Args; 1712 TargetLowering::ArgListEntry Entry; 1713 1714 Entry.Ty = IntPtrTy; 1715 Entry.Node = Trmp; Args.push_back(Entry); 1716 1717 // TrampSize == (isPPC64 ? 48 : 40); 1718 Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, 1719 isPPC64 ? MVT::i64 : MVT::i32); 1720 Args.push_back(Entry); 1721 1722 Entry.Node = FPtr; Args.push_back(Entry); 1723 Entry.Node = Nest; Args.push_back(Entry); 1724 1725 // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg) 1726 TargetLowering::CallLoweringInfo CLI(Chain, 1727 Type::getVoidTy(*DAG.getContext()), 1728 false, false, false, false, 0, 1729 CallingConv::C, 1730 /*isTailCall=*/false, 1731 /*doesNotRet=*/false, 1732 /*isReturnValueUsed=*/true, 1733 DAG.getExternalSymbol("__trampoline_setup", PtrVT), 1734 Args, DAG, dl); 1735 std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); 1736 1737 return CallResult.second; 1738} 1739 1740SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, 1741 const PPCSubtarget &Subtarget) const { 1742 MachineFunction &MF = DAG.getMachineFunction(); 1743 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1744 1745 SDLoc dl(Op); 1746 1747 if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { 1748 // vastart just stores the address of the VarArgsFrameIndex slot into the 1749 // memory location argument. 1750 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1751 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 1752 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1753 return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), 1754 MachinePointerInfo(SV), 1755 false, false, 0); 1756 } 1757 1758 // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. 1759 // We suppose the given va_list is already allocated. 1760 // 1761 // typedef struct { 1762 // char gpr; /* index into the array of 8 GPRs 1763 // * stored in the register save area 1764 // * gpr=0 corresponds to r3, 1765 // * gpr=1 to r4, etc. 1766 // */ 1767 // char fpr; /* index into the array of 8 FPRs 1768 // * stored in the register save area 1769 // * fpr=0 corresponds to f1, 1770 // * fpr=1 to f2, etc. 1771 // */ 1772 // char *overflow_arg_area; 1773 // /* location on stack that holds 1774 // * the next overflow argument 1775 // */ 1776 // char *reg_save_area; 1777 // /* where r3:r10 and f1:f8 (if saved) 1778 // * are stored 1779 // */ 1780 // } va_list[1]; 1781 1782 1783 SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), MVT::i32); 1784 SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), MVT::i32); 1785 1786 1787 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1788 1789 SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(), 1790 PtrVT); 1791 SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), 1792 PtrVT); 1793 1794 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 1795 SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 1796 1797 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 1798 SDValue ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 1799 1800 uint64_t FPROffset = 1; 1801 SDValue ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 1802 1803 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1804 1805 // Store first byte : number of int regs 1806 SDValue firstStore = DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, 1807 Op.getOperand(1), 1808 MachinePointerInfo(SV), 1809 MVT::i8, false, false, 0); 1810 uint64_t nextOffset = FPROffset; 1811 SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1), 1812 ConstFPROffset); 1813 1814 // Store second byte : number of float regs 1815 SDValue secondStore = 1816 DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr, 1817 MachinePointerInfo(SV, nextOffset), MVT::i8, 1818 false, false, 0); 1819 nextOffset += StackOffset; 1820 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset); 1821 1822 // Store second word : arguments given on stack 1823 SDValue thirdStore = 1824 DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr, 1825 MachinePointerInfo(SV, nextOffset), 1826 false, false, 0); 1827 nextOffset += FrameOffset; 1828 nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset); 1829 1830 // Store third word : arguments given in registers 1831 return DAG.getStore(thirdStore, dl, FR, nextPtr, 1832 MachinePointerInfo(SV, nextOffset), 1833 false, false, 0); 1834 1835} 1836 1837#include "PPCGenCallingConv.inc" 1838 1839// Function whose sole purpose is to kill compiler warnings 1840// stemming from unused functions included from PPCGenCallingConv.inc. 1841CCAssignFn *PPCTargetLowering::useFastISelCCs(unsigned Flag) const { 1842 return Flag ? CC_PPC64_ELF_FIS : RetCC_PPC64_ELF_FIS; 1843} 1844 1845bool llvm::CC_PPC32_SVR4_Custom_Dummy(unsigned &ValNo, MVT &ValVT, MVT &LocVT, 1846 CCValAssign::LocInfo &LocInfo, 1847 ISD::ArgFlagsTy &ArgFlags, 1848 CCState &State) { 1849 return true; 1850} 1851 1852bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT, 1853 MVT &LocVT, 1854 CCValAssign::LocInfo &LocInfo, 1855 ISD::ArgFlagsTy &ArgFlags, 1856 CCState &State) { 1857 static const uint16_t ArgRegs[] = { 1858 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1859 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1860 }; 1861 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1862 1863 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1864 1865 // Skip one register if the first unallocated register has an even register 1866 // number and there are still argument registers available which have not been 1867 // allocated yet. RegNum is actually an index into ArgRegs, which means we 1868 // need to skip a register if RegNum is odd. 1869 if (RegNum != NumArgRegs && RegNum % 2 == 1) { 1870 State.AllocateReg(ArgRegs[RegNum]); 1871 } 1872 1873 // Always return false here, as this function only makes sure that the first 1874 // unallocated register has an odd register number and does not actually 1875 // allocate a register for the current argument. 1876 return false; 1877} 1878 1879bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT, 1880 MVT &LocVT, 1881 CCValAssign::LocInfo &LocInfo, 1882 ISD::ArgFlagsTy &ArgFlags, 1883 CCState &State) { 1884 static const uint16_t ArgRegs[] = { 1885 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1886 PPC::F8 1887 }; 1888 1889 const unsigned NumArgRegs = array_lengthof(ArgRegs); 1890 1891 unsigned RegNum = State.getFirstUnallocated(ArgRegs, NumArgRegs); 1892 1893 // If there is only one Floating-point register left we need to put both f64 1894 // values of a split ppc_fp128 value on the stack. 1895 if (RegNum != NumArgRegs && ArgRegs[RegNum] == PPC::F8) { 1896 State.AllocateReg(ArgRegs[RegNum]); 1897 } 1898 1899 // Always return false here, as this function only makes sure that the two f64 1900 // values a ppc_fp128 value is split into are both passed in registers or both 1901 // passed on the stack and does not actually allocate a register for the 1902 // current argument. 1903 return false; 1904} 1905 1906/// GetFPR - Get the set of FP registers that should be allocated for arguments, 1907/// on Darwin. 1908static const uint16_t *GetFPR() { 1909 static const uint16_t FPR[] = { 1910 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1911 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1912 }; 1913 1914 return FPR; 1915} 1916 1917/// CalculateStackSlotSize - Calculates the size reserved for this argument on 1918/// the stack. 1919static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, 1920 unsigned PtrByteSize) { 1921 unsigned ArgSize = ArgVT.getSizeInBits()/8; 1922 if (Flags.isByVal()) 1923 ArgSize = Flags.getByValSize(); 1924 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1925 1926 return ArgSize; 1927} 1928 1929SDValue 1930PPCTargetLowering::LowerFormalArguments(SDValue Chain, 1931 CallingConv::ID CallConv, bool isVarArg, 1932 const SmallVectorImpl<ISD::InputArg> 1933 &Ins, 1934 SDLoc dl, SelectionDAG &DAG, 1935 SmallVectorImpl<SDValue> &InVals) 1936 const { 1937 if (PPCSubTarget.isSVR4ABI()) { 1938 if (PPCSubTarget.isPPC64()) 1939 return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, 1940 dl, DAG, InVals); 1941 else 1942 return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, 1943 dl, DAG, InVals); 1944 } else { 1945 return LowerFormalArguments_Darwin(Chain, CallConv, isVarArg, Ins, 1946 dl, DAG, InVals); 1947 } 1948} 1949 1950SDValue 1951PPCTargetLowering::LowerFormalArguments_32SVR4( 1952 SDValue Chain, 1953 CallingConv::ID CallConv, bool isVarArg, 1954 const SmallVectorImpl<ISD::InputArg> 1955 &Ins, 1956 SDLoc dl, SelectionDAG &DAG, 1957 SmallVectorImpl<SDValue> &InVals) const { 1958 1959 // 32-bit SVR4 ABI Stack Frame Layout: 1960 // +-----------------------------------+ 1961 // +--> | Back chain | 1962 // | +-----------------------------------+ 1963 // | | Floating-point register save area | 1964 // | +-----------------------------------+ 1965 // | | General register save area | 1966 // | +-----------------------------------+ 1967 // | | CR save word | 1968 // | +-----------------------------------+ 1969 // | | VRSAVE save word | 1970 // | +-----------------------------------+ 1971 // | | Alignment padding | 1972 // | +-----------------------------------+ 1973 // | | Vector register save area | 1974 // | +-----------------------------------+ 1975 // | | Local variable space | 1976 // | +-----------------------------------+ 1977 // | | Parameter list area | 1978 // | +-----------------------------------+ 1979 // | | LR save word | 1980 // | +-----------------------------------+ 1981 // SP--> +--- | Back chain | 1982 // +-----------------------------------+ 1983 // 1984 // Specifications: 1985 // System V Application Binary Interface PowerPC Processor Supplement 1986 // AltiVec Technology Programming Interface Manual 1987 1988 MachineFunction &MF = DAG.getMachineFunction(); 1989 MachineFrameInfo *MFI = MF.getFrameInfo(); 1990 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 1991 1992 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1993 // Potential tail calls could cause overwriting of argument stack slots. 1994 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 1995 (CallConv == CallingConv::Fast)); 1996 unsigned PtrByteSize = 4; 1997 1998 // Assign locations to all of the incoming arguments. 1999 SmallVector<CCValAssign, 16> ArgLocs; 2000 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2001 getTargetMachine(), ArgLocs, *DAG.getContext()); 2002 2003 // Reserve space for the linkage area on the stack. 2004 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 2005 2006 CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); 2007 2008 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { 2009 CCValAssign &VA = ArgLocs[i]; 2010 2011 // Arguments stored in registers. 2012 if (VA.isRegLoc()) { 2013 const TargetRegisterClass *RC; 2014 EVT ValVT = VA.getValVT(); 2015 2016 switch (ValVT.getSimpleVT().SimpleTy) { 2017 default: 2018 llvm_unreachable("ValVT not supported by formal arguments Lowering"); 2019 case MVT::i32: 2020 RC = &PPC::GPRCRegClass; 2021 break; 2022 case MVT::f32: 2023 RC = &PPC::F4RCRegClass; 2024 break; 2025 case MVT::f64: 2026 RC = &PPC::F8RCRegClass; 2027 break; 2028 case MVT::v16i8: 2029 case MVT::v8i16: 2030 case MVT::v4i32: 2031 case MVT::v4f32: 2032 RC = &PPC::VRRCRegClass; 2033 break; 2034 } 2035 2036 // Transform the arguments stored in physical registers into virtual ones. 2037 unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC); 2038 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, ValVT); 2039 2040 InVals.push_back(ArgValue); 2041 } else { 2042 // Argument stored in memory. 2043 assert(VA.isMemLoc()); 2044 2045 unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; 2046 int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(), 2047 isImmutable); 2048 2049 // Create load nodes to retrieve arguments from the stack. 2050 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2051 InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, 2052 MachinePointerInfo(), 2053 false, false, false, 0)); 2054 } 2055 } 2056 2057 // Assign locations to all of the incoming aggregate by value arguments. 2058 // Aggregates passed by value are stored in the local variable space of the 2059 // caller's stack frame, right above the parameter list area. 2060 SmallVector<CCValAssign, 16> ByValArgLocs; 2061 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 2062 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 2063 2064 // Reserve stack space for the allocations in CCInfo. 2065 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 2066 2067 CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal); 2068 2069 // Area that is at least reserved in the caller of this function. 2070 unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); 2071 2072 // Set the size that is at least reserved in caller of this function. Tail 2073 // call optimized function's reserved stack space needs to be aligned so that 2074 // taking the difference between two stack areas will result in an aligned 2075 // stack. 2076 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2077 2078 MinReservedArea = 2079 std::max(MinReservedArea, 2080 PPCFrameLowering::getMinCallFrameSize(false, false)); 2081 2082 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> 2083 getStackAlignment(); 2084 unsigned AlignMask = TargetAlign-1; 2085 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 2086 2087 FI->setMinReservedArea(MinReservedArea); 2088 2089 SmallVector<SDValue, 8> MemOps; 2090 2091 // If the function takes variable number of arguments, make a frame index for 2092 // the start of the first vararg value... for expansion of llvm.va_start. 2093 if (isVarArg) { 2094 static const uint16_t GPArgRegs[] = { 2095 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2096 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2097 }; 2098 const unsigned NumGPArgRegs = array_lengthof(GPArgRegs); 2099 2100 static const uint16_t FPArgRegs[] = { 2101 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 2102 PPC::F8 2103 }; 2104 const unsigned NumFPArgRegs = array_lengthof(FPArgRegs); 2105 2106 FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs, 2107 NumGPArgRegs)); 2108 FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs, 2109 NumFPArgRegs)); 2110 2111 // Make room for NumGPArgRegs and NumFPArgRegs. 2112 int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 + 2113 NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8; 2114 2115 FuncInfo->setVarArgsStackOffset( 2116 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2117 CCInfo.getNextStackOffset(), true)); 2118 2119 FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false)); 2120 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2121 2122 // The fixed integer arguments of a variadic function are stored to the 2123 // VarArgsFrameIndex on the stack so that they may be loaded by deferencing 2124 // the result of va_next. 2125 for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) { 2126 // Get an existing live-in vreg, or add a new one. 2127 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]); 2128 if (!VReg) 2129 VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass); 2130 2131 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2132 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2133 MachinePointerInfo(), false, false, 0); 2134 MemOps.push_back(Store); 2135 // Increment the address by four for the next argument to store 2136 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2137 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2138 } 2139 2140 // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 2141 // is set. 2142 // The double arguments are stored to the VarArgsFrameIndex 2143 // on the stack. 2144 for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) { 2145 // Get an existing live-in vreg, or add a new one. 2146 unsigned VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]); 2147 if (!VReg) 2148 VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass); 2149 2150 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64); 2151 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2152 MachinePointerInfo(), false, false, 0); 2153 MemOps.push_back(Store); 2154 // Increment the address by eight for the next argument to store 2155 SDValue PtrOff = DAG.getConstant(EVT(MVT::f64).getSizeInBits()/8, 2156 PtrVT); 2157 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2158 } 2159 } 2160 2161 if (!MemOps.empty()) 2162 Chain = DAG.getNode(ISD::TokenFactor, dl, 2163 MVT::Other, &MemOps[0], MemOps.size()); 2164 2165 return Chain; 2166} 2167 2168// PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2169// value to MVT::i64 and then truncate to the correct register size. 2170SDValue 2171PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, 2172 SelectionDAG &DAG, SDValue ArgVal, 2173 SDLoc dl) const { 2174 if (Flags.isSExt()) 2175 ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal, 2176 DAG.getValueType(ObjectVT)); 2177 else if (Flags.isZExt()) 2178 ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal, 2179 DAG.getValueType(ObjectVT)); 2180 2181 return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal); 2182} 2183 2184// Set the size that is at least reserved in caller of this function. Tail 2185// call optimized functions' reserved stack space needs to be aligned so that 2186// taking the difference between two stack areas will result in an aligned 2187// stack. 2188void 2189PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, 2190 unsigned nAltivecParamsAtEnd, 2191 unsigned MinReservedArea, 2192 bool isPPC64) const { 2193 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2194 // Add the Altivec parameters at the end, if needed. 2195 if (nAltivecParamsAtEnd) { 2196 MinReservedArea = ((MinReservedArea+15)/16)*16; 2197 MinReservedArea += 16*nAltivecParamsAtEnd; 2198 } 2199 MinReservedArea = 2200 std::max(MinReservedArea, 2201 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2202 unsigned TargetAlign 2203 = DAG.getMachineFunction().getTarget().getFrameLowering()-> 2204 getStackAlignment(); 2205 unsigned AlignMask = TargetAlign-1; 2206 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 2207 FI->setMinReservedArea(MinReservedArea); 2208} 2209 2210SDValue 2211PPCTargetLowering::LowerFormalArguments_64SVR4( 2212 SDValue Chain, 2213 CallingConv::ID CallConv, bool isVarArg, 2214 const SmallVectorImpl<ISD::InputArg> 2215 &Ins, 2216 SDLoc dl, SelectionDAG &DAG, 2217 SmallVectorImpl<SDValue> &InVals) const { 2218 // TODO: add description of PPC stack frame format, or at least some docs. 2219 // 2220 MachineFunction &MF = DAG.getMachineFunction(); 2221 MachineFrameInfo *MFI = MF.getFrameInfo(); 2222 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2223 2224 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2225 // Potential tail calls could cause overwriting of argument stack slots. 2226 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2227 (CallConv == CallingConv::Fast)); 2228 unsigned PtrByteSize = 8; 2229 2230 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); 2231 // Area that is at least reserved in caller of this function. 2232 unsigned MinReservedArea = ArgOffset; 2233 2234 static const uint16_t GPR[] = { 2235 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2236 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2237 }; 2238 2239 static const uint16_t *FPR = GetFPR(); 2240 2241 static const uint16_t VR[] = { 2242 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2243 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2244 }; 2245 2246 const unsigned Num_GPR_Regs = array_lengthof(GPR); 2247 const unsigned Num_FPR_Regs = 13; 2248 const unsigned Num_VR_Regs = array_lengthof(VR); 2249 2250 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2251 2252 // Add DAG nodes to load the arguments or copy them out of registers. On 2253 // entry to a function on PPC, the arguments start after the linkage area, 2254 // although the first ones are often in registers. 2255 2256 SmallVector<SDValue, 8> MemOps; 2257 unsigned nAltivecParamsAtEnd = 0; 2258 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2259 unsigned CurArgIdx = 0; 2260 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2261 SDValue ArgVal; 2262 bool needsLoad = false; 2263 EVT ObjectVT = Ins[ArgNo].VT; 2264 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 2265 unsigned ArgSize = ObjSize; 2266 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2267 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); 2268 CurArgIdx = Ins[ArgNo].OrigArgIndex; 2269 2270 unsigned CurArgOffset = ArgOffset; 2271 2272 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 2273 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 2274 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 2275 if (isVarArg) { 2276 MinReservedArea = ((MinReservedArea+15)/16)*16; 2277 MinReservedArea += CalculateStackSlotSize(ObjectVT, 2278 Flags, 2279 PtrByteSize); 2280 } else 2281 nAltivecParamsAtEnd++; 2282 } else 2283 // Calculate min reserved area. 2284 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 2285 Flags, 2286 PtrByteSize); 2287 2288 // FIXME the codegen can be much improved in some cases. 2289 // We do not have to keep everything in memory. 2290 if (Flags.isByVal()) { 2291 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2292 ObjSize = Flags.getByValSize(); 2293 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2294 // Empty aggregate parameters do not take up registers. Examples: 2295 // struct { } a; 2296 // union { } b; 2297 // int c[0]; 2298 // etc. However, we have to provide a place-holder in InVals, so 2299 // pretend we have an 8-byte item at the current address for that 2300 // purpose. 2301 if (!ObjSize) { 2302 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2303 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2304 InVals.push_back(FIN); 2305 continue; 2306 } 2307 2308 unsigned BVAlign = Flags.getByValAlign(); 2309 if (BVAlign > 8) { 2310 ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; 2311 CurArgOffset = ArgOffset; 2312 } 2313 2314 // All aggregates smaller than 8 bytes must be passed right-justified. 2315 if (ObjSize < PtrByteSize) 2316 CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); 2317 // The value of the object is its address. 2318 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2319 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2320 InVals.push_back(FIN); 2321 2322 if (ObjSize < 8) { 2323 if (GPR_idx != Num_GPR_Regs) { 2324 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2325 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2326 SDValue Store; 2327 2328 if (ObjSize==1 || ObjSize==2 || ObjSize==4) { 2329 EVT ObjType = (ObjSize == 1 ? MVT::i8 : 2330 (ObjSize == 2 ? MVT::i16 : MVT::i32)); 2331 Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2332 MachinePointerInfo(FuncArg, CurArgOffset), 2333 ObjType, false, false, 0); 2334 } else { 2335 // For sizes that don't fit a truncating store (3, 5, 6, 7), 2336 // store the whole register as-is to the parameter save area 2337 // slot. The address of the parameter was already calculated 2338 // above (InVals.push_back(FIN)) to be the right-justified 2339 // offset within the slot. For this store, we need a new 2340 // frame index that points at the beginning of the slot. 2341 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2342 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2343 Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2344 MachinePointerInfo(FuncArg, ArgOffset), 2345 false, false, 0); 2346 } 2347 2348 MemOps.push_back(Store); 2349 ++GPR_idx; 2350 } 2351 // Whether we copied from a register or not, advance the offset 2352 // into the parameter save area by a full doubleword. 2353 ArgOffset += PtrByteSize; 2354 continue; 2355 } 2356 2357 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2358 // Store whatever pieces of the object are in registers 2359 // to memory. ArgOffset will be the address of the beginning 2360 // of the object. 2361 if (GPR_idx != Num_GPR_Regs) { 2362 unsigned VReg; 2363 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2364 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2365 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2366 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2367 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2368 MachinePointerInfo(FuncArg, ArgOffset), 2369 false, false, 0); 2370 MemOps.push_back(Store); 2371 ++GPR_idx; 2372 ArgOffset += PtrByteSize; 2373 } else { 2374 ArgOffset += ArgSize - j; 2375 break; 2376 } 2377 } 2378 continue; 2379 } 2380 2381 switch (ObjectVT.getSimpleVT().SimpleTy) { 2382 default: llvm_unreachable("Unhandled argument type!"); 2383 case MVT::i32: 2384 case MVT::i64: 2385 if (GPR_idx != Num_GPR_Regs) { 2386 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2387 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2388 2389 if (ObjectVT == MVT::i32) 2390 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2391 // value to MVT::i64 and then truncate to the correct register size. 2392 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2393 2394 ++GPR_idx; 2395 } else { 2396 needsLoad = true; 2397 ArgSize = PtrByteSize; 2398 } 2399 ArgOffset += 8; 2400 break; 2401 2402 case MVT::f32: 2403 case MVT::f64: 2404 // Every 8 bytes of argument space consumes one of the GPRs available for 2405 // argument passing. 2406 if (GPR_idx != Num_GPR_Regs) { 2407 ++GPR_idx; 2408 } 2409 if (FPR_idx != Num_FPR_Regs) { 2410 unsigned VReg; 2411 2412 if (ObjectVT == MVT::f32) 2413 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2414 else 2415 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2416 2417 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2418 ++FPR_idx; 2419 } else { 2420 needsLoad = true; 2421 ArgSize = PtrByteSize; 2422 } 2423 2424 ArgOffset += 8; 2425 break; 2426 case MVT::v4f32: 2427 case MVT::v4i32: 2428 case MVT::v8i16: 2429 case MVT::v16i8: 2430 // Note that vector arguments in registers don't reserve stack space, 2431 // except in varargs functions. 2432 if (VR_idx != Num_VR_Regs) { 2433 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2434 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2435 if (isVarArg) { 2436 while ((ArgOffset % 16) != 0) { 2437 ArgOffset += PtrByteSize; 2438 if (GPR_idx != Num_GPR_Regs) 2439 GPR_idx++; 2440 } 2441 ArgOffset += 16; 2442 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2443 } 2444 ++VR_idx; 2445 } else { 2446 // Vectors are aligned. 2447 ArgOffset = ((ArgOffset+15)/16)*16; 2448 CurArgOffset = ArgOffset; 2449 ArgOffset += 16; 2450 needsLoad = true; 2451 } 2452 break; 2453 } 2454 2455 // We need to load the argument to a virtual register if we determined 2456 // above that we ran out of physical registers of the appropriate type. 2457 if (needsLoad) { 2458 int FI = MFI->CreateFixedObject(ObjSize, 2459 CurArgOffset + (ArgSize - ObjSize), 2460 isImmutable); 2461 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2462 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2463 false, false, false, 0); 2464 } 2465 2466 InVals.push_back(ArgVal); 2467 } 2468 2469 // Set the size that is at least reserved in caller of this function. Tail 2470 // call optimized functions' reserved stack space needs to be aligned so that 2471 // taking the difference between two stack areas will result in an aligned 2472 // stack. 2473 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true); 2474 2475 // If the function takes variable number of arguments, make a frame index for 2476 // the start of the first vararg value... for expansion of llvm.va_start. 2477 if (isVarArg) { 2478 int Depth = ArgOffset; 2479 2480 FuncInfo->setVarArgsFrameIndex( 2481 MFI->CreateFixedObject(PtrByteSize, Depth, true)); 2482 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2483 2484 // If this function is vararg, store any remaining integer argument regs 2485 // to their spots on the stack so that they may be loaded by deferencing the 2486 // result of va_next. 2487 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2488 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2489 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2490 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2491 MachinePointerInfo(), false, false, 0); 2492 MemOps.push_back(Store); 2493 // Increment the address by four for the next argument to store 2494 SDValue PtrOff = DAG.getConstant(PtrByteSize, PtrVT); 2495 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2496 } 2497 } 2498 2499 if (!MemOps.empty()) 2500 Chain = DAG.getNode(ISD::TokenFactor, dl, 2501 MVT::Other, &MemOps[0], MemOps.size()); 2502 2503 return Chain; 2504} 2505 2506SDValue 2507PPCTargetLowering::LowerFormalArguments_Darwin( 2508 SDValue Chain, 2509 CallingConv::ID CallConv, bool isVarArg, 2510 const SmallVectorImpl<ISD::InputArg> 2511 &Ins, 2512 SDLoc dl, SelectionDAG &DAG, 2513 SmallVectorImpl<SDValue> &InVals) const { 2514 // TODO: add description of PPC stack frame format, or at least some docs. 2515 // 2516 MachineFunction &MF = DAG.getMachineFunction(); 2517 MachineFrameInfo *MFI = MF.getFrameInfo(); 2518 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 2519 2520 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2521 bool isPPC64 = PtrVT == MVT::i64; 2522 // Potential tail calls could cause overwriting of argument stack slots. 2523 bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt && 2524 (CallConv == CallingConv::Fast)); 2525 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2526 2527 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 2528 // Area that is at least reserved in caller of this function. 2529 unsigned MinReservedArea = ArgOffset; 2530 2531 static const uint16_t GPR_32[] = { // 32-bit registers. 2532 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2533 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2534 }; 2535 static const uint16_t GPR_64[] = { // 64-bit registers. 2536 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2537 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2538 }; 2539 2540 static const uint16_t *FPR = GetFPR(); 2541 2542 static const uint16_t VR[] = { 2543 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2544 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2545 }; 2546 2547 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 2548 const unsigned Num_FPR_Regs = 13; 2549 const unsigned Num_VR_Regs = array_lengthof( VR); 2550 2551 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2552 2553 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 2554 2555 // In 32-bit non-varargs functions, the stack space for vectors is after the 2556 // stack space for non-vectors. We do not use this space unless we have 2557 // too many vectors to fit in registers, something that only occurs in 2558 // constructed examples:), but we have to walk the arglist to figure 2559 // that out...for the pathological case, compute VecArgOffset as the 2560 // start of the vector parameter area. Computing VecArgOffset is the 2561 // entire point of the following loop. 2562 unsigned VecArgOffset = ArgOffset; 2563 if (!isVarArg && !isPPC64) { 2564 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; 2565 ++ArgNo) { 2566 EVT ObjectVT = Ins[ArgNo].VT; 2567 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2568 2569 if (Flags.isByVal()) { 2570 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 2571 unsigned ObjSize = Flags.getByValSize(); 2572 unsigned ArgSize = 2573 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2574 VecArgOffset += ArgSize; 2575 continue; 2576 } 2577 2578 switch(ObjectVT.getSimpleVT().SimpleTy) { 2579 default: llvm_unreachable("Unhandled argument type!"); 2580 case MVT::i32: 2581 case MVT::f32: 2582 VecArgOffset += 4; 2583 break; 2584 case MVT::i64: // PPC64 2585 case MVT::f64: 2586 // FIXME: We are guaranteed to be !isPPC64 at this point. 2587 // Does MVT::i64 apply? 2588 VecArgOffset += 8; 2589 break; 2590 case MVT::v4f32: 2591 case MVT::v4i32: 2592 case MVT::v8i16: 2593 case MVT::v16i8: 2594 // Nothing to do, we're only looking at Nonvector args here. 2595 break; 2596 } 2597 } 2598 } 2599 // We've found where the vector parameter area in memory is. Skip the 2600 // first 12 parameters; these don't use that memory. 2601 VecArgOffset = ((VecArgOffset+15)/16)*16; 2602 VecArgOffset += 12*16; 2603 2604 // Add DAG nodes to load the arguments or copy them out of registers. On 2605 // entry to a function on PPC, the arguments start after the linkage area, 2606 // although the first ones are often in registers. 2607 2608 SmallVector<SDValue, 8> MemOps; 2609 unsigned nAltivecParamsAtEnd = 0; 2610 Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); 2611 unsigned CurArgIdx = 0; 2612 for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { 2613 SDValue ArgVal; 2614 bool needsLoad = false; 2615 EVT ObjectVT = Ins[ArgNo].VT; 2616 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 2617 unsigned ArgSize = ObjSize; 2618 ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags; 2619 std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); 2620 CurArgIdx = Ins[ArgNo].OrigArgIndex; 2621 2622 unsigned CurArgOffset = ArgOffset; 2623 2624 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 2625 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 2626 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 2627 if (isVarArg || isPPC64) { 2628 MinReservedArea = ((MinReservedArea+15)/16)*16; 2629 MinReservedArea += CalculateStackSlotSize(ObjectVT, 2630 Flags, 2631 PtrByteSize); 2632 } else nAltivecParamsAtEnd++; 2633 } else 2634 // Calculate min reserved area. 2635 MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, 2636 Flags, 2637 PtrByteSize); 2638 2639 // FIXME the codegen can be much improved in some cases. 2640 // We do not have to keep everything in memory. 2641 if (Flags.isByVal()) { 2642 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 2643 ObjSize = Flags.getByValSize(); 2644 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 2645 // Objects of size 1 and 2 are right justified, everything else is 2646 // left justified. This means the memory address is adjusted forwards. 2647 if (ObjSize==1 || ObjSize==2) { 2648 CurArgOffset = CurArgOffset + (4 - ObjSize); 2649 } 2650 // The value of the object is its address. 2651 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); 2652 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2653 InVals.push_back(FIN); 2654 if (ObjSize==1 || ObjSize==2) { 2655 if (GPR_idx != Num_GPR_Regs) { 2656 unsigned VReg; 2657 if (isPPC64) 2658 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2659 else 2660 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2661 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2662 EVT ObjType = ObjSize == 1 ? MVT::i8 : MVT::i16; 2663 SDValue Store = DAG.getTruncStore(Val.getValue(1), dl, Val, FIN, 2664 MachinePointerInfo(FuncArg, 2665 CurArgOffset), 2666 ObjType, false, false, 0); 2667 MemOps.push_back(Store); 2668 ++GPR_idx; 2669 } 2670 2671 ArgOffset += PtrByteSize; 2672 2673 continue; 2674 } 2675 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 2676 // Store whatever pieces of the object are in registers 2677 // to memory. ArgOffset will be the address of the beginning 2678 // of the object. 2679 if (GPR_idx != Num_GPR_Regs) { 2680 unsigned VReg; 2681 if (isPPC64) 2682 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2683 else 2684 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2685 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true); 2686 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2687 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2688 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2689 MachinePointerInfo(FuncArg, ArgOffset), 2690 false, false, 0); 2691 MemOps.push_back(Store); 2692 ++GPR_idx; 2693 ArgOffset += PtrByteSize; 2694 } else { 2695 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 2696 break; 2697 } 2698 } 2699 continue; 2700 } 2701 2702 switch (ObjectVT.getSimpleVT().SimpleTy) { 2703 default: llvm_unreachable("Unhandled argument type!"); 2704 case MVT::i32: 2705 if (!isPPC64) { 2706 if (GPR_idx != Num_GPR_Regs) { 2707 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2708 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i32); 2709 ++GPR_idx; 2710 } else { 2711 needsLoad = true; 2712 ArgSize = PtrByteSize; 2713 } 2714 // All int arguments reserve stack space in the Darwin ABI. 2715 ArgOffset += PtrByteSize; 2716 break; 2717 } 2718 // FALLTHROUGH 2719 case MVT::i64: // PPC64 2720 if (GPR_idx != Num_GPR_Regs) { 2721 unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2722 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64); 2723 2724 if (ObjectVT == MVT::i32) 2725 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 2726 // value to MVT::i64 and then truncate to the correct register size. 2727 ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); 2728 2729 ++GPR_idx; 2730 } else { 2731 needsLoad = true; 2732 ArgSize = PtrByteSize; 2733 } 2734 // All int arguments reserve stack space in the Darwin ABI. 2735 ArgOffset += 8; 2736 break; 2737 2738 case MVT::f32: 2739 case MVT::f64: 2740 // Every 4 bytes of argument space consumes one of the GPRs available for 2741 // argument passing. 2742 if (GPR_idx != Num_GPR_Regs) { 2743 ++GPR_idx; 2744 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 2745 ++GPR_idx; 2746 } 2747 if (FPR_idx != Num_FPR_Regs) { 2748 unsigned VReg; 2749 2750 if (ObjectVT == MVT::f32) 2751 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); 2752 else 2753 VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F8RCRegClass); 2754 2755 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2756 ++FPR_idx; 2757 } else { 2758 needsLoad = true; 2759 } 2760 2761 // All FP arguments reserve stack space in the Darwin ABI. 2762 ArgOffset += isPPC64 ? 8 : ObjSize; 2763 break; 2764 case MVT::v4f32: 2765 case MVT::v4i32: 2766 case MVT::v8i16: 2767 case MVT::v16i8: 2768 // Note that vector arguments in registers don't reserve stack space, 2769 // except in varargs functions. 2770 if (VR_idx != Num_VR_Regs) { 2771 unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); 2772 ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); 2773 if (isVarArg) { 2774 while ((ArgOffset % 16) != 0) { 2775 ArgOffset += PtrByteSize; 2776 if (GPR_idx != Num_GPR_Regs) 2777 GPR_idx++; 2778 } 2779 ArgOffset += 16; 2780 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? 2781 } 2782 ++VR_idx; 2783 } else { 2784 if (!isVarArg && !isPPC64) { 2785 // Vectors go after all the nonvectors. 2786 CurArgOffset = VecArgOffset; 2787 VecArgOffset += 16; 2788 } else { 2789 // Vectors are aligned. 2790 ArgOffset = ((ArgOffset+15)/16)*16; 2791 CurArgOffset = ArgOffset; 2792 ArgOffset += 16; 2793 } 2794 needsLoad = true; 2795 } 2796 break; 2797 } 2798 2799 // We need to load the argument to a virtual register if we determined above 2800 // that we ran out of physical registers of the appropriate type. 2801 if (needsLoad) { 2802 int FI = MFI->CreateFixedObject(ObjSize, 2803 CurArgOffset + (ArgSize - ObjSize), 2804 isImmutable); 2805 SDValue FIN = DAG.getFrameIndex(FI, PtrVT); 2806 ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), 2807 false, false, false, 0); 2808 } 2809 2810 InVals.push_back(ArgVal); 2811 } 2812 2813 // Set the size that is at least reserved in caller of this function. Tail 2814 // call optimized functions' reserved stack space needs to be aligned so that 2815 // taking the difference between two stack areas will result in an aligned 2816 // stack. 2817 setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64); 2818 2819 // If the function takes variable number of arguments, make a frame index for 2820 // the start of the first vararg value... for expansion of llvm.va_start. 2821 if (isVarArg) { 2822 int Depth = ArgOffset; 2823 2824 FuncInfo->setVarArgsFrameIndex( 2825 MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 2826 Depth, true)); 2827 SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); 2828 2829 // If this function is vararg, store any remaining integer argument regs 2830 // to their spots on the stack so that they may be loaded by deferencing the 2831 // result of va_next. 2832 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 2833 unsigned VReg; 2834 2835 if (isPPC64) 2836 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); 2837 else 2838 VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass); 2839 2840 SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); 2841 SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, 2842 MachinePointerInfo(), false, false, 0); 2843 MemOps.push_back(Store); 2844 // Increment the address by four for the next argument to store 2845 SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 2846 FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); 2847 } 2848 } 2849 2850 if (!MemOps.empty()) 2851 Chain = DAG.getNode(ISD::TokenFactor, dl, 2852 MVT::Other, &MemOps[0], MemOps.size()); 2853 2854 return Chain; 2855} 2856 2857/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus 2858/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI. 2859static unsigned 2860CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, 2861 bool isPPC64, 2862 bool isVarArg, 2863 unsigned CC, 2864 const SmallVectorImpl<ISD::OutputArg> 2865 &Outs, 2866 const SmallVectorImpl<SDValue> &OutVals, 2867 unsigned &nAltivecParamsAtEnd) { 2868 // Count how many bytes are to be pushed on the stack, including the linkage 2869 // area, and parameter passing area. We start with 24/48 bytes, which is 2870 // prereserved space for [SP][CR][LR][3 x unused]. 2871 unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true); 2872 unsigned NumOps = Outs.size(); 2873 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2874 2875 // Add up all the space actually used. 2876 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 2877 // they all go in registers, but we must reserve stack space for them for 2878 // possible use by the caller. In varargs or 64-bit calls, parameters are 2879 // assigned stack space in order, with padding so Altivec parameters are 2880 // 16-byte aligned. 2881 nAltivecParamsAtEnd = 0; 2882 for (unsigned i = 0; i != NumOps; ++i) { 2883 ISD::ArgFlagsTy Flags = Outs[i].Flags; 2884 EVT ArgVT = Outs[i].VT; 2885 // Varargs Altivec parameters are padded to a 16 byte boundary. 2886 if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || 2887 ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { 2888 if (!isVarArg && !isPPC64) { 2889 // Non-varargs Altivec parameters go after all the non-Altivec 2890 // parameters; handle those later so we know how much padding we need. 2891 nAltivecParamsAtEnd++; 2892 continue; 2893 } 2894 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 2895 NumBytes = ((NumBytes+15)/16)*16; 2896 } 2897 NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); 2898 } 2899 2900 // Allow for Altivec parameters at the end, if needed. 2901 if (nAltivecParamsAtEnd) { 2902 NumBytes = ((NumBytes+15)/16)*16; 2903 NumBytes += 16*nAltivecParamsAtEnd; 2904 } 2905 2906 // The prolog code of the callee may store up to 8 GPR argument registers to 2907 // the stack, allowing va_start to index over them in memory if its varargs. 2908 // Because we cannot tell if this is needed on the caller side, we have to 2909 // conservatively assume that it is needed. As such, make sure we have at 2910 // least enough stack space for the caller to store the 8 GPRs. 2911 NumBytes = std::max(NumBytes, 2912 PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); 2913 2914 // Tail call needs the stack to be aligned. 2915 if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ 2916 unsigned TargetAlign = DAG.getMachineFunction().getTarget(). 2917 getFrameLowering()->getStackAlignment(); 2918 unsigned AlignMask = TargetAlign-1; 2919 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 2920 } 2921 2922 return NumBytes; 2923} 2924 2925/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 2926/// adjusted to accommodate the arguments for the tailcall. 2927static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, 2928 unsigned ParamSize) { 2929 2930 if (!isTailCall) return 0; 2931 2932 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 2933 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 2934 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 2935 // Remember only if the new adjustement is bigger. 2936 if (SPDiff < FI->getTailCallSPDelta()) 2937 FI->setTailCallSPDelta(SPDiff); 2938 2939 return SPDiff; 2940} 2941 2942/// IsEligibleForTailCallOptimization - Check whether the call is eligible 2943/// for tail call optimization. Targets which want to do tail call 2944/// optimization should implement this function. 2945bool 2946PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, 2947 CallingConv::ID CalleeCC, 2948 bool isVarArg, 2949 const SmallVectorImpl<ISD::InputArg> &Ins, 2950 SelectionDAG& DAG) const { 2951 if (!getTargetMachine().Options.GuaranteedTailCallOpt) 2952 return false; 2953 2954 // Variable argument functions are not supported. 2955 if (isVarArg) 2956 return false; 2957 2958 MachineFunction &MF = DAG.getMachineFunction(); 2959 CallingConv::ID CallerCC = MF.getFunction()->getCallingConv(); 2960 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 2961 // Functions containing by val parameters are not supported. 2962 for (unsigned i = 0; i != Ins.size(); i++) { 2963 ISD::ArgFlagsTy Flags = Ins[i].Flags; 2964 if (Flags.isByVal()) return false; 2965 } 2966 2967 // Non PIC/GOT tail calls are supported. 2968 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 2969 return true; 2970 2971 // At the moment we can only do local tail calls (in same module, hidden 2972 // or protected) if we are generating PIC. 2973 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2974 return G->getGlobal()->hasHiddenVisibility() 2975 || G->getGlobal()->hasProtectedVisibility(); 2976 } 2977 2978 return false; 2979} 2980 2981/// isCallCompatibleAddress - Return the immediate to use if the specified 2982/// 32-bit value is representable in the immediate field of a BxA instruction. 2983static SDNode *isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG) { 2984 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 2985 if (!C) return 0; 2986 2987 int Addr = C->getZExtValue(); 2988 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 2989 SignExtend32<26>(Addr) != Addr) 2990 return 0; // Top 6 bits have to be sext of immediate. 2991 2992 return DAG.getConstant((int)C->getZExtValue() >> 2, 2993 DAG.getTargetLoweringInfo().getPointerTy()).getNode(); 2994} 2995 2996namespace { 2997 2998struct TailCallArgumentInfo { 2999 SDValue Arg; 3000 SDValue FrameIdxOp; 3001 int FrameIdx; 3002 3003 TailCallArgumentInfo() : FrameIdx(0) {} 3004}; 3005 3006} 3007 3008/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 3009static void 3010StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 3011 SDValue Chain, 3012 const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs, 3013 SmallVectorImpl<SDValue> &MemOpChains, 3014 SDLoc dl) { 3015 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 3016 SDValue Arg = TailCallArgs[i].Arg; 3017 SDValue FIN = TailCallArgs[i].FrameIdxOp; 3018 int FI = TailCallArgs[i].FrameIdx; 3019 // Store relative to framepointer. 3020 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, FIN, 3021 MachinePointerInfo::getFixedStack(FI), 3022 false, false, 0)); 3023 } 3024} 3025 3026/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 3027/// the appropriate stack slot for the tail call optimized function call. 3028static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 3029 MachineFunction &MF, 3030 SDValue Chain, 3031 SDValue OldRetAddr, 3032 SDValue OldFP, 3033 int SPDiff, 3034 bool isPPC64, 3035 bool isDarwinABI, 3036 SDLoc dl) { 3037 if (SPDiff) { 3038 // Calculate the new stack slot for the return address. 3039 int SlotSize = isPPC64 ? 8 : 4; 3040 int NewRetAddrLoc = SPDiff + PPCFrameLowering::getReturnSaveOffset(isPPC64, 3041 isDarwinABI); 3042 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 3043 NewRetAddrLoc, true); 3044 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3045 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 3046 Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, 3047 MachinePointerInfo::getFixedStack(NewRetAddr), 3048 false, false, 0); 3049 3050 // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack 3051 // slot as the FP is never overwritten. 3052 if (isDarwinABI) { 3053 int NewFPLoc = 3054 SPDiff + PPCFrameLowering::getFramePointerSaveOffset(isPPC64, isDarwinABI); 3055 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc, 3056 true); 3057 SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 3058 Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx, 3059 MachinePointerInfo::getFixedStack(NewFPIdx), 3060 false, false, 0); 3061 } 3062 } 3063 return Chain; 3064} 3065 3066/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 3067/// the position of the argument. 3068static void 3069CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 3070 SDValue Arg, int SPDiff, unsigned ArgOffset, 3071 SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) { 3072 int Offset = ArgOffset + SPDiff; 3073 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 3074 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true); 3075 EVT VT = isPPC64 ? MVT::i64 : MVT::i32; 3076 SDValue FIN = DAG.getFrameIndex(FI, VT); 3077 TailCallArgumentInfo Info; 3078 Info.Arg = Arg; 3079 Info.FrameIdxOp = FIN; 3080 Info.FrameIdx = FI; 3081 TailCallArguments.push_back(Info); 3082} 3083 3084/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 3085/// stack slot. Returns the chain as result and the loaded frame pointers in 3086/// LROpOut/FPOpout. Used when tail calling. 3087SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 3088 int SPDiff, 3089 SDValue Chain, 3090 SDValue &LROpOut, 3091 SDValue &FPOpOut, 3092 bool isDarwinABI, 3093 SDLoc dl) const { 3094 if (SPDiff) { 3095 // Load the LR and FP stack slot for later adjusting. 3096 EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; 3097 LROpOut = getReturnAddrFrameIndex(DAG); 3098 LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), 3099 false, false, false, 0); 3100 Chain = SDValue(LROpOut.getNode(), 1); 3101 3102 // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack 3103 // slot as the FP is never overwritten. 3104 if (isDarwinABI) { 3105 FPOpOut = getFramePointerFrameIndex(DAG); 3106 FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), 3107 false, false, false, 0); 3108 Chain = SDValue(FPOpOut.getNode(), 1); 3109 } 3110 } 3111 return Chain; 3112} 3113 3114/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 3115/// by "Src" to address "Dst" of size "Size". Alignment information is 3116/// specified by the specific parameter attribute. The copy will be passed as 3117/// a byval function parameter. 3118/// Sometimes what we are copying is the end of a larger object, the part that 3119/// does not fit in registers. 3120static SDValue 3121CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, 3122 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 3123 SDLoc dl) { 3124 SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), MVT::i32); 3125 return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), 3126 false, false, MachinePointerInfo(0), 3127 MachinePointerInfo(0)); 3128} 3129 3130/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 3131/// tail calls. 3132static void 3133LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, 3134 SDValue Arg, SDValue PtrOff, int SPDiff, 3135 unsigned ArgOffset, bool isPPC64, bool isTailCall, 3136 bool isVector, SmallVectorImpl<SDValue> &MemOpChains, 3137 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, 3138 SDLoc dl) { 3139 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3140 if (!isTailCall) { 3141 if (isVector) { 3142 SDValue StackPtr; 3143 if (isPPC64) 3144 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3145 else 3146 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3147 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 3148 DAG.getConstant(ArgOffset, PtrVT)); 3149 } 3150 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3151 MachinePointerInfo(), false, false, 0)); 3152 // Calculate and remember argument location. 3153 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 3154 TailCallArguments); 3155} 3156 3157static 3158void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, 3159 SDLoc dl, bool isPPC64, int SPDiff, unsigned NumBytes, 3160 SDValue LROp, SDValue FPOp, bool isDarwinABI, 3161 SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) { 3162 MachineFunction &MF = DAG.getMachineFunction(); 3163 3164 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 3165 // might overwrite each other in case of tail call optimization. 3166 SmallVector<SDValue, 8> MemOpChains2; 3167 // Do not flag preceding copytoreg stuff together with the following stuff. 3168 InFlag = SDValue(); 3169 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 3170 MemOpChains2, dl); 3171 if (!MemOpChains2.empty()) 3172 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3173 &MemOpChains2[0], MemOpChains2.size()); 3174 3175 // Store the return address to the appropriate stack slot. 3176 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 3177 isPPC64, isDarwinABI, dl); 3178 3179 // Emit callseq_end just before tailcall node. 3180 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3181 DAG.getIntPtrConstant(0, true), InFlag, dl); 3182 InFlag = Chain.getValue(1); 3183} 3184 3185static 3186unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, 3187 SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, 3188 SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, 3189 SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, 3190 const PPCSubtarget &PPCSubTarget) { 3191 3192 bool isPPC64 = PPCSubTarget.isPPC64(); 3193 bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); 3194 3195 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3196 NodeTys.push_back(MVT::Other); // Returns a chain 3197 NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. 3198 3199 unsigned CallOpc = PPCISD::CALL; 3200 3201 bool needIndirectCall = true; 3202 if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { 3203 // If this is an absolute destination address, use the munged value. 3204 Callee = SDValue(Dest, 0); 3205 needIndirectCall = false; 3206 } 3207 3208 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { 3209 // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 3210 // Use indirect calls for ALL functions calls in JIT mode, since the 3211 // far-call stubs may be outside relocation limits for a BL instruction. 3212 if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { 3213 unsigned OpFlags = 0; 3214 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 3215 (PPCSubTarget.getTargetTriple().isMacOSX() && 3216 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && 3217 (G->getGlobal()->isDeclaration() || 3218 G->getGlobal()->isWeakForLinker())) { 3219 // PC-relative references to external symbols should go through $stub, 3220 // unless we're building with the leopard linker or later, which 3221 // automatically synthesizes these stubs. 3222 OpFlags = PPCII::MO_DARWIN_STUB; 3223 } 3224 3225 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, 3226 // every direct call is) turn it into a TargetGlobalAddress / 3227 // TargetExternalSymbol node so that legalize doesn't hack it. 3228 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, 3229 Callee.getValueType(), 3230 0, OpFlags); 3231 needIndirectCall = false; 3232 } 3233 } 3234 3235 if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { 3236 unsigned char OpFlags = 0; 3237 3238 if (DAG.getTarget().getRelocationModel() != Reloc::Static && 3239 (PPCSubTarget.getTargetTriple().isMacOSX() && 3240 PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { 3241 // PC-relative references to external symbols should go through $stub, 3242 // unless we're building with the leopard linker or later, which 3243 // automatically synthesizes these stubs. 3244 OpFlags = PPCII::MO_DARWIN_STUB; 3245 } 3246 3247 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType(), 3248 OpFlags); 3249 needIndirectCall = false; 3250 } 3251 3252 if (needIndirectCall) { 3253 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 3254 // to do the call, we can't use PPCISD::CALL. 3255 SDValue MTCTROps[] = {Chain, Callee, InFlag}; 3256 3257 if (isSVR4ABI && isPPC64) { 3258 // Function pointers in the 64-bit SVR4 ABI do not point to the function 3259 // entry point, but to the function descriptor (the function entry point 3260 // address is part of the function descriptor though). 3261 // The function descriptor is a three doubleword structure with the 3262 // following fields: function entry point, TOC base address and 3263 // environment pointer. 3264 // Thus for a call through a function pointer, the following actions need 3265 // to be performed: 3266 // 1. Save the TOC of the caller in the TOC save area of its stack 3267 // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()). 3268 // 2. Load the address of the function entry point from the function 3269 // descriptor. 3270 // 3. Load the TOC of the callee from the function descriptor into r2. 3271 // 4. Load the environment pointer from the function descriptor into 3272 // r11. 3273 // 5. Branch to the function entry point address. 3274 // 6. On return of the callee, the TOC of the caller needs to be 3275 // restored (this is done in FinishCall()). 3276 // 3277 // All those operations are flagged together to ensure that no other 3278 // operations can be scheduled in between. E.g. without flagging the 3279 // operations together, a TOC access in the caller could be scheduled 3280 // between the load of the callee TOC and the branch to the callee, which 3281 // results in the TOC access going through the TOC of the callee instead 3282 // of going through the TOC of the caller, which leads to incorrect code. 3283 3284 // Load the address of the function entry point from the function 3285 // descriptor. 3286 SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue); 3287 SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, MTCTROps, 3288 InFlag.getNode() ? 3 : 2); 3289 Chain = LoadFuncPtr.getValue(1); 3290 InFlag = LoadFuncPtr.getValue(2); 3291 3292 // Load environment pointer into r11. 3293 // Offset of the environment pointer within the function descriptor. 3294 SDValue PtrOff = DAG.getIntPtrConstant(16); 3295 3296 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff); 3297 SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr, 3298 InFlag); 3299 Chain = LoadEnvPtr.getValue(1); 3300 InFlag = LoadEnvPtr.getValue(2); 3301 3302 SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr, 3303 InFlag); 3304 Chain = EnvVal.getValue(0); 3305 InFlag = EnvVal.getValue(1); 3306 3307 // Load TOC of the callee into r2. We are using a target-specific load 3308 // with r2 hard coded, because the result of a target-independent load 3309 // would never go directly into r2, since r2 is a reserved register (which 3310 // prevents the register allocator from allocating it), resulting in an 3311 // additional register being allocated and an unnecessary move instruction 3312 // being generated. 3313 VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3314 SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, 3315 Callee, InFlag); 3316 Chain = LoadTOCPtr.getValue(0); 3317 InFlag = LoadTOCPtr.getValue(1); 3318 3319 MTCTROps[0] = Chain; 3320 MTCTROps[1] = LoadFuncPtr; 3321 MTCTROps[2] = InFlag; 3322 } 3323 3324 Chain = DAG.getNode(PPCISD::MTCTR, dl, NodeTys, MTCTROps, 3325 2 + (InFlag.getNode() != 0)); 3326 InFlag = Chain.getValue(1); 3327 3328 NodeTys.clear(); 3329 NodeTys.push_back(MVT::Other); 3330 NodeTys.push_back(MVT::Glue); 3331 Ops.push_back(Chain); 3332 CallOpc = PPCISD::BCTRL; 3333 Callee.setNode(0); 3334 // Add use of X11 (holding environment pointer) 3335 if (isSVR4ABI && isPPC64) 3336 Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); 3337 // Add CTR register as callee so a bctr can be emitted later. 3338 if (isTailCall) 3339 Ops.push_back(DAG.getRegister(isPPC64 ? PPC::CTR8 : PPC::CTR, PtrVT)); 3340 } 3341 3342 // If this is a direct call, pass the chain and the callee. 3343 if (Callee.getNode()) { 3344 Ops.push_back(Chain); 3345 Ops.push_back(Callee); 3346 } 3347 // If this is a tail call add stack pointer delta. 3348 if (isTailCall) 3349 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 3350 3351 // Add argument registers to the end of the list so that they are known live 3352 // into the call. 3353 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 3354 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 3355 RegsToPass[i].second.getValueType())); 3356 3357 return CallOpc; 3358} 3359 3360static 3361bool isLocalCall(const SDValue &Callee) 3362{ 3363 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 3364 return !G->getGlobal()->isDeclaration() && 3365 !G->getGlobal()->isWeakForLinker(); 3366 return false; 3367} 3368 3369SDValue 3370PPCTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, 3371 CallingConv::ID CallConv, bool isVarArg, 3372 const SmallVectorImpl<ISD::InputArg> &Ins, 3373 SDLoc dl, SelectionDAG &DAG, 3374 SmallVectorImpl<SDValue> &InVals) const { 3375 3376 SmallVector<CCValAssign, 16> RVLocs; 3377 CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3378 getTargetMachine(), RVLocs, *DAG.getContext()); 3379 CCRetInfo.AnalyzeCallResult(Ins, RetCC_PPC); 3380 3381 // Copy all of the result registers out of their specified physreg. 3382 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 3383 CCValAssign &VA = RVLocs[i]; 3384 assert(VA.isRegLoc() && "Can only return in registers!"); 3385 3386 SDValue Val = DAG.getCopyFromReg(Chain, dl, 3387 VA.getLocReg(), VA.getLocVT(), InFlag); 3388 Chain = Val.getValue(1); 3389 InFlag = Val.getValue(2); 3390 3391 switch (VA.getLocInfo()) { 3392 default: llvm_unreachable("Unknown loc info!"); 3393 case CCValAssign::Full: break; 3394 case CCValAssign::AExt: 3395 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3396 break; 3397 case CCValAssign::ZExt: 3398 Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val, 3399 DAG.getValueType(VA.getValVT())); 3400 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3401 break; 3402 case CCValAssign::SExt: 3403 Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val, 3404 DAG.getValueType(VA.getValVT())); 3405 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); 3406 break; 3407 } 3408 3409 InVals.push_back(Val); 3410 } 3411 3412 return Chain; 3413} 3414 3415SDValue 3416PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, 3417 bool isTailCall, bool isVarArg, 3418 SelectionDAG &DAG, 3419 SmallVector<std::pair<unsigned, SDValue>, 8> 3420 &RegsToPass, 3421 SDValue InFlag, SDValue Chain, 3422 SDValue &Callee, 3423 int SPDiff, unsigned NumBytes, 3424 const SmallVectorImpl<ISD::InputArg> &Ins, 3425 SmallVectorImpl<SDValue> &InVals) const { 3426 std::vector<EVT> NodeTys; 3427 SmallVector<SDValue, 8> Ops; 3428 unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, 3429 isTailCall, RegsToPass, Ops, NodeTys, 3430 PPCSubTarget); 3431 3432 // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls 3433 if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) 3434 Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); 3435 3436 // When performing tail call optimization the callee pops its arguments off 3437 // the stack. Account for this here so these bytes can be pushed back on in 3438 // PPCFrameLowering::eliminateCallFramePseudoInstr. 3439 int BytesCalleePops = 3440 (CallConv == CallingConv::Fast && 3441 getTargetMachine().Options.GuaranteedTailCallOpt) ? NumBytes : 0; 3442 3443 // Add a register mask operand representing the call-preserved registers. 3444 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 3445 const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); 3446 assert(Mask && "Missing call preserved mask for calling convention"); 3447 Ops.push_back(DAG.getRegisterMask(Mask)); 3448 3449 if (InFlag.getNode()) 3450 Ops.push_back(InFlag); 3451 3452 // Emit tail call. 3453 if (isTailCall) { 3454 assert(((Callee.getOpcode() == ISD::Register && 3455 cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) || 3456 Callee.getOpcode() == ISD::TargetExternalSymbol || 3457 Callee.getOpcode() == ISD::TargetGlobalAddress || 3458 isa<ConstantSDNode>(Callee)) && 3459 "Expecting an global address, external symbol, absolute value or register"); 3460 3461 return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); 3462 } 3463 3464 // Add a NOP immediately after the branch instruction when using the 64-bit 3465 // SVR4 ABI. At link time, if caller and callee are in a different module and 3466 // thus have a different TOC, the call will be replaced with a call to a stub 3467 // function which saves the current TOC, loads the TOC of the callee and 3468 // branches to the callee. The NOP will be replaced with a load instruction 3469 // which restores the TOC of the caller from the TOC save slot of the current 3470 // stack frame. If caller and callee belong to the same module (and have the 3471 // same TOC), the NOP will remain unchanged. 3472 3473 bool needsTOCRestore = false; 3474 if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { 3475 if (CallOpc == PPCISD::BCTRL) { 3476 // This is a call through a function pointer. 3477 // Restore the caller TOC from the save area into R2. 3478 // See PrepareCall() for more information about calls through function 3479 // pointers in the 64-bit SVR4 ABI. 3480 // We are using a target-specific load with r2 hard coded, because the 3481 // result of a target-independent load would never go directly into r2, 3482 // since r2 is a reserved register (which prevents the register allocator 3483 // from allocating it), resulting in an additional register being 3484 // allocated and an unnecessary move instruction being generated. 3485 needsTOCRestore = true; 3486 } else if ((CallOpc == PPCISD::CALL) && !isLocalCall(Callee)) { 3487 // Otherwise insert NOP for non-local calls. 3488 CallOpc = PPCISD::CALL_NOP; 3489 } 3490 } 3491 3492 Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); 3493 InFlag = Chain.getValue(1); 3494 3495 if (needsTOCRestore) { 3496 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3497 Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); 3498 InFlag = Chain.getValue(1); 3499 } 3500 3501 Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), 3502 DAG.getIntPtrConstant(BytesCalleePops, true), 3503 InFlag, dl); 3504 if (!Ins.empty()) 3505 InFlag = Chain.getValue(1); 3506 3507 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, 3508 Ins, dl, DAG, InVals); 3509} 3510 3511SDValue 3512PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, 3513 SmallVectorImpl<SDValue> &InVals) const { 3514 SelectionDAG &DAG = CLI.DAG; 3515 SDLoc &dl = CLI.DL; 3516 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; 3517 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; 3518 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; 3519 SDValue Chain = CLI.Chain; 3520 SDValue Callee = CLI.Callee; 3521 bool &isTailCall = CLI.IsTailCall; 3522 CallingConv::ID CallConv = CLI.CallConv; 3523 bool isVarArg = CLI.IsVarArg; 3524 3525 if (isTailCall) 3526 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg, 3527 Ins, DAG); 3528 3529 if (PPCSubTarget.isSVR4ABI()) { 3530 if (PPCSubTarget.isPPC64()) 3531 return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, 3532 isTailCall, Outs, OutVals, Ins, 3533 dl, DAG, InVals); 3534 else 3535 return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg, 3536 isTailCall, Outs, OutVals, Ins, 3537 dl, DAG, InVals); 3538 } 3539 3540 return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg, 3541 isTailCall, Outs, OutVals, Ins, 3542 dl, DAG, InVals); 3543} 3544 3545SDValue 3546PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee, 3547 CallingConv::ID CallConv, bool isVarArg, 3548 bool isTailCall, 3549 const SmallVectorImpl<ISD::OutputArg> &Outs, 3550 const SmallVectorImpl<SDValue> &OutVals, 3551 const SmallVectorImpl<ISD::InputArg> &Ins, 3552 SDLoc dl, SelectionDAG &DAG, 3553 SmallVectorImpl<SDValue> &InVals) const { 3554 // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description 3555 // of the 32-bit SVR4 ABI stack frame layout. 3556 3557 assert((CallConv == CallingConv::C || 3558 CallConv == CallingConv::Fast) && "Unknown calling convention!"); 3559 3560 unsigned PtrByteSize = 4; 3561 3562 MachineFunction &MF = DAG.getMachineFunction(); 3563 3564 // Mark this function as potentially containing a function that contains a 3565 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3566 // and restoring the callers stack pointer in this functions epilog. This is 3567 // done because by tail calling the called function might overwrite the value 3568 // in this function's (MF) stack pointer stack slot 0(SP). 3569 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3570 CallConv == CallingConv::Fast) 3571 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3572 3573 // Count how many bytes are to be pushed on the stack, including the linkage 3574 // area, parameter list area and the part of the local variable space which 3575 // contains copies of aggregates which are passed by value. 3576 3577 // Assign locations to all of the outgoing arguments. 3578 SmallVector<CCValAssign, 16> ArgLocs; 3579 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3580 getTargetMachine(), ArgLocs, *DAG.getContext()); 3581 3582 // Reserve space for the linkage area on the stack. 3583 CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); 3584 3585 if (isVarArg) { 3586 // Handle fixed and variable vector arguments differently. 3587 // Fixed vector arguments go into registers as long as registers are 3588 // available. Variable vector arguments always go into memory. 3589 unsigned NumArgs = Outs.size(); 3590 3591 for (unsigned i = 0; i != NumArgs; ++i) { 3592 MVT ArgVT = Outs[i].VT; 3593 ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; 3594 bool Result; 3595 3596 if (Outs[i].IsFixed) { 3597 Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, 3598 CCInfo); 3599 } else { 3600 Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, 3601 ArgFlags, CCInfo); 3602 } 3603 3604 if (Result) { 3605#ifndef NDEBUG 3606 errs() << "Call operand #" << i << " has unhandled type " 3607 << EVT(ArgVT).getEVTString() << "\n"; 3608#endif 3609 llvm_unreachable(0); 3610 } 3611 } 3612 } else { 3613 // All arguments are treated the same. 3614 CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4); 3615 } 3616 3617 // Assign locations to all of the outgoing aggregate by value arguments. 3618 SmallVector<CCValAssign, 16> ByValArgLocs; 3619 CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(), 3620 getTargetMachine(), ByValArgLocs, *DAG.getContext()); 3621 3622 // Reserve stack space for the allocations in CCInfo. 3623 CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrByteSize); 3624 3625 CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal); 3626 3627 // Size of the linkage area, parameter list area and the part of the local 3628 // space variable where copies of aggregates which are passed by value are 3629 // stored. 3630 unsigned NumBytes = CCByValInfo.getNextStackOffset(); 3631 3632 // Calculate by how many bytes the stack has to be adjusted in case of tail 3633 // call optimization. 3634 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3635 3636 // Adjust the stack pointer for the new arguments... 3637 // These operations are automatically eliminated by the prolog/epilog pass 3638 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 3639 dl); 3640 SDValue CallSeqStart = Chain; 3641 3642 // Load the return address and frame pointer so it can be moved somewhere else 3643 // later. 3644 SDValue LROp, FPOp; 3645 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, false, 3646 dl); 3647 3648 // Set up a copy of the stack pointer for use loading and storing any 3649 // arguments that may not fit in the registers available for argument 3650 // passing. 3651 SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 3652 3653 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3654 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3655 SmallVector<SDValue, 8> MemOpChains; 3656 3657 bool seenFloatArg = false; 3658 // Walk the register/memloc assignments, inserting copies/loads. 3659 for (unsigned i = 0, j = 0, e = ArgLocs.size(); 3660 i != e; 3661 ++i) { 3662 CCValAssign &VA = ArgLocs[i]; 3663 SDValue Arg = OutVals[i]; 3664 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3665 3666 if (Flags.isByVal()) { 3667 // Argument is an aggregate which is passed by value, thus we need to 3668 // create a copy of it in the local variable space of the current stack 3669 // frame (which is the stack frame of the caller) and pass the address of 3670 // this copy to the callee. 3671 assert((j < ByValArgLocs.size()) && "Index out of bounds!"); 3672 CCValAssign &ByValVA = ByValArgLocs[j++]; 3673 assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!"); 3674 3675 // Memory reserved in the local variable space of the callers stack frame. 3676 unsigned LocMemOffset = ByValVA.getLocMemOffset(); 3677 3678 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3679 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3680 3681 // Create a copy of the argument in the local area of the current 3682 // stack frame. 3683 SDValue MemcpyCall = 3684 CreateCopyOfByValArgument(Arg, PtrOff, 3685 CallSeqStart.getNode()->getOperand(0), 3686 Flags, DAG, dl); 3687 3688 // This must go outside the CALLSEQ_START..END. 3689 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3690 CallSeqStart.getNode()->getOperand(1), 3691 SDLoc(MemcpyCall)); 3692 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3693 NewCallSeqStart.getNode()); 3694 Chain = CallSeqStart = NewCallSeqStart; 3695 3696 // Pass the address of the aggregate copy on the stack either in a 3697 // physical register or in the parameter list area of the current stack 3698 // frame to the callee. 3699 Arg = PtrOff; 3700 } 3701 3702 if (VA.isRegLoc()) { 3703 seenFloatArg |= VA.getLocVT().isFloatingPoint(); 3704 // Put argument in a physical register. 3705 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); 3706 } else { 3707 // Put argument in the parameter list area of the current stack frame. 3708 assert(VA.isMemLoc()); 3709 unsigned LocMemOffset = VA.getLocMemOffset(); 3710 3711 if (!isTailCall) { 3712 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset); 3713 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff); 3714 3715 MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, 3716 MachinePointerInfo(), 3717 false, false, 0)); 3718 } else { 3719 // Calculate and remember argument location. 3720 CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset, 3721 TailCallArguments); 3722 } 3723 } 3724 } 3725 3726 if (!MemOpChains.empty()) 3727 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 3728 &MemOpChains[0], MemOpChains.size()); 3729 3730 // Build a sequence of copy-to-reg nodes chained together with token chain 3731 // and flag operands which copy the outgoing args into the appropriate regs. 3732 SDValue InFlag; 3733 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 3734 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 3735 RegsToPass[i].second, InFlag); 3736 InFlag = Chain.getValue(1); 3737 } 3738 3739 // Set CR bit 6 to true if this is a vararg call with floating args passed in 3740 // registers. 3741 if (isVarArg) { 3742 SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); 3743 SDValue Ops[] = { Chain, InFlag }; 3744 3745 Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET, 3746 dl, VTs, Ops, InFlag.getNode() ? 2 : 1); 3747 3748 InFlag = Chain.getValue(1); 3749 } 3750 3751 if (isTailCall) 3752 PrepareTailCall(DAG, InFlag, Chain, dl, false, SPDiff, NumBytes, LROp, FPOp, 3753 false, TailCallArguments); 3754 3755 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 3756 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 3757 Ins, InVals); 3758} 3759 3760// Copy an argument into memory, being careful to do this outside the 3761// call sequence for the call to which the argument belongs. 3762SDValue 3763PPCTargetLowering::createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff, 3764 SDValue CallSeqStart, 3765 ISD::ArgFlagsTy Flags, 3766 SelectionDAG &DAG, 3767 SDLoc dl) const { 3768 SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 3769 CallSeqStart.getNode()->getOperand(0), 3770 Flags, DAG, dl); 3771 // The MEMCPY must go outside the CALLSEQ_START..END. 3772 SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 3773 CallSeqStart.getNode()->getOperand(1), 3774 SDLoc(MemcpyCall)); 3775 DAG.ReplaceAllUsesWith(CallSeqStart.getNode(), 3776 NewCallSeqStart.getNode()); 3777 return NewCallSeqStart; 3778} 3779 3780SDValue 3781PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, 3782 CallingConv::ID CallConv, bool isVarArg, 3783 bool isTailCall, 3784 const SmallVectorImpl<ISD::OutputArg> &Outs, 3785 const SmallVectorImpl<SDValue> &OutVals, 3786 const SmallVectorImpl<ISD::InputArg> &Ins, 3787 SDLoc dl, SelectionDAG &DAG, 3788 SmallVectorImpl<SDValue> &InVals) const { 3789 3790 unsigned NumOps = Outs.size(); 3791 3792 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3793 unsigned PtrByteSize = 8; 3794 3795 MachineFunction &MF = DAG.getMachineFunction(); 3796 3797 // Mark this function as potentially containing a function that contains a 3798 // tail call. As a consequence the frame pointer will be used for dynamicalloc 3799 // and restoring the callers stack pointer in this functions epilog. This is 3800 // done because by tail calling the called function might overwrite the value 3801 // in this function's (MF) stack pointer stack slot 0(SP). 3802 if (getTargetMachine().Options.GuaranteedTailCallOpt && 3803 CallConv == CallingConv::Fast) 3804 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 3805 3806 unsigned nAltivecParamsAtEnd = 0; 3807 3808 // Count how many bytes are to be pushed on the stack, including the linkage 3809 // area, and parameter passing area. We start with at least 48 bytes, which 3810 // is reserved space for [SP][CR][LR][3 x unused]. 3811 // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result 3812 // of this call. 3813 unsigned NumBytes = 3814 CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv, 3815 Outs, OutVals, nAltivecParamsAtEnd); 3816 3817 // Calculate by how many bytes the stack has to be adjusted in case of tail 3818 // call optimization. 3819 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 3820 3821 // To protect arguments on the stack from being clobbered in a tail call, 3822 // force all the loads to happen before doing any other lowering. 3823 if (isTailCall) 3824 Chain = DAG.getStackArgumentTokenFactor(Chain); 3825 3826 // Adjust the stack pointer for the new arguments... 3827 // These operations are automatically eliminated by the prolog/epilog pass 3828 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 3829 dl); 3830 SDValue CallSeqStart = Chain; 3831 3832 // Load the return address and frame pointer so it can be move somewhere else 3833 // later. 3834 SDValue LROp, FPOp; 3835 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 3836 dl); 3837 3838 // Set up a copy of the stack pointer for use loading and storing any 3839 // arguments that may not fit in the registers available for argument 3840 // passing. 3841 SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 3842 3843 // Figure out which arguments are going to go in registers, and which in 3844 // memory. Also, if this is a vararg function, floating point operations 3845 // must be stored to our stack, and loaded into integer regs as well, if 3846 // any integer regs are available for argument passing. 3847 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); 3848 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 3849 3850 static const uint16_t GPR[] = { 3851 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 3852 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 3853 }; 3854 static const uint16_t *FPR = GetFPR(); 3855 3856 static const uint16_t VR[] = { 3857 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 3858 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 3859 }; 3860 const unsigned NumGPRs = array_lengthof(GPR); 3861 const unsigned NumFPRs = 13; 3862 const unsigned NumVRs = array_lengthof(VR); 3863 3864 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 3865 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 3866 3867 SmallVector<SDValue, 8> MemOpChains; 3868 for (unsigned i = 0; i != NumOps; ++i) { 3869 SDValue Arg = OutVals[i]; 3870 ISD::ArgFlagsTy Flags = Outs[i].Flags; 3871 3872 // PtrOff will be used to store the current argument to the stack if a 3873 // register cannot be found for it. 3874 SDValue PtrOff; 3875 3876 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 3877 3878 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 3879 3880 // Promote integers to 64-bit values. 3881 if (Arg.getValueType() == MVT::i32) { 3882 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 3883 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 3884 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 3885 } 3886 3887 // FIXME memcpy is used way more than necessary. Correctness first. 3888 // Note: "by value" is code for passing a structure by value, not 3889 // basic types. 3890 if (Flags.isByVal()) { 3891 // Note: Size includes alignment padding, so 3892 // struct x { short a; char b; } 3893 // will have Size = 4. With #pragma pack(1), it will have Size = 3. 3894 // These are the proper values we need for right-justifying the 3895 // aggregate in a parameter register. 3896 unsigned Size = Flags.getByValSize(); 3897 3898 // An empty aggregate parameter takes up no storage and no 3899 // registers. 3900 if (Size == 0) 3901 continue; 3902 3903 unsigned BVAlign = Flags.getByValAlign(); 3904 if (BVAlign > 8) { 3905 if (BVAlign % PtrByteSize != 0) 3906 llvm_unreachable( 3907 "ByVal alignment is not a multiple of the pointer size"); 3908 3909 ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; 3910 } 3911 3912 // All aggregates smaller than 8 bytes must be passed right-justified. 3913 if (Size==1 || Size==2 || Size==4) { 3914 EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); 3915 if (GPR_idx != NumGPRs) { 3916 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 3917 MachinePointerInfo(), VT, 3918 false, false, 0); 3919 MemOpChains.push_back(Load.getValue(1)); 3920 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3921 3922 ArgOffset += PtrByteSize; 3923 continue; 3924 } 3925 } 3926 3927 if (GPR_idx == NumGPRs && Size < 8) { 3928 SDValue Const = DAG.getConstant(PtrByteSize - Size, 3929 PtrOff.getValueType()); 3930 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 3931 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 3932 CallSeqStart, 3933 Flags, DAG, dl); 3934 ArgOffset += PtrByteSize; 3935 continue; 3936 } 3937 // Copy entire object into memory. There are cases where gcc-generated 3938 // code assumes it is there, even if it could be put entirely into 3939 // registers. (This is not what the doc says.) 3940 3941 // FIXME: The above statement is likely due to a misunderstanding of the 3942 // documents. All arguments must be copied into the parameter area BY 3943 // THE CALLEE in the event that the callee takes the address of any 3944 // formal argument. That has not yet been implemented. However, it is 3945 // reasonable to use the stack area as a staging area for the register 3946 // load. 3947 3948 // Skip this for small aggregates, as we will use the same slot for a 3949 // right-justified copy, below. 3950 if (Size >= 8) 3951 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 3952 CallSeqStart, 3953 Flags, DAG, dl); 3954 3955 // When a register is available, pass a small aggregate right-justified. 3956 if (Size < 8 && GPR_idx != NumGPRs) { 3957 // The easiest way to get this right-justified in a register 3958 // is to copy the structure into the rightmost portion of a 3959 // local variable slot, then load the whole slot into the 3960 // register. 3961 // FIXME: The memcpy seems to produce pretty awful code for 3962 // small aggregates, particularly for packed ones. 3963 // FIXME: It would be preferable to use the slot in the 3964 // parameter save area instead of a new local variable. 3965 SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); 3966 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 3967 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 3968 CallSeqStart, 3969 Flags, DAG, dl); 3970 3971 // Load the slot into the register. 3972 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, PtrOff, 3973 MachinePointerInfo(), 3974 false, false, false, 0); 3975 MemOpChains.push_back(Load.getValue(1)); 3976 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3977 3978 // Done with this argument. 3979 ArgOffset += PtrByteSize; 3980 continue; 3981 } 3982 3983 // For aggregates larger than PtrByteSize, copy the pieces of the 3984 // object that fit into registers from the parameter save area. 3985 for (unsigned j=0; j<Size; j+=PtrByteSize) { 3986 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 3987 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 3988 if (GPR_idx != NumGPRs) { 3989 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 3990 MachinePointerInfo(), 3991 false, false, false, 0); 3992 MemOpChains.push_back(Load.getValue(1)); 3993 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 3994 ArgOffset += PtrByteSize; 3995 } else { 3996 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 3997 break; 3998 } 3999 } 4000 continue; 4001 } 4002 4003 switch (Arg.getSimpleValueType().SimpleTy) { 4004 default: llvm_unreachable("Unexpected ValueType for argument!"); 4005 case MVT::i32: 4006 case MVT::i64: 4007 if (GPR_idx != NumGPRs) { 4008 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4009 } else { 4010 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4011 true, isTailCall, false, MemOpChains, 4012 TailCallArguments, dl); 4013 } 4014 ArgOffset += PtrByteSize; 4015 break; 4016 case MVT::f32: 4017 case MVT::f64: 4018 if (FPR_idx != NumFPRs) { 4019 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4020 4021 if (isVarArg) { 4022 // A single float or an aggregate containing only a single float 4023 // must be passed right-justified in the stack doubleword, and 4024 // in the GPR, if one is available. 4025 SDValue StoreOff; 4026 if (Arg.getSimpleValueType().SimpleTy == MVT::f32) { 4027 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4028 StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4029 } else 4030 StoreOff = PtrOff; 4031 4032 SDValue Store = DAG.getStore(Chain, dl, Arg, StoreOff, 4033 MachinePointerInfo(), false, false, 0); 4034 MemOpChains.push_back(Store); 4035 4036 // Float varargs are always shadowed in available integer registers 4037 if (GPR_idx != NumGPRs) { 4038 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4039 MachinePointerInfo(), false, false, 4040 false, 0); 4041 MemOpChains.push_back(Load.getValue(1)); 4042 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4043 } 4044 } else if (GPR_idx != NumGPRs) 4045 // If we have any FPRs remaining, we may also have GPRs remaining. 4046 ++GPR_idx; 4047 } else { 4048 // Single-precision floating-point values are mapped to the 4049 // second (rightmost) word of the stack doubleword. 4050 if (Arg.getValueType() == MVT::f32) { 4051 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4052 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4053 } 4054 4055 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4056 true, isTailCall, false, MemOpChains, 4057 TailCallArguments, dl); 4058 } 4059 ArgOffset += 8; 4060 break; 4061 case MVT::v4f32: 4062 case MVT::v4i32: 4063 case MVT::v8i16: 4064 case MVT::v16i8: 4065 if (isVarArg) { 4066 // These go aligned on the stack, or in the corresponding R registers 4067 // when within range. The Darwin PPC ABI doc claims they also go in 4068 // V registers; in fact gcc does this only for arguments that are 4069 // prototyped, not for those that match the ... We do it for all 4070 // arguments, seems to work. 4071 while (ArgOffset % 16 !=0) { 4072 ArgOffset += PtrByteSize; 4073 if (GPR_idx != NumGPRs) 4074 GPR_idx++; 4075 } 4076 // We could elide this store in the case where the object fits 4077 // entirely in R registers. Maybe later. 4078 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4079 DAG.getConstant(ArgOffset, PtrVT)); 4080 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4081 MachinePointerInfo(), false, false, 0); 4082 MemOpChains.push_back(Store); 4083 if (VR_idx != NumVRs) { 4084 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4085 MachinePointerInfo(), 4086 false, false, false, 0); 4087 MemOpChains.push_back(Load.getValue(1)); 4088 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 4089 } 4090 ArgOffset += 16; 4091 for (unsigned i=0; i<16; i+=PtrByteSize) { 4092 if (GPR_idx == NumGPRs) 4093 break; 4094 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4095 DAG.getConstant(i, PtrVT)); 4096 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4097 false, false, false, 0); 4098 MemOpChains.push_back(Load.getValue(1)); 4099 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4100 } 4101 break; 4102 } 4103 4104 // Non-varargs Altivec params generally go in registers, but have 4105 // stack space allocated at the end. 4106 if (VR_idx != NumVRs) { 4107 // Doesn't have GPR space allocated. 4108 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 4109 } else { 4110 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4111 true, isTailCall, true, MemOpChains, 4112 TailCallArguments, dl); 4113 ArgOffset += 16; 4114 } 4115 break; 4116 } 4117 } 4118 4119 if (!MemOpChains.empty()) 4120 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 4121 &MemOpChains[0], MemOpChains.size()); 4122 4123 // Check if this is an indirect call (MTCTR/BCTRL). 4124 // See PrepareCall() for more information about calls through function 4125 // pointers in the 64-bit SVR4 ABI. 4126 if (!isTailCall && 4127 !dyn_cast<GlobalAddressSDNode>(Callee) && 4128 !dyn_cast<ExternalSymbolSDNode>(Callee) && 4129 !isBLACompatibleAddress(Callee, DAG)) { 4130 // Load r2 into a virtual register and store it to the TOC save area. 4131 SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); 4132 // TOC save area offset. 4133 SDValue PtrOff = DAG.getIntPtrConstant(40); 4134 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4135 Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), 4136 false, false, 0); 4137 // R12 must contain the address of an indirect callee. This does not 4138 // mean the MTCTR instruction must use R12; it's easier to model this 4139 // as an extra parameter, so do that. 4140 RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); 4141 } 4142 4143 // Build a sequence of copy-to-reg nodes chained together with token chain 4144 // and flag operands which copy the outgoing args into the appropriate regs. 4145 SDValue InFlag; 4146 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4147 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4148 RegsToPass[i].second, InFlag); 4149 InFlag = Chain.getValue(1); 4150 } 4151 4152 if (isTailCall) 4153 PrepareTailCall(DAG, InFlag, Chain, dl, true, SPDiff, NumBytes, LROp, 4154 FPOp, true, TailCallArguments); 4155 4156 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4157 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4158 Ins, InVals); 4159} 4160 4161SDValue 4162PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, 4163 CallingConv::ID CallConv, bool isVarArg, 4164 bool isTailCall, 4165 const SmallVectorImpl<ISD::OutputArg> &Outs, 4166 const SmallVectorImpl<SDValue> &OutVals, 4167 const SmallVectorImpl<ISD::InputArg> &Ins, 4168 SDLoc dl, SelectionDAG &DAG, 4169 SmallVectorImpl<SDValue> &InVals) const { 4170 4171 unsigned NumOps = Outs.size(); 4172 4173 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4174 bool isPPC64 = PtrVT == MVT::i64; 4175 unsigned PtrByteSize = isPPC64 ? 8 : 4; 4176 4177 MachineFunction &MF = DAG.getMachineFunction(); 4178 4179 // Mark this function as potentially containing a function that contains a 4180 // tail call. As a consequence the frame pointer will be used for dynamicalloc 4181 // and restoring the callers stack pointer in this functions epilog. This is 4182 // done because by tail calling the called function might overwrite the value 4183 // in this function's (MF) stack pointer stack slot 0(SP). 4184 if (getTargetMachine().Options.GuaranteedTailCallOpt && 4185 CallConv == CallingConv::Fast) 4186 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 4187 4188 unsigned nAltivecParamsAtEnd = 0; 4189 4190 // Count how many bytes are to be pushed on the stack, including the linkage 4191 // area, and parameter passing area. We start with 24/48 bytes, which is 4192 // prereserved space for [SP][CR][LR][3 x unused]. 4193 unsigned NumBytes = 4194 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, 4195 Outs, OutVals, 4196 nAltivecParamsAtEnd); 4197 4198 // Calculate by how many bytes the stack has to be adjusted in case of tail 4199 // call optimization. 4200 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 4201 4202 // To protect arguments on the stack from being clobbered in a tail call, 4203 // force all the loads to happen before doing any other lowering. 4204 if (isTailCall) 4205 Chain = DAG.getStackArgumentTokenFactor(Chain); 4206 4207 // Adjust the stack pointer for the new arguments... 4208 // These operations are automatically eliminated by the prolog/epilog pass 4209 Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), 4210 dl); 4211 SDValue CallSeqStart = Chain; 4212 4213 // Load the return address and frame pointer so it can be move somewhere else 4214 // later. 4215 SDValue LROp, FPOp; 4216 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, true, 4217 dl); 4218 4219 // Set up a copy of the stack pointer for use loading and storing any 4220 // arguments that may not fit in the registers available for argument 4221 // passing. 4222 SDValue StackPtr; 4223 if (isPPC64) 4224 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 4225 else 4226 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 4227 4228 // Figure out which arguments are going to go in registers, and which in 4229 // memory. Also, if this is a vararg function, floating point operations 4230 // must be stored to our stack, and loaded into integer regs as well, if 4231 // any integer regs are available for argument passing. 4232 unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); 4233 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 4234 4235 static const uint16_t GPR_32[] = { // 32-bit registers. 4236 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 4237 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 4238 }; 4239 static const uint16_t GPR_64[] = { // 64-bit registers. 4240 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 4241 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 4242 }; 4243 static const uint16_t *FPR = GetFPR(); 4244 4245 static const uint16_t VR[] = { 4246 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 4247 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 4248 }; 4249 const unsigned NumGPRs = array_lengthof(GPR_32); 4250 const unsigned NumFPRs = 13; 4251 const unsigned NumVRs = array_lengthof(VR); 4252 4253 const uint16_t *GPR = isPPC64 ? GPR_64 : GPR_32; 4254 4255 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass; 4256 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 4257 4258 SmallVector<SDValue, 8> MemOpChains; 4259 for (unsigned i = 0; i != NumOps; ++i) { 4260 SDValue Arg = OutVals[i]; 4261 ISD::ArgFlagsTy Flags = Outs[i].Flags; 4262 4263 // PtrOff will be used to store the current argument to the stack if a 4264 // register cannot be found for it. 4265 SDValue PtrOff; 4266 4267 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 4268 4269 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); 4270 4271 // On PPC64, promote integers to 64-bit values. 4272 if (isPPC64 && Arg.getValueType() == MVT::i32) { 4273 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 4274 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 4275 Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg); 4276 } 4277 4278 // FIXME memcpy is used way more than necessary. Correctness first. 4279 // Note: "by value" is code for passing a structure by value, not 4280 // basic types. 4281 if (Flags.isByVal()) { 4282 unsigned Size = Flags.getByValSize(); 4283 // Very small objects are passed right-justified. Everything else is 4284 // passed left-justified. 4285 if (Size==1 || Size==2) { 4286 EVT VT = (Size==1) ? MVT::i8 : MVT::i16; 4287 if (GPR_idx != NumGPRs) { 4288 SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg, 4289 MachinePointerInfo(), VT, 4290 false, false, 0); 4291 MemOpChains.push_back(Load.getValue(1)); 4292 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4293 4294 ArgOffset += PtrByteSize; 4295 } else { 4296 SDValue Const = DAG.getConstant(PtrByteSize - Size, 4297 PtrOff.getValueType()); 4298 SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); 4299 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, 4300 CallSeqStart, 4301 Flags, DAG, dl); 4302 ArgOffset += PtrByteSize; 4303 } 4304 continue; 4305 } 4306 // Copy entire object into memory. There are cases where gcc-generated 4307 // code assumes it is there, even if it could be put entirely into 4308 // registers. (This is not what the doc says.) 4309 Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff, 4310 CallSeqStart, 4311 Flags, DAG, dl); 4312 4313 // For small aggregates (Darwin only) and aggregates >= PtrByteSize, 4314 // copy the pieces of the object that fit into registers from the 4315 // parameter save area. 4316 for (unsigned j=0; j<Size; j+=PtrByteSize) { 4317 SDValue Const = DAG.getConstant(j, PtrOff.getValueType()); 4318 SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); 4319 if (GPR_idx != NumGPRs) { 4320 SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, 4321 MachinePointerInfo(), 4322 false, false, false, 0); 4323 MemOpChains.push_back(Load.getValue(1)); 4324 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4325 ArgOffset += PtrByteSize; 4326 } else { 4327 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 4328 break; 4329 } 4330 } 4331 continue; 4332 } 4333 4334 switch (Arg.getSimpleValueType().SimpleTy) { 4335 default: llvm_unreachable("Unexpected ValueType for argument!"); 4336 case MVT::i32: 4337 case MVT::i64: 4338 if (GPR_idx != NumGPRs) { 4339 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 4340 } else { 4341 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4342 isPPC64, isTailCall, false, MemOpChains, 4343 TailCallArguments, dl); 4344 } 4345 ArgOffset += PtrByteSize; 4346 break; 4347 case MVT::f32: 4348 case MVT::f64: 4349 if (FPR_idx != NumFPRs) { 4350 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 4351 4352 if (isVarArg) { 4353 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4354 MachinePointerInfo(), false, false, 0); 4355 MemOpChains.push_back(Store); 4356 4357 // Float varargs are always shadowed in available integer registers 4358 if (GPR_idx != NumGPRs) { 4359 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4360 MachinePointerInfo(), false, false, 4361 false, 0); 4362 MemOpChains.push_back(Load.getValue(1)); 4363 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4364 } 4365 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 4366 SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 4367 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); 4368 SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, 4369 MachinePointerInfo(), 4370 false, false, false, 0); 4371 MemOpChains.push_back(Load.getValue(1)); 4372 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4373 } 4374 } else { 4375 // If we have any FPRs remaining, we may also have GPRs remaining. 4376 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 4377 // GPRs. 4378 if (GPR_idx != NumGPRs) 4379 ++GPR_idx; 4380 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 4381 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 4382 ++GPR_idx; 4383 } 4384 } else 4385 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4386 isPPC64, isTailCall, false, MemOpChains, 4387 TailCallArguments, dl); 4388 if (isPPC64) 4389 ArgOffset += 8; 4390 else 4391 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 4392 break; 4393 case MVT::v4f32: 4394 case MVT::v4i32: 4395 case MVT::v8i16: 4396 case MVT::v16i8: 4397 if (isVarArg) { 4398 // These go aligned on the stack, or in the corresponding R registers 4399 // when within range. The Darwin PPC ABI doc claims they also go in 4400 // V registers; in fact gcc does this only for arguments that are 4401 // prototyped, not for those that match the ... We do it for all 4402 // arguments, seems to work. 4403 while (ArgOffset % 16 !=0) { 4404 ArgOffset += PtrByteSize; 4405 if (GPR_idx != NumGPRs) 4406 GPR_idx++; 4407 } 4408 // We could elide this store in the case where the object fits 4409 // entirely in R registers. Maybe later. 4410 PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, 4411 DAG.getConstant(ArgOffset, PtrVT)); 4412 SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, 4413 MachinePointerInfo(), false, false, 0); 4414 MemOpChains.push_back(Store); 4415 if (VR_idx != NumVRs) { 4416 SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, 4417 MachinePointerInfo(), 4418 false, false, false, 0); 4419 MemOpChains.push_back(Load.getValue(1)); 4420 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 4421 } 4422 ArgOffset += 16; 4423 for (unsigned i=0; i<16; i+=PtrByteSize) { 4424 if (GPR_idx == NumGPRs) 4425 break; 4426 SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, 4427 DAG.getConstant(i, PtrVT)); 4428 SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), 4429 false, false, false, 0); 4430 MemOpChains.push_back(Load.getValue(1)); 4431 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 4432 } 4433 break; 4434 } 4435 4436 // Non-varargs Altivec params generally go in registers, but have 4437 // stack space allocated at the end. 4438 if (VR_idx != NumVRs) { 4439 // Doesn't have GPR space allocated. 4440 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 4441 } else if (nAltivecParamsAtEnd==0) { 4442 // We are emitting Altivec params in order. 4443 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4444 isPPC64, isTailCall, true, MemOpChains, 4445 TailCallArguments, dl); 4446 ArgOffset += 16; 4447 } 4448 break; 4449 } 4450 } 4451 // If all Altivec parameters fit in registers, as they usually do, 4452 // they get stack space following the non-Altivec parameters. We 4453 // don't track this here because nobody below needs it. 4454 // If there are more Altivec parameters than fit in registers emit 4455 // the stores here. 4456 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 4457 unsigned j = 0; 4458 // Offset is aligned; skip 1st 12 params which go in V registers. 4459 ArgOffset = ((ArgOffset+15)/16)*16; 4460 ArgOffset += 12*16; 4461 for (unsigned i = 0; i != NumOps; ++i) { 4462 SDValue Arg = OutVals[i]; 4463 EVT ArgType = Outs[i].VT; 4464 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 4465 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 4466 if (++j > NumVRs) { 4467 SDValue PtrOff; 4468 // We are emitting Altivec params in order. 4469 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 4470 isPPC64, isTailCall, true, MemOpChains, 4471 TailCallArguments, dl); 4472 ArgOffset += 16; 4473 } 4474 } 4475 } 4476 } 4477 4478 if (!MemOpChains.empty()) 4479 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 4480 &MemOpChains[0], MemOpChains.size()); 4481 4482 // On Darwin, R12 must contain the address of an indirect callee. This does 4483 // not mean the MTCTR instruction must use R12; it's easier to model this as 4484 // an extra parameter, so do that. 4485 if (!isTailCall && 4486 !dyn_cast<GlobalAddressSDNode>(Callee) && 4487 !dyn_cast<ExternalSymbolSDNode>(Callee) && 4488 !isBLACompatibleAddress(Callee, DAG)) 4489 RegsToPass.push_back(std::make_pair((unsigned)(isPPC64 ? PPC::X12 : 4490 PPC::R12), Callee)); 4491 4492 // Build a sequence of copy-to-reg nodes chained together with token chain 4493 // and flag operands which copy the outgoing args into the appropriate regs. 4494 SDValue InFlag; 4495 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 4496 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, 4497 RegsToPass[i].second, InFlag); 4498 InFlag = Chain.getValue(1); 4499 } 4500 4501 if (isTailCall) 4502 PrepareTailCall(DAG, InFlag, Chain, dl, isPPC64, SPDiff, NumBytes, LROp, 4503 FPOp, true, TailCallArguments); 4504 4505 return FinishCall(CallConv, dl, isTailCall, isVarArg, DAG, 4506 RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes, 4507 Ins, InVals); 4508} 4509 4510bool 4511PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv, 4512 MachineFunction &MF, bool isVarArg, 4513 const SmallVectorImpl<ISD::OutputArg> &Outs, 4514 LLVMContext &Context) const { 4515 SmallVector<CCValAssign, 16> RVLocs; 4516 CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), 4517 RVLocs, Context); 4518 return CCInfo.CheckReturn(Outs, RetCC_PPC); 4519} 4520 4521SDValue 4522PPCTargetLowering::LowerReturn(SDValue Chain, 4523 CallingConv::ID CallConv, bool isVarArg, 4524 const SmallVectorImpl<ISD::OutputArg> &Outs, 4525 const SmallVectorImpl<SDValue> &OutVals, 4526 SDLoc dl, SelectionDAG &DAG) const { 4527 4528 SmallVector<CCValAssign, 16> RVLocs; 4529 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), 4530 getTargetMachine(), RVLocs, *DAG.getContext()); 4531 CCInfo.AnalyzeReturn(Outs, RetCC_PPC); 4532 4533 SDValue Flag; 4534 SmallVector<SDValue, 4> RetOps(1, Chain); 4535 4536 // Copy the result values into the output registers. 4537 for (unsigned i = 0; i != RVLocs.size(); ++i) { 4538 CCValAssign &VA = RVLocs[i]; 4539 assert(VA.isRegLoc() && "Can only return in registers!"); 4540 4541 SDValue Arg = OutVals[i]; 4542 4543 switch (VA.getLocInfo()) { 4544 default: llvm_unreachable("Unknown loc info!"); 4545 case CCValAssign::Full: break; 4546 case CCValAssign::AExt: 4547 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); 4548 break; 4549 case CCValAssign::ZExt: 4550 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); 4551 break; 4552 case CCValAssign::SExt: 4553 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); 4554 break; 4555 } 4556 4557 Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag); 4558 Flag = Chain.getValue(1); 4559 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT())); 4560 } 4561 4562 RetOps[0] = Chain; // Update chain. 4563 4564 // Add the flag if we have it. 4565 if (Flag.getNode()) 4566 RetOps.push_back(Flag); 4567 4568 return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, 4569 &RetOps[0], RetOps.size()); 4570} 4571 4572SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, 4573 const PPCSubtarget &Subtarget) const { 4574 // When we pop the dynamic allocation we need to restore the SP link. 4575 SDLoc dl(Op); 4576 4577 // Get the corect type for pointers. 4578 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4579 4580 // Construct the stack pointer operand. 4581 bool isPPC64 = Subtarget.isPPC64(); 4582 unsigned SP = isPPC64 ? PPC::X1 : PPC::R1; 4583 SDValue StackPtr = DAG.getRegister(SP, PtrVT); 4584 4585 // Get the operands for the STACKRESTORE. 4586 SDValue Chain = Op.getOperand(0); 4587 SDValue SaveSP = Op.getOperand(1); 4588 4589 // Load the old link SP. 4590 SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, 4591 MachinePointerInfo(), 4592 false, false, false, 0); 4593 4594 // Restore the stack pointer. 4595 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); 4596 4597 // Store the old link SP. 4598 return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo(), 4599 false, false, 0); 4600} 4601 4602 4603 4604SDValue 4605PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 4606 MachineFunction &MF = DAG.getMachineFunction(); 4607 bool isPPC64 = PPCSubTarget.isPPC64(); 4608 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 4609 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4610 4611 // Get current frame pointer save index. The users of this index will be 4612 // primarily DYNALLOC instructions. 4613 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 4614 int RASI = FI->getReturnAddrSaveIndex(); 4615 4616 // If the frame pointer save index hasn't been defined yet. 4617 if (!RASI) { 4618 // Find out what the fix offset of the frame pointer save area. 4619 int LROffset = PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI); 4620 // Allocate the frame index for frame pointer save area. 4621 RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, true); 4622 // Save the result. 4623 FI->setReturnAddrSaveIndex(RASI); 4624 } 4625 return DAG.getFrameIndex(RASI, PtrVT); 4626} 4627 4628SDValue 4629PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 4630 MachineFunction &MF = DAG.getMachineFunction(); 4631 bool isPPC64 = PPCSubTarget.isPPC64(); 4632 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 4633 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4634 4635 // Get current frame pointer save index. The users of this index will be 4636 // primarily DYNALLOC instructions. 4637 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 4638 int FPSI = FI->getFramePointerSaveIndex(); 4639 4640 // If the frame pointer save index hasn't been defined yet. 4641 if (!FPSI) { 4642 // Find out what the fix offset of the frame pointer save area. 4643 int FPOffset = PPCFrameLowering::getFramePointerSaveOffset(isPPC64, 4644 isDarwinABI); 4645 4646 // Allocate the frame index for frame pointer save area. 4647 FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 4648 // Save the result. 4649 FI->setFramePointerSaveIndex(FPSI); 4650 } 4651 return DAG.getFrameIndex(FPSI, PtrVT); 4652} 4653 4654SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 4655 SelectionDAG &DAG, 4656 const PPCSubtarget &Subtarget) const { 4657 // Get the inputs. 4658 SDValue Chain = Op.getOperand(0); 4659 SDValue Size = Op.getOperand(1); 4660 SDLoc dl(Op); 4661 4662 // Get the corect type for pointers. 4663 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4664 // Negate the size. 4665 SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT, 4666 DAG.getConstant(0, PtrVT), Size); 4667 // Construct a node for the frame pointer save index. 4668 SDValue FPSIdx = getFramePointerFrameIndex(DAG); 4669 // Build a DYNALLOC node. 4670 SDValue Ops[3] = { Chain, NegSize, FPSIdx }; 4671 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 4672 return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops, 3); 4673} 4674 4675SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, 4676 SelectionDAG &DAG) const { 4677 SDLoc DL(Op); 4678 return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL, 4679 DAG.getVTList(MVT::i32, MVT::Other), 4680 Op.getOperand(0), Op.getOperand(1)); 4681} 4682 4683SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, 4684 SelectionDAG &DAG) const { 4685 SDLoc DL(Op); 4686 return DAG.getNode(PPCISD::EH_SJLJ_LONGJMP, DL, MVT::Other, 4687 Op.getOperand(0), Op.getOperand(1)); 4688} 4689 4690/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 4691/// possible. 4692SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { 4693 // Not FP? Not a fsel. 4694 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 4695 !Op.getOperand(2).getValueType().isFloatingPoint()) 4696 return Op; 4697 4698 // We might be able to do better than this under some circumstances, but in 4699 // general, fsel-based lowering of select is a finite-math-only optimization. 4700 // For more information, see section F.3 of the 2.06 ISA specification. 4701 if (!DAG.getTarget().Options.NoInfsFPMath || 4702 !DAG.getTarget().Options.NoNaNsFPMath) 4703 return Op; 4704 4705 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 4706 4707 EVT ResVT = Op.getValueType(); 4708 EVT CmpVT = Op.getOperand(0).getValueType(); 4709 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 4710 SDValue TV = Op.getOperand(2), FV = Op.getOperand(3); 4711 SDLoc dl(Op); 4712 4713 // If the RHS of the comparison is a 0.0, we don't need to do the 4714 // subtraction at all. 4715 SDValue Sel1; 4716 if (isFloatingPointZero(RHS)) 4717 switch (CC) { 4718 default: break; // SETUO etc aren't handled by fsel. 4719 case ISD::SETNE: 4720 std::swap(TV, FV); 4721 case ISD::SETEQ: 4722 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4723 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4724 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 4725 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 4726 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 4727 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4728 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV); 4729 case ISD::SETULT: 4730 case ISD::SETLT: 4731 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 4732 case ISD::SETOGE: 4733 case ISD::SETGE: 4734 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4735 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4736 return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV); 4737 case ISD::SETUGT: 4738 case ISD::SETGT: 4739 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 4740 case ISD::SETOLE: 4741 case ISD::SETLE: 4742 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 4743 LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS); 4744 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4745 DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV); 4746 } 4747 4748 SDValue Cmp; 4749 switch (CC) { 4750 default: break; // SETUO etc aren't handled by fsel. 4751 case ISD::SETNE: 4752 std::swap(TV, FV); 4753 case ISD::SETEQ: 4754 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4755 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4756 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4757 Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4758 if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits 4759 Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1); 4760 return DAG.getNode(PPCISD::FSEL, dl, ResVT, 4761 DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV); 4762 case ISD::SETULT: 4763 case ISD::SETLT: 4764 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4765 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4766 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4767 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 4768 case ISD::SETOGE: 4769 case ISD::SETGE: 4770 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS); 4771 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4772 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4773 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4774 case ISD::SETUGT: 4775 case ISD::SETGT: 4776 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 4777 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4778 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4779 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV); 4780 case ISD::SETOLE: 4781 case ISD::SETLE: 4782 Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS); 4783 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 4784 Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp); 4785 return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV); 4786 } 4787 return Op; 4788} 4789 4790// FIXME: Split this code up when LegalizeDAGTypes lands. 4791SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, 4792 SDLoc dl) const { 4793 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 4794 SDValue Src = Op.getOperand(0); 4795 if (Src.getValueType() == MVT::f32) 4796 Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); 4797 4798 SDValue Tmp; 4799 switch (Op.getSimpleValueType().SimpleTy) { 4800 default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); 4801 case MVT::i32: 4802 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : 4803 (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ : 4804 PPCISD::FCTIDZ), 4805 dl, MVT::f64, Src); 4806 break; 4807 case MVT::i64: 4808 assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) && 4809 "i64 FP_TO_UINT is supported only with FPCVT"); 4810 Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : 4811 PPCISD::FCTIDUZ, 4812 dl, MVT::f64, Src); 4813 break; 4814 } 4815 4816 // Convert the FP value to an int value through memory. 4817 bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() && 4818 (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()); 4819 SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); 4820 int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); 4821 MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); 4822 4823 // Emit a store to the stack slot. 4824 SDValue Chain; 4825 if (i32Stack) { 4826 MachineFunction &MF = DAG.getMachineFunction(); 4827 MachineMemOperand *MMO = 4828 MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, 4); 4829 SDValue Ops[] = { DAG.getEntryNode(), Tmp, FIPtr }; 4830 Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 4831 DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), 4832 MVT::i32, MMO); 4833 } else 4834 Chain = DAG.getStore(DAG.getEntryNode(), dl, Tmp, FIPtr, 4835 MPI, false, false, 0); 4836 4837 // Result is a load from the stack slot. If loading 4 bytes, make sure to 4838 // add in a bias. 4839 if (Op.getValueType() == MVT::i32 && !i32Stack) { 4840 FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, 4841 DAG.getConstant(4, FIPtr.getValueType())); 4842 MPI = MachinePointerInfo(); 4843 } 4844 4845 return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MPI, 4846 false, false, false, 0); 4847} 4848 4849SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, 4850 SelectionDAG &DAG) const { 4851 SDLoc dl(Op); 4852 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 4853 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 4854 return SDValue(); 4855 4856 assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && 4857 "UINT_TO_FP is supported only with FPCVT"); 4858 4859 // If we have FCFIDS, then use it when converting to single-precision. 4860 // Otherwise, convert to double-precision and then round. 4861 unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 4862 (Op.getOpcode() == ISD::UINT_TO_FP ? 4863 PPCISD::FCFIDUS : PPCISD::FCFIDS) : 4864 (Op.getOpcode() == ISD::UINT_TO_FP ? 4865 PPCISD::FCFIDU : PPCISD::FCFID); 4866 MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? 4867 MVT::f32 : MVT::f64; 4868 4869 if (Op.getOperand(0).getValueType() == MVT::i64) { 4870 SDValue SINT = Op.getOperand(0); 4871 // When converting to single-precision, we actually need to convert 4872 // to double-precision first and then round to single-precision. 4873 // To avoid double-rounding effects during that operation, we have 4874 // to prepare the input operand. Bits that might be truncated when 4875 // converting to double-precision are replaced by a bit that won't 4876 // be lost at this stage, but is below the single-precision rounding 4877 // position. 4878 // 4879 // However, if -enable-unsafe-fp-math is in effect, accept double 4880 // rounding to avoid the extra overhead. 4881 if (Op.getValueType() == MVT::f32 && 4882 !PPCSubTarget.hasFPCVT() && 4883 !DAG.getTarget().Options.UnsafeFPMath) { 4884 4885 // Twiddle input to make sure the low 11 bits are zero. (If this 4886 // is the case, we are guaranteed the value will fit into the 53 bit 4887 // mantissa of an IEEE double-precision value without rounding.) 4888 // If any of those low 11 bits were not zero originally, make sure 4889 // bit 12 (value 2048) is set instead, so that the final rounding 4890 // to single-precision gets the correct result. 4891 SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64, 4892 SINT, DAG.getConstant(2047, MVT::i64)); 4893 Round = DAG.getNode(ISD::ADD, dl, MVT::i64, 4894 Round, DAG.getConstant(2047, MVT::i64)); 4895 Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT); 4896 Round = DAG.getNode(ISD::AND, dl, MVT::i64, 4897 Round, DAG.getConstant(-2048, MVT::i64)); 4898 4899 // However, we cannot use that value unconditionally: if the magnitude 4900 // of the input value is small, the bit-twiddling we did above might 4901 // end up visibly changing the output. Fortunately, in that case, we 4902 // don't need to twiddle bits since the original input will convert 4903 // exactly to double-precision floating-point already. Therefore, 4904 // construct a conditional to use the original value if the top 11 4905 // bits are all sign-bit copies, and use the rounded value computed 4906 // above otherwise. 4907 SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64, 4908 SINT, DAG.getConstant(53, MVT::i32)); 4909 Cond = DAG.getNode(ISD::ADD, dl, MVT::i64, 4910 Cond, DAG.getConstant(1, MVT::i64)); 4911 Cond = DAG.getSetCC(dl, MVT::i32, 4912 Cond, DAG.getConstant(1, MVT::i64), ISD::SETUGT); 4913 4914 SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT); 4915 } 4916 4917 SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); 4918 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); 4919 4920 if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) 4921 FP = DAG.getNode(ISD::FP_ROUND, dl, 4922 MVT::f32, FP, DAG.getIntPtrConstant(0)); 4923 return FP; 4924 } 4925 4926 assert(Op.getOperand(0).getValueType() == MVT::i32 && 4927 "Unhandled INT_TO_FP type in custom expander!"); 4928 // Since we only generate this in 64-bit mode, we can take advantage of 4929 // 64-bit registers. In particular, sign extend the input value into the 4930 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 4931 // then lfd it and fcfid it. 4932 MachineFunction &MF = DAG.getMachineFunction(); 4933 MachineFrameInfo *FrameInfo = MF.getFrameInfo(); 4934 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4935 4936 SDValue Ld; 4937 if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) { 4938 int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); 4939 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 4940 4941 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx, 4942 MachinePointerInfo::getFixedStack(FrameIdx), 4943 false, false, 0); 4944 4945 assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 && 4946 "Expected an i32 store"); 4947 MachineMemOperand *MMO = 4948 MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FrameIdx), 4949 MachineMemOperand::MOLoad, 4, 4); 4950 SDValue Ops[] = { Store, FIdx }; 4951 Ld = DAG.getMemIntrinsicNode(Op.getOpcode() == ISD::UINT_TO_FP ? 4952 PPCISD::LFIWZX : PPCISD::LFIWAX, 4953 dl, DAG.getVTList(MVT::f64, MVT::Other), 4954 Ops, 2, MVT::i32, MMO); 4955 } else { 4956 assert(PPCSubTarget.isPPC64() && 4957 "i32->FP without LFIWAX supported only on PPC64"); 4958 4959 int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); 4960 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 4961 4962 SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, 4963 Op.getOperand(0)); 4964 4965 // STD the extended value into the stack slot. 4966 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Ext64, FIdx, 4967 MachinePointerInfo::getFixedStack(FrameIdx), 4968 false, false, 0); 4969 4970 // Load the value as a double. 4971 Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, 4972 MachinePointerInfo::getFixedStack(FrameIdx), 4973 false, false, false, 0); 4974 } 4975 4976 // FCFID it and return it. 4977 SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); 4978 if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) 4979 FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); 4980 return FP; 4981} 4982 4983SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, 4984 SelectionDAG &DAG) const { 4985 SDLoc dl(Op); 4986 /* 4987 The rounding mode is in bits 30:31 of FPSR, and has the following 4988 settings: 4989 00 Round to nearest 4990 01 Round to 0 4991 10 Round to +inf 4992 11 Round to -inf 4993 4994 FLT_ROUNDS, on the other hand, expects the following: 4995 -1 Undefined 4996 0 Round to 0 4997 1 Round to nearest 4998 2 Round to +inf 4999 3 Round to -inf 5000 5001 To perform the conversion, we do: 5002 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 5003 */ 5004 5005 MachineFunction &MF = DAG.getMachineFunction(); 5006 EVT VT = Op.getValueType(); 5007 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 5008 SDValue MFFSreg, InFlag; 5009 5010 // Save FP Control Word to register 5011 EVT NodeTys[] = { 5012 MVT::f64, // return register 5013 MVT::Glue // unused in this context 5014 }; 5015 SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0); 5016 5017 // Save FP register to stack slot 5018 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false); 5019 SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 5020 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, 5021 StackSlot, MachinePointerInfo(), false, false,0); 5022 5023 // Load FP Control Word from low 32 bits of stack slot. 5024 SDValue Four = DAG.getConstant(4, PtrVT); 5025 SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); 5026 SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), 5027 false, false, false, 0); 5028 5029 // Transform as necessary 5030 SDValue CWD1 = 5031 DAG.getNode(ISD::AND, dl, MVT::i32, 5032 CWD, DAG.getConstant(3, MVT::i32)); 5033 SDValue CWD2 = 5034 DAG.getNode(ISD::SRL, dl, MVT::i32, 5035 DAG.getNode(ISD::AND, dl, MVT::i32, 5036 DAG.getNode(ISD::XOR, dl, MVT::i32, 5037 CWD, DAG.getConstant(3, MVT::i32)), 5038 DAG.getConstant(3, MVT::i32)), 5039 DAG.getConstant(1, MVT::i32)); 5040 5041 SDValue RetVal = 5042 DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2); 5043 5044 return DAG.getNode((VT.getSizeInBits() < 16 ? 5045 ISD::TRUNCATE : ISD::ZERO_EXTEND), dl, VT, RetVal); 5046} 5047 5048SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5049 EVT VT = Op.getValueType(); 5050 unsigned BitWidth = VT.getSizeInBits(); 5051 SDLoc dl(Op); 5052 assert(Op.getNumOperands() == 3 && 5053 VT == Op.getOperand(1).getValueType() && 5054 "Unexpected SHL!"); 5055 5056 // Expand into a bunch of logical ops. Note that these ops 5057 // depend on the PPC behavior for oversized shift amounts. 5058 SDValue Lo = Op.getOperand(0); 5059 SDValue Hi = Op.getOperand(1); 5060 SDValue Amt = Op.getOperand(2); 5061 EVT AmtVT = Amt.getValueType(); 5062 5063 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5064 DAG.getConstant(BitWidth, AmtVT), Amt); 5065 SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt); 5066 SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1); 5067 SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3); 5068 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5069 DAG.getConstant(-BitWidth, AmtVT)); 5070 SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5); 5071 SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5072 SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt); 5073 SDValue OutOps[] = { OutLo, OutHi }; 5074 return DAG.getMergeValues(OutOps, 2, dl); 5075} 5076 5077SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const { 5078 EVT VT = Op.getValueType(); 5079 SDLoc dl(Op); 5080 unsigned BitWidth = VT.getSizeInBits(); 5081 assert(Op.getNumOperands() == 3 && 5082 VT == Op.getOperand(1).getValueType() && 5083 "Unexpected SRL!"); 5084 5085 // Expand into a bunch of logical ops. Note that these ops 5086 // depend on the PPC behavior for oversized shift amounts. 5087 SDValue Lo = Op.getOperand(0); 5088 SDValue Hi = Op.getOperand(1); 5089 SDValue Amt = Op.getOperand(2); 5090 EVT AmtVT = Amt.getValueType(); 5091 5092 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5093 DAG.getConstant(BitWidth, AmtVT), Amt); 5094 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5095 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5096 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5097 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5098 DAG.getConstant(-BitWidth, AmtVT)); 5099 SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5); 5100 SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6); 5101 SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt); 5102 SDValue OutOps[] = { OutLo, OutHi }; 5103 return DAG.getMergeValues(OutOps, 2, dl); 5104} 5105 5106SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const { 5107 SDLoc dl(Op); 5108 EVT VT = Op.getValueType(); 5109 unsigned BitWidth = VT.getSizeInBits(); 5110 assert(Op.getNumOperands() == 3 && 5111 VT == Op.getOperand(1).getValueType() && 5112 "Unexpected SRA!"); 5113 5114 // Expand into a bunch of logical ops, followed by a select_cc. 5115 SDValue Lo = Op.getOperand(0); 5116 SDValue Hi = Op.getOperand(1); 5117 SDValue Amt = Op.getOperand(2); 5118 EVT AmtVT = Amt.getValueType(); 5119 5120 SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT, 5121 DAG.getConstant(BitWidth, AmtVT), Amt); 5122 SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt); 5123 SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1); 5124 SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3); 5125 SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt, 5126 DAG.getConstant(-BitWidth, AmtVT)); 5127 SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5); 5128 SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt); 5129 SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, AmtVT), 5130 Tmp4, Tmp6, ISD::SETLE); 5131 SDValue OutOps[] = { OutLo, OutHi }; 5132 return DAG.getMergeValues(OutOps, 2, dl); 5133} 5134 5135//===----------------------------------------------------------------------===// 5136// Vector related lowering. 5137// 5138 5139/// BuildSplatI - Build a canonical splati of Val with an element size of 5140/// SplatSize. Cast the result to VT. 5141static SDValue BuildSplatI(int Val, unsigned SplatSize, EVT VT, 5142 SelectionDAG &DAG, SDLoc dl) { 5143 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 5144 5145 static const EVT VTys[] = { // canonical VT to use for each size. 5146 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 5147 }; 5148 5149 EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 5150 5151 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 5152 if (Val == -1) 5153 SplatSize = 1; 5154 5155 EVT CanonicalVT = VTys[SplatSize-1]; 5156 5157 // Build a canonical splat for this value. 5158 SDValue Elt = DAG.getConstant(Val, MVT::i32); 5159 SmallVector<SDValue, 8> Ops; 5160 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 5161 SDValue Res = DAG.getNode(ISD::BUILD_VECTOR, dl, CanonicalVT, 5162 &Ops[0], Ops.size()); 5163 return DAG.getNode(ISD::BITCAST, dl, ReqVT, Res); 5164} 5165 5166/// BuildIntrinsicOp - Return a unary operator intrinsic node with the 5167/// specified intrinsic ID. 5168static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, 5169 SelectionDAG &DAG, SDLoc dl, 5170 EVT DestVT = MVT::Other) { 5171 if (DestVT == MVT::Other) DestVT = Op.getValueType(); 5172 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5173 DAG.getConstant(IID, MVT::i32), Op); 5174} 5175 5176/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 5177/// specified intrinsic ID. 5178static SDValue BuildIntrinsicOp(unsigned IID, SDValue LHS, SDValue RHS, 5179 SelectionDAG &DAG, SDLoc dl, 5180 EVT DestVT = MVT::Other) { 5181 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 5182 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5183 DAG.getConstant(IID, MVT::i32), LHS, RHS); 5184} 5185 5186/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 5187/// specified intrinsic ID. 5188static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1, 5189 SDValue Op2, SelectionDAG &DAG, 5190 SDLoc dl, EVT DestVT = MVT::Other) { 5191 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 5192 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, 5193 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 5194} 5195 5196 5197/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 5198/// amount. The result has the specified value type. 5199static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, 5200 EVT VT, SelectionDAG &DAG, SDLoc dl) { 5201 // Force LHS/RHS to be the right type. 5202 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS); 5203 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS); 5204 5205 int Ops[16]; 5206 for (unsigned i = 0; i != 16; ++i) 5207 Ops[i] = i + Amt; 5208 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops); 5209 return DAG.getNode(ISD::BITCAST, dl, VT, T); 5210} 5211 5212// If this is a case we can't handle, return null and let the default 5213// expansion code take care of it. If we CAN select this case, and if it 5214// selects to a single instruction, return Op. Otherwise, if we can codegen 5215// this case more efficiently than a constant pool load, lower it to the 5216// sequence of ops that should be used. 5217SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, 5218 SelectionDAG &DAG) const { 5219 SDLoc dl(Op); 5220 BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode()); 5221 assert(BVN != 0 && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR"); 5222 5223 // Check if this is a splat of a constant value. 5224 APInt APSplatBits, APSplatUndef; 5225 unsigned SplatBitSize; 5226 bool HasAnyUndefs; 5227 if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize, 5228 HasAnyUndefs, 0, true) || SplatBitSize > 32) 5229 return SDValue(); 5230 5231 unsigned SplatBits = APSplatBits.getZExtValue(); 5232 unsigned SplatUndef = APSplatUndef.getZExtValue(); 5233 unsigned SplatSize = SplatBitSize / 8; 5234 5235 // First, handle single instruction cases. 5236 5237 // All zeros? 5238 if (SplatBits == 0) { 5239 // Canonicalize all zero vectors to be v4i32. 5240 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 5241 SDValue Z = DAG.getConstant(0, MVT::i32); 5242 Z = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v4i32, Z, Z, Z, Z); 5243 Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z); 5244 } 5245 return Op; 5246 } 5247 5248 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 5249 int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >> 5250 (32-SplatBitSize)); 5251 if (SextVal >= -16 && SextVal <= 15) 5252 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG, dl); 5253 5254 5255 // Two instruction sequences. 5256 5257 // If this value is in the range [-32,30] and is even, use: 5258 // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2) 5259 // If this value is in the range [17,31] and is odd, use: 5260 // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16) 5261 // If this value is in the range [-31,-17] and is odd, use: 5262 // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16) 5263 // Note the last two are three-instruction sequences. 5264 if (SextVal >= -32 && SextVal <= 31) { 5265 // To avoid having these optimizations undone by constant folding, 5266 // we convert to a pseudo that will be expanded later into one of 5267 // the above forms. 5268 SDValue Elt = DAG.getConstant(SextVal, MVT::i32); 5269 EVT VT = Op.getValueType(); 5270 int Size = VT == MVT::v16i8 ? 1 : (VT == MVT::v8i16 ? 2 : 4); 5271 SDValue EltSize = DAG.getConstant(Size, MVT::i32); 5272 return DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize); 5273 } 5274 5275 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 5276 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 5277 // for fneg/fabs. 5278 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 5279 // Make -1 and vspltisw -1: 5280 SDValue OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG, dl); 5281 5282 // Make the VSLW intrinsic, computing 0x8000_0000. 5283 SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 5284 OnesV, DAG, dl); 5285 5286 // xor by OnesV to invert it. 5287 Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV); 5288 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5289 } 5290 5291 // Check to see if this is a wide variety of vsplti*, binop self cases. 5292 static const signed char SplatCsts[] = { 5293 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 5294 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 5295 }; 5296 5297 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 5298 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 5299 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 5300 int i = SplatCsts[idx]; 5301 5302 // Figure out what shift amount will be used by altivec if shifted by i in 5303 // this splat size. 5304 unsigned TypeShiftAmt = i & (SplatBitSize-1); 5305 5306 // vsplti + shl self. 5307 if (SextVal == (int)((unsigned)i << TypeShiftAmt)) { 5308 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5309 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5310 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 5311 Intrinsic::ppc_altivec_vslw 5312 }; 5313 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5314 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5315 } 5316 5317 // vsplti + srl self. 5318 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5319 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5320 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5321 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 5322 Intrinsic::ppc_altivec_vsrw 5323 }; 5324 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5325 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5326 } 5327 5328 // vsplti + sra self. 5329 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 5330 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5331 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5332 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 5333 Intrinsic::ppc_altivec_vsraw 5334 }; 5335 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5336 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5337 } 5338 5339 // vsplti + rol self. 5340 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 5341 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 5342 SDValue Res = BuildSplatI(i, SplatSize, MVT::Other, DAG, dl); 5343 static const unsigned IIDs[] = { // Intrinsic to use for each size. 5344 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 5345 Intrinsic::ppc_altivec_vrlw 5346 }; 5347 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); 5348 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); 5349 } 5350 5351 // t = vsplti c, result = vsldoi t, t, 1 5352 if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) { 5353 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5354 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG, dl); 5355 } 5356 // t = vsplti c, result = vsldoi t, t, 2 5357 if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) { 5358 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5359 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG, dl); 5360 } 5361 // t = vsplti c, result = vsldoi t, t, 3 5362 if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) { 5363 SDValue T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG, dl); 5364 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG, dl); 5365 } 5366 } 5367 5368 return SDValue(); 5369} 5370 5371/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 5372/// the specified operations to build the shuffle. 5373static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, 5374 SDValue RHS, SelectionDAG &DAG, 5375 SDLoc dl) { 5376 unsigned OpNum = (PFEntry >> 26) & 0x0F; 5377 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 5378 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 5379 5380 enum { 5381 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 5382 OP_VMRGHW, 5383 OP_VMRGLW, 5384 OP_VSPLTISW0, 5385 OP_VSPLTISW1, 5386 OP_VSPLTISW2, 5387 OP_VSPLTISW3, 5388 OP_VSLDOI4, 5389 OP_VSLDOI8, 5390 OP_VSLDOI12 5391 }; 5392 5393 if (OpNum == OP_COPY) { 5394 if (LHSID == (1*9+2)*9+3) return LHS; 5395 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 5396 return RHS; 5397 } 5398 5399 SDValue OpLHS, OpRHS; 5400 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl); 5401 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl); 5402 5403 int ShufIdxs[16]; 5404 switch (OpNum) { 5405 default: llvm_unreachable("Unknown i32 permute!"); 5406 case OP_VMRGHW: 5407 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 5408 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 5409 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 5410 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 5411 break; 5412 case OP_VMRGLW: 5413 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 5414 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 5415 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 5416 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 5417 break; 5418 case OP_VSPLTISW0: 5419 for (unsigned i = 0; i != 16; ++i) 5420 ShufIdxs[i] = (i&3)+0; 5421 break; 5422 case OP_VSPLTISW1: 5423 for (unsigned i = 0; i != 16; ++i) 5424 ShufIdxs[i] = (i&3)+4; 5425 break; 5426 case OP_VSPLTISW2: 5427 for (unsigned i = 0; i != 16; ++i) 5428 ShufIdxs[i] = (i&3)+8; 5429 break; 5430 case OP_VSPLTISW3: 5431 for (unsigned i = 0; i != 16; ++i) 5432 ShufIdxs[i] = (i&3)+12; 5433 break; 5434 case OP_VSLDOI4: 5435 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl); 5436 case OP_VSLDOI8: 5437 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl); 5438 case OP_VSLDOI12: 5439 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl); 5440 } 5441 EVT VT = OpLHS.getValueType(); 5442 OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS); 5443 OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS); 5444 SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs); 5445 return DAG.getNode(ISD::BITCAST, dl, VT, T); 5446} 5447 5448/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 5449/// is a shuffle we can handle in a single instruction, return it. Otherwise, 5450/// return the code it can be lowered into. Worst case, it can always be 5451/// lowered into a vperm. 5452SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, 5453 SelectionDAG &DAG) const { 5454 SDLoc dl(Op); 5455 SDValue V1 = Op.getOperand(0); 5456 SDValue V2 = Op.getOperand(1); 5457 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); 5458 EVT VT = Op.getValueType(); 5459 5460 // Cases that are handled by instructions that take permute immediates 5461 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 5462 // selected by the instruction selector. 5463 if (V2.getOpcode() == ISD::UNDEF) { 5464 if (PPC::isSplatShuffleMask(SVOp, 1) || 5465 PPC::isSplatShuffleMask(SVOp, 2) || 5466 PPC::isSplatShuffleMask(SVOp, 4) || 5467 PPC::isVPKUWUMShuffleMask(SVOp, true) || 5468 PPC::isVPKUHUMShuffleMask(SVOp, true) || 5469 PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || 5470 PPC::isVMRGLShuffleMask(SVOp, 1, true) || 5471 PPC::isVMRGLShuffleMask(SVOp, 2, true) || 5472 PPC::isVMRGLShuffleMask(SVOp, 4, true) || 5473 PPC::isVMRGHShuffleMask(SVOp, 1, true) || 5474 PPC::isVMRGHShuffleMask(SVOp, 2, true) || 5475 PPC::isVMRGHShuffleMask(SVOp, 4, true)) { 5476 return Op; 5477 } 5478 } 5479 5480 // Altivec has a variety of "shuffle immediates" that take two vector inputs 5481 // and produce a fixed permutation. If any of these match, do not lower to 5482 // VPERM. 5483 if (PPC::isVPKUWUMShuffleMask(SVOp, false) || 5484 PPC::isVPKUHUMShuffleMask(SVOp, false) || 5485 PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || 5486 PPC::isVMRGLShuffleMask(SVOp, 1, false) || 5487 PPC::isVMRGLShuffleMask(SVOp, 2, false) || 5488 PPC::isVMRGLShuffleMask(SVOp, 4, false) || 5489 PPC::isVMRGHShuffleMask(SVOp, 1, false) || 5490 PPC::isVMRGHShuffleMask(SVOp, 2, false) || 5491 PPC::isVMRGHShuffleMask(SVOp, 4, false)) 5492 return Op; 5493 5494 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 5495 // perfect shuffle table to emit an optimal matching sequence. 5496 ArrayRef<int> PermMask = SVOp->getMask(); 5497 5498 unsigned PFIndexes[4]; 5499 bool isFourElementShuffle = true; 5500 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 5501 unsigned EltNo = 8; // Start out undef. 5502 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 5503 if (PermMask[i*4+j] < 0) 5504 continue; // Undef, ignore it. 5505 5506 unsigned ByteSource = PermMask[i*4+j]; 5507 if ((ByteSource & 3) != j) { 5508 isFourElementShuffle = false; 5509 break; 5510 } 5511 5512 if (EltNo == 8) { 5513 EltNo = ByteSource/4; 5514 } else if (EltNo != ByteSource/4) { 5515 isFourElementShuffle = false; 5516 break; 5517 } 5518 } 5519 PFIndexes[i] = EltNo; 5520 } 5521 5522 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 5523 // perfect shuffle vector to determine if it is cost effective to do this as 5524 // discrete instructions, or whether we should use a vperm. 5525 if (isFourElementShuffle) { 5526 // Compute the index in the perfect shuffle table. 5527 unsigned PFTableIndex = 5528 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 5529 5530 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 5531 unsigned Cost = (PFEntry >> 30); 5532 5533 // Determining when to avoid vperm is tricky. Many things affect the cost 5534 // of vperm, particularly how many times the perm mask needs to be computed. 5535 // For example, if the perm mask can be hoisted out of a loop or is already 5536 // used (perhaps because there are multiple permutes with the same shuffle 5537 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 5538 // the loop requires an extra register. 5539 // 5540 // As a compromise, we only emit discrete instructions if the shuffle can be 5541 // generated in 3 or fewer operations. When we have loop information 5542 // available, if this block is within a loop, we should avoid using vperm 5543 // for 3-operation perms and use a constant pool load instead. 5544 if (Cost < 3) 5545 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl); 5546 } 5547 5548 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 5549 // vector that will get spilled to the constant pool. 5550 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 5551 5552 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 5553 // that it is in input element units, not in bytes. Convert now. 5554 EVT EltVT = V1.getValueType().getVectorElementType(); 5555 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 5556 5557 SmallVector<SDValue, 16> ResultMask; 5558 for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { 5559 unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; 5560 5561 for (unsigned j = 0; j != BytesPerElement; ++j) 5562 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 5563 MVT::i32)); 5564 } 5565 5566 SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, 5567 &ResultMask[0], ResultMask.size()); 5568 return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); 5569} 5570 5571/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 5572/// altivec comparison. If it is, return true and fill in Opc/isDot with 5573/// information about the intrinsic. 5574static bool getAltivecCompareInfo(SDValue Intrin, int &CompareOpc, 5575 bool &isDot) { 5576 unsigned IntrinsicID = 5577 cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue(); 5578 CompareOpc = -1; 5579 isDot = false; 5580 switch (IntrinsicID) { 5581 default: return false; 5582 // Comparison predicates. 5583 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 5584 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 5585 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 5586 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 5587 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 5588 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 5589 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 5590 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 5591 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 5592 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 5593 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 5594 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 5595 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 5596 5597 // Normal Comparisons. 5598 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 5599 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 5600 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 5601 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 5602 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 5603 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 5604 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 5605 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 5606 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 5607 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 5608 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 5609 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 5610 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 5611 } 5612 return true; 5613} 5614 5615/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 5616/// lower, do it, otherwise return null. 5617SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, 5618 SelectionDAG &DAG) const { 5619 // If this is a lowered altivec predicate compare, CompareOpc is set to the 5620 // opcode number of the comparison. 5621 SDLoc dl(Op); 5622 int CompareOpc; 5623 bool isDot; 5624 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 5625 return SDValue(); // Don't custom lower most intrinsics. 5626 5627 // If this is a non-dot comparison, make the VCMP node and we are done. 5628 if (!isDot) { 5629 SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(), 5630 Op.getOperand(1), Op.getOperand(2), 5631 DAG.getConstant(CompareOpc, MVT::i32)); 5632 return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp); 5633 } 5634 5635 // Create the PPCISD altivec 'dot' comparison node. 5636 SDValue Ops[] = { 5637 Op.getOperand(2), // LHS 5638 Op.getOperand(3), // RHS 5639 DAG.getConstant(CompareOpc, MVT::i32) 5640 }; 5641 EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue }; 5642 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 5643 5644 // Now that we have the comparison, emit a copy from the CR to a GPR. 5645 // This is flagged to the above dot comparison. 5646 SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32, 5647 DAG.getRegister(PPC::CR6, MVT::i32), 5648 CompNode.getValue(1)); 5649 5650 // Unpack the result based on how the target uses it. 5651 unsigned BitNo; // Bit # of CR6. 5652 bool InvertBit; // Invert result? 5653 switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) { 5654 default: // Can't happen, don't crash on invalid number though. 5655 case 0: // Return the value of the EQ bit of CR6. 5656 BitNo = 0; InvertBit = false; 5657 break; 5658 case 1: // Return the inverted value of the EQ bit of CR6. 5659 BitNo = 0; InvertBit = true; 5660 break; 5661 case 2: // Return the value of the LT bit of CR6. 5662 BitNo = 2; InvertBit = false; 5663 break; 5664 case 3: // Return the inverted value of the LT bit of CR6. 5665 BitNo = 2; InvertBit = true; 5666 break; 5667 } 5668 5669 // Shift the bit into the low position. 5670 Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags, 5671 DAG.getConstant(8-(3-BitNo), MVT::i32)); 5672 // Isolate the bit. 5673 Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags, 5674 DAG.getConstant(1, MVT::i32)); 5675 5676 // If we are supposed to, toggle the bit. 5677 if (InvertBit) 5678 Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags, 5679 DAG.getConstant(1, MVT::i32)); 5680 return Flags; 5681} 5682 5683SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, 5684 SelectionDAG &DAG) const { 5685 SDLoc dl(Op); 5686 // Create a stack slot that is 16-byte aligned. 5687 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 5688 int FrameIdx = FrameInfo->CreateStackObject(16, 16, false); 5689 EVT PtrVT = getPointerTy(); 5690 SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 5691 5692 // Store the input value into Value#0 of the stack slot. 5693 SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, 5694 Op.getOperand(0), FIdx, MachinePointerInfo(), 5695 false, false, 0); 5696 // Load it out. 5697 return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), 5698 false, false, false, 0); 5699} 5700 5701SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { 5702 SDLoc dl(Op); 5703 if (Op.getValueType() == MVT::v4i32) { 5704 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5705 5706 SDValue Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG, dl); 5707 SDValue Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG, dl);//+16 as shift amt. 5708 5709 SDValue RHSSwap = // = vrlw RHS, 16 5710 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl); 5711 5712 // Shrinkify inputs to v8i16. 5713 LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS); 5714 RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS); 5715 RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap); 5716 5717 // Low parts multiplied together, generating 32-bit results (we ignore the 5718 // top parts). 5719 SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 5720 LHS, RHS, DAG, dl, MVT::v4i32); 5721 5722 SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 5723 LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32); 5724 // Shift the high parts up 16 bits. 5725 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, 5726 Neg16, DAG, dl); 5727 return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd); 5728 } else if (Op.getValueType() == MVT::v8i16) { 5729 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5730 5731 SDValue Zero = BuildSplatI(0, 1, MVT::v8i16, DAG, dl); 5732 5733 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 5734 LHS, RHS, Zero, DAG, dl); 5735 } else if (Op.getValueType() == MVT::v16i8) { 5736 SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); 5737 5738 // Multiply the even 8-bit parts, producing 16-bit sums. 5739 SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 5740 LHS, RHS, DAG, dl, MVT::v8i16); 5741 EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts); 5742 5743 // Multiply the odd 8-bit parts, producing 16-bit sums. 5744 SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 5745 LHS, RHS, DAG, dl, MVT::v8i16); 5746 OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); 5747 5748 // Merge the results together. 5749 int Ops[16]; 5750 for (unsigned i = 0; i != 8; ++i) { 5751 Ops[i*2 ] = 2*i+1; 5752 Ops[i*2+1] = 2*i+1+16; 5753 } 5754 return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); 5755 } else { 5756 llvm_unreachable("Unknown mul to lower!"); 5757 } 5758} 5759 5760/// LowerOperation - Provide custom lowering hooks for some operations. 5761/// 5762SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { 5763 switch (Op.getOpcode()) { 5764 default: llvm_unreachable("Wasn't expecting to be able to lower this!"); 5765 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 5766 case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); 5767 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 5768 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 5769 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 5770 case ISD::SETCC: return LowerSETCC(Op, DAG); 5771 case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); 5772 case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); 5773 case ISD::VASTART: 5774 return LowerVASTART(Op, DAG, PPCSubTarget); 5775 5776 case ISD::VAARG: 5777 return LowerVAARG(Op, DAG, PPCSubTarget); 5778 5779 case ISD::VACOPY: 5780 return LowerVACOPY(Op, DAG, PPCSubTarget); 5781 5782 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 5783 case ISD::DYNAMIC_STACKALLOC: 5784 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 5785 5786 case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); 5787 case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); 5788 5789 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 5790 case ISD::FP_TO_UINT: 5791 case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, 5792 SDLoc(Op)); 5793 case ISD::UINT_TO_FP: 5794 case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG); 5795 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 5796 5797 // Lower 64-bit shifts. 5798 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 5799 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 5800 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 5801 5802 // Vector-related lowering. 5803 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 5804 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 5805 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 5806 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 5807 case ISD::MUL: return LowerMUL(Op, DAG); 5808 5809 // For counter-based loop handling. 5810 case ISD::INTRINSIC_W_CHAIN: return SDValue(); 5811 5812 // Frame & Return address. 5813 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 5814 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 5815 } 5816} 5817 5818void PPCTargetLowering::ReplaceNodeResults(SDNode *N, 5819 SmallVectorImpl<SDValue>&Results, 5820 SelectionDAG &DAG) const { 5821 const TargetMachine &TM = getTargetMachine(); 5822 SDLoc dl(N); 5823 switch (N->getOpcode()) { 5824 default: 5825 llvm_unreachable("Do not know how to custom type legalize this operation!"); 5826 case ISD::INTRINSIC_W_CHAIN: { 5827 if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() != 5828 Intrinsic::ppc_is_decremented_ctr_nonzero) 5829 break; 5830 5831 assert(N->getValueType(0) == MVT::i1 && 5832 "Unexpected result type for CTR decrement intrinsic"); 5833 EVT SVT = getSetCCResultType(*DAG.getContext(), N->getValueType(0)); 5834 SDVTList VTs = DAG.getVTList(SVT, MVT::Other); 5835 SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0), 5836 N->getOperand(1)); 5837 5838 Results.push_back(NewInt); 5839 Results.push_back(NewInt.getValue(1)); 5840 break; 5841 } 5842 case ISD::VAARG: { 5843 if (!TM.getSubtarget<PPCSubtarget>().isSVR4ABI() 5844 || TM.getSubtarget<PPCSubtarget>().isPPC64()) 5845 return; 5846 5847 EVT VT = N->getValueType(0); 5848 5849 if (VT == MVT::i64) { 5850 SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); 5851 5852 Results.push_back(NewNode); 5853 Results.push_back(NewNode.getValue(1)); 5854 } 5855 return; 5856 } 5857 case ISD::FP_ROUND_INREG: { 5858 assert(N->getValueType(0) == MVT::ppcf128); 5859 assert(N->getOperand(0).getValueType() == MVT::ppcf128); 5860 SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 5861 MVT::f64, N->getOperand(0), 5862 DAG.getIntPtrConstant(0)); 5863 SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, 5864 MVT::f64, N->getOperand(0), 5865 DAG.getIntPtrConstant(1)); 5866 5867 // Add the two halves of the long double in round-to-zero mode. 5868 SDValue FPreg = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi); 5869 5870 // We know the low half is about to be thrown away, so just use something 5871 // convenient. 5872 Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, 5873 FPreg, FPreg)); 5874 return; 5875 } 5876 case ISD::FP_TO_SINT: 5877 // LowerFP_TO_INT() can only handle f32 and f64. 5878 if (N->getOperand(0).getValueType() == MVT::ppcf128) 5879 return; 5880 Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); 5881 return; 5882 } 5883} 5884 5885 5886//===----------------------------------------------------------------------===// 5887// Other Lowering Code 5888//===----------------------------------------------------------------------===// 5889 5890MachineBasicBlock * 5891PPCTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, 5892 bool is64bit, unsigned BinOpcode) const { 5893 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5894 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5895 5896 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5897 MachineFunction *F = BB->getParent(); 5898 MachineFunction::iterator It = BB; 5899 ++It; 5900 5901 unsigned dest = MI->getOperand(0).getReg(); 5902 unsigned ptrA = MI->getOperand(1).getReg(); 5903 unsigned ptrB = MI->getOperand(2).getReg(); 5904 unsigned incr = MI->getOperand(3).getReg(); 5905 DebugLoc dl = MI->getDebugLoc(); 5906 5907 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 5908 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5909 F->insert(It, loopMBB); 5910 F->insert(It, exitMBB); 5911 exitMBB->splice(exitMBB->begin(), BB, 5912 llvm::next(MachineBasicBlock::iterator(MI)), 5913 BB->end()); 5914 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5915 5916 MachineRegisterInfo &RegInfo = F->getRegInfo(); 5917 unsigned TmpReg = (!BinOpcode) ? incr : 5918 RegInfo.createVirtualRegister( 5919 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 5920 (const TargetRegisterClass *) &PPC::GPRCRegClass); 5921 5922 // thisMBB: 5923 // ... 5924 // fallthrough --> loopMBB 5925 BB->addSuccessor(loopMBB); 5926 5927 // loopMBB: 5928 // l[wd]arx dest, ptr 5929 // add r0, dest, incr 5930 // st[wd]cx. r0, ptr 5931 // bne- loopMBB 5932 // fallthrough --> exitMBB 5933 BB = loopMBB; 5934 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 5935 .addReg(ptrA).addReg(ptrB); 5936 if (BinOpcode) 5937 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest); 5938 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 5939 .addReg(TmpReg).addReg(ptrA).addReg(ptrB); 5940 BuildMI(BB, dl, TII->get(PPC::BCC)) 5941 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 5942 BB->addSuccessor(loopMBB); 5943 BB->addSuccessor(exitMBB); 5944 5945 // exitMBB: 5946 // ... 5947 BB = exitMBB; 5948 return BB; 5949} 5950 5951MachineBasicBlock * 5952PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, 5953 MachineBasicBlock *BB, 5954 bool is8bit, // operation 5955 unsigned BinOpcode) const { 5956 // This also handles ATOMIC_SWAP, indicated by BinOpcode==0. 5957 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 5958 // In 64 bit mode we have to use 64 bits for addresses, even though the 5959 // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address 5960 // registers without caring whether they're 32 or 64, but here we're 5961 // doing actual arithmetic on the addresses. 5962 bool is64bit = PPCSubTarget.isPPC64(); 5963 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 5964 5965 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 5966 MachineFunction *F = BB->getParent(); 5967 MachineFunction::iterator It = BB; 5968 ++It; 5969 5970 unsigned dest = MI->getOperand(0).getReg(); 5971 unsigned ptrA = MI->getOperand(1).getReg(); 5972 unsigned ptrB = MI->getOperand(2).getReg(); 5973 unsigned incr = MI->getOperand(3).getReg(); 5974 DebugLoc dl = MI->getDebugLoc(); 5975 5976 MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB); 5977 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 5978 F->insert(It, loopMBB); 5979 F->insert(It, exitMBB); 5980 exitMBB->splice(exitMBB->begin(), BB, 5981 llvm::next(MachineBasicBlock::iterator(MI)), 5982 BB->end()); 5983 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 5984 5985 MachineRegisterInfo &RegInfo = F->getRegInfo(); 5986 const TargetRegisterClass *RC = 5987 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 5988 (const TargetRegisterClass *) &PPC::GPRCRegClass; 5989 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 5990 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 5991 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 5992 unsigned Incr2Reg = RegInfo.createVirtualRegister(RC); 5993 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 5994 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 5995 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 5996 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 5997 unsigned Tmp3Reg = RegInfo.createVirtualRegister(RC); 5998 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 5999 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6000 unsigned Ptr1Reg; 6001 unsigned TmpReg = (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(RC); 6002 6003 // thisMBB: 6004 // ... 6005 // fallthrough --> loopMBB 6006 BB->addSuccessor(loopMBB); 6007 6008 // The 4-byte load must be aligned, while a char or short may be 6009 // anywhere in the word. Hence all this nasty bookkeeping code. 6010 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6011 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6012 // xori shift, shift1, 24 [16] 6013 // rlwinm ptr, ptr1, 0, 0, 29 6014 // slw incr2, incr, shift 6015 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6016 // slw mask, mask2, shift 6017 // loopMBB: 6018 // lwarx tmpDest, ptr 6019 // add tmp, tmpDest, incr2 6020 // andc tmp2, tmpDest, mask 6021 // and tmp3, tmp, mask 6022 // or tmp4, tmp3, tmp2 6023 // stwcx. tmp4, ptr 6024 // bne- loopMBB 6025 // fallthrough --> exitMBB 6026 // srw dest, tmpDest, shift 6027 if (ptrA != ZeroReg) { 6028 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6029 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6030 .addReg(ptrA).addReg(ptrB); 6031 } else { 6032 Ptr1Reg = ptrB; 6033 } 6034 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6035 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6036 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6037 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6038 if (is64bit) 6039 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6040 .addReg(Ptr1Reg).addImm(0).addImm(61); 6041 else 6042 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6043 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6044 BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg) 6045 .addReg(incr).addReg(ShiftReg); 6046 if (is8bit) 6047 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6048 else { 6049 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6050 BuildMI(BB, dl, TII->get(PPC::ORI),Mask2Reg).addReg(Mask3Reg).addImm(65535); 6051 } 6052 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6053 .addReg(Mask2Reg).addReg(ShiftReg); 6054 6055 BB = loopMBB; 6056 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6057 .addReg(ZeroReg).addReg(PtrReg); 6058 if (BinOpcode) 6059 BuildMI(BB, dl, TII->get(BinOpcode), TmpReg) 6060 .addReg(Incr2Reg).addReg(TmpDestReg); 6061 BuildMI(BB, dl, TII->get(is64bit ? PPC::ANDC8 : PPC::ANDC), Tmp2Reg) 6062 .addReg(TmpDestReg).addReg(MaskReg); 6063 BuildMI(BB, dl, TII->get(is64bit ? PPC::AND8 : PPC::AND), Tmp3Reg) 6064 .addReg(TmpReg).addReg(MaskReg); 6065 BuildMI(BB, dl, TII->get(is64bit ? PPC::OR8 : PPC::OR), Tmp4Reg) 6066 .addReg(Tmp3Reg).addReg(Tmp2Reg); 6067 BuildMI(BB, dl, TII->get(PPC::STWCX)) 6068 .addReg(Tmp4Reg).addReg(ZeroReg).addReg(PtrReg); 6069 BuildMI(BB, dl, TII->get(PPC::BCC)) 6070 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB); 6071 BB->addSuccessor(loopMBB); 6072 BB->addSuccessor(exitMBB); 6073 6074 // exitMBB: 6075 // ... 6076 BB = exitMBB; 6077 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest).addReg(TmpDestReg) 6078 .addReg(ShiftReg); 6079 return BB; 6080} 6081 6082llvm::MachineBasicBlock* 6083PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, 6084 MachineBasicBlock *MBB) const { 6085 DebugLoc DL = MI->getDebugLoc(); 6086 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6087 6088 MachineFunction *MF = MBB->getParent(); 6089 MachineRegisterInfo &MRI = MF->getRegInfo(); 6090 6091 const BasicBlock *BB = MBB->getBasicBlock(); 6092 MachineFunction::iterator I = MBB; 6093 ++I; 6094 6095 // Memory Reference 6096 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 6097 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 6098 6099 unsigned DstReg = MI->getOperand(0).getReg(); 6100 const TargetRegisterClass *RC = MRI.getRegClass(DstReg); 6101 assert(RC->hasType(MVT::i32) && "Invalid destination!"); 6102 unsigned mainDstReg = MRI.createVirtualRegister(RC); 6103 unsigned restoreDstReg = MRI.createVirtualRegister(RC); 6104 6105 MVT PVT = getPointerTy(); 6106 assert((PVT == MVT::i64 || PVT == MVT::i32) && 6107 "Invalid Pointer Size!"); 6108 // For v = setjmp(buf), we generate 6109 // 6110 // thisMBB: 6111 // SjLjSetup mainMBB 6112 // bl mainMBB 6113 // v_restore = 1 6114 // b sinkMBB 6115 // 6116 // mainMBB: 6117 // buf[LabelOffset] = LR 6118 // v_main = 0 6119 // 6120 // sinkMBB: 6121 // v = phi(main, restore) 6122 // 6123 6124 MachineBasicBlock *thisMBB = MBB; 6125 MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); 6126 MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); 6127 MF->insert(I, mainMBB); 6128 MF->insert(I, sinkMBB); 6129 6130 MachineInstrBuilder MIB; 6131 6132 // Transfer the remainder of BB and its successor edges to sinkMBB. 6133 sinkMBB->splice(sinkMBB->begin(), MBB, 6134 llvm::next(MachineBasicBlock::iterator(MI)), MBB->end()); 6135 sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); 6136 6137 // Note that the structure of the jmp_buf used here is not compatible 6138 // with that used by libc, and is not designed to be. Specifically, it 6139 // stores only those 'reserved' registers that LLVM does not otherwise 6140 // understand how to spill. Also, by convention, by the time this 6141 // intrinsic is called, Clang has already stored the frame address in the 6142 // first slot of the buffer and stack address in the third. Following the 6143 // X86 target code, we'll store the jump address in the second slot. We also 6144 // need to save the TOC pointer (R2) to handle jumps between shared 6145 // libraries, and that will be stored in the fourth slot. The thread 6146 // identifier (R13) is not affected. 6147 6148 // thisMBB: 6149 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 6150 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 6151 const int64_t BPOffset = 4 * PVT.getStoreSize(); 6152 6153 // Prepare IP either in reg. 6154 const TargetRegisterClass *PtrRC = getRegClassFor(PVT); 6155 unsigned LabelReg = MRI.createVirtualRegister(PtrRC); 6156 unsigned BufReg = MI->getOperand(1).getReg(); 6157 6158 if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { 6159 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) 6160 .addReg(PPC::X2) 6161 .addImm(TOCOffset) 6162 .addReg(BufReg); 6163 MIB.setMemRefs(MMOBegin, MMOEnd); 6164 } 6165 6166 // Naked functions never have a base pointer, and so we use r1. For all 6167 // other functions, this decision must be delayed until during PEI. 6168 unsigned BaseReg; 6169 if (MF->getFunction()->getAttributes().hasAttribute( 6170 AttributeSet::FunctionIndex, Attribute::Naked)) 6171 BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1; 6172 else 6173 BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP; 6174 6175 MIB = BuildMI(*thisMBB, MI, DL, 6176 TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW)) 6177 .addReg(BaseReg) 6178 .addImm(BPOffset) 6179 .addReg(BufReg); 6180 MIB.setMemRefs(MMOBegin, MMOEnd); 6181 6182 // Setup 6183 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB); 6184 const PPCRegisterInfo *TRI = 6185 static_cast<const PPCRegisterInfo*>(getTargetMachine().getRegisterInfo()); 6186 MIB.addRegMask(TRI->getNoPreservedMask()); 6187 6188 BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1); 6189 6190 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup)) 6191 .addMBB(mainMBB); 6192 MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB); 6193 6194 thisMBB->addSuccessor(mainMBB, /* weight */ 0); 6195 thisMBB->addSuccessor(sinkMBB, /* weight */ 1); 6196 6197 // mainMBB: 6198 // mainDstReg = 0 6199 MIB = BuildMI(mainMBB, DL, 6200 TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); 6201 6202 // Store IP 6203 if (PPCSubTarget.isPPC64()) { 6204 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) 6205 .addReg(LabelReg) 6206 .addImm(LabelOffset) 6207 .addReg(BufReg); 6208 } else { 6209 MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW)) 6210 .addReg(LabelReg) 6211 .addImm(LabelOffset) 6212 .addReg(BufReg); 6213 } 6214 6215 MIB.setMemRefs(MMOBegin, MMOEnd); 6216 6217 BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0); 6218 mainMBB->addSuccessor(sinkMBB); 6219 6220 // sinkMBB: 6221 BuildMI(*sinkMBB, sinkMBB->begin(), DL, 6222 TII->get(PPC::PHI), DstReg) 6223 .addReg(mainDstReg).addMBB(mainMBB) 6224 .addReg(restoreDstReg).addMBB(thisMBB); 6225 6226 MI->eraseFromParent(); 6227 return sinkMBB; 6228} 6229 6230MachineBasicBlock * 6231PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, 6232 MachineBasicBlock *MBB) const { 6233 DebugLoc DL = MI->getDebugLoc(); 6234 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6235 6236 MachineFunction *MF = MBB->getParent(); 6237 MachineRegisterInfo &MRI = MF->getRegInfo(); 6238 6239 // Memory Reference 6240 MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); 6241 MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); 6242 6243 MVT PVT = getPointerTy(); 6244 assert((PVT == MVT::i64 || PVT == MVT::i32) && 6245 "Invalid Pointer Size!"); 6246 6247 const TargetRegisterClass *RC = 6248 (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass; 6249 unsigned Tmp = MRI.createVirtualRegister(RC); 6250 // Since FP is only updated here but NOT referenced, it's treated as GPR. 6251 unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31; 6252 unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1; 6253 unsigned BP = (PVT == MVT::i64) ? PPC::X30 : PPC::R30; 6254 6255 MachineInstrBuilder MIB; 6256 6257 const int64_t LabelOffset = 1 * PVT.getStoreSize(); 6258 const int64_t SPOffset = 2 * PVT.getStoreSize(); 6259 const int64_t TOCOffset = 3 * PVT.getStoreSize(); 6260 const int64_t BPOffset = 4 * PVT.getStoreSize(); 6261 6262 unsigned BufReg = MI->getOperand(0).getReg(); 6263 6264 // Reload FP (the jumped-to function may not have had a 6265 // frame pointer, and if so, then its r31 will be restored 6266 // as necessary). 6267 if (PVT == MVT::i64) { 6268 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP) 6269 .addImm(0) 6270 .addReg(BufReg); 6271 } else { 6272 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP) 6273 .addImm(0) 6274 .addReg(BufReg); 6275 } 6276 MIB.setMemRefs(MMOBegin, MMOEnd); 6277 6278 // Reload IP 6279 if (PVT == MVT::i64) { 6280 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp) 6281 .addImm(LabelOffset) 6282 .addReg(BufReg); 6283 } else { 6284 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp) 6285 .addImm(LabelOffset) 6286 .addReg(BufReg); 6287 } 6288 MIB.setMemRefs(MMOBegin, MMOEnd); 6289 6290 // Reload SP 6291 if (PVT == MVT::i64) { 6292 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP) 6293 .addImm(SPOffset) 6294 .addReg(BufReg); 6295 } else { 6296 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP) 6297 .addImm(SPOffset) 6298 .addReg(BufReg); 6299 } 6300 MIB.setMemRefs(MMOBegin, MMOEnd); 6301 6302 // Reload BP 6303 if (PVT == MVT::i64) { 6304 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP) 6305 .addImm(BPOffset) 6306 .addReg(BufReg); 6307 } else { 6308 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP) 6309 .addImm(BPOffset) 6310 .addReg(BufReg); 6311 } 6312 MIB.setMemRefs(MMOBegin, MMOEnd); 6313 6314 // Reload TOC 6315 if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { 6316 MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) 6317 .addImm(TOCOffset) 6318 .addReg(BufReg); 6319 6320 MIB.setMemRefs(MMOBegin, MMOEnd); 6321 } 6322 6323 // Jump 6324 BuildMI(*MBB, MI, DL, 6325 TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp); 6326 BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR)); 6327 6328 MI->eraseFromParent(); 6329 return MBB; 6330} 6331 6332MachineBasicBlock * 6333PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 6334 MachineBasicBlock *BB) const { 6335 if (MI->getOpcode() == PPC::EH_SjLj_SetJmp32 || 6336 MI->getOpcode() == PPC::EH_SjLj_SetJmp64) { 6337 return emitEHSjLjSetJmp(MI, BB); 6338 } else if (MI->getOpcode() == PPC::EH_SjLj_LongJmp32 || 6339 MI->getOpcode() == PPC::EH_SjLj_LongJmp64) { 6340 return emitEHSjLjLongJmp(MI, BB); 6341 } 6342 6343 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6344 6345 // To "insert" these instructions we actually have to insert their 6346 // control-flow patterns. 6347 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 6348 MachineFunction::iterator It = BB; 6349 ++It; 6350 6351 MachineFunction *F = BB->getParent(); 6352 6353 if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || 6354 MI->getOpcode() == PPC::SELECT_CC_I8)) { 6355 SmallVector<MachineOperand, 2> Cond; 6356 Cond.push_back(MI->getOperand(4)); 6357 Cond.push_back(MI->getOperand(1)); 6358 6359 DebugLoc dl = MI->getDebugLoc(); 6360 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 6361 TII->insertSelect(*BB, MI, dl, MI->getOperand(0).getReg(), 6362 Cond, MI->getOperand(2).getReg(), 6363 MI->getOperand(3).getReg()); 6364 } else if (MI->getOpcode() == PPC::SELECT_CC_I4 || 6365 MI->getOpcode() == PPC::SELECT_CC_I8 || 6366 MI->getOpcode() == PPC::SELECT_CC_F4 || 6367 MI->getOpcode() == PPC::SELECT_CC_F8 || 6368 MI->getOpcode() == PPC::SELECT_CC_VRRC) { 6369 6370 6371 // The incoming instruction knows the destination vreg to set, the 6372 // condition code register to branch on, the true/false values to 6373 // select between, and a branch opcode to use. 6374 6375 // thisMBB: 6376 // ... 6377 // TrueVal = ... 6378 // cmpTY ccX, r1, r2 6379 // bCC copy1MBB 6380 // fallthrough --> copy0MBB 6381 MachineBasicBlock *thisMBB = BB; 6382 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 6383 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 6384 unsigned SelectPred = MI->getOperand(4).getImm(); 6385 DebugLoc dl = MI->getDebugLoc(); 6386 F->insert(It, copy0MBB); 6387 F->insert(It, sinkMBB); 6388 6389 // Transfer the remainder of BB and its successor edges to sinkMBB. 6390 sinkMBB->splice(sinkMBB->begin(), BB, 6391 llvm::next(MachineBasicBlock::iterator(MI)), 6392 BB->end()); 6393 sinkMBB->transferSuccessorsAndUpdatePHIs(BB); 6394 6395 // Next, add the true and fallthrough blocks as its successors. 6396 BB->addSuccessor(copy0MBB); 6397 BB->addSuccessor(sinkMBB); 6398 6399 BuildMI(BB, dl, TII->get(PPC::BCC)) 6400 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 6401 6402 // copy0MBB: 6403 // %FalseValue = ... 6404 // # fallthrough to sinkMBB 6405 BB = copy0MBB; 6406 6407 // Update machine-CFG edges 6408 BB->addSuccessor(sinkMBB); 6409 6410 // sinkMBB: 6411 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 6412 // ... 6413 BB = sinkMBB; 6414 BuildMI(*BB, BB->begin(), dl, 6415 TII->get(PPC::PHI), MI->getOperand(0).getReg()) 6416 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 6417 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 6418 } 6419 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I8) 6420 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4); 6421 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I16) 6422 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4); 6423 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I32) 6424 BB = EmitAtomicBinary(MI, BB, false, PPC::ADD4); 6425 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_ADD_I64) 6426 BB = EmitAtomicBinary(MI, BB, true, PPC::ADD8); 6427 6428 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I8) 6429 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::AND); 6430 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I16) 6431 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::AND); 6432 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I32) 6433 BB = EmitAtomicBinary(MI, BB, false, PPC::AND); 6434 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_AND_I64) 6435 BB = EmitAtomicBinary(MI, BB, true, PPC::AND8); 6436 6437 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I8) 6438 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::OR); 6439 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I16) 6440 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR); 6441 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I32) 6442 BB = EmitAtomicBinary(MI, BB, false, PPC::OR); 6443 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_OR_I64) 6444 BB = EmitAtomicBinary(MI, BB, true, PPC::OR8); 6445 6446 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I8) 6447 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::XOR); 6448 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I16) 6449 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::XOR); 6450 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I32) 6451 BB = EmitAtomicBinary(MI, BB, false, PPC::XOR); 6452 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_XOR_I64) 6453 BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); 6454 6455 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) 6456 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC); 6457 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) 6458 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC); 6459 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) 6460 BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC); 6461 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) 6462 BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8); 6463 6464 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) 6465 BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); 6466 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I16) 6467 BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF); 6468 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I32) 6469 BB = EmitAtomicBinary(MI, BB, false, PPC::SUBF); 6470 else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I64) 6471 BB = EmitAtomicBinary(MI, BB, true, PPC::SUBF8); 6472 6473 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I8) 6474 BB = EmitPartwordAtomicBinary(MI, BB, true, 0); 6475 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I16) 6476 BB = EmitPartwordAtomicBinary(MI, BB, false, 0); 6477 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I32) 6478 BB = EmitAtomicBinary(MI, BB, false, 0); 6479 else if (MI->getOpcode() == PPC::ATOMIC_SWAP_I64) 6480 BB = EmitAtomicBinary(MI, BB, true, 0); 6481 6482 else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 || 6483 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64) { 6484 bool is64bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I64; 6485 6486 unsigned dest = MI->getOperand(0).getReg(); 6487 unsigned ptrA = MI->getOperand(1).getReg(); 6488 unsigned ptrB = MI->getOperand(2).getReg(); 6489 unsigned oldval = MI->getOperand(3).getReg(); 6490 unsigned newval = MI->getOperand(4).getReg(); 6491 DebugLoc dl = MI->getDebugLoc(); 6492 6493 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 6494 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 6495 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6496 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6497 F->insert(It, loop1MBB); 6498 F->insert(It, loop2MBB); 6499 F->insert(It, midMBB); 6500 F->insert(It, exitMBB); 6501 exitMBB->splice(exitMBB->begin(), BB, 6502 llvm::next(MachineBasicBlock::iterator(MI)), 6503 BB->end()); 6504 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6505 6506 // thisMBB: 6507 // ... 6508 // fallthrough --> loopMBB 6509 BB->addSuccessor(loop1MBB); 6510 6511 // loop1MBB: 6512 // l[wd]arx dest, ptr 6513 // cmp[wd] dest, oldval 6514 // bne- midMBB 6515 // loop2MBB: 6516 // st[wd]cx. newval, ptr 6517 // bne- loopMBB 6518 // b exitBB 6519 // midMBB: 6520 // st[wd]cx. dest, ptr 6521 // exitBB: 6522 BB = loop1MBB; 6523 BuildMI(BB, dl, TII->get(is64bit ? PPC::LDARX : PPC::LWARX), dest) 6524 .addReg(ptrA).addReg(ptrB); 6525 BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0) 6526 .addReg(oldval).addReg(dest); 6527 BuildMI(BB, dl, TII->get(PPC::BCC)) 6528 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 6529 BB->addSuccessor(loop2MBB); 6530 BB->addSuccessor(midMBB); 6531 6532 BB = loop2MBB; 6533 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6534 .addReg(newval).addReg(ptrA).addReg(ptrB); 6535 BuildMI(BB, dl, TII->get(PPC::BCC)) 6536 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 6537 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6538 BB->addSuccessor(loop1MBB); 6539 BB->addSuccessor(exitMBB); 6540 6541 BB = midMBB; 6542 BuildMI(BB, dl, TII->get(is64bit ? PPC::STDCX : PPC::STWCX)) 6543 .addReg(dest).addReg(ptrA).addReg(ptrB); 6544 BB->addSuccessor(exitMBB); 6545 6546 // exitMBB: 6547 // ... 6548 BB = exitMBB; 6549 } else if (MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 || 6550 MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) { 6551 // We must use 64-bit registers for addresses when targeting 64-bit, 6552 // since we're actually doing arithmetic on them. Other registers 6553 // can be 32-bit. 6554 bool is64bit = PPCSubTarget.isPPC64(); 6555 bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; 6556 6557 unsigned dest = MI->getOperand(0).getReg(); 6558 unsigned ptrA = MI->getOperand(1).getReg(); 6559 unsigned ptrB = MI->getOperand(2).getReg(); 6560 unsigned oldval = MI->getOperand(3).getReg(); 6561 unsigned newval = MI->getOperand(4).getReg(); 6562 DebugLoc dl = MI->getDebugLoc(); 6563 6564 MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB); 6565 MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB); 6566 MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB); 6567 MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB); 6568 F->insert(It, loop1MBB); 6569 F->insert(It, loop2MBB); 6570 F->insert(It, midMBB); 6571 F->insert(It, exitMBB); 6572 exitMBB->splice(exitMBB->begin(), BB, 6573 llvm::next(MachineBasicBlock::iterator(MI)), 6574 BB->end()); 6575 exitMBB->transferSuccessorsAndUpdatePHIs(BB); 6576 6577 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6578 const TargetRegisterClass *RC = 6579 is64bit ? (const TargetRegisterClass *) &PPC::G8RCRegClass : 6580 (const TargetRegisterClass *) &PPC::GPRCRegClass; 6581 unsigned PtrReg = RegInfo.createVirtualRegister(RC); 6582 unsigned Shift1Reg = RegInfo.createVirtualRegister(RC); 6583 unsigned ShiftReg = RegInfo.createVirtualRegister(RC); 6584 unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC); 6585 unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC); 6586 unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC); 6587 unsigned OldVal3Reg = RegInfo.createVirtualRegister(RC); 6588 unsigned MaskReg = RegInfo.createVirtualRegister(RC); 6589 unsigned Mask2Reg = RegInfo.createVirtualRegister(RC); 6590 unsigned Mask3Reg = RegInfo.createVirtualRegister(RC); 6591 unsigned Tmp2Reg = RegInfo.createVirtualRegister(RC); 6592 unsigned Tmp4Reg = RegInfo.createVirtualRegister(RC); 6593 unsigned TmpDestReg = RegInfo.createVirtualRegister(RC); 6594 unsigned Ptr1Reg; 6595 unsigned TmpReg = RegInfo.createVirtualRegister(RC); 6596 unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; 6597 // thisMBB: 6598 // ... 6599 // fallthrough --> loopMBB 6600 BB->addSuccessor(loop1MBB); 6601 6602 // The 4-byte load must be aligned, while a char or short may be 6603 // anywhere in the word. Hence all this nasty bookkeeping code. 6604 // add ptr1, ptrA, ptrB [copy if ptrA==0] 6605 // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27] 6606 // xori shift, shift1, 24 [16] 6607 // rlwinm ptr, ptr1, 0, 0, 29 6608 // slw newval2, newval, shift 6609 // slw oldval2, oldval,shift 6610 // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535] 6611 // slw mask, mask2, shift 6612 // and newval3, newval2, mask 6613 // and oldval3, oldval2, mask 6614 // loop1MBB: 6615 // lwarx tmpDest, ptr 6616 // and tmp, tmpDest, mask 6617 // cmpw tmp, oldval3 6618 // bne- midMBB 6619 // loop2MBB: 6620 // andc tmp2, tmpDest, mask 6621 // or tmp4, tmp2, newval3 6622 // stwcx. tmp4, ptr 6623 // bne- loop1MBB 6624 // b exitBB 6625 // midMBB: 6626 // stwcx. tmpDest, ptr 6627 // exitBB: 6628 // srw dest, tmpDest, shift 6629 if (ptrA != ZeroReg) { 6630 Ptr1Reg = RegInfo.createVirtualRegister(RC); 6631 BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg) 6632 .addReg(ptrA).addReg(ptrB); 6633 } else { 6634 Ptr1Reg = ptrB; 6635 } 6636 BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg) 6637 .addImm(3).addImm(27).addImm(is8bit ? 28 : 27); 6638 BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg) 6639 .addReg(Shift1Reg).addImm(is8bit ? 24 : 16); 6640 if (is64bit) 6641 BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg) 6642 .addReg(Ptr1Reg).addImm(0).addImm(61); 6643 else 6644 BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg) 6645 .addReg(Ptr1Reg).addImm(0).addImm(0).addImm(29); 6646 BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg) 6647 .addReg(newval).addReg(ShiftReg); 6648 BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg) 6649 .addReg(oldval).addReg(ShiftReg); 6650 if (is8bit) 6651 BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255); 6652 else { 6653 BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0); 6654 BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg) 6655 .addReg(Mask3Reg).addImm(65535); 6656 } 6657 BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg) 6658 .addReg(Mask2Reg).addReg(ShiftReg); 6659 BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg) 6660 .addReg(NewVal2Reg).addReg(MaskReg); 6661 BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg) 6662 .addReg(OldVal2Reg).addReg(MaskReg); 6663 6664 BB = loop1MBB; 6665 BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg) 6666 .addReg(ZeroReg).addReg(PtrReg); 6667 BuildMI(BB, dl, TII->get(PPC::AND),TmpReg) 6668 .addReg(TmpDestReg).addReg(MaskReg); 6669 BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0) 6670 .addReg(TmpReg).addReg(OldVal3Reg); 6671 BuildMI(BB, dl, TII->get(PPC::BCC)) 6672 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(midMBB); 6673 BB->addSuccessor(loop2MBB); 6674 BB->addSuccessor(midMBB); 6675 6676 BB = loop2MBB; 6677 BuildMI(BB, dl, TII->get(PPC::ANDC),Tmp2Reg) 6678 .addReg(TmpDestReg).addReg(MaskReg); 6679 BuildMI(BB, dl, TII->get(PPC::OR),Tmp4Reg) 6680 .addReg(Tmp2Reg).addReg(NewVal3Reg); 6681 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(Tmp4Reg) 6682 .addReg(ZeroReg).addReg(PtrReg); 6683 BuildMI(BB, dl, TII->get(PPC::BCC)) 6684 .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loop1MBB); 6685 BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB); 6686 BB->addSuccessor(loop1MBB); 6687 BB->addSuccessor(exitMBB); 6688 6689 BB = midMBB; 6690 BuildMI(BB, dl, TII->get(PPC::STWCX)).addReg(TmpDestReg) 6691 .addReg(ZeroReg).addReg(PtrReg); 6692 BB->addSuccessor(exitMBB); 6693 6694 // exitMBB: 6695 // ... 6696 BB = exitMBB; 6697 BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW),dest).addReg(TmpReg) 6698 .addReg(ShiftReg); 6699 } else if (MI->getOpcode() == PPC::FADDrtz) { 6700 // This pseudo performs an FADD with rounding mode temporarily forced 6701 // to round-to-zero. We emit this via custom inserter since the FPSCR 6702 // is not modeled at the SelectionDAG level. 6703 unsigned Dest = MI->getOperand(0).getReg(); 6704 unsigned Src1 = MI->getOperand(1).getReg(); 6705 unsigned Src2 = MI->getOperand(2).getReg(); 6706 DebugLoc dl = MI->getDebugLoc(); 6707 6708 MachineRegisterInfo &RegInfo = F->getRegInfo(); 6709 unsigned MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 6710 6711 // Save FPSCR value. 6712 BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg); 6713 6714 // Set rounding mode to round-to-zero. 6715 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1)).addImm(31); 6716 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0)).addImm(30); 6717 6718 // Perform addition. 6719 BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2); 6720 6721 // Restore FPSCR value. 6722 BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF)).addImm(1).addReg(MFFSReg); 6723 } else { 6724 llvm_unreachable("Unexpected instr type to insert"); 6725 } 6726 6727 MI->eraseFromParent(); // The pseudo instruction is gone now. 6728 return BB; 6729} 6730 6731//===----------------------------------------------------------------------===// 6732// Target Optimization Hooks 6733//===----------------------------------------------------------------------===// 6734 6735SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, 6736 DAGCombinerInfo &DCI) const { 6737 if (DCI.isAfterLegalizeVectorOps()) 6738 return SDValue(); 6739 6740 EVT VT = Op.getValueType(); 6741 6742 if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) || 6743 (VT == MVT::f64 && PPCSubTarget.hasFRE()) || 6744 (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { 6745 6746 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 6747 // For the reciprocal, we need to find the zero of the function: 6748 // F(X) = A X - 1 [which has a zero at X = 1/A] 6749 // => 6750 // X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form 6751 // does not require additional intermediate precision] 6752 6753 // Convergence is quadratic, so we essentially double the number of digits 6754 // correct after every iteration. The minimum architected relative 6755 // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has 6756 // 23 digits and double has 52 digits. 6757 int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; 6758 if (VT.getScalarType() == MVT::f64) 6759 ++Iterations; 6760 6761 SelectionDAG &DAG = DCI.DAG; 6762 SDLoc dl(Op); 6763 6764 SDValue FPOne = 6765 DAG.getConstantFP(1.0, VT.getScalarType()); 6766 if (VT.isVector()) { 6767 assert(VT.getVectorNumElements() == 4 && 6768 "Unknown vector type"); 6769 FPOne = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 6770 FPOne, FPOne, FPOne, FPOne); 6771 } 6772 6773 SDValue Est = DAG.getNode(PPCISD::FRE, dl, VT, Op); 6774 DCI.AddToWorklist(Est.getNode()); 6775 6776 // Newton iterations: Est = Est + Est (1 - Arg * Est) 6777 for (int i = 0; i < Iterations; ++i) { 6778 SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Op, Est); 6779 DCI.AddToWorklist(NewEst.getNode()); 6780 6781 NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPOne, NewEst); 6782 DCI.AddToWorklist(NewEst.getNode()); 6783 6784 NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); 6785 DCI.AddToWorklist(NewEst.getNode()); 6786 6787 Est = DAG.getNode(ISD::FADD, dl, VT, Est, NewEst); 6788 DCI.AddToWorklist(Est.getNode()); 6789 } 6790 6791 return Est; 6792 } 6793 6794 return SDValue(); 6795} 6796 6797SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, 6798 DAGCombinerInfo &DCI) const { 6799 if (DCI.isAfterLegalizeVectorOps()) 6800 return SDValue(); 6801 6802 EVT VT = Op.getValueType(); 6803 6804 if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) || 6805 (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) || 6806 (VT == MVT::v4f32 && PPCSubTarget.hasAltivec())) { 6807 6808 // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) 6809 // For the reciprocal sqrt, we need to find the zero of the function: 6810 // F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)] 6811 // => 6812 // X_{i+1} = X_i (1.5 - A X_i^2 / 2) 6813 // As a result, we precompute A/2 prior to the iteration loop. 6814 6815 // Convergence is quadratic, so we essentially double the number of digits 6816 // correct after every iteration. The minimum architected relative 6817 // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has 6818 // 23 digits and double has 52 digits. 6819 int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; 6820 if (VT.getScalarType() == MVT::f64) 6821 ++Iterations; 6822 6823 SelectionDAG &DAG = DCI.DAG; 6824 SDLoc dl(Op); 6825 6826 SDValue FPThreeHalves = 6827 DAG.getConstantFP(1.5, VT.getScalarType()); 6828 if (VT.isVector()) { 6829 assert(VT.getVectorNumElements() == 4 && 6830 "Unknown vector type"); 6831 FPThreeHalves = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, 6832 FPThreeHalves, FPThreeHalves, 6833 FPThreeHalves, FPThreeHalves); 6834 } 6835 6836 SDValue Est = DAG.getNode(PPCISD::FRSQRTE, dl, VT, Op); 6837 DCI.AddToWorklist(Est.getNode()); 6838 6839 // We now need 0.5*Arg which we can write as (1.5*Arg - Arg) so that 6840 // this entire sequence requires only one FP constant. 6841 SDValue HalfArg = DAG.getNode(ISD::FMUL, dl, VT, FPThreeHalves, Op); 6842 DCI.AddToWorklist(HalfArg.getNode()); 6843 6844 HalfArg = DAG.getNode(ISD::FSUB, dl, VT, HalfArg, Op); 6845 DCI.AddToWorklist(HalfArg.getNode()); 6846 6847 // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est) 6848 for (int i = 0; i < Iterations; ++i) { 6849 SDValue NewEst = DAG.getNode(ISD::FMUL, dl, VT, Est, Est); 6850 DCI.AddToWorklist(NewEst.getNode()); 6851 6852 NewEst = DAG.getNode(ISD::FMUL, dl, VT, HalfArg, NewEst); 6853 DCI.AddToWorklist(NewEst.getNode()); 6854 6855 NewEst = DAG.getNode(ISD::FSUB, dl, VT, FPThreeHalves, NewEst); 6856 DCI.AddToWorklist(NewEst.getNode()); 6857 6858 Est = DAG.getNode(ISD::FMUL, dl, VT, Est, NewEst); 6859 DCI.AddToWorklist(Est.getNode()); 6860 } 6861 6862 return Est; 6863 } 6864 6865 return SDValue(); 6866} 6867 6868// Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does 6869// not enforce equality of the chain operands. 6870static bool isConsecutiveLS(LSBaseSDNode *LS, LSBaseSDNode *Base, 6871 unsigned Bytes, int Dist, 6872 SelectionDAG &DAG) { 6873 EVT VT = LS->getMemoryVT(); 6874 if (VT.getSizeInBits() / 8 != Bytes) 6875 return false; 6876 6877 SDValue Loc = LS->getBasePtr(); 6878 SDValue BaseLoc = Base->getBasePtr(); 6879 if (Loc.getOpcode() == ISD::FrameIndex) { 6880 if (BaseLoc.getOpcode() != ISD::FrameIndex) 6881 return false; 6882 const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 6883 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 6884 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 6885 int FS = MFI->getObjectSize(FI); 6886 int BFS = MFI->getObjectSize(BFI); 6887 if (FS != BFS || FS != (int)Bytes) return false; 6888 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 6889 } 6890 6891 // Handle X+C 6892 if (DAG.isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc && 6893 cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes) 6894 return true; 6895 6896 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6897 const GlobalValue *GV1 = NULL; 6898 const GlobalValue *GV2 = NULL; 6899 int64_t Offset1 = 0; 6900 int64_t Offset2 = 0; 6901 bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); 6902 bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); 6903 if (isGA1 && isGA2 && GV1 == GV2) 6904 return Offset1 == (Offset2 + Dist*Bytes); 6905 return false; 6906} 6907 6908// Return true is there is a nearyby consecutive load to the one provided 6909// (regardless of alignment). We search up and down the chain, looking though 6910// token factors and other loads (but nothing else). As a result, a true 6911// results indicates that it is safe to create a new consecutive load adjacent 6912// to the load provided. 6913static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { 6914 SDValue Chain = LD->getChain(); 6915 EVT VT = LD->getMemoryVT(); 6916 6917 SmallSet<SDNode *, 16> LoadRoots; 6918 SmallVector<SDNode *, 8> Queue(1, Chain.getNode()); 6919 SmallSet<SDNode *, 16> Visited; 6920 6921 // First, search up the chain, branching to follow all token-factor operands. 6922 // If we find a consecutive load, then we're done, otherwise, record all 6923 // nodes just above the top-level loads and token factors. 6924 while (!Queue.empty()) { 6925 SDNode *ChainNext = Queue.pop_back_val(); 6926 if (!Visited.insert(ChainNext)) 6927 continue; 6928 6929 if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(ChainNext)) { 6930 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 6931 return true; 6932 6933 if (!Visited.count(ChainLD->getChain().getNode())) 6934 Queue.push_back(ChainLD->getChain().getNode()); 6935 } else if (ChainNext->getOpcode() == ISD::TokenFactor) { 6936 for (SDNode::op_iterator O = ChainNext->op_begin(), 6937 OE = ChainNext->op_end(); O != OE; ++O) 6938 if (!Visited.count(O->getNode())) 6939 Queue.push_back(O->getNode()); 6940 } else 6941 LoadRoots.insert(ChainNext); 6942 } 6943 6944 // Second, search down the chain, starting from the top-level nodes recorded 6945 // in the first phase. These top-level nodes are the nodes just above all 6946 // loads and token factors. Starting with their uses, recursively look though 6947 // all loads (just the chain uses) and token factors to find a consecutive 6948 // load. 6949 Visited.clear(); 6950 Queue.clear(); 6951 6952 for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(), 6953 IE = LoadRoots.end(); I != IE; ++I) { 6954 Queue.push_back(*I); 6955 6956 while (!Queue.empty()) { 6957 SDNode *LoadRoot = Queue.pop_back_val(); 6958 if (!Visited.insert(LoadRoot)) 6959 continue; 6960 6961 if (LoadSDNode *ChainLD = dyn_cast<LoadSDNode>(LoadRoot)) 6962 if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG)) 6963 return true; 6964 6965 for (SDNode::use_iterator UI = LoadRoot->use_begin(), 6966 UE = LoadRoot->use_end(); UI != UE; ++UI) 6967 if (((isa<LoadSDNode>(*UI) && 6968 cast<LoadSDNode>(*UI)->getChain().getNode() == LoadRoot) || 6969 UI->getOpcode() == ISD::TokenFactor) && !Visited.count(*UI)) 6970 Queue.push_back(*UI); 6971 } 6972 } 6973 6974 return false; 6975} 6976 6977SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, 6978 DAGCombinerInfo &DCI) const { 6979 const TargetMachine &TM = getTargetMachine(); 6980 SelectionDAG &DAG = DCI.DAG; 6981 SDLoc dl(N); 6982 switch (N->getOpcode()) { 6983 default: break; 6984 case PPCISD::SHL: 6985 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6986 if (C->isNullValue()) // 0 << V -> 0. 6987 return N->getOperand(0); 6988 } 6989 break; 6990 case PPCISD::SRL: 6991 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6992 if (C->isNullValue()) // 0 >>u V -> 0. 6993 return N->getOperand(0); 6994 } 6995 break; 6996 case PPCISD::SRA: 6997 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 6998 if (C->isNullValue() || // 0 >>s V -> 0. 6999 C->isAllOnesValue()) // -1 >>s V -> -1. 7000 return N->getOperand(0); 7001 } 7002 break; 7003 case ISD::FDIV: { 7004 assert(TM.Options.UnsafeFPMath && 7005 "Reciprocal estimates require UnsafeFPMath"); 7006 7007 if (N->getOperand(1).getOpcode() == ISD::FSQRT) { 7008 SDValue RV = 7009 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0), DCI); 7010 if (RV.getNode() != 0) { 7011 DCI.AddToWorklist(RV.getNode()); 7012 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 7013 N->getOperand(0), RV); 7014 } 7015 } else if (N->getOperand(1).getOpcode() == ISD::FP_EXTEND && 7016 N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { 7017 SDValue RV = 7018 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), 7019 DCI); 7020 if (RV.getNode() != 0) { 7021 DCI.AddToWorklist(RV.getNode()); 7022 RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N->getOperand(1)), 7023 N->getValueType(0), RV); 7024 DCI.AddToWorklist(RV.getNode()); 7025 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 7026 N->getOperand(0), RV); 7027 } 7028 } else if (N->getOperand(1).getOpcode() == ISD::FP_ROUND && 7029 N->getOperand(1).getOperand(0).getOpcode() == ISD::FSQRT) { 7030 SDValue RV = 7031 DAGCombineFastRecipFSQRT(N->getOperand(1).getOperand(0).getOperand(0), 7032 DCI); 7033 if (RV.getNode() != 0) { 7034 DCI.AddToWorklist(RV.getNode()); 7035 RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N->getOperand(1)), 7036 N->getValueType(0), RV, 7037 N->getOperand(1).getOperand(1)); 7038 DCI.AddToWorklist(RV.getNode()); 7039 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 7040 N->getOperand(0), RV); 7041 } 7042 } 7043 7044 SDValue RV = DAGCombineFastRecip(N->getOperand(1), DCI); 7045 if (RV.getNode() != 0) { 7046 DCI.AddToWorklist(RV.getNode()); 7047 return DAG.getNode(ISD::FMUL, dl, N->getValueType(0), 7048 N->getOperand(0), RV); 7049 } 7050 7051 } 7052 break; 7053 case ISD::FSQRT: { 7054 assert(TM.Options.UnsafeFPMath && 7055 "Reciprocal estimates require UnsafeFPMath"); 7056 7057 // Compute this as 1/(1/sqrt(X)), which is the reciprocal of the 7058 // reciprocal sqrt. 7059 SDValue RV = DAGCombineFastRecipFSQRT(N->getOperand(0), DCI); 7060 if (RV.getNode() != 0) { 7061 DCI.AddToWorklist(RV.getNode()); 7062 RV = DAGCombineFastRecip(RV, DCI); 7063 if (RV.getNode() != 0) { 7064 // Unfortunately, RV is now NaN if the input was exactly 0. Select out 7065 // this case and force the answer to 0. 7066 7067 EVT VT = RV.getValueType(); 7068 7069 SDValue Zero = DAG.getConstantFP(0.0, VT.getScalarType()); 7070 if (VT.isVector()) { 7071 assert(VT.getVectorNumElements() == 4 && "Unknown vector type"); 7072 Zero = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Zero, Zero, Zero, Zero); 7073 } 7074 7075 SDValue ZeroCmp = 7076 DAG.getSetCC(dl, getSetCCResultType(*DAG.getContext(), VT), 7077 N->getOperand(0), Zero, ISD::SETEQ); 7078 DCI.AddToWorklist(ZeroCmp.getNode()); 7079 DCI.AddToWorklist(RV.getNode()); 7080 7081 RV = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, dl, VT, 7082 ZeroCmp, Zero, RV); 7083 return RV; 7084 } 7085 } 7086 7087 } 7088 break; 7089 case ISD::SINT_TO_FP: 7090 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 7091 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 7092 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 7093 // We allow the src/dst to be either f32/f64, but the intermediate 7094 // type must be i64. 7095 if (N->getOperand(0).getValueType() == MVT::i64 && 7096 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 7097 SDValue Val = N->getOperand(0).getOperand(0); 7098 if (Val.getValueType() == MVT::f32) { 7099 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 7100 DCI.AddToWorklist(Val.getNode()); 7101 } 7102 7103 Val = DAG.getNode(PPCISD::FCTIDZ, dl, MVT::f64, Val); 7104 DCI.AddToWorklist(Val.getNode()); 7105 Val = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Val); 7106 DCI.AddToWorklist(Val.getNode()); 7107 if (N->getValueType(0) == MVT::f32) { 7108 Val = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Val, 7109 DAG.getIntPtrConstant(0)); 7110 DCI.AddToWorklist(Val.getNode()); 7111 } 7112 return Val; 7113 } else if (N->getOperand(0).getValueType() == MVT::i32) { 7114 // If the intermediate type is i32, we can avoid the load/store here 7115 // too. 7116 } 7117 } 7118 } 7119 break; 7120 case ISD::STORE: 7121 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 7122 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 7123 !cast<StoreSDNode>(N)->isTruncatingStore() && 7124 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 7125 N->getOperand(1).getValueType() == MVT::i32 && 7126 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 7127 SDValue Val = N->getOperand(1).getOperand(0); 7128 if (Val.getValueType() == MVT::f32) { 7129 Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val); 7130 DCI.AddToWorklist(Val.getNode()); 7131 } 7132 Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val); 7133 DCI.AddToWorklist(Val.getNode()); 7134 7135 SDValue Ops[] = { 7136 N->getOperand(0), Val, N->getOperand(2), 7137 DAG.getValueType(N->getOperand(1).getValueType()) 7138 }; 7139 7140 Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl, 7141 DAG.getVTList(MVT::Other), Ops, array_lengthof(Ops), 7142 cast<StoreSDNode>(N)->getMemoryVT(), 7143 cast<StoreSDNode>(N)->getMemOperand()); 7144 DCI.AddToWorklist(Val.getNode()); 7145 return Val; 7146 } 7147 7148 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 7149 if (cast<StoreSDNode>(N)->isUnindexed() && 7150 N->getOperand(1).getOpcode() == ISD::BSWAP && 7151 N->getOperand(1).getNode()->hasOneUse() && 7152 (N->getOperand(1).getValueType() == MVT::i32 || 7153 N->getOperand(1).getValueType() == MVT::i16 || 7154 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 7155 TM.getSubtarget<PPCSubtarget>().isPPC64() && 7156 N->getOperand(1).getValueType() == MVT::i64))) { 7157 SDValue BSwapOp = N->getOperand(1).getOperand(0); 7158 // Do an any-extend to 32-bits if this is a half-word input. 7159 if (BSwapOp.getValueType() == MVT::i16) 7160 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp); 7161 7162 SDValue Ops[] = { 7163 N->getOperand(0), BSwapOp, N->getOperand(2), 7164 DAG.getValueType(N->getOperand(1).getValueType()) 7165 }; 7166 return 7167 DAG.getMemIntrinsicNode(PPCISD::STBRX, dl, DAG.getVTList(MVT::Other), 7168 Ops, array_lengthof(Ops), 7169 cast<StoreSDNode>(N)->getMemoryVT(), 7170 cast<StoreSDNode>(N)->getMemOperand()); 7171 } 7172 break; 7173 case ISD::LOAD: { 7174 LoadSDNode *LD = cast<LoadSDNode>(N); 7175 EVT VT = LD->getValueType(0); 7176 Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); 7177 unsigned ABIAlignment = getDataLayout()->getABITypeAlignment(Ty); 7178 if (ISD::isNON_EXTLoad(N) && VT.isVector() && 7179 TM.getSubtarget<PPCSubtarget>().hasAltivec() && 7180 DCI.getDAGCombineLevel() == AfterLegalizeTypes && 7181 LD->getAlignment() < ABIAlignment) { 7182 // This is a type-legal unaligned Altivec load. 7183 SDValue Chain = LD->getChain(); 7184 SDValue Ptr = LD->getBasePtr(); 7185 7186 // This implements the loading of unaligned vectors as described in 7187 // the venerable Apple Velocity Engine overview. Specifically: 7188 // https://developer.apple.com/hardwaredrivers/ve/alignment.html 7189 // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html 7190 // 7191 // The general idea is to expand a sequence of one or more unaligned 7192 // loads into a alignment-based permutation-control instruction (lvsl), 7193 // a series of regular vector loads (which always truncate their 7194 // input address to an aligned address), and a series of permutations. 7195 // The results of these permutations are the requested loaded values. 7196 // The trick is that the last "extra" load is not taken from the address 7197 // you might suspect (sizeof(vector) bytes after the last requested 7198 // load), but rather sizeof(vector) - 1 bytes after the last 7199 // requested vector. The point of this is to avoid a page fault if the 7200 // base address happend to be aligned. This works because if the base 7201 // address is aligned, then adding less than a full vector length will 7202 // cause the last vector in the sequence to be (re)loaded. Otherwise, 7203 // the next vector will be fetched as you might suspect was necessary. 7204 7205 // We might be able to reuse the permutation generation from 7206 // a different base address offset from this one by an aligned amount. 7207 // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this 7208 // optimization later. 7209 SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr, 7210 DAG, dl, MVT::v16i8); 7211 7212 // Refine the alignment of the original load (a "new" load created here 7213 // which was identical to the first except for the alignment would be 7214 // merged with the existing node regardless). 7215 MachineFunction &MF = DAG.getMachineFunction(); 7216 MachineMemOperand *MMO = 7217 MF.getMachineMemOperand(LD->getPointerInfo(), 7218 LD->getMemOperand()->getFlags(), 7219 LD->getMemoryVT().getStoreSize(), 7220 ABIAlignment); 7221 LD->refineAlignment(MMO); 7222 SDValue BaseLoad = SDValue(LD, 0); 7223 7224 // Note that the value of IncOffset (which is provided to the next 7225 // load's pointer info offset value, and thus used to calculate the 7226 // alignment), and the value of IncValue (which is actually used to 7227 // increment the pointer value) are different! This is because we 7228 // require the next load to appear to be aligned, even though it 7229 // is actually offset from the base pointer by a lesser amount. 7230 int IncOffset = VT.getSizeInBits() / 8; 7231 int IncValue = IncOffset; 7232 7233 // Walk (both up and down) the chain looking for another load at the real 7234 // (aligned) offset (the alignment of the other load does not matter in 7235 // this case). If found, then do not use the offset reduction trick, as 7236 // that will prevent the loads from being later combined (as they would 7237 // otherwise be duplicates). 7238 if (!findConsecutiveLoad(LD, DAG)) 7239 --IncValue; 7240 7241 SDValue Increment = DAG.getConstant(IncValue, getPointerTy()); 7242 Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment); 7243 7244 SDValue ExtraLoad = 7245 DAG.getLoad(VT, dl, Chain, Ptr, 7246 LD->getPointerInfo().getWithOffset(IncOffset), 7247 LD->isVolatile(), LD->isNonTemporal(), 7248 LD->isInvariant(), ABIAlignment); 7249 7250 SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, 7251 BaseLoad.getValue(1), ExtraLoad.getValue(1)); 7252 7253 if (BaseLoad.getValueType() != MVT::v4i32) 7254 BaseLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, BaseLoad); 7255 7256 if (ExtraLoad.getValueType() != MVT::v4i32) 7257 ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad); 7258 7259 SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, 7260 BaseLoad, ExtraLoad, PermCntl, DAG, dl); 7261 7262 if (VT != MVT::v4i32) 7263 Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); 7264 7265 // Now we need to be really careful about how we update the users of the 7266 // original load. We cannot just call DCI.CombineTo (or 7267 // DAG.ReplaceAllUsesWith for that matter), because the load still has 7268 // uses created here (the permutation for example) that need to stay. 7269 SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 7270 while (UI != UE) { 7271 SDUse &Use = UI.getUse(); 7272 SDNode *User = *UI; 7273 // Note: BaseLoad is checked here because it might not be N, but a 7274 // bitcast of N. 7275 if (User == Perm.getNode() || User == BaseLoad.getNode() || 7276 User == TF.getNode() || Use.getResNo() > 1) { 7277 ++UI; 7278 continue; 7279 } 7280 7281 SDValue To = Use.getResNo() ? TF : Perm; 7282 ++UI; 7283 7284 SmallVector<SDValue, 8> Ops; 7285 for (SDNode::op_iterator O = User->op_begin(), 7286 OE = User->op_end(); O != OE; ++O) { 7287 if (*O == Use) 7288 Ops.push_back(To); 7289 else 7290 Ops.push_back(*O); 7291 } 7292 7293 DAG.UpdateNodeOperands(User, Ops.data(), Ops.size()); 7294 } 7295 7296 return SDValue(N, 0); 7297 } 7298 } 7299 break; 7300 case ISD::INTRINSIC_WO_CHAIN: 7301 if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == 7302 Intrinsic::ppc_altivec_lvsl && 7303 N->getOperand(1)->getOpcode() == ISD::ADD) { 7304 SDValue Add = N->getOperand(1); 7305 7306 if (DAG.MaskedValueIsZero(Add->getOperand(1), 7307 APInt::getAllOnesValue(4 /* 16 byte alignment */).zext( 7308 Add.getValueType().getScalarType().getSizeInBits()))) { 7309 SDNode *BasePtr = Add->getOperand(0).getNode(); 7310 for (SDNode::use_iterator UI = BasePtr->use_begin(), 7311 UE = BasePtr->use_end(); UI != UE; ++UI) { 7312 if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && 7313 cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == 7314 Intrinsic::ppc_altivec_lvsl) { 7315 // We've found another LVSL, and this address if an aligned 7316 // multiple of that one. The results will be the same, so use the 7317 // one we've just found instead. 7318 7319 return SDValue(*UI, 0); 7320 } 7321 } 7322 } 7323 } 7324 7325 break; 7326 case ISD::BSWAP: 7327 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 7328 if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) && 7329 N->getOperand(0).hasOneUse() && 7330 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 || 7331 (TM.getSubtarget<PPCSubtarget>().hasLDBRX() && 7332 TM.getSubtarget<PPCSubtarget>().isPPC64() && 7333 N->getValueType(0) == MVT::i64))) { 7334 SDValue Load = N->getOperand(0); 7335 LoadSDNode *LD = cast<LoadSDNode>(Load); 7336 // Create the byte-swapping load. 7337 SDValue Ops[] = { 7338 LD->getChain(), // Chain 7339 LD->getBasePtr(), // Ptr 7340 DAG.getValueType(N->getValueType(0)) // VT 7341 }; 7342 SDValue BSLoad = 7343 DAG.getMemIntrinsicNode(PPCISD::LBRX, dl, 7344 DAG.getVTList(N->getValueType(0) == MVT::i64 ? 7345 MVT::i64 : MVT::i32, MVT::Other), 7346 Ops, 3, LD->getMemoryVT(), LD->getMemOperand()); 7347 7348 // If this is an i16 load, insert the truncate. 7349 SDValue ResVal = BSLoad; 7350 if (N->getValueType(0) == MVT::i16) 7351 ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad); 7352 7353 // First, combine the bswap away. This makes the value produced by the 7354 // load dead. 7355 DCI.CombineTo(N, ResVal); 7356 7357 // Next, combine the load away, we give it a bogus result value but a real 7358 // chain result. The result value is dead because the bswap is dead. 7359 DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1)); 7360 7361 // Return N so it doesn't get rechecked! 7362 return SDValue(N, 0); 7363 } 7364 7365 break; 7366 case PPCISD::VCMP: { 7367 // If a VCMPo node already exists with exactly the same operands as this 7368 // node, use its result instead of this node (VCMPo computes both a CR6 and 7369 // a normal output). 7370 // 7371 if (!N->getOperand(0).hasOneUse() && 7372 !N->getOperand(1).hasOneUse() && 7373 !N->getOperand(2).hasOneUse()) { 7374 7375 // Scan all of the users of the LHS, looking for VCMPo's that match. 7376 SDNode *VCMPoNode = 0; 7377 7378 SDNode *LHSN = N->getOperand(0).getNode(); 7379 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 7380 UI != E; ++UI) 7381 if (UI->getOpcode() == PPCISD::VCMPo && 7382 UI->getOperand(1) == N->getOperand(1) && 7383 UI->getOperand(2) == N->getOperand(2) && 7384 UI->getOperand(0) == N->getOperand(0)) { 7385 VCMPoNode = *UI; 7386 break; 7387 } 7388 7389 // If there is no VCMPo node, or if the flag value has a single use, don't 7390 // transform this. 7391 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 7392 break; 7393 7394 // Look at the (necessarily single) use of the flag value. If it has a 7395 // chain, this transformation is more complex. Note that multiple things 7396 // could use the value result, which we should ignore. 7397 SDNode *FlagUser = 0; 7398 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 7399 FlagUser == 0; ++UI) { 7400 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 7401 SDNode *User = *UI; 7402 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 7403 if (User->getOperand(i) == SDValue(VCMPoNode, 1)) { 7404 FlagUser = User; 7405 break; 7406 } 7407 } 7408 } 7409 7410 // If the user is a MFOCRF instruction, we know this is safe. 7411 // Otherwise we give up for right now. 7412 if (FlagUser->getOpcode() == PPCISD::MFOCRF) 7413 return SDValue(VCMPoNode, 0); 7414 } 7415 break; 7416 } 7417 case ISD::BR_CC: { 7418 // If this is a branch on an altivec predicate comparison, lower this so 7419 // that we don't have to do a MFOCRF: instead, branch directly on CR6. This 7420 // lowering is done pre-legalize, because the legalizer lowers the predicate 7421 // compare down to code that is difficult to reassemble. 7422 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 7423 SDValue LHS = N->getOperand(2), RHS = N->getOperand(3); 7424 7425 // Sometimes the promoted value of the intrinsic is ANDed by some non-zero 7426 // value. If so, pass-through the AND to get to the intrinsic. 7427 if (LHS.getOpcode() == ISD::AND && 7428 LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN && 7429 cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() == 7430 Intrinsic::ppc_is_decremented_ctr_nonzero && 7431 isa<ConstantSDNode>(LHS.getOperand(1)) && 7432 !cast<ConstantSDNode>(LHS.getOperand(1))->getConstantIntValue()-> 7433 isZero()) 7434 LHS = LHS.getOperand(0); 7435 7436 if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN && 7437 cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() == 7438 Intrinsic::ppc_is_decremented_ctr_nonzero && 7439 isa<ConstantSDNode>(RHS)) { 7440 assert((CC == ISD::SETEQ || CC == ISD::SETNE) && 7441 "Counter decrement comparison is not EQ or NE"); 7442 7443 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 7444 bool isBDNZ = (CC == ISD::SETEQ && Val) || 7445 (CC == ISD::SETNE && !Val); 7446 7447 // We now need to make the intrinsic dead (it cannot be instruction 7448 // selected). 7449 DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0)); 7450 assert(LHS.getNode()->hasOneUse() && 7451 "Counter decrement has more than one use"); 7452 7453 return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other, 7454 N->getOperand(0), N->getOperand(4)); 7455 } 7456 7457 int CompareOpc; 7458 bool isDot; 7459 7460 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 7461 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 7462 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 7463 assert(isDot && "Can't compare against a vector result!"); 7464 7465 // If this is a comparison against something other than 0/1, then we know 7466 // that the condition is never/always true. 7467 unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue(); 7468 if (Val != 0 && Val != 1) { 7469 if (CC == ISD::SETEQ) // Cond never true, remove branch. 7470 return N->getOperand(0); 7471 // Always !=, turn it into an unconditional branch. 7472 return DAG.getNode(ISD::BR, dl, MVT::Other, 7473 N->getOperand(0), N->getOperand(4)); 7474 } 7475 7476 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 7477 7478 // Create the PPCISD altivec 'dot' comparison node. 7479 SDValue Ops[] = { 7480 LHS.getOperand(2), // LHS of compare 7481 LHS.getOperand(3), // RHS of compare 7482 DAG.getConstant(CompareOpc, MVT::i32) 7483 }; 7484 EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue }; 7485 SDValue CompNode = DAG.getNode(PPCISD::VCMPo, dl, VTs, Ops, 3); 7486 7487 // Unpack the result based on how the target uses it. 7488 PPC::Predicate CompOpc; 7489 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) { 7490 default: // Can't happen, don't crash on invalid number though. 7491 case 0: // Branch on the value of the EQ bit of CR6. 7492 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 7493 break; 7494 case 1: // Branch on the inverted value of the EQ bit of CR6. 7495 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 7496 break; 7497 case 2: // Branch on the value of the LT bit of CR6. 7498 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 7499 break; 7500 case 3: // Branch on the inverted value of the LT bit of CR6. 7501 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 7502 break; 7503 } 7504 7505 return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0), 7506 DAG.getConstant(CompOpc, MVT::i32), 7507 DAG.getRegister(PPC::CR6, MVT::i32), 7508 N->getOperand(4), CompNode.getValue(1)); 7509 } 7510 break; 7511 } 7512 } 7513 7514 return SDValue(); 7515} 7516 7517//===----------------------------------------------------------------------===// 7518// Inline Assembly Support 7519//===----------------------------------------------------------------------===// 7520 7521void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, 7522 APInt &KnownZero, 7523 APInt &KnownOne, 7524 const SelectionDAG &DAG, 7525 unsigned Depth) const { 7526 KnownZero = KnownOne = APInt(KnownZero.getBitWidth(), 0); 7527 switch (Op.getOpcode()) { 7528 default: break; 7529 case PPCISD::LBRX: { 7530 // lhbrx is known to have the top bits cleared out. 7531 if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16) 7532 KnownZero = 0xFFFF0000; 7533 break; 7534 } 7535 case ISD::INTRINSIC_WO_CHAIN: { 7536 switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) { 7537 default: break; 7538 case Intrinsic::ppc_altivec_vcmpbfp_p: 7539 case Intrinsic::ppc_altivec_vcmpeqfp_p: 7540 case Intrinsic::ppc_altivec_vcmpequb_p: 7541 case Intrinsic::ppc_altivec_vcmpequh_p: 7542 case Intrinsic::ppc_altivec_vcmpequw_p: 7543 case Intrinsic::ppc_altivec_vcmpgefp_p: 7544 case Intrinsic::ppc_altivec_vcmpgtfp_p: 7545 case Intrinsic::ppc_altivec_vcmpgtsb_p: 7546 case Intrinsic::ppc_altivec_vcmpgtsh_p: 7547 case Intrinsic::ppc_altivec_vcmpgtsw_p: 7548 case Intrinsic::ppc_altivec_vcmpgtub_p: 7549 case Intrinsic::ppc_altivec_vcmpgtuh_p: 7550 case Intrinsic::ppc_altivec_vcmpgtuw_p: 7551 KnownZero = ~1U; // All bits but the low one are known to be zero. 7552 break; 7553 } 7554 } 7555 } 7556} 7557 7558 7559/// getConstraintType - Given a constraint, return the type of 7560/// constraint it is for this target. 7561PPCTargetLowering::ConstraintType 7562PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 7563 if (Constraint.size() == 1) { 7564 switch (Constraint[0]) { 7565 default: break; 7566 case 'b': 7567 case 'r': 7568 case 'f': 7569 case 'v': 7570 case 'y': 7571 return C_RegisterClass; 7572 case 'Z': 7573 // FIXME: While Z does indicate a memory constraint, it specifically 7574 // indicates an r+r address (used in conjunction with the 'y' modifier 7575 // in the replacement string). Currently, we're forcing the base 7576 // register to be r0 in the asm printer (which is interpreted as zero) 7577 // and forming the complete address in the second register. This is 7578 // suboptimal. 7579 return C_Memory; 7580 } 7581 } 7582 return TargetLowering::getConstraintType(Constraint); 7583} 7584 7585/// Examine constraint type and operand type and determine a weight value. 7586/// This object must already have been set up with the operand type 7587/// and the current alternative constraint selected. 7588TargetLowering::ConstraintWeight 7589PPCTargetLowering::getSingleConstraintMatchWeight( 7590 AsmOperandInfo &info, const char *constraint) const { 7591 ConstraintWeight weight = CW_Invalid; 7592 Value *CallOperandVal = info.CallOperandVal; 7593 // If we don't have a value, we can't do a match, 7594 // but allow it at the lowest weight. 7595 if (CallOperandVal == NULL) 7596 return CW_Default; 7597 Type *type = CallOperandVal->getType(); 7598 // Look at the constraint type. 7599 switch (*constraint) { 7600 default: 7601 weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); 7602 break; 7603 case 'b': 7604 if (type->isIntegerTy()) 7605 weight = CW_Register; 7606 break; 7607 case 'f': 7608 if (type->isFloatTy()) 7609 weight = CW_Register; 7610 break; 7611 case 'd': 7612 if (type->isDoubleTy()) 7613 weight = CW_Register; 7614 break; 7615 case 'v': 7616 if (type->isVectorTy()) 7617 weight = CW_Register; 7618 break; 7619 case 'y': 7620 weight = CW_Register; 7621 break; 7622 case 'Z': 7623 weight = CW_Memory; 7624 break; 7625 } 7626 return weight; 7627} 7628 7629std::pair<unsigned, const TargetRegisterClass*> 7630PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 7631 MVT VT) const { 7632 if (Constraint.size() == 1) { 7633 // GCC RS6000 Constraint Letters 7634 switch (Constraint[0]) { 7635 case 'b': // R1-R31 7636 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 7637 return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); 7638 return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); 7639 case 'r': // R0-R31 7640 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 7641 return std::make_pair(0U, &PPC::G8RCRegClass); 7642 return std::make_pair(0U, &PPC::GPRCRegClass); 7643 case 'f': 7644 if (VT == MVT::f32 || VT == MVT::i32) 7645 return std::make_pair(0U, &PPC::F4RCRegClass); 7646 if (VT == MVT::f64 || VT == MVT::i64) 7647 return std::make_pair(0U, &PPC::F8RCRegClass); 7648 break; 7649 case 'v': 7650 return std::make_pair(0U, &PPC::VRRCRegClass); 7651 case 'y': // crrc 7652 return std::make_pair(0U, &PPC::CRRCRegClass); 7653 } 7654 } 7655 7656 std::pair<unsigned, const TargetRegisterClass*> R = 7657 TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 7658 7659 // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers 7660 // (which we call X[0-9]+). If a 64-bit value has been requested, and a 7661 // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent 7662 // register. 7663 // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use 7664 // the AsmName field from *RegisterInfo.td, then this would not be necessary. 7665 if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() && 7666 PPC::GPRCRegClass.contains(R.first)) { 7667 const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); 7668 return std::make_pair(TRI->getMatchingSuperReg(R.first, 7669 PPC::sub_32, &PPC::G8RCRegClass), 7670 &PPC::G8RCRegClass); 7671 } 7672 7673 return R; 7674} 7675 7676 7677/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 7678/// vector. If it is invalid, don't add anything to Ops. 7679void PPCTargetLowering::LowerAsmOperandForConstraint(SDValue Op, 7680 std::string &Constraint, 7681 std::vector<SDValue>&Ops, 7682 SelectionDAG &DAG) const { 7683 SDValue Result(0,0); 7684 7685 // Only support length 1 constraints. 7686 if (Constraint.length() > 1) return; 7687 7688 char Letter = Constraint[0]; 7689 switch (Letter) { 7690 default: break; 7691 case 'I': 7692 case 'J': 7693 case 'K': 7694 case 'L': 7695 case 'M': 7696 case 'N': 7697 case 'O': 7698 case 'P': { 7699 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 7700 if (!CST) return; // Must be an immediate to match. 7701 unsigned Value = CST->getZExtValue(); 7702 switch (Letter) { 7703 default: llvm_unreachable("Unknown constraint letter!"); 7704 case 'I': // "I" is a signed 16-bit constant. 7705 if ((short)Value == (int)Value) 7706 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7707 break; 7708 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 7709 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 7710 if ((short)Value == 0) 7711 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7712 break; 7713 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 7714 if ((Value >> 16) == 0) 7715 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7716 break; 7717 case 'M': // "M" is a constant that is greater than 31. 7718 if (Value > 31) 7719 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7720 break; 7721 case 'N': // "N" is a positive constant that is an exact power of two. 7722 if ((int)Value > 0 && isPowerOf2_32(Value)) 7723 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7724 break; 7725 case 'O': // "O" is the constant zero. 7726 if (Value == 0) 7727 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7728 break; 7729 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 7730 if ((short)-Value == (int)-Value) 7731 Result = DAG.getTargetConstant(Value, Op.getValueType()); 7732 break; 7733 } 7734 break; 7735 } 7736 } 7737 7738 if (Result.getNode()) { 7739 Ops.push_back(Result); 7740 return; 7741 } 7742 7743 // Handle standard constraint letters. 7744 TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); 7745} 7746 7747// isLegalAddressingMode - Return true if the addressing mode represented 7748// by AM is legal for this target, for a load/store of the specified type. 7749bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 7750 Type *Ty) const { 7751 // FIXME: PPC does not allow r+i addressing modes for vectors! 7752 7753 // PPC allows a sign-extended 16-bit immediate field. 7754 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 7755 return false; 7756 7757 // No global is ever allowed as a base. 7758 if (AM.BaseGV) 7759 return false; 7760 7761 // PPC only support r+r, 7762 switch (AM.Scale) { 7763 case 0: // "r+i" or just "i", depending on HasBaseReg. 7764 break; 7765 case 1: 7766 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 7767 return false; 7768 // Otherwise we have r+r or r+i. 7769 break; 7770 case 2: 7771 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 7772 return false; 7773 // Allow 2*r as r+r. 7774 break; 7775 default: 7776 // No other scales are supported. 7777 return false; 7778 } 7779 7780 return true; 7781} 7782 7783SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, 7784 SelectionDAG &DAG) const { 7785 MachineFunction &MF = DAG.getMachineFunction(); 7786 MachineFrameInfo *MFI = MF.getFrameInfo(); 7787 MFI->setReturnAddressIsTaken(true); 7788 7789 SDLoc dl(Op); 7790 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7791 7792 // Make sure the function does not optimize away the store of the RA to 7793 // the stack. 7794 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 7795 FuncInfo->setLRStoreRequired(); 7796 bool isPPC64 = PPCSubTarget.isPPC64(); 7797 bool isDarwinABI = PPCSubTarget.isDarwinABI(); 7798 7799 if (Depth > 0) { 7800 SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); 7801 SDValue Offset = 7802 7803 DAG.getConstant(PPCFrameLowering::getReturnSaveOffset(isPPC64, isDarwinABI), 7804 isPPC64? MVT::i64 : MVT::i32); 7805 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 7806 DAG.getNode(ISD::ADD, dl, getPointerTy(), 7807 FrameAddr, Offset), 7808 MachinePointerInfo(), false, false, false, 0); 7809 } 7810 7811 // Just load the return address off the stack. 7812 SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); 7813 return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), 7814 RetAddrFI, MachinePointerInfo(), false, false, false, 0); 7815} 7816 7817SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, 7818 SelectionDAG &DAG) const { 7819 SDLoc dl(Op); 7820 unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); 7821 7822 EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 7823 bool isPPC64 = PtrVT == MVT::i64; 7824 7825 MachineFunction &MF = DAG.getMachineFunction(); 7826 MachineFrameInfo *MFI = MF.getFrameInfo(); 7827 MFI->setFrameAddressIsTaken(true); 7828 7829 // Naked functions never have a frame pointer, and so we use r1. For all 7830 // other functions, this decision must be delayed until during PEI. 7831 unsigned FrameReg; 7832 if (MF.getFunction()->getAttributes().hasAttribute( 7833 AttributeSet::FunctionIndex, Attribute::Naked)) 7834 FrameReg = isPPC64 ? PPC::X1 : PPC::R1; 7835 else 7836 FrameReg = isPPC64 ? PPC::FP8 : PPC::FP; 7837 7838 SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, 7839 PtrVT); 7840 while (Depth--) 7841 FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), 7842 FrameAddr, MachinePointerInfo(), false, false, 7843 false, 0); 7844 return FrameAddr; 7845} 7846 7847bool 7848PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { 7849 // The PowerPC target isn't yet aware of offsets. 7850 return false; 7851} 7852 7853/// getOptimalMemOpType - Returns the target specific optimal type for load 7854/// and store operations as a result of memset, memcpy, and memmove 7855/// lowering. If DstAlign is zero that means it's safe to destination 7856/// alignment can satisfy any constraint. Similarly if SrcAlign is zero it 7857/// means there isn't a need to check it against alignment requirement, 7858/// probably because the source does not need to be loaded. If 'IsMemset' is 7859/// true, that means it's expanding a memset. If 'ZeroMemset' is true, that 7860/// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy 7861/// source is constant so it does not need to be loaded. 7862/// It returns EVT::Other if the type should be determined using generic 7863/// target-independent logic. 7864EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, 7865 unsigned DstAlign, unsigned SrcAlign, 7866 bool IsMemset, bool ZeroMemset, 7867 bool MemcpyStrSrc, 7868 MachineFunction &MF) const { 7869 if (this->PPCSubTarget.isPPC64()) { 7870 return MVT::i64; 7871 } else { 7872 return MVT::i32; 7873 } 7874} 7875 7876bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, 7877 bool *Fast) const { 7878 if (DisablePPCUnaligned) 7879 return false; 7880 7881 // PowerPC supports unaligned memory access for simple non-vector types. 7882 // Although accessing unaligned addresses is not as efficient as accessing 7883 // aligned addresses, it is generally more efficient than manual expansion, 7884 // and generally only traps for software emulation when crossing page 7885 // boundaries. 7886 7887 if (!VT.isSimple()) 7888 return false; 7889 7890 if (VT.getSimpleVT().isVector()) 7891 return false; 7892 7893 if (VT == MVT::ppcf128) 7894 return false; 7895 7896 if (Fast) 7897 *Fast = true; 7898 7899 return true; 7900} 7901 7902bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { 7903 VT = VT.getScalarType(); 7904 7905 if (!VT.isSimple()) 7906 return false; 7907 7908 switch (VT.getSimpleVT().SimpleTy) { 7909 case MVT::f32: 7910 case MVT::f64: 7911 return true; 7912 default: 7913 break; 7914 } 7915 7916 return false; 7917} 7918 7919Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { 7920 if (DisableILPPref || PPCSubTarget.enableMachineScheduler()) 7921 return TargetLowering::getSchedulingPreference(N); 7922 7923 return Sched::ILP; 7924} 7925 7926// Create a fast isel object. 7927FastISel * 7928PPCTargetLowering::createFastISel(FunctionLoweringInfo &FuncInfo, 7929 const TargetLibraryInfo *LibInfo) const { 7930 return PPC::createFastISel(FuncInfo, LibInfo); 7931} 7932