PPCISelLowering.cpp revision 85e7ac0353d6be357e6cab49f40304793ee601a1
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "PPCMachineFunctionInfo.h" 16#include "PPCPredicates.h" 17#include "PPCTargetMachine.h" 18#include "PPCPerfectShuffle.h" 19#include "llvm/ADT/STLExtras.h" 20#include "llvm/ADT/VectorExtras.h" 21#include "llvm/CodeGen/CallingConvLower.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineFunction.h" 24#include "llvm/CodeGen/MachineInstrBuilder.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/PseudoSourceValue.h" 27#include "llvm/CodeGen/SelectionDAG.h" 28#include "llvm/CallingConv.h" 29#include "llvm/Constants.h" 30#include "llvm/Function.h" 31#include "llvm/Intrinsics.h" 32#include "llvm/ParameterAttributes.h" 33#include "llvm/Support/MathExtras.h" 34#include "llvm/Target/TargetOptions.h" 35#include "llvm/Support/CommandLine.h" 36using namespace llvm; 37 38static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc", 39cl::desc("enable preincrement load/store generation on PPC (experimental)"), 40 cl::Hidden); 41 42PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 43 : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()), 44 PPCAtomicLabelIndex(0) { 45 46 setPow2DivIsCheap(); 47 48 // Use _setjmp/_longjmp instead of setjmp/longjmp. 49 setUseUnderscoreSetJmp(true); 50 setUseUnderscoreLongJmp(true); 51 52 // Set up the register classes. 53 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 54 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 55 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 56 57 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 58 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); 59 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Expand); 60 61 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 62 63 // PowerPC has pre-inc load and store's. 64 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 65 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 66 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 67 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 68 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 69 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 70 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 71 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 72 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 73 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 74 75 // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg) 76 setConvertAction(MVT::ppcf128, MVT::f64, Expand); 77 setConvertAction(MVT::ppcf128, MVT::f32, Expand); 78 // This is used in the ppcf128->int sequence. Note it has different semantics 79 // from FP_ROUND: that rounds to nearest, this rounds to zero. 80 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 81 82 // PowerPC has no intrinsics for these particular operations 83 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 84 85 // PowerPC has no SREM/UREM instructions 86 setOperationAction(ISD::SREM, MVT::i32, Expand); 87 setOperationAction(ISD::UREM, MVT::i32, Expand); 88 setOperationAction(ISD::SREM, MVT::i64, Expand); 89 setOperationAction(ISD::UREM, MVT::i64, Expand); 90 91 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 92 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 93 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 94 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 95 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 96 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 97 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 98 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 99 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 100 101 // We don't support sin/cos/sqrt/fmod/pow 102 setOperationAction(ISD::FSIN , MVT::f64, Expand); 103 setOperationAction(ISD::FCOS , MVT::f64, Expand); 104 setOperationAction(ISD::FREM , MVT::f64, Expand); 105 setOperationAction(ISD::FPOW , MVT::f64, Expand); 106 setOperationAction(ISD::FSIN , MVT::f32, Expand); 107 setOperationAction(ISD::FCOS , MVT::f32, Expand); 108 setOperationAction(ISD::FREM , MVT::f32, Expand); 109 setOperationAction(ISD::FPOW , MVT::f32, Expand); 110 111 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 112 113 // If we're enabling GP optimizations, use hardware square root 114 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 115 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 116 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 117 } 118 119 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 120 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 121 122 // PowerPC does not have BSWAP, CTPOP or CTTZ 123 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 124 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 125 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 126 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 127 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 128 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 129 130 // PowerPC does not have ROTR 131 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 132 133 // PowerPC does not have Select 134 setOperationAction(ISD::SELECT, MVT::i32, Expand); 135 setOperationAction(ISD::SELECT, MVT::i64, Expand); 136 setOperationAction(ISD::SELECT, MVT::f32, Expand); 137 setOperationAction(ISD::SELECT, MVT::f64, Expand); 138 139 // PowerPC wants to turn select_cc of FP into fsel when possible. 140 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 141 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 142 143 // PowerPC wants to optimize integer setcc a bit 144 setOperationAction(ISD::SETCC, MVT::i32, Custom); 145 146 // PowerPC does not have BRCOND which requires SetCC 147 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 148 149 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 150 151 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 152 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 153 154 // PowerPC does not have [U|S]INT_TO_FP 155 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 156 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 157 158 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 159 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 160 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); 161 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); 162 163 // We cannot sextinreg(i1). Expand to shifts. 164 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 165 166 // Support label based line numbers. 167 setOperationAction(ISD::DBG_STOPPOINT, MVT::Other, Expand); 168 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 169 170 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 171 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 172 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 173 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 174 175 176 // We want to legalize GlobalAddress and ConstantPool nodes into the 177 // appropriate instructions to materialize the address. 178 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 179 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 180 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 181 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 182 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 183 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 184 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 185 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 186 187 // RET must be custom lowered, to meet ABI requirements 188 setOperationAction(ISD::RET , MVT::Other, Custom); 189 190 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 191 setOperationAction(ISD::VASTART , MVT::Other, Custom); 192 193 // VAARG is custom lowered with ELF 32 ABI 194 if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI()) 195 setOperationAction(ISD::VAARG, MVT::Other, Custom); 196 else 197 setOperationAction(ISD::VAARG, MVT::Other, Expand); 198 199 // Use the default implementation. 200 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 201 setOperationAction(ISD::VAEND , MVT::Other, Expand); 202 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 203 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 204 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 205 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 206 207 setOperationAction(ISD::ATOMIC_LOAD_ADD , MVT::i32 , Custom); 208 setOperationAction(ISD::ATOMIC_CMP_SWAP , MVT::i32 , Custom); 209 setOperationAction(ISD::ATOMIC_SWAP , MVT::i32 , Custom); 210 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 211 setOperationAction(ISD::ATOMIC_LOAD_ADD , MVT::i64 , Custom); 212 setOperationAction(ISD::ATOMIC_CMP_SWAP , MVT::i64 , Custom); 213 setOperationAction(ISD::ATOMIC_SWAP , MVT::i64 , Custom); 214 } 215 216 // We want to custom lower some of our intrinsics. 217 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 218 219 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 220 // They also have instructions for converting between i64 and fp. 221 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 222 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 223 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 224 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 225 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 226 227 // FIXME: disable this lowered code. This generates 64-bit register values, 228 // and we don't model the fact that the top part is clobbered by calls. We 229 // need to flag these together so that the value isn't live across a call. 230 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 231 232 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 233 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 234 } else { 235 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 236 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 237 } 238 239 if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { 240 // 64-bit PowerPC implementations can support i64 types directly 241 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 242 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 243 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 244 // 64-bit PowerPC wants to expand i128 shifts itself. 245 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 246 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 247 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 248 } else { 249 // 32-bit PowerPC wants to expand i64 shifts itself. 250 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 251 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 252 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 253 } 254 255 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 256 // First set operation action for all vector types to expand. Then we 257 // will selectively turn on ones that can be effectively codegen'd. 258 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 259 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 260 MVT VT = (MVT::SimpleValueType)i; 261 262 // add/sub are legal for all supported vector VT's. 263 setOperationAction(ISD::ADD , VT, Legal); 264 setOperationAction(ISD::SUB , VT, Legal); 265 266 // We promote all shuffles to v16i8. 267 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 268 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 269 270 // We promote all non-typed operations to v4i32. 271 setOperationAction(ISD::AND , VT, Promote); 272 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 273 setOperationAction(ISD::OR , VT, Promote); 274 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 275 setOperationAction(ISD::XOR , VT, Promote); 276 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 277 setOperationAction(ISD::LOAD , VT, Promote); 278 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 279 setOperationAction(ISD::SELECT, VT, Promote); 280 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 281 setOperationAction(ISD::STORE, VT, Promote); 282 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 283 284 // No other operations are legal. 285 setOperationAction(ISD::MUL , VT, Expand); 286 setOperationAction(ISD::SDIV, VT, Expand); 287 setOperationAction(ISD::SREM, VT, Expand); 288 setOperationAction(ISD::UDIV, VT, Expand); 289 setOperationAction(ISD::UREM, VT, Expand); 290 setOperationAction(ISD::FDIV, VT, Expand); 291 setOperationAction(ISD::FNEG, VT, Expand); 292 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 293 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 294 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 295 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 296 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 297 setOperationAction(ISD::UDIVREM, VT, Expand); 298 setOperationAction(ISD::SDIVREM, VT, Expand); 299 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 300 setOperationAction(ISD::FPOW, VT, Expand); 301 setOperationAction(ISD::CTPOP, VT, Expand); 302 setOperationAction(ISD::CTLZ, VT, Expand); 303 setOperationAction(ISD::CTTZ, VT, Expand); 304 } 305 306 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 307 // with merges, splats, etc. 308 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 309 310 setOperationAction(ISD::AND , MVT::v4i32, Legal); 311 setOperationAction(ISD::OR , MVT::v4i32, Legal); 312 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 313 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 314 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 315 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 316 317 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 318 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 319 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 320 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 321 322 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 323 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 324 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 325 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 326 327 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 328 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 329 330 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 331 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 332 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 333 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 334 } 335 336 setShiftAmountType(MVT::i32); 337 setSetCCResultContents(ZeroOrOneSetCCResult); 338 339 if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { 340 setStackPointerRegisterToSaveRestore(PPC::X1); 341 setExceptionPointerRegister(PPC::X3); 342 setExceptionSelectorRegister(PPC::X4); 343 } else { 344 setStackPointerRegisterToSaveRestore(PPC::R1); 345 setExceptionPointerRegister(PPC::R3); 346 setExceptionSelectorRegister(PPC::R4); 347 } 348 349 // We have target-specific dag combine patterns for the following nodes: 350 setTargetDAGCombine(ISD::SINT_TO_FP); 351 setTargetDAGCombine(ISD::STORE); 352 setTargetDAGCombine(ISD::BR_CC); 353 setTargetDAGCombine(ISD::BSWAP); 354 355 // Darwin long double math library functions have $LDBL128 appended. 356 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) { 357 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 358 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 359 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 360 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 361 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 362 } 363 364 computeRegisterProperties(); 365} 366 367/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 368/// function arguments in the caller parameter area. 369unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { 370 TargetMachine &TM = getTargetMachine(); 371 // Darwin passes everything on 4 byte boundary. 372 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) 373 return 4; 374 // FIXME Elf TBD 375 return 4; 376} 377 378const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 379 switch (Opcode) { 380 default: return 0; 381 case PPCISD::FSEL: return "PPCISD::FSEL"; 382 case PPCISD::FCFID: return "PPCISD::FCFID"; 383 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 384 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 385 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 386 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 387 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 388 case PPCISD::VPERM: return "PPCISD::VPERM"; 389 case PPCISD::Hi: return "PPCISD::Hi"; 390 case PPCISD::Lo: return "PPCISD::Lo"; 391 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 392 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 393 case PPCISD::SRL: return "PPCISD::SRL"; 394 case PPCISD::SRA: return "PPCISD::SRA"; 395 case PPCISD::SHL: return "PPCISD::SHL"; 396 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 397 case PPCISD::STD_32: return "PPCISD::STD_32"; 398 case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF"; 399 case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho"; 400 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 401 case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho"; 402 case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF"; 403 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 404 case PPCISD::MFCR: return "PPCISD::MFCR"; 405 case PPCISD::VCMP: return "PPCISD::VCMP"; 406 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 407 case PPCISD::LBRX: return "PPCISD::LBRX"; 408 case PPCISD::STBRX: return "PPCISD::STBRX"; 409 case PPCISD::LARX: return "PPCISD::LARX"; 410 case PPCISD::STCX: return "PPCISD::STCX"; 411 case PPCISD::CMP_UNRESERVE: return "PPCISD::CMP_UNRESERVE"; 412 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 413 case PPCISD::MFFS: return "PPCISD::MFFS"; 414 case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; 415 case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; 416 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 417 case PPCISD::MTFSF: return "PPCISD::MTFSF"; 418 case PPCISD::TAILCALL: return "PPCISD::TAILCALL"; 419 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 420 } 421} 422 423 424MVT PPCTargetLowering::getSetCCResultType(const SDOperand &) const { 425 return MVT::i32; 426} 427 428 429//===----------------------------------------------------------------------===// 430// Node matching predicates, for use by the tblgen matching code. 431//===----------------------------------------------------------------------===// 432 433/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 434static bool isFloatingPointZero(SDOperand Op) { 435 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 436 return CFP->getValueAPF().isZero(); 437 else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) { 438 // Maybe this has already been legalized into the constant pool? 439 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 440 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 441 return CFP->getValueAPF().isZero(); 442 } 443 return false; 444} 445 446/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 447/// true if Op is undef or if it matches the specified value. 448static bool isConstantOrUndef(SDOperand Op, unsigned Val) { 449 return Op.getOpcode() == ISD::UNDEF || 450 cast<ConstantSDNode>(Op)->getValue() == Val; 451} 452 453/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 454/// VPKUHUM instruction. 455bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { 456 if (!isUnary) { 457 for (unsigned i = 0; i != 16; ++i) 458 if (!isConstantOrUndef(N->getOperand(i), i*2+1)) 459 return false; 460 } else { 461 for (unsigned i = 0; i != 8; ++i) 462 if (!isConstantOrUndef(N->getOperand(i), i*2+1) || 463 !isConstantOrUndef(N->getOperand(i+8), i*2+1)) 464 return false; 465 } 466 return true; 467} 468 469/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 470/// VPKUWUM instruction. 471bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { 472 if (!isUnary) { 473 for (unsigned i = 0; i != 16; i += 2) 474 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 475 !isConstantOrUndef(N->getOperand(i+1), i*2+3)) 476 return false; 477 } else { 478 for (unsigned i = 0; i != 8; i += 2) 479 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 480 !isConstantOrUndef(N->getOperand(i+1), i*2+3) || 481 !isConstantOrUndef(N->getOperand(i+8), i*2+2) || 482 !isConstantOrUndef(N->getOperand(i+9), i*2+3)) 483 return false; 484 } 485 return true; 486} 487 488/// isVMerge - Common function, used to match vmrg* shuffles. 489/// 490static bool isVMerge(SDNode *N, unsigned UnitSize, 491 unsigned LHSStart, unsigned RHSStart) { 492 assert(N->getOpcode() == ISD::BUILD_VECTOR && 493 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 494 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 495 "Unsupported merge size!"); 496 497 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 498 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 499 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 500 LHSStart+j+i*UnitSize) || 501 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 502 RHSStart+j+i*UnitSize)) 503 return false; 504 } 505 return true; 506} 507 508/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 509/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 510bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 511 if (!isUnary) 512 return isVMerge(N, UnitSize, 8, 24); 513 return isVMerge(N, UnitSize, 8, 8); 514} 515 516/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 517/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 518bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 519 if (!isUnary) 520 return isVMerge(N, UnitSize, 0, 16); 521 return isVMerge(N, UnitSize, 0, 0); 522} 523 524 525/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 526/// amount, otherwise return -1. 527int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 528 assert(N->getOpcode() == ISD::BUILD_VECTOR && 529 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 530 // Find the first non-undef value in the shuffle mask. 531 unsigned i; 532 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 533 /*search*/; 534 535 if (i == 16) return -1; // all undef. 536 537 // Otherwise, check to see if the rest of the elements are consequtively 538 // numbered from this value. 539 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 540 if (ShiftAmt < i) return -1; 541 ShiftAmt -= i; 542 543 if (!isUnary) { 544 // Check the rest of the elements to see if they are consequtive. 545 for (++i; i != 16; ++i) 546 if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) 547 return -1; 548 } else { 549 // Check the rest of the elements to see if they are consequtive. 550 for (++i; i != 16; ++i) 551 if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) 552 return -1; 553 } 554 555 return ShiftAmt; 556} 557 558/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 559/// specifies a splat of a single element that is suitable for input to 560/// VSPLTB/VSPLTH/VSPLTW. 561bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 562 assert(N->getOpcode() == ISD::BUILD_VECTOR && 563 N->getNumOperands() == 16 && 564 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 565 566 // This is a splat operation if each element of the permute is the same, and 567 // if the value doesn't reference the second vector. 568 unsigned ElementBase = 0; 569 SDOperand Elt = N->getOperand(0); 570 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 571 ElementBase = EltV->getValue(); 572 else 573 return false; // FIXME: Handle UNDEF elements too! 574 575 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 576 return false; 577 578 // Check that they are consequtive. 579 for (unsigned i = 1; i != EltSize; ++i) { 580 if (!isa<ConstantSDNode>(N->getOperand(i)) || 581 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 582 return false; 583 } 584 585 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 586 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 587 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 588 assert(isa<ConstantSDNode>(N->getOperand(i)) && 589 "Invalid VECTOR_SHUFFLE mask!"); 590 for (unsigned j = 0; j != EltSize; ++j) 591 if (N->getOperand(i+j) != N->getOperand(j)) 592 return false; 593 } 594 595 return true; 596} 597 598/// isAllNegativeZeroVector - Returns true if all elements of build_vector 599/// are -0.0. 600bool PPC::isAllNegativeZeroVector(SDNode *N) { 601 assert(N->getOpcode() == ISD::BUILD_VECTOR); 602 if (PPC::isSplatShuffleMask(N, N->getNumOperands())) 603 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N)) 604 return CFP->getValueAPF().isNegZero(); 605 return false; 606} 607 608/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 609/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 610unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 611 assert(isSplatShuffleMask(N, EltSize)); 612 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 613} 614 615/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 616/// by using a vspltis[bhw] instruction of the specified element size, return 617/// the constant being splatted. The ByteSize field indicates the number of 618/// bytes of each element [124] -> [bhw]. 619SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 620 SDOperand OpVal(0, 0); 621 622 // If ByteSize of the splat is bigger than the element size of the 623 // build_vector, then we have a case where we are checking for a splat where 624 // multiple elements of the buildvector are folded together into a single 625 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 626 unsigned EltSize = 16/N->getNumOperands(); 627 if (EltSize < ByteSize) { 628 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 629 SDOperand UniquedVals[4]; 630 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 631 632 // See if all of the elements in the buildvector agree across. 633 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 634 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 635 // If the element isn't a constant, bail fully out. 636 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand(); 637 638 639 if (UniquedVals[i&(Multiple-1)].Val == 0) 640 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 641 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 642 return SDOperand(); // no match. 643 } 644 645 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 646 // either constant or undef values that are identical for each chunk. See 647 // if these chunks can form into a larger vspltis*. 648 649 // Check to see if all of the leading entries are either 0 or -1. If 650 // neither, then this won't fit into the immediate field. 651 bool LeadingZero = true; 652 bool LeadingOnes = true; 653 for (unsigned i = 0; i != Multiple-1; ++i) { 654 if (UniquedVals[i].Val == 0) continue; // Must have been undefs. 655 656 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 657 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 658 } 659 // Finally, check the least significant entry. 660 if (LeadingZero) { 661 if (UniquedVals[Multiple-1].Val == 0) 662 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 663 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue(); 664 if (Val < 16) 665 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 666 } 667 if (LeadingOnes) { 668 if (UniquedVals[Multiple-1].Val == 0) 669 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 670 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended(); 671 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 672 return DAG.getTargetConstant(Val, MVT::i32); 673 } 674 675 return SDOperand(); 676 } 677 678 // Check to see if this buildvec has a single non-undef value in its elements. 679 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 680 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 681 if (OpVal.Val == 0) 682 OpVal = N->getOperand(i); 683 else if (OpVal != N->getOperand(i)) 684 return SDOperand(); 685 } 686 687 if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def. 688 689 unsigned ValSizeInBytes = 0; 690 uint64_t Value = 0; 691 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 692 Value = CN->getValue(); 693 ValSizeInBytes = CN->getValueType(0).getSizeInBits()/8; 694 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 695 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 696 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 697 ValSizeInBytes = 4; 698 } 699 700 // If the splat value is larger than the element value, then we can never do 701 // this splat. The only case that we could fit the replicated bits into our 702 // immediate field for would be zero, and we prefer to use vxor for it. 703 if (ValSizeInBytes < ByteSize) return SDOperand(); 704 705 // If the element value is larger than the splat value, cut it in half and 706 // check to see if the two halves are equal. Continue doing this until we 707 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 708 while (ValSizeInBytes > ByteSize) { 709 ValSizeInBytes >>= 1; 710 711 // If the top half equals the bottom half, we're still ok. 712 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 713 (Value & ((1 << (8*ValSizeInBytes))-1))) 714 return SDOperand(); 715 } 716 717 // Properly sign extend the value. 718 int ShAmt = (4-ByteSize)*8; 719 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 720 721 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 722 if (MaskVal == 0) return SDOperand(); 723 724 // Finally, if this value fits in a 5 bit sext field, return it 725 if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) 726 return DAG.getTargetConstant(MaskVal, MVT::i32); 727 return SDOperand(); 728} 729 730//===----------------------------------------------------------------------===// 731// Addressing Mode Selection 732//===----------------------------------------------------------------------===// 733 734/// isIntS16Immediate - This method tests to see if the node is either a 32-bit 735/// or 64-bit immediate, and if the value can be accurately represented as a 736/// sign extension from a 16-bit value. If so, this returns true and the 737/// immediate. 738static bool isIntS16Immediate(SDNode *N, short &Imm) { 739 if (N->getOpcode() != ISD::Constant) 740 return false; 741 742 Imm = (short)cast<ConstantSDNode>(N)->getValue(); 743 if (N->getValueType(0) == MVT::i32) 744 return Imm == (int32_t)cast<ConstantSDNode>(N)->getValue(); 745 else 746 return Imm == (int64_t)cast<ConstantSDNode>(N)->getValue(); 747} 748static bool isIntS16Immediate(SDOperand Op, short &Imm) { 749 return isIntS16Immediate(Op.Val, Imm); 750} 751 752 753/// SelectAddressRegReg - Given the specified addressed, check to see if it 754/// can be represented as an indexed [r+r] operation. Returns false if it 755/// can be more efficiently represented with [r+imm]. 756bool PPCTargetLowering::SelectAddressRegReg(SDOperand N, SDOperand &Base, 757 SDOperand &Index, 758 SelectionDAG &DAG) { 759 short imm = 0; 760 if (N.getOpcode() == ISD::ADD) { 761 if (isIntS16Immediate(N.getOperand(1), imm)) 762 return false; // r+i 763 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 764 return false; // r+i 765 766 Base = N.getOperand(0); 767 Index = N.getOperand(1); 768 return true; 769 } else if (N.getOpcode() == ISD::OR) { 770 if (isIntS16Immediate(N.getOperand(1), imm)) 771 return false; // r+i can fold it if we can. 772 773 // If this is an or of disjoint bitfields, we can codegen this as an add 774 // (for better address arithmetic) if the LHS and RHS of the OR are provably 775 // disjoint. 776 APInt LHSKnownZero, LHSKnownOne; 777 APInt RHSKnownZero, RHSKnownOne; 778 DAG.ComputeMaskedBits(N.getOperand(0), 779 APInt::getAllOnesValue(N.getOperand(0) 780 .getValueSizeInBits()), 781 LHSKnownZero, LHSKnownOne); 782 783 if (LHSKnownZero.getBoolValue()) { 784 DAG.ComputeMaskedBits(N.getOperand(1), 785 APInt::getAllOnesValue(N.getOperand(1) 786 .getValueSizeInBits()), 787 RHSKnownZero, RHSKnownOne); 788 // If all of the bits are known zero on the LHS or RHS, the add won't 789 // carry. 790 if (~(LHSKnownZero | RHSKnownZero) == 0) { 791 Base = N.getOperand(0); 792 Index = N.getOperand(1); 793 return true; 794 } 795 } 796 } 797 798 return false; 799} 800 801/// Returns true if the address N can be represented by a base register plus 802/// a signed 16-bit displacement [r+imm], and if it is not better 803/// represented as reg+reg. 804bool PPCTargetLowering::SelectAddressRegImm(SDOperand N, SDOperand &Disp, 805 SDOperand &Base, SelectionDAG &DAG){ 806 // If this can be more profitably realized as r+r, fail. 807 if (SelectAddressRegReg(N, Disp, Base, DAG)) 808 return false; 809 810 if (N.getOpcode() == ISD::ADD) { 811 short imm = 0; 812 if (isIntS16Immediate(N.getOperand(1), imm)) { 813 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 814 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 815 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 816 } else { 817 Base = N.getOperand(0); 818 } 819 return true; // [r+i] 820 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 821 // Match LOAD (ADD (X, Lo(G))). 822 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue() 823 && "Cannot handle constant offsets yet!"); 824 Disp = N.getOperand(1).getOperand(0); // The global address. 825 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 826 Disp.getOpcode() == ISD::TargetConstantPool || 827 Disp.getOpcode() == ISD::TargetJumpTable); 828 Base = N.getOperand(0); 829 return true; // [&g+r] 830 } 831 } else if (N.getOpcode() == ISD::OR) { 832 short imm = 0; 833 if (isIntS16Immediate(N.getOperand(1), imm)) { 834 // If this is an or of disjoint bitfields, we can codegen this as an add 835 // (for better address arithmetic) if the LHS and RHS of the OR are 836 // provably disjoint. 837 APInt LHSKnownZero, LHSKnownOne; 838 DAG.ComputeMaskedBits(N.getOperand(0), 839 APInt::getAllOnesValue(N.getOperand(0) 840 .getValueSizeInBits()), 841 LHSKnownZero, LHSKnownOne); 842 843 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 844 // If all of the bits are known zero on the LHS or RHS, the add won't 845 // carry. 846 Base = N.getOperand(0); 847 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 848 return true; 849 } 850 } 851 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 852 // Loading from a constant address. 853 854 // If this address fits entirely in a 16-bit sext immediate field, codegen 855 // this as "d, 0" 856 short Imm; 857 if (isIntS16Immediate(CN, Imm)) { 858 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 859 Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); 860 return true; 861 } 862 863 // Handle 32-bit sext immediates with LIS + addr mode. 864 if (CN->getValueType(0) == MVT::i32 || 865 (int64_t)CN->getValue() == (int)CN->getValue()) { 866 int Addr = (int)CN->getValue(); 867 868 // Otherwise, break this down into an LIS + disp. 869 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 870 871 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 872 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 873 Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); 874 return true; 875 } 876 } 877 878 Disp = DAG.getTargetConstant(0, getPointerTy()); 879 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 880 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 881 else 882 Base = N; 883 return true; // [r+0] 884} 885 886/// SelectAddressRegRegOnly - Given the specified addressed, force it to be 887/// represented as an indexed [r+r] operation. 888bool PPCTargetLowering::SelectAddressRegRegOnly(SDOperand N, SDOperand &Base, 889 SDOperand &Index, 890 SelectionDAG &DAG) { 891 // Check to see if we can easily represent this as an [r+r] address. This 892 // will fail if it thinks that the address is more profitably represented as 893 // reg+imm, e.g. where imm = 0. 894 if (SelectAddressRegReg(N, Base, Index, DAG)) 895 return true; 896 897 // If the operand is an addition, always emit this as [r+r], since this is 898 // better (for code size, and execution, as the memop does the add for free) 899 // than emitting an explicit add. 900 if (N.getOpcode() == ISD::ADD) { 901 Base = N.getOperand(0); 902 Index = N.getOperand(1); 903 return true; 904 } 905 906 // Otherwise, do it the hard way, using R0 as the base register. 907 Base = DAG.getRegister(PPC::R0, N.getValueType()); 908 Index = N; 909 return true; 910} 911 912/// SelectAddressRegImmShift - Returns true if the address N can be 913/// represented by a base register plus a signed 14-bit displacement 914/// [r+imm*4]. Suitable for use by STD and friends. 915bool PPCTargetLowering::SelectAddressRegImmShift(SDOperand N, SDOperand &Disp, 916 SDOperand &Base, 917 SelectionDAG &DAG) { 918 // If this can be more profitably realized as r+r, fail. 919 if (SelectAddressRegReg(N, Disp, Base, DAG)) 920 return false; 921 922 if (N.getOpcode() == ISD::ADD) { 923 short imm = 0; 924 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 925 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 926 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 927 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 928 } else { 929 Base = N.getOperand(0); 930 } 931 return true; // [r+i] 932 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 933 // Match LOAD (ADD (X, Lo(G))). 934 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue() 935 && "Cannot handle constant offsets yet!"); 936 Disp = N.getOperand(1).getOperand(0); // The global address. 937 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 938 Disp.getOpcode() == ISD::TargetConstantPool || 939 Disp.getOpcode() == ISD::TargetJumpTable); 940 Base = N.getOperand(0); 941 return true; // [&g+r] 942 } 943 } else if (N.getOpcode() == ISD::OR) { 944 short imm = 0; 945 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 946 // If this is an or of disjoint bitfields, we can codegen this as an add 947 // (for better address arithmetic) if the LHS and RHS of the OR are 948 // provably disjoint. 949 APInt LHSKnownZero, LHSKnownOne; 950 DAG.ComputeMaskedBits(N.getOperand(0), 951 APInt::getAllOnesValue(N.getOperand(0) 952 .getValueSizeInBits()), 953 LHSKnownZero, LHSKnownOne); 954 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 955 // If all of the bits are known zero on the LHS or RHS, the add won't 956 // carry. 957 Base = N.getOperand(0); 958 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 959 return true; 960 } 961 } 962 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 963 // Loading from a constant address. Verify low two bits are clear. 964 if ((CN->getValue() & 3) == 0) { 965 // If this address fits entirely in a 14-bit sext immediate field, codegen 966 // this as "d, 0" 967 short Imm; 968 if (isIntS16Immediate(CN, Imm)) { 969 Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); 970 Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); 971 return true; 972 } 973 974 // Fold the low-part of 32-bit absolute addresses into addr mode. 975 if (CN->getValueType(0) == MVT::i32 || 976 (int64_t)CN->getValue() == (int)CN->getValue()) { 977 int Addr = (int)CN->getValue(); 978 979 // Otherwise, break this down into an LIS + disp. 980 Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); 981 982 Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); 983 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 984 Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); 985 return true; 986 } 987 } 988 } 989 990 Disp = DAG.getTargetConstant(0, getPointerTy()); 991 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 992 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 993 else 994 Base = N; 995 return true; // [r+0] 996} 997 998 999/// getPreIndexedAddressParts - returns true by value, base pointer and 1000/// offset pointer and addressing mode by reference if the node's address 1001/// can be legally represented as pre-indexed load / store address. 1002bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base, 1003 SDOperand &Offset, 1004 ISD::MemIndexedMode &AM, 1005 SelectionDAG &DAG) { 1006 // Disabled by default for now. 1007 if (!EnablePPCPreinc) return false; 1008 1009 SDOperand Ptr; 1010 MVT VT; 1011 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1012 Ptr = LD->getBasePtr(); 1013 VT = LD->getMemoryVT(); 1014 1015 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1016 ST = ST; 1017 Ptr = ST->getBasePtr(); 1018 VT = ST->getMemoryVT(); 1019 } else 1020 return false; 1021 1022 // PowerPC doesn't have preinc load/store instructions for vectors. 1023 if (VT.isVector()) 1024 return false; 1025 1026 // TODO: Check reg+reg first. 1027 1028 // LDU/STU use reg+imm*4, others use reg+imm. 1029 if (VT != MVT::i64) { 1030 // reg + imm 1031 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) 1032 return false; 1033 } else { 1034 // reg + imm * 4. 1035 if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) 1036 return false; 1037 } 1038 1039 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1040 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1041 // sext i32 to i64 when addr mode is r+i. 1042 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1043 LD->getExtensionType() == ISD::SEXTLOAD && 1044 isa<ConstantSDNode>(Offset)) 1045 return false; 1046 } 1047 1048 AM = ISD::PRE_INC; 1049 return true; 1050} 1051 1052//===----------------------------------------------------------------------===// 1053// LowerOperation implementation 1054//===----------------------------------------------------------------------===// 1055 1056SDOperand PPCTargetLowering::LowerConstantPool(SDOperand Op, 1057 SelectionDAG &DAG) { 1058 MVT PtrVT = Op.getValueType(); 1059 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1060 Constant *C = CP->getConstVal(); 1061 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 1062 SDOperand Zero = DAG.getConstant(0, PtrVT); 1063 1064 const TargetMachine &TM = DAG.getTarget(); 1065 1066 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero); 1067 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero); 1068 1069 // If this is a non-darwin platform, we don't support non-static relo models 1070 // yet. 1071 if (TM.getRelocationModel() == Reloc::Static || 1072 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1073 // Generate non-pic code that has direct accesses to the constant pool. 1074 // The address of the global is just (hi(&g)+lo(&g)). 1075 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1076 } 1077 1078 if (TM.getRelocationModel() == Reloc::PIC_) { 1079 // With PIC, the first instruction is actually "GR+hi(&G)". 1080 Hi = DAG.getNode(ISD::ADD, PtrVT, 1081 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1082 } 1083 1084 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1085 return Lo; 1086} 1087 1088SDOperand PPCTargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 1089 MVT PtrVT = Op.getValueType(); 1090 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1091 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1092 SDOperand Zero = DAG.getConstant(0, PtrVT); 1093 1094 const TargetMachine &TM = DAG.getTarget(); 1095 1096 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero); 1097 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero); 1098 1099 // If this is a non-darwin platform, we don't support non-static relo models 1100 // yet. 1101 if (TM.getRelocationModel() == Reloc::Static || 1102 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1103 // Generate non-pic code that has direct accesses to the constant pool. 1104 // The address of the global is just (hi(&g)+lo(&g)). 1105 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1106 } 1107 1108 if (TM.getRelocationModel() == Reloc::PIC_) { 1109 // With PIC, the first instruction is actually "GR+hi(&G)". 1110 Hi = DAG.getNode(ISD::ADD, PtrVT, 1111 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1112 } 1113 1114 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1115 return Lo; 1116} 1117 1118SDOperand PPCTargetLowering::LowerGlobalTLSAddress(SDOperand Op, 1119 SelectionDAG &DAG) { 1120 assert(0 && "TLS not implemented for PPC."); 1121 return SDOperand(); // Not reached 1122} 1123 1124SDOperand PPCTargetLowering::LowerGlobalAddress(SDOperand Op, 1125 SelectionDAG &DAG) { 1126 MVT PtrVT = Op.getValueType(); 1127 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1128 GlobalValue *GV = GSDN->getGlobal(); 1129 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); 1130 // If it's a debug information descriptor, don't mess with it. 1131 if (DAG.isVerifiedDebugInfoDesc(Op)) 1132 return GA; 1133 SDOperand Zero = DAG.getConstant(0, PtrVT); 1134 1135 const TargetMachine &TM = DAG.getTarget(); 1136 1137 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero); 1138 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero); 1139 1140 // If this is a non-darwin platform, we don't support non-static relo models 1141 // yet. 1142 if (TM.getRelocationModel() == Reloc::Static || 1143 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1144 // Generate non-pic code that has direct accesses to globals. 1145 // The address of the global is just (hi(&g)+lo(&g)). 1146 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1147 } 1148 1149 if (TM.getRelocationModel() == Reloc::PIC_) { 1150 // With PIC, the first instruction is actually "GR+hi(&G)". 1151 Hi = DAG.getNode(ISD::ADD, PtrVT, 1152 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1153 } 1154 1155 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1156 1157 if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV)) 1158 return Lo; 1159 1160 // If the global is weak or external, we have to go through the lazy 1161 // resolution stub. 1162 return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0); 1163} 1164 1165SDOperand PPCTargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 1166 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1167 1168 // If we're comparing for equality to zero, expose the fact that this is 1169 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1170 // fold the new nodes. 1171 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1172 if (C->isNullValue() && CC == ISD::SETEQ) { 1173 MVT VT = Op.getOperand(0).getValueType(); 1174 SDOperand Zext = Op.getOperand(0); 1175 if (VT.bitsLT(MVT::i32)) { 1176 VT = MVT::i32; 1177 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 1178 } 1179 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1180 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 1181 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz, 1182 DAG.getConstant(Log2b, MVT::i32)); 1183 return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc); 1184 } 1185 // Leave comparisons against 0 and -1 alone for now, since they're usually 1186 // optimized. FIXME: revisit this when we can custom lower all setcc 1187 // optimizations. 1188 if (C->isAllOnesValue() || C->isNullValue()) 1189 return SDOperand(); 1190 } 1191 1192 // If we have an integer seteq/setne, turn it into a compare against zero 1193 // by xor'ing the rhs with the lhs, which is faster than setting a 1194 // condition register, reading it back out, and masking the correct bit. The 1195 // normal approach here uses sub to do this instead of xor. Using xor exposes 1196 // the result to other bit-twiddling opportunities. 1197 MVT LHSVT = Op.getOperand(0).getValueType(); 1198 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1199 MVT VT = Op.getValueType(); 1200 SDOperand Sub = DAG.getNode(ISD::XOR, LHSVT, Op.getOperand(0), 1201 Op.getOperand(1)); 1202 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 1203 } 1204 return SDOperand(); 1205} 1206 1207SDOperand PPCTargetLowering::LowerVAARG(SDOperand Op, SelectionDAG &DAG, 1208 int VarArgsFrameIndex, 1209 int VarArgsStackOffset, 1210 unsigned VarArgsNumGPR, 1211 unsigned VarArgsNumFPR, 1212 const PPCSubtarget &Subtarget) { 1213 1214 assert(0 && "VAARG in ELF32 ABI not implemented yet!"); 1215 return SDOperand(); // Not reached 1216} 1217 1218SDOperand PPCTargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG, 1219 int VarArgsFrameIndex, 1220 int VarArgsStackOffset, 1221 unsigned VarArgsNumGPR, 1222 unsigned VarArgsNumFPR, 1223 const PPCSubtarget &Subtarget) { 1224 1225 if (Subtarget.isMachoABI()) { 1226 // vastart just stores the address of the VarArgsFrameIndex slot into the 1227 // memory location argument. 1228 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1229 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1230 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1231 return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV, 0); 1232 } 1233 1234 // For ELF 32 ABI we follow the layout of the va_list struct. 1235 // We suppose the given va_list is already allocated. 1236 // 1237 // typedef struct { 1238 // char gpr; /* index into the array of 8 GPRs 1239 // * stored in the register save area 1240 // * gpr=0 corresponds to r3, 1241 // * gpr=1 to r4, etc. 1242 // */ 1243 // char fpr; /* index into the array of 8 FPRs 1244 // * stored in the register save area 1245 // * fpr=0 corresponds to f1, 1246 // * fpr=1 to f2, etc. 1247 // */ 1248 // char *overflow_arg_area; 1249 // /* location on stack that holds 1250 // * the next overflow argument 1251 // */ 1252 // char *reg_save_area; 1253 // /* where r3:r10 and f1:f8 (if saved) 1254 // * are stored 1255 // */ 1256 // } va_list[1]; 1257 1258 1259 SDOperand ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8); 1260 SDOperand ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8); 1261 1262 1263 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1264 1265 SDOperand StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); 1266 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1267 1268 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 1269 SDOperand ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 1270 1271 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 1272 SDOperand ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 1273 1274 uint64_t FPROffset = 1; 1275 SDOperand ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 1276 1277 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1278 1279 // Store first byte : number of int regs 1280 SDOperand firstStore = DAG.getStore(Op.getOperand(0), ArgGPR, 1281 Op.getOperand(1), SV, 0); 1282 uint64_t nextOffset = FPROffset; 1283 SDOperand nextPtr = DAG.getNode(ISD::ADD, PtrVT, Op.getOperand(1), 1284 ConstFPROffset); 1285 1286 // Store second byte : number of float regs 1287 SDOperand secondStore = 1288 DAG.getStore(firstStore, ArgFPR, nextPtr, SV, nextOffset); 1289 nextOffset += StackOffset; 1290 nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstStackOffset); 1291 1292 // Store second word : arguments given on stack 1293 SDOperand thirdStore = 1294 DAG.getStore(secondStore, StackOffsetFI, nextPtr, SV, nextOffset); 1295 nextOffset += FrameOffset; 1296 nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstFrameOffset); 1297 1298 // Store third word : arguments given in registers 1299 return DAG.getStore(thirdStore, FR, nextPtr, SV, nextOffset); 1300 1301} 1302 1303#include "PPCGenCallingConv.inc" 1304 1305/// GetFPR - Get the set of FP registers that should be allocated for arguments, 1306/// depending on which subtarget is selected. 1307static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { 1308 if (Subtarget.isMachoABI()) { 1309 static const unsigned FPR[] = { 1310 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1311 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1312 }; 1313 return FPR; 1314 } 1315 1316 1317 static const unsigned FPR[] = { 1318 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1319 PPC::F8 1320 }; 1321 return FPR; 1322} 1323 1324/// CalculateStackSlotSize - Calculates the size reserved for this argument on 1325/// the stack. 1326static unsigned CalculateStackSlotSize(SDOperand Arg, SDOperand Flag, 1327 bool isVarArg, unsigned PtrByteSize) { 1328 MVT ArgVT = Arg.getValueType(); 1329 ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Flag)->getArgFlags(); 1330 unsigned ArgSize =ArgVT.getSizeInBits()/8; 1331 if (Flags.isByVal()) 1332 ArgSize = Flags.getByValSize(); 1333 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1334 1335 return ArgSize; 1336} 1337 1338SDOperand 1339PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, 1340 SelectionDAG &DAG, 1341 int &VarArgsFrameIndex, 1342 int &VarArgsStackOffset, 1343 unsigned &VarArgsNumGPR, 1344 unsigned &VarArgsNumFPR, 1345 const PPCSubtarget &Subtarget) { 1346 // TODO: add description of PPC stack frame format, or at least some docs. 1347 // 1348 MachineFunction &MF = DAG.getMachineFunction(); 1349 MachineFrameInfo *MFI = MF.getFrameInfo(); 1350 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1351 SmallVector<SDOperand, 8> ArgValues; 1352 SDOperand Root = Op.getOperand(0); 1353 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1354 1355 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1356 bool isPPC64 = PtrVT == MVT::i64; 1357 bool isMachoABI = Subtarget.isMachoABI(); 1358 bool isELF32_ABI = Subtarget.isELF32_ABI(); 1359 // Potential tail calls could cause overwriting of argument stack slots. 1360 unsigned CC = MF.getFunction()->getCallingConv(); 1361 bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); 1362 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1363 1364 unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1365 // Area that is at least reserved in caller of this function. 1366 unsigned MinReservedArea = ArgOffset; 1367 1368 static const unsigned GPR_32[] = { // 32-bit registers. 1369 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1370 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1371 }; 1372 static const unsigned GPR_64[] = { // 64-bit registers. 1373 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 1374 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 1375 }; 1376 1377 static const unsigned *FPR = GetFPR(Subtarget); 1378 1379 static const unsigned VR[] = { 1380 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 1381 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 1382 }; 1383 1384 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 1385 const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8; 1386 const unsigned Num_VR_Regs = array_lengthof( VR); 1387 1388 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 1389 1390 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 1391 1392 // In 32-bit non-varargs functions, the stack space for vectors is after the 1393 // stack space for non-vectors. We do not use this space unless we have 1394 // too many vectors to fit in registers, something that only occurs in 1395 // constructed examples:), but we have to walk the arglist to figure 1396 // that out...for the pathological case, compute VecArgOffset as the 1397 // start of the vector parameter area. Computing VecArgOffset is the 1398 // entire point of the following loop. 1399 // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying 1400 // to handle Elf here. 1401 unsigned VecArgOffset = ArgOffset; 1402 if (!isVarArg && !isPPC64) { 1403 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; 1404 ++ArgNo) { 1405 MVT ObjectVT = Op.getValue(ArgNo).getValueType(); 1406 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1407 ISD::ArgFlagsTy Flags = 1408 cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags(); 1409 1410 if (Flags.isByVal()) { 1411 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 1412 ObjSize = Flags.getByValSize(); 1413 unsigned ArgSize = 1414 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1415 VecArgOffset += ArgSize; 1416 continue; 1417 } 1418 1419 switch(ObjectVT.getSimpleVT()) { 1420 default: assert(0 && "Unhandled argument type!"); 1421 case MVT::i32: 1422 case MVT::f32: 1423 VecArgOffset += isPPC64 ? 8 : 4; 1424 break; 1425 case MVT::i64: // PPC64 1426 case MVT::f64: 1427 VecArgOffset += 8; 1428 break; 1429 case MVT::v4f32: 1430 case MVT::v4i32: 1431 case MVT::v8i16: 1432 case MVT::v16i8: 1433 // Nothing to do, we're only looking at Nonvector args here. 1434 break; 1435 } 1436 } 1437 } 1438 // We've found where the vector parameter area in memory is. Skip the 1439 // first 12 parameters; these don't use that memory. 1440 VecArgOffset = ((VecArgOffset+15)/16)*16; 1441 VecArgOffset += 12*16; 1442 1443 // Add DAG nodes to load the arguments or copy them out of registers. On 1444 // entry to a function on PPC, the arguments start after the linkage area, 1445 // although the first ones are often in registers. 1446 // 1447 // In the ELF 32 ABI, GPRs and stack are double word align: an argument 1448 // represented with two words (long long or double) must be copied to an 1449 // even GPR_idx value or to an even ArgOffset value. 1450 1451 SmallVector<SDOperand, 8> MemOps; 1452 unsigned nAltivecParamsAtEnd = 0; 1453 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { 1454 SDOperand ArgVal; 1455 bool needsLoad = false; 1456 MVT ObjectVT = Op.getValue(ArgNo).getValueType(); 1457 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1458 unsigned ArgSize = ObjSize; 1459 ISD::ArgFlagsTy Flags = 1460 cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags(); 1461 // See if next argument requires stack alignment in ELF 1462 bool Align = Flags.isSplit(); 1463 1464 unsigned CurArgOffset = ArgOffset; 1465 1466 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 1467 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 1468 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 1469 if (isVarArg || isPPC64) { 1470 MinReservedArea = ((MinReservedArea+15)/16)*16; 1471 MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), 1472 Op.getOperand(ArgNo+3), 1473 isVarArg, 1474 PtrByteSize); 1475 } else nAltivecParamsAtEnd++; 1476 } else 1477 // Calculate min reserved area. 1478 MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), 1479 Op.getOperand(ArgNo+3), 1480 isVarArg, 1481 PtrByteSize); 1482 1483 // FIXME alignment for ELF may not be right 1484 // FIXME the codegen can be much improved in some cases. 1485 // We do not have to keep everything in memory. 1486 if (Flags.isByVal()) { 1487 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 1488 ObjSize = Flags.getByValSize(); 1489 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1490 // Double word align in ELF 1491 if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); 1492 // Objects of size 1 and 2 are right justified, everything else is 1493 // left justified. This means the memory address is adjusted forwards. 1494 if (ObjSize==1 || ObjSize==2) { 1495 CurArgOffset = CurArgOffset + (4 - ObjSize); 1496 } 1497 // The value of the object is its address. 1498 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); 1499 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 1500 ArgValues.push_back(FIN); 1501 if (ObjSize==1 || ObjSize==2) { 1502 if (GPR_idx != Num_GPR_Regs) { 1503 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1504 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1505 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1506 SDOperand Store = DAG.getTruncStore(Val.getValue(1), Val, FIN, 1507 NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); 1508 MemOps.push_back(Store); 1509 ++GPR_idx; 1510 if (isMachoABI) ArgOffset += PtrByteSize; 1511 } else { 1512 ArgOffset += PtrByteSize; 1513 } 1514 continue; 1515 } 1516 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 1517 // Store whatever pieces of the object are in registers 1518 // to memory. ArgVal will be address of the beginning of 1519 // the object. 1520 if (GPR_idx != Num_GPR_Regs) { 1521 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1522 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1523 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); 1524 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 1525 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1526 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1527 MemOps.push_back(Store); 1528 ++GPR_idx; 1529 if (isMachoABI) ArgOffset += PtrByteSize; 1530 } else { 1531 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 1532 break; 1533 } 1534 } 1535 continue; 1536 } 1537 1538 switch (ObjectVT.getSimpleVT()) { 1539 default: assert(0 && "Unhandled argument type!"); 1540 case MVT::i32: 1541 if (!isPPC64) { 1542 // Double word align in ELF 1543 if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); 1544 1545 if (GPR_idx != Num_GPR_Regs) { 1546 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1547 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1548 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); 1549 ++GPR_idx; 1550 } else { 1551 needsLoad = true; 1552 ArgSize = PtrByteSize; 1553 } 1554 // Stack align in ELF 1555 if (needsLoad && Align && isELF32_ABI) 1556 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1557 // All int arguments reserve stack space in Macho ABI. 1558 if (isMachoABI || needsLoad) ArgOffset += PtrByteSize; 1559 break; 1560 } 1561 // FALLTHROUGH 1562 case MVT::i64: // PPC64 1563 if (GPR_idx != Num_GPR_Regs) { 1564 unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 1565 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1566 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1567 1568 if (ObjectVT == MVT::i32) { 1569 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 1570 // value to MVT::i64 and then truncate to the correct register size. 1571 if (Flags.isSExt()) 1572 ArgVal = DAG.getNode(ISD::AssertSext, MVT::i64, ArgVal, 1573 DAG.getValueType(ObjectVT)); 1574 else if (Flags.isZExt()) 1575 ArgVal = DAG.getNode(ISD::AssertZext, MVT::i64, ArgVal, 1576 DAG.getValueType(ObjectVT)); 1577 1578 ArgVal = DAG.getNode(ISD::TRUNCATE, MVT::i32, ArgVal); 1579 } 1580 1581 ++GPR_idx; 1582 } else { 1583 needsLoad = true; 1584 } 1585 // All int arguments reserve stack space in Macho ABI. 1586 if (isMachoABI || needsLoad) ArgOffset += 8; 1587 break; 1588 1589 case MVT::f32: 1590 case MVT::f64: 1591 // Every 4 bytes of argument space consumes one of the GPRs available for 1592 // argument passing. 1593 if (GPR_idx != Num_GPR_Regs && isMachoABI) { 1594 ++GPR_idx; 1595 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 1596 ++GPR_idx; 1597 } 1598 if (FPR_idx != Num_FPR_Regs) { 1599 unsigned VReg; 1600 if (ObjectVT == MVT::f32) 1601 VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass); 1602 else 1603 VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 1604 RegInfo.addLiveIn(FPR[FPR_idx], VReg); 1605 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1606 ++FPR_idx; 1607 } else { 1608 needsLoad = true; 1609 } 1610 1611 // Stack align in ELF 1612 if (needsLoad && Align && isELF32_ABI) 1613 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1614 // All FP arguments reserve stack space in Macho ABI. 1615 if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize; 1616 break; 1617 case MVT::v4f32: 1618 case MVT::v4i32: 1619 case MVT::v8i16: 1620 case MVT::v16i8: 1621 // Note that vector arguments in registers don't reserve stack space, 1622 // except in varargs functions. 1623 if (VR_idx != Num_VR_Regs) { 1624 unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass); 1625 RegInfo.addLiveIn(VR[VR_idx], VReg); 1626 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1627 if (isVarArg) { 1628 while ((ArgOffset % 16) != 0) { 1629 ArgOffset += PtrByteSize; 1630 if (GPR_idx != Num_GPR_Regs) 1631 GPR_idx++; 1632 } 1633 ArgOffset += 16; 1634 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); 1635 } 1636 ++VR_idx; 1637 } else { 1638 if (!isVarArg && !isPPC64) { 1639 // Vectors go after all the nonvectors. 1640 CurArgOffset = VecArgOffset; 1641 VecArgOffset += 16; 1642 } else { 1643 // Vectors are aligned. 1644 ArgOffset = ((ArgOffset+15)/16)*16; 1645 CurArgOffset = ArgOffset; 1646 ArgOffset += 16; 1647 } 1648 needsLoad = true; 1649 } 1650 break; 1651 } 1652 1653 // We need to load the argument to a virtual register if we determined above 1654 // that we ran out of physical registers of the appropriate type. 1655 if (needsLoad) { 1656 int FI = MFI->CreateFixedObject(ObjSize, 1657 CurArgOffset + (ArgSize - ObjSize), 1658 isImmutable); 1659 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 1660 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); 1661 } 1662 1663 ArgValues.push_back(ArgVal); 1664 } 1665 1666 // Set the size that is at least reserved in caller of this function. Tail 1667 // call optimized function's reserved stack space needs to be aligned so that 1668 // taking the difference between two stack areas will result in an aligned 1669 // stack. 1670 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1671 // Add the Altivec parameters at the end, if needed. 1672 if (nAltivecParamsAtEnd) { 1673 MinReservedArea = ((MinReservedArea+15)/16)*16; 1674 MinReservedArea += 16*nAltivecParamsAtEnd; 1675 } 1676 MinReservedArea = 1677 std::max(MinReservedArea, 1678 PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); 1679 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> 1680 getStackAlignment(); 1681 unsigned AlignMask = TargetAlign-1; 1682 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 1683 FI->setMinReservedArea(MinReservedArea); 1684 1685 // If the function takes variable number of arguments, make a frame index for 1686 // the start of the first vararg value... for expansion of llvm.va_start. 1687 if (isVarArg) { 1688 1689 int depth; 1690 if (isELF32_ABI) { 1691 VarArgsNumGPR = GPR_idx; 1692 VarArgsNumFPR = FPR_idx; 1693 1694 // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame 1695 // pointer. 1696 depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 + 1697 Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 + 1698 PtrVT.getSizeInBits()/8); 1699 1700 VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 1701 ArgOffset); 1702 1703 } 1704 else 1705 depth = ArgOffset; 1706 1707 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 1708 depth); 1709 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1710 1711 // In ELF 32 ABI, the fixed integer arguments of a variadic function are 1712 // stored to the VarArgsFrameIndex on the stack. 1713 if (isELF32_ABI) { 1714 for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) { 1715 SDOperand Val = DAG.getRegister(GPR[GPR_idx], PtrVT); 1716 SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0); 1717 MemOps.push_back(Store); 1718 // Increment the address by four for the next argument to store 1719 SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 1720 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1721 } 1722 } 1723 1724 // If this function is vararg, store any remaining integer argument regs 1725 // to their spots on the stack so that they may be loaded by deferencing the 1726 // result of va_next. 1727 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 1728 unsigned VReg; 1729 if (isPPC64) 1730 VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 1731 else 1732 VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1733 1734 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1735 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1736 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1737 MemOps.push_back(Store); 1738 // Increment the address by four for the next argument to store 1739 SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 1740 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1741 } 1742 1743 // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex 1744 // on the stack. 1745 if (isELF32_ABI) { 1746 for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) { 1747 SDOperand Val = DAG.getRegister(FPR[FPR_idx], MVT::f64); 1748 SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0); 1749 MemOps.push_back(Store); 1750 // Increment the address by eight for the next argument to store 1751 SDOperand PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, 1752 PtrVT); 1753 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1754 } 1755 1756 for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) { 1757 unsigned VReg; 1758 VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 1759 1760 RegInfo.addLiveIn(FPR[FPR_idx], VReg); 1761 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::f64); 1762 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1763 MemOps.push_back(Store); 1764 // Increment the address by eight for the next argument to store 1765 SDOperand PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, 1766 PtrVT); 1767 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1768 } 1769 } 1770 } 1771 1772 if (!MemOps.empty()) 1773 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); 1774 1775 ArgValues.push_back(Root); 1776 1777 // Return the new list of results. 1778 return DAG.getMergeValues(Op.Val->getVTList(), &ArgValues[0], 1779 ArgValues.size()); 1780} 1781 1782/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus 1783/// linkage area. 1784static unsigned 1785CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, 1786 bool isPPC64, 1787 bool isMachoABI, 1788 bool isVarArg, 1789 unsigned CC, 1790 SDOperand Call, 1791 unsigned &nAltivecParamsAtEnd) { 1792 // Count how many bytes are to be pushed on the stack, including the linkage 1793 // area, and parameter passing area. We start with 24/48 bytes, which is 1794 // prereserved space for [SP][CR][LR][3 x unused]. 1795 unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1796 unsigned NumOps = (Call.getNumOperands() - 5) / 2; 1797 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1798 1799 // Add up all the space actually used. 1800 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 1801 // they all go in registers, but we must reserve stack space for them for 1802 // possible use by the caller. In varargs or 64-bit calls, parameters are 1803 // assigned stack space in order, with padding so Altivec parameters are 1804 // 16-byte aligned. 1805 nAltivecParamsAtEnd = 0; 1806 for (unsigned i = 0; i != NumOps; ++i) { 1807 SDOperand Arg = Call.getOperand(5+2*i); 1808 SDOperand Flag = Call.getOperand(5+2*i+1); 1809 MVT ArgVT = Arg.getValueType(); 1810 // Varargs Altivec parameters are padded to a 16 byte boundary. 1811 if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || 1812 ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { 1813 if (!isVarArg && !isPPC64) { 1814 // Non-varargs Altivec parameters go after all the non-Altivec 1815 // parameters; handle those later so we know how much padding we need. 1816 nAltivecParamsAtEnd++; 1817 continue; 1818 } 1819 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 1820 NumBytes = ((NumBytes+15)/16)*16; 1821 } 1822 NumBytes += CalculateStackSlotSize(Arg, Flag, isVarArg, PtrByteSize); 1823 } 1824 1825 // Allow for Altivec parameters at the end, if needed. 1826 if (nAltivecParamsAtEnd) { 1827 NumBytes = ((NumBytes+15)/16)*16; 1828 NumBytes += 16*nAltivecParamsAtEnd; 1829 } 1830 1831 // The prolog code of the callee may store up to 8 GPR argument registers to 1832 // the stack, allowing va_start to index over them in memory if its varargs. 1833 // Because we cannot tell if this is needed on the caller side, we have to 1834 // conservatively assume that it is needed. As such, make sure we have at 1835 // least enough stack space for the caller to store the 8 GPRs. 1836 NumBytes = std::max(NumBytes, 1837 PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); 1838 1839 // Tail call needs the stack to be aligned. 1840 if (CC==CallingConv::Fast && PerformTailCallOpt) { 1841 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> 1842 getStackAlignment(); 1843 unsigned AlignMask = TargetAlign-1; 1844 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 1845 } 1846 1847 return NumBytes; 1848} 1849 1850/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 1851/// adjusted to accomodate the arguments for the tailcall. 1852static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall, 1853 unsigned ParamSize) { 1854 1855 if (!IsTailCall) return 0; 1856 1857 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 1858 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 1859 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 1860 // Remember only if the new adjustement is bigger. 1861 if (SPDiff < FI->getTailCallSPDelta()) 1862 FI->setTailCallSPDelta(SPDiff); 1863 1864 return SPDiff; 1865} 1866 1867/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1868/// following the call is a return. A function is eligible if caller/callee 1869/// calling conventions match, currently only fastcc supports tail calls, and 1870/// the function CALL is immediatly followed by a RET. 1871bool 1872PPCTargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, 1873 SDOperand Ret, 1874 SelectionDAG& DAG) const { 1875 // Variable argument functions are not supported. 1876 if (!PerformTailCallOpt || 1877 cast<ConstantSDNode>(Call.getOperand(2))->getValue() != 0) return false; 1878 1879 if (CheckTailCallReturnConstraints(Call, Ret)) { 1880 MachineFunction &MF = DAG.getMachineFunction(); 1881 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1882 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1883 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1884 // Functions containing by val parameters are not supported. 1885 for (unsigned i = 0; i != ((Call.getNumOperands()-5)/2); i++) { 1886 ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Call.getOperand(5+2*i+1)) 1887 ->getArgFlags(); 1888 if (Flags.isByVal()) return false; 1889 } 1890 1891 SDOperand Callee = Call.getOperand(4); 1892 // Non PIC/GOT tail calls are supported. 1893 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 1894 return true; 1895 1896 // At the moment we can only do local tail calls (in same module, hidden 1897 // or protected) if we are generating PIC. 1898 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1899 return G->getGlobal()->hasHiddenVisibility() 1900 || G->getGlobal()->hasProtectedVisibility(); 1901 } 1902 } 1903 1904 return false; 1905} 1906 1907/// isCallCompatibleAddress - Return the immediate to use if the specified 1908/// 32-bit value is representable in the immediate field of a BxA instruction. 1909static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) { 1910 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1911 if (!C) return 0; 1912 1913 int Addr = C->getValue(); 1914 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1915 (Addr << 6 >> 6) != Addr) 1916 return 0; // Top 6 bits have to be sext of immediate. 1917 1918 return DAG.getConstant((int)C->getValue() >> 2, 1919 DAG.getTargetLoweringInfo().getPointerTy()).Val; 1920} 1921 1922namespace { 1923 1924struct TailCallArgumentInfo { 1925 SDOperand Arg; 1926 SDOperand FrameIdxOp; 1927 int FrameIdx; 1928 1929 TailCallArgumentInfo() : FrameIdx(0) {} 1930}; 1931 1932} 1933 1934/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 1935static void 1936StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 1937 SDOperand Chain, 1938 const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs, 1939 SmallVector<SDOperand, 8> &MemOpChains) { 1940 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 1941 SDOperand Arg = TailCallArgs[i].Arg; 1942 SDOperand FIN = TailCallArgs[i].FrameIdxOp; 1943 int FI = TailCallArgs[i].FrameIdx; 1944 // Store relative to framepointer. 1945 MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, 1946 PseudoSourceValue::getFixedStack(), 1947 FI)); 1948 } 1949} 1950 1951/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 1952/// the appropriate stack slot for the tail call optimized function call. 1953static SDOperand EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 1954 MachineFunction &MF, 1955 SDOperand Chain, 1956 SDOperand OldRetAddr, 1957 SDOperand OldFP, 1958 int SPDiff, 1959 bool isPPC64, 1960 bool isMachoABI) { 1961 if (SPDiff) { 1962 // Calculate the new stack slot for the return address. 1963 int SlotSize = isPPC64 ? 8 : 4; 1964 int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, 1965 isMachoABI); 1966 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 1967 NewRetAddrLoc); 1968 int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, 1969 isMachoABI); 1970 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); 1971 1972 MVT VT = isPPC64 ? MVT::i64 : MVT::i32; 1973 SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 1974 Chain = DAG.getStore(Chain, OldRetAddr, NewRetAddrFrIdx, 1975 PseudoSourceValue::getFixedStack(), NewRetAddr); 1976 SDOperand NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 1977 Chain = DAG.getStore(Chain, OldFP, NewFramePtrIdx, 1978 PseudoSourceValue::getFixedStack(), NewFPIdx); 1979 } 1980 return Chain; 1981} 1982 1983/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 1984/// the position of the argument. 1985static void 1986CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 1987 SDOperand Arg, int SPDiff, unsigned ArgOffset, 1988 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { 1989 int Offset = ArgOffset + SPDiff; 1990 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 1991 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1992 MVT VT = isPPC64 ? MVT::i64 : MVT::i32; 1993 SDOperand FIN = DAG.getFrameIndex(FI, VT); 1994 TailCallArgumentInfo Info; 1995 Info.Arg = Arg; 1996 Info.FrameIdxOp = FIN; 1997 Info.FrameIdx = FI; 1998 TailCallArguments.push_back(Info); 1999} 2000 2001/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 2002/// stack slot. Returns the chain as result and the loaded frame pointers in 2003/// LROpOut/FPOpout. Used when tail calling. 2004SDOperand PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 2005 int SPDiff, 2006 SDOperand Chain, 2007 SDOperand &LROpOut, 2008 SDOperand &FPOpOut) { 2009 if (SPDiff) { 2010 // Load the LR and FP stack slot for later adjusting. 2011 MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; 2012 LROpOut = getReturnAddrFrameIndex(DAG); 2013 LROpOut = DAG.getLoad(VT, Chain, LROpOut, NULL, 0); 2014 Chain = SDOperand(LROpOut.Val, 1); 2015 FPOpOut = getFramePointerFrameIndex(DAG); 2016 FPOpOut = DAG.getLoad(VT, Chain, FPOpOut, NULL, 0); 2017 Chain = SDOperand(FPOpOut.Val, 1); 2018 } 2019 return Chain; 2020} 2021 2022/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 2023/// by "Src" to address "Dst" of size "Size". Alignment information is 2024/// specified by the specific parameter attribute. The copy will be passed as 2025/// a byval function parameter. 2026/// Sometimes what we are copying is the end of a larger object, the part that 2027/// does not fit in registers. 2028static SDOperand 2029CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, 2030 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 2031 unsigned Size) { 2032 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 2033 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), false, 2034 NULL, 0, NULL, 0); 2035} 2036 2037/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 2038/// tail calls. 2039static void 2040LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDOperand Chain, 2041 SDOperand Arg, SDOperand PtrOff, int SPDiff, 2042 unsigned ArgOffset, bool isPPC64, bool isTailCall, 2043 bool isVector, SmallVector<SDOperand, 8> &MemOpChains, 2044 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { 2045 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2046 if (!isTailCall) { 2047 if (isVector) { 2048 SDOperand StackPtr; 2049 if (isPPC64) 2050 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 2051 else 2052 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 2053 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, 2054 DAG.getConstant(ArgOffset, PtrVT)); 2055 } 2056 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 2057 // Calculate and remember argument location. 2058 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 2059 TailCallArguments); 2060} 2061 2062SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, 2063 const PPCSubtarget &Subtarget, 2064 TargetMachine &TM) { 2065 SDOperand Chain = Op.getOperand(0); 2066 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 2067 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2068 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0 && 2069 CC == CallingConv::Fast && PerformTailCallOpt; 2070 SDOperand Callee = Op.getOperand(4); 2071 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 2072 2073 bool isMachoABI = Subtarget.isMachoABI(); 2074 bool isELF32_ABI = Subtarget.isELF32_ABI(); 2075 2076 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2077 bool isPPC64 = PtrVT == MVT::i64; 2078 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2079 2080 MachineFunction &MF = DAG.getMachineFunction(); 2081 2082 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 2083 // SelectExpr to use to put the arguments in the appropriate registers. 2084 std::vector<SDOperand> args_to_use; 2085 2086 // Mark this function as potentially containing a function that contains a 2087 // tail call. As a consequence the frame pointer will be used for dynamicalloc 2088 // and restoring the callers stack pointer in this functions epilog. This is 2089 // done because by tail calling the called function might overwrite the value 2090 // in this function's (MF) stack pointer stack slot 0(SP). 2091 if (PerformTailCallOpt && CC==CallingConv::Fast) 2092 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 2093 2094 unsigned nAltivecParamsAtEnd = 0; 2095 2096 // Count how many bytes are to be pushed on the stack, including the linkage 2097 // area, and parameter passing area. We start with 24/48 bytes, which is 2098 // prereserved space for [SP][CR][LR][3 x unused]. 2099 unsigned NumBytes = 2100 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC, 2101 Op, nAltivecParamsAtEnd); 2102 2103 // Calculate by how many bytes the stack has to be adjusted in case of tail 2104 // call optimization. 2105 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 2106 2107 // Adjust the stack pointer for the new arguments... 2108 // These operations are automatically eliminated by the prolog/epilog pass 2109 Chain = DAG.getCALLSEQ_START(Chain, 2110 DAG.getConstant(NumBytes, PtrVT)); 2111 SDOperand CallSeqStart = Chain; 2112 2113 // Load the return address and frame pointer so it can be move somewhere else 2114 // later. 2115 SDOperand LROp, FPOp; 2116 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp); 2117 2118 // Set up a copy of the stack pointer for use loading and storing any 2119 // arguments that may not fit in the registers available for argument 2120 // passing. 2121 SDOperand StackPtr; 2122 if (isPPC64) 2123 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 2124 else 2125 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 2126 2127 // Figure out which arguments are going to go in registers, and which in 2128 // memory. Also, if this is a vararg function, floating point operations 2129 // must be stored to our stack, and loaded into integer regs as well, if 2130 // any integer regs are available for argument passing. 2131 unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 2132 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2133 2134 static const unsigned GPR_32[] = { // 32-bit registers. 2135 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2136 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2137 }; 2138 static const unsigned GPR_64[] = { // 64-bit registers. 2139 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2140 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2141 }; 2142 static const unsigned *FPR = GetFPR(Subtarget); 2143 2144 static const unsigned VR[] = { 2145 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2146 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2147 }; 2148 const unsigned NumGPRs = array_lengthof(GPR_32); 2149 const unsigned NumFPRs = isMachoABI ? 13 : 8; 2150 const unsigned NumVRs = array_lengthof( VR); 2151 2152 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 2153 2154 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 2155 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 2156 2157 SmallVector<SDOperand, 8> MemOpChains; 2158 for (unsigned i = 0; i != NumOps; ++i) { 2159 bool inMem = false; 2160 SDOperand Arg = Op.getOperand(5+2*i); 2161 ISD::ArgFlagsTy Flags = 2162 cast<ARG_FLAGSSDNode>(Op.getOperand(5+2*i+1))->getArgFlags(); 2163 // See if next argument requires stack alignment in ELF 2164 bool Align = Flags.isSplit(); 2165 2166 // PtrOff will be used to store the current argument to the stack if a 2167 // register cannot be found for it. 2168 SDOperand PtrOff; 2169 2170 // Stack align in ELF 32 2171 if (isELF32_ABI && Align) 2172 PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize, 2173 StackPtr.getValueType()); 2174 else 2175 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 2176 2177 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); 2178 2179 // On PPC64, promote integers to 64-bit values. 2180 if (isPPC64 && Arg.getValueType() == MVT::i32) { 2181 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 2182 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 2183 Arg = DAG.getNode(ExtOp, MVT::i64, Arg); 2184 } 2185 2186 // FIXME Elf untested, what are alignment rules? 2187 // FIXME memcpy is used way more than necessary. Correctness first. 2188 if (Flags.isByVal()) { 2189 unsigned Size = Flags.getByValSize(); 2190 if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); 2191 if (Size==1 || Size==2) { 2192 // Very small objects are passed right-justified. 2193 // Everything else is passed left-justified. 2194 MVT VT = (Size==1) ? MVT::i8 : MVT::i16; 2195 if (GPR_idx != NumGPRs) { 2196 SDOperand Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, Chain, Arg, 2197 NULL, 0, VT); 2198 MemOpChains.push_back(Load.getValue(1)); 2199 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 2200 if (isMachoABI) 2201 ArgOffset += PtrByteSize; 2202 } else { 2203 SDOperand Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); 2204 SDOperand AddPtr = DAG.getNode(ISD::ADD, PtrVT, PtrOff, Const); 2205 SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, 2206 CallSeqStart.Val->getOperand(0), 2207 Flags, DAG, Size); 2208 // This must go outside the CALLSEQ_START..END. 2209 SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 2210 CallSeqStart.Val->getOperand(1)); 2211 DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val); 2212 Chain = CallSeqStart = NewCallSeqStart; 2213 ArgOffset += PtrByteSize; 2214 } 2215 continue; 2216 } 2217 // Copy entire object into memory. There are cases where gcc-generated 2218 // code assumes it is there, even if it could be put entirely into 2219 // registers. (This is not what the doc says.) 2220 SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 2221 CallSeqStart.Val->getOperand(0), 2222 Flags, DAG, Size); 2223 // This must go outside the CALLSEQ_START..END. 2224 SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 2225 CallSeqStart.Val->getOperand(1)); 2226 DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val); 2227 Chain = CallSeqStart = NewCallSeqStart; 2228 // And copy the pieces of it that fit into registers. 2229 for (unsigned j=0; j<Size; j+=PtrByteSize) { 2230 SDOperand Const = DAG.getConstant(j, PtrOff.getValueType()); 2231 SDOperand AddArg = DAG.getNode(ISD::ADD, PtrVT, Arg, Const); 2232 if (GPR_idx != NumGPRs) { 2233 SDOperand Load = DAG.getLoad(PtrVT, Chain, AddArg, NULL, 0); 2234 MemOpChains.push_back(Load.getValue(1)); 2235 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 2236 if (isMachoABI) 2237 ArgOffset += PtrByteSize; 2238 } else { 2239 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 2240 break; 2241 } 2242 } 2243 continue; 2244 } 2245 2246 switch (Arg.getValueType().getSimpleVT()) { 2247 default: assert(0 && "Unexpected ValueType for argument!"); 2248 case MVT::i32: 2249 case MVT::i64: 2250 // Double word align in ELF 2251 if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); 2252 if (GPR_idx != NumGPRs) { 2253 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 2254 } else { 2255 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2256 isPPC64, isTailCall, false, MemOpChains, 2257 TailCallArguments); 2258 inMem = true; 2259 } 2260 if (inMem || isMachoABI) { 2261 // Stack align in ELF 2262 if (isELF32_ABI && Align) 2263 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 2264 2265 ArgOffset += PtrByteSize; 2266 } 2267 break; 2268 case MVT::f32: 2269 case MVT::f64: 2270 if (FPR_idx != NumFPRs) { 2271 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 2272 2273 if (isVarArg) { 2274 SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 2275 MemOpChains.push_back(Store); 2276 2277 // Float varargs are always shadowed in available integer registers 2278 if (GPR_idx != NumGPRs) { 2279 SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0); 2280 MemOpChains.push_back(Load.getValue(1)); 2281 if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], 2282 Load)); 2283 } 2284 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 2285 SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 2286 PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour); 2287 SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0); 2288 MemOpChains.push_back(Load.getValue(1)); 2289 if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], 2290 Load)); 2291 } 2292 } else { 2293 // If we have any FPRs remaining, we may also have GPRs remaining. 2294 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 2295 // GPRs. 2296 if (isMachoABI) { 2297 if (GPR_idx != NumGPRs) 2298 ++GPR_idx; 2299 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 2300 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 2301 ++GPR_idx; 2302 } 2303 } 2304 } else { 2305 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2306 isPPC64, isTailCall, false, MemOpChains, 2307 TailCallArguments); 2308 inMem = true; 2309 } 2310 if (inMem || isMachoABI) { 2311 // Stack align in ELF 2312 if (isELF32_ABI && Align) 2313 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 2314 if (isPPC64) 2315 ArgOffset += 8; 2316 else 2317 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 2318 } 2319 break; 2320 case MVT::v4f32: 2321 case MVT::v4i32: 2322 case MVT::v8i16: 2323 case MVT::v16i8: 2324 if (isVarArg) { 2325 // These go aligned on the stack, or in the corresponding R registers 2326 // when within range. The Darwin PPC ABI doc claims they also go in 2327 // V registers; in fact gcc does this only for arguments that are 2328 // prototyped, not for those that match the ... We do it for all 2329 // arguments, seems to work. 2330 while (ArgOffset % 16 !=0) { 2331 ArgOffset += PtrByteSize; 2332 if (GPR_idx != NumGPRs) 2333 GPR_idx++; 2334 } 2335 // We could elide this store in the case where the object fits 2336 // entirely in R registers. Maybe later. 2337 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, 2338 DAG.getConstant(ArgOffset, PtrVT)); 2339 SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 2340 MemOpChains.push_back(Store); 2341 if (VR_idx != NumVRs) { 2342 SDOperand Load = DAG.getLoad(MVT::v4f32, Store, PtrOff, NULL, 0); 2343 MemOpChains.push_back(Load.getValue(1)); 2344 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 2345 } 2346 ArgOffset += 16; 2347 for (unsigned i=0; i<16; i+=PtrByteSize) { 2348 if (GPR_idx == NumGPRs) 2349 break; 2350 SDOperand Ix = DAG.getNode(ISD::ADD, PtrVT, PtrOff, 2351 DAG.getConstant(i, PtrVT)); 2352 SDOperand Load = DAG.getLoad(PtrVT, Store, Ix, NULL, 0); 2353 MemOpChains.push_back(Load.getValue(1)); 2354 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 2355 } 2356 break; 2357 } 2358 2359 // Non-varargs Altivec params generally go in registers, but have 2360 // stack space allocated at the end. 2361 if (VR_idx != NumVRs) { 2362 // Doesn't have GPR space allocated. 2363 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 2364 } else if (nAltivecParamsAtEnd==0) { 2365 // We are emitting Altivec params in order. 2366 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2367 isPPC64, isTailCall, true, MemOpChains, 2368 TailCallArguments); 2369 ArgOffset += 16; 2370 } 2371 break; 2372 } 2373 } 2374 // If all Altivec parameters fit in registers, as they usually do, 2375 // they get stack space following the non-Altivec parameters. We 2376 // don't track this here because nobody below needs it. 2377 // If there are more Altivec parameters than fit in registers emit 2378 // the stores here. 2379 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 2380 unsigned j = 0; 2381 // Offset is aligned; skip 1st 12 params which go in V registers. 2382 ArgOffset = ((ArgOffset+15)/16)*16; 2383 ArgOffset += 12*16; 2384 for (unsigned i = 0; i != NumOps; ++i) { 2385 SDOperand Arg = Op.getOperand(5+2*i); 2386 MVT ArgType = Arg.getValueType(); 2387 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 2388 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 2389 if (++j > NumVRs) { 2390 SDOperand PtrOff; 2391 // We are emitting Altivec params in order. 2392 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2393 isPPC64, isTailCall, true, MemOpChains, 2394 TailCallArguments); 2395 ArgOffset += 16; 2396 } 2397 } 2398 } 2399 } 2400 2401 if (!MemOpChains.empty()) 2402 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 2403 &MemOpChains[0], MemOpChains.size()); 2404 2405 // Build a sequence of copy-to-reg nodes chained together with token chain 2406 // and flag operands which copy the outgoing args into the appropriate regs. 2407 SDOperand InFlag; 2408 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 2409 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 2410 InFlag); 2411 InFlag = Chain.getValue(1); 2412 } 2413 2414 // With the ELF 32 ABI, set CR6 to true if this is a vararg call. 2415 if (isVarArg && isELF32_ABI) { 2416 SDOperand SetCR(DAG.getTargetNode(PPC::CRSET, MVT::i32), 0); 2417 Chain = DAG.getCopyToReg(Chain, PPC::CR1EQ, SetCR, InFlag); 2418 InFlag = Chain.getValue(1); 2419 } 2420 2421 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 2422 // might overwrite each other in case of tail call optimization. 2423 if (isTailCall) { 2424 SmallVector<SDOperand, 8> MemOpChains2; 2425 // Do not flag preceeding copytoreg stuff together with the following stuff. 2426 InFlag = SDOperand(); 2427 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 2428 MemOpChains2); 2429 if (!MemOpChains2.empty()) 2430 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 2431 &MemOpChains2[0], MemOpChains2.size()); 2432 2433 // Store the return address to the appropriate stack slot. 2434 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 2435 isPPC64, isMachoABI); 2436 } 2437 2438 // Emit callseq_end just before tailcall node. 2439 if (isTailCall) { 2440 SmallVector<SDOperand, 8> CallSeqOps; 2441 SDVTList CallSeqNodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 2442 CallSeqOps.push_back(Chain); 2443 CallSeqOps.push_back(DAG.getIntPtrConstant(NumBytes)); 2444 CallSeqOps.push_back(DAG.getIntPtrConstant(0)); 2445 if (InFlag.Val) 2446 CallSeqOps.push_back(InFlag); 2447 Chain = DAG.getNode(ISD::CALLSEQ_END, CallSeqNodeTys, &CallSeqOps[0], 2448 CallSeqOps.size()); 2449 InFlag = Chain.getValue(1); 2450 } 2451 2452 std::vector<MVT> NodeTys; 2453 NodeTys.push_back(MVT::Other); // Returns a chain 2454 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 2455 2456 SmallVector<SDOperand, 8> Ops; 2457 unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF; 2458 2459 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 2460 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 2461 // node so that legalize doesn't hack it. 2462 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2463 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); 2464 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 2465 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); 2466 else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) 2467 // If this is an absolute destination address, use the munged value. 2468 Callee = SDOperand(Dest, 0); 2469 else { 2470 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 2471 // to do the call, we can't use PPCISD::CALL. 2472 SDOperand MTCTROps[] = {Chain, Callee, InFlag}; 2473 Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, MTCTROps, 2+(InFlag.Val!=0)); 2474 InFlag = Chain.getValue(1); 2475 2476 // Copy the callee address into R12/X12 on darwin. 2477 if (isMachoABI) { 2478 unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12; 2479 Chain = DAG.getCopyToReg(Chain, Reg, Callee, InFlag); 2480 InFlag = Chain.getValue(1); 2481 } 2482 2483 NodeTys.clear(); 2484 NodeTys.push_back(MVT::Other); 2485 NodeTys.push_back(MVT::Flag); 2486 Ops.push_back(Chain); 2487 CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF; 2488 Callee.Val = 0; 2489 // Add CTR register as callee so a bctr can be emitted later. 2490 if (isTailCall) 2491 Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy())); 2492 } 2493 2494 // If this is a direct call, pass the chain and the callee. 2495 if (Callee.Val) { 2496 Ops.push_back(Chain); 2497 Ops.push_back(Callee); 2498 } 2499 // If this is a tail call add stack pointer delta. 2500 if (isTailCall) 2501 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 2502 2503 // Add argument registers to the end of the list so that they are known live 2504 // into the call. 2505 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 2506 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 2507 RegsToPass[i].second.getValueType())); 2508 2509 // When performing tail call optimization the callee pops its arguments off 2510 // the stack. Account for this here so these bytes can be pushed back on in 2511 // PPCRegisterInfo::eliminateCallFramePseudoInstr. 2512 int BytesCalleePops = 2513 (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; 2514 2515 if (InFlag.Val) 2516 Ops.push_back(InFlag); 2517 2518 // Emit tail call. 2519 if (isTailCall) { 2520 assert(InFlag.Val && 2521 "Flag must be set. Depend on flag being set in LowerRET"); 2522 Chain = DAG.getNode(PPCISD::TAILCALL, 2523 Op.Val->getVTList(), &Ops[0], Ops.size()); 2524 return SDOperand(Chain.Val, Op.ResNo); 2525 } 2526 2527 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); 2528 InFlag = Chain.getValue(1); 2529 2530 Chain = DAG.getCALLSEQ_END(Chain, 2531 DAG.getConstant(NumBytes, PtrVT), 2532 DAG.getConstant(BytesCalleePops, PtrVT), 2533 InFlag); 2534 if (Op.Val->getValueType(0) != MVT::Other) 2535 InFlag = Chain.getValue(1); 2536 2537 SmallVector<SDOperand, 16> ResultVals; 2538 SmallVector<CCValAssign, 16> RVLocs; 2539 unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); 2540 CCState CCInfo(CallerCC, isVarArg, TM, RVLocs); 2541 CCInfo.AnalyzeCallResult(Op.Val, RetCC_PPC); 2542 2543 // Copy all of the result registers out of their specified physreg. 2544 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 2545 CCValAssign &VA = RVLocs[i]; 2546 MVT VT = VA.getValVT(); 2547 assert(VA.isRegLoc() && "Can only return in registers!"); 2548 Chain = DAG.getCopyFromReg(Chain, VA.getLocReg(), VT, InFlag).getValue(1); 2549 ResultVals.push_back(Chain.getValue(0)); 2550 InFlag = Chain.getValue(2); 2551 } 2552 2553 // If the function returns void, just return the chain. 2554 if (RVLocs.empty()) 2555 return Chain; 2556 2557 // Otherwise, merge everything together with a MERGE_VALUES node. 2558 ResultVals.push_back(Chain); 2559 SDOperand Res = DAG.getMergeValues(Op.Val->getVTList(), &ResultVals[0], 2560 ResultVals.size()); 2561 return Res.getValue(Op.ResNo); 2562} 2563 2564SDOperand PPCTargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG, 2565 TargetMachine &TM) { 2566 SmallVector<CCValAssign, 16> RVLocs; 2567 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 2568 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 2569 CCState CCInfo(CC, isVarArg, TM, RVLocs); 2570 CCInfo.AnalyzeReturn(Op.Val, RetCC_PPC); 2571 2572 // If this is the first return lowered for this function, add the regs to the 2573 // liveout set for the function. 2574 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 2575 for (unsigned i = 0; i != RVLocs.size(); ++i) 2576 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 2577 } 2578 2579 SDOperand Chain = Op.getOperand(0); 2580 2581 Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL); 2582 if (Chain.getOpcode() == PPCISD::TAILCALL) { 2583 SDOperand TailCall = Chain; 2584 SDOperand TargetAddress = TailCall.getOperand(1); 2585 SDOperand StackAdjustment = TailCall.getOperand(2); 2586 2587 assert(((TargetAddress.getOpcode() == ISD::Register && 2588 cast<RegisterSDNode>(TargetAddress)->getReg() == PPC::CTR) || 2589 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 2590 TargetAddress.getOpcode() == ISD::TargetGlobalAddress || 2591 isa<ConstantSDNode>(TargetAddress)) && 2592 "Expecting an global address, external symbol, absolute value or register"); 2593 2594 assert(StackAdjustment.getOpcode() == ISD::Constant && 2595 "Expecting a const value"); 2596 2597 SmallVector<SDOperand,8> Operands; 2598 Operands.push_back(Chain.getOperand(0)); 2599 Operands.push_back(TargetAddress); 2600 Operands.push_back(StackAdjustment); 2601 // Copy registers used by the call. Last operand is a flag so it is not 2602 // copied. 2603 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 2604 Operands.push_back(Chain.getOperand(i)); 2605 } 2606 return DAG.getNode(PPCISD::TC_RETURN, MVT::Other, &Operands[0], 2607 Operands.size()); 2608 } 2609 2610 SDOperand Flag; 2611 2612 // Copy the result values into the output registers. 2613 for (unsigned i = 0; i != RVLocs.size(); ++i) { 2614 CCValAssign &VA = RVLocs[i]; 2615 assert(VA.isRegLoc() && "Can only return in registers!"); 2616 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); 2617 Flag = Chain.getValue(1); 2618 } 2619 2620 if (Flag.Val) 2621 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain, Flag); 2622 else 2623 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain); 2624} 2625 2626SDOperand PPCTargetLowering::LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG, 2627 const PPCSubtarget &Subtarget) { 2628 // When we pop the dynamic allocation we need to restore the SP link. 2629 2630 // Get the corect type for pointers. 2631 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2632 2633 // Construct the stack pointer operand. 2634 bool IsPPC64 = Subtarget.isPPC64(); 2635 unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1; 2636 SDOperand StackPtr = DAG.getRegister(SP, PtrVT); 2637 2638 // Get the operands for the STACKRESTORE. 2639 SDOperand Chain = Op.getOperand(0); 2640 SDOperand SaveSP = Op.getOperand(1); 2641 2642 // Load the old link SP. 2643 SDOperand LoadLinkSP = DAG.getLoad(PtrVT, Chain, StackPtr, NULL, 0); 2644 2645 // Restore the stack pointer. 2646 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), SP, SaveSP); 2647 2648 // Store the old link SP. 2649 return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0); 2650} 2651 2652 2653 2654SDOperand 2655PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 2656 MachineFunction &MF = DAG.getMachineFunction(); 2657 bool IsPPC64 = PPCSubTarget.isPPC64(); 2658 bool isMachoABI = PPCSubTarget.isMachoABI(); 2659 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2660 2661 // Get current frame pointer save index. The users of this index will be 2662 // primarily DYNALLOC instructions. 2663 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2664 int RASI = FI->getReturnAddrSaveIndex(); 2665 2666 // If the frame pointer save index hasn't been defined yet. 2667 if (!RASI) { 2668 // Find out what the fix offset of the frame pointer save area. 2669 int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI); 2670 // Allocate the frame index for frame pointer save area. 2671 RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset); 2672 // Save the result. 2673 FI->setReturnAddrSaveIndex(RASI); 2674 } 2675 return DAG.getFrameIndex(RASI, PtrVT); 2676} 2677 2678SDOperand 2679PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 2680 MachineFunction &MF = DAG.getMachineFunction(); 2681 bool IsPPC64 = PPCSubTarget.isPPC64(); 2682 bool isMachoABI = PPCSubTarget.isMachoABI(); 2683 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2684 2685 // Get current frame pointer save index. The users of this index will be 2686 // primarily DYNALLOC instructions. 2687 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2688 int FPSI = FI->getFramePointerSaveIndex(); 2689 2690 // If the frame pointer save index hasn't been defined yet. 2691 if (!FPSI) { 2692 // Find out what the fix offset of the frame pointer save area. 2693 int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI); 2694 2695 // Allocate the frame index for frame pointer save area. 2696 FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); 2697 // Save the result. 2698 FI->setFramePointerSaveIndex(FPSI); 2699 } 2700 return DAG.getFrameIndex(FPSI, PtrVT); 2701} 2702 2703SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 2704 SelectionDAG &DAG, 2705 const PPCSubtarget &Subtarget) { 2706 // Get the inputs. 2707 SDOperand Chain = Op.getOperand(0); 2708 SDOperand Size = Op.getOperand(1); 2709 2710 // Get the corect type for pointers. 2711 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2712 // Negate the size. 2713 SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT, 2714 DAG.getConstant(0, PtrVT), Size); 2715 // Construct a node for the frame pointer save index. 2716 SDOperand FPSIdx = getFramePointerFrameIndex(DAG); 2717 // Build a DYNALLOC node. 2718 SDOperand Ops[3] = { Chain, NegSize, FPSIdx }; 2719 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 2720 return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3); 2721} 2722 2723SDOperand PPCTargetLowering::LowerAtomicLOAD_ADD(SDOperand Op, SelectionDAG &DAG) { 2724 MVT VT = Op.Val->getValueType(0); 2725 SDOperand Chain = Op.getOperand(0); 2726 SDOperand Ptr = Op.getOperand(1); 2727 SDOperand Incr = Op.getOperand(2); 2728 2729 // Issue a "load and reserve". 2730 std::vector<MVT> VTs; 2731 VTs.push_back(VT); 2732 VTs.push_back(MVT::Other); 2733 2734 SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32); 2735 SDOperand Ops[] = { 2736 Chain, // Chain 2737 Ptr, // Ptr 2738 Label, // Label 2739 }; 2740 SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3); 2741 Chain = Load.getValue(1); 2742 2743 // Compute new value. 2744 SDOperand NewVal = DAG.getNode(ISD::ADD, VT, Load, Incr); 2745 2746 // Issue a "store and check". 2747 SDOperand Ops2[] = { 2748 Chain, // Chain 2749 NewVal, // Value 2750 Ptr, // Ptr 2751 Label, // Label 2752 }; 2753 SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops2, 4); 2754 SDOperand OutOps[] = { Load, Store }; 2755 return DAG.getMergeValues(OutOps, 2); 2756} 2757 2758SDOperand PPCTargetLowering::LowerAtomicCMP_SWAP(SDOperand Op, SelectionDAG &DAG) { 2759 MVT VT = Op.Val->getValueType(0); 2760 SDOperand Chain = Op.getOperand(0); 2761 SDOperand Ptr = Op.getOperand(1); 2762 SDOperand NewVal = Op.getOperand(2); 2763 SDOperand OldVal = Op.getOperand(3); 2764 2765 // Issue a "load and reserve". 2766 std::vector<MVT> VTs; 2767 VTs.push_back(VT); 2768 VTs.push_back(MVT::Other); 2769 2770 SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32); 2771 SDOperand Ops[] = { 2772 Chain, // Chain 2773 Ptr, // Ptr 2774 Label, // Label 2775 }; 2776 SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3); 2777 Chain = Load.getValue(1); 2778 2779 // Compare and unreserve if not equal. 2780 SDOperand Ops2[] = { 2781 Chain, // Chain 2782 OldVal, // Old value 2783 Load, // Value in memory 2784 Label, // Label 2785 }; 2786 Chain = DAG.getNode(PPCISD::CMP_UNRESERVE, MVT::Other, Ops2, 4); 2787 2788 // Issue a "store and check". 2789 SDOperand Ops3[] = { 2790 Chain, // Chain 2791 NewVal, // Value 2792 Ptr, // Ptr 2793 Label, // Label 2794 }; 2795 SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops3, 4); 2796 SDOperand OutOps[] = { Load, Store }; 2797 return DAG.getMergeValues(OutOps, 2); 2798} 2799 2800SDOperand PPCTargetLowering::LowerAtomicSWAP(SDOperand Op, SelectionDAG &DAG) { 2801 MVT VT = Op.Val->getValueType(0); 2802 SDOperand Chain = Op.getOperand(0); 2803 SDOperand Ptr = Op.getOperand(1); 2804 SDOperand NewVal = Op.getOperand(2); 2805 2806 // Issue a "load and reserve". 2807 std::vector<MVT> VTs; 2808 VTs.push_back(VT); 2809 VTs.push_back(MVT::Other); 2810 2811 SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32); 2812 SDOperand Ops[] = { 2813 Chain, // Chain 2814 Ptr, // Ptr 2815 Label, // Label 2816 }; 2817 SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3); 2818 Chain = Load.getValue(1); 2819 2820 // Issue a "store and check". 2821 SDOperand Ops2[] = { 2822 Chain, // Chain 2823 NewVal, // Value 2824 Ptr, // Ptr 2825 Label, // Label 2826 }; 2827 SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops2, 4); 2828 SDOperand OutOps[] = { Load, Store }; 2829 return DAG.getMergeValues(OutOps, 2); 2830} 2831 2832/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 2833/// possible. 2834SDOperand PPCTargetLowering::LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { 2835 // Not FP? Not a fsel. 2836 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 2837 !Op.getOperand(2).getValueType().isFloatingPoint()) 2838 return SDOperand(); 2839 2840 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2841 2842 // Cannot handle SETEQ/SETNE. 2843 if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand(); 2844 2845 MVT ResVT = Op.getValueType(); 2846 MVT CmpVT = Op.getOperand(0).getValueType(); 2847 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 2848 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3); 2849 2850 // If the RHS of the comparison is a 0.0, we don't need to do the 2851 // subtraction at all. 2852 if (isFloatingPointZero(RHS)) 2853 switch (CC) { 2854 default: break; // SETUO etc aren't handled by fsel. 2855 case ISD::SETULT: 2856 case ISD::SETOLT: 2857 case ISD::SETLT: 2858 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 2859 case ISD::SETUGE: 2860 case ISD::SETOGE: 2861 case ISD::SETGE: 2862 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 2863 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 2864 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 2865 case ISD::SETUGT: 2866 case ISD::SETOGT: 2867 case ISD::SETGT: 2868 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 2869 case ISD::SETULE: 2870 case ISD::SETOLE: 2871 case ISD::SETLE: 2872 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 2873 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 2874 return DAG.getNode(PPCISD::FSEL, ResVT, 2875 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 2876 } 2877 2878 SDOperand Cmp; 2879 switch (CC) { 2880 default: break; // SETUO etc aren't handled by fsel. 2881 case ISD::SETULT: 2882 case ISD::SETOLT: 2883 case ISD::SETLT: 2884 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 2885 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2886 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2887 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 2888 case ISD::SETUGE: 2889 case ISD::SETOGE: 2890 case ISD::SETGE: 2891 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 2892 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2893 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2894 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 2895 case ISD::SETUGT: 2896 case ISD::SETOGT: 2897 case ISD::SETGT: 2898 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 2899 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2900 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2901 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 2902 case ISD::SETULE: 2903 case ISD::SETOLE: 2904 case ISD::SETLE: 2905 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 2906 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2907 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2908 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 2909 } 2910 return SDOperand(); 2911} 2912 2913// FIXME: Split this code up when LegalizeDAGTypes lands. 2914SDOperand PPCTargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 2915 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 2916 SDOperand Src = Op.getOperand(0); 2917 if (Src.getValueType() == MVT::f32) 2918 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 2919 2920 SDOperand Tmp; 2921 switch (Op.getValueType().getSimpleVT()) { 2922 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 2923 case MVT::i32: 2924 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 2925 break; 2926 case MVT::i64: 2927 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 2928 break; 2929 } 2930 2931 // Convert the FP value to an int value through memory. 2932 SDOperand FIPtr = DAG.CreateStackTemporary(MVT::f64); 2933 2934 // Emit a store to the stack slot. 2935 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Tmp, FIPtr, NULL, 0); 2936 2937 // Result is a load from the stack slot. If loading 4 bytes, make sure to 2938 // add in a bias. 2939 if (Op.getValueType() == MVT::i32) 2940 FIPtr = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, 2941 DAG.getConstant(4, FIPtr.getValueType())); 2942 return DAG.getLoad(Op.getValueType(), Chain, FIPtr, NULL, 0); 2943} 2944 2945SDOperand PPCTargetLowering::LowerFP_ROUND_INREG(SDOperand Op, 2946 SelectionDAG &DAG) { 2947 assert(Op.getValueType() == MVT::ppcf128); 2948 SDNode *Node = Op.Val; 2949 assert(Node->getOperand(0).getValueType() == MVT::ppcf128); 2950 assert(Node->getOperand(0).Val->getOpcode() == ISD::BUILD_PAIR); 2951 SDOperand Lo = Node->getOperand(0).Val->getOperand(0); 2952 SDOperand Hi = Node->getOperand(0).Val->getOperand(1); 2953 2954 // This sequence changes FPSCR to do round-to-zero, adds the two halves 2955 // of the long double, and puts FPSCR back the way it was. We do not 2956 // actually model FPSCR. 2957 std::vector<MVT> NodeTys; 2958 SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg; 2959 2960 NodeTys.push_back(MVT::f64); // Return register 2961 NodeTys.push_back(MVT::Flag); // Returns a flag for later insns 2962 Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); 2963 MFFSreg = Result.getValue(0); 2964 InFlag = Result.getValue(1); 2965 2966 NodeTys.clear(); 2967 NodeTys.push_back(MVT::Flag); // Returns a flag 2968 Ops[0] = DAG.getConstant(31, MVT::i32); 2969 Ops[1] = InFlag; 2970 Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2); 2971 InFlag = Result.getValue(0); 2972 2973 NodeTys.clear(); 2974 NodeTys.push_back(MVT::Flag); // Returns a flag 2975 Ops[0] = DAG.getConstant(30, MVT::i32); 2976 Ops[1] = InFlag; 2977 Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2); 2978 InFlag = Result.getValue(0); 2979 2980 NodeTys.clear(); 2981 NodeTys.push_back(MVT::f64); // result of add 2982 NodeTys.push_back(MVT::Flag); // Returns a flag 2983 Ops[0] = Lo; 2984 Ops[1] = Hi; 2985 Ops[2] = InFlag; 2986 Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3); 2987 FPreg = Result.getValue(0); 2988 InFlag = Result.getValue(1); 2989 2990 NodeTys.clear(); 2991 NodeTys.push_back(MVT::f64); 2992 Ops[0] = DAG.getConstant(1, MVT::i32); 2993 Ops[1] = MFFSreg; 2994 Ops[2] = FPreg; 2995 Ops[3] = InFlag; 2996 Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4); 2997 FPreg = Result.getValue(0); 2998 2999 // We know the low half is about to be thrown away, so just use something 3000 // convenient. 3001 return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg); 3002} 3003 3004SDOperand PPCTargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3005 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 3006 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 3007 return SDOperand(); 3008 3009 if (Op.getOperand(0).getValueType() == MVT::i64) { 3010 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 3011 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 3012 if (Op.getValueType() == MVT::f32) 3013 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); 3014 return FP; 3015 } 3016 3017 assert(Op.getOperand(0).getValueType() == MVT::i32 && 3018 "Unhandled SINT_TO_FP type in custom expander!"); 3019 // Since we only generate this in 64-bit mode, we can take advantage of 3020 // 64-bit registers. In particular, sign extend the input value into the 3021 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 3022 // then lfd it and fcfid it. 3023 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 3024 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 3025 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3026 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 3027 3028 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 3029 Op.getOperand(0)); 3030 3031 // STD the extended value into the stack slot. 3032 MachineMemOperand MO(PseudoSourceValue::getFixedStack(), 3033 MachineMemOperand::MOStore, FrameIdx, 8, 8); 3034 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 3035 DAG.getEntryNode(), Ext64, FIdx, 3036 DAG.getMemOperand(MO)); 3037 // Load the value as a double. 3038 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, NULL, 0); 3039 3040 // FCFID it and return it. 3041 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 3042 if (Op.getValueType() == MVT::f32) 3043 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); 3044 return FP; 3045} 3046 3047SDOperand PPCTargetLowering::LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG) { 3048 /* 3049 The rounding mode is in bits 30:31 of FPSR, and has the following 3050 settings: 3051 00 Round to nearest 3052 01 Round to 0 3053 10 Round to +inf 3054 11 Round to -inf 3055 3056 FLT_ROUNDS, on the other hand, expects the following: 3057 -1 Undefined 3058 0 Round to 0 3059 1 Round to nearest 3060 2 Round to +inf 3061 3 Round to -inf 3062 3063 To perform the conversion, we do: 3064 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 3065 */ 3066 3067 MachineFunction &MF = DAG.getMachineFunction(); 3068 MVT VT = Op.getValueType(); 3069 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3070 std::vector<MVT> NodeTys; 3071 SDOperand MFFSreg, InFlag; 3072 3073 // Save FP Control Word to register 3074 NodeTys.push_back(MVT::f64); // return register 3075 NodeTys.push_back(MVT::Flag); // unused in this context 3076 SDOperand Chain = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); 3077 3078 // Save FP register to stack slot 3079 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3080 SDOperand StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 3081 SDOperand Store = DAG.getStore(DAG.getEntryNode(), Chain, 3082 StackSlot, NULL, 0); 3083 3084 // Load FP Control Word from low 32 bits of stack slot. 3085 SDOperand Four = DAG.getConstant(4, PtrVT); 3086 SDOperand Addr = DAG.getNode(ISD::ADD, PtrVT, StackSlot, Four); 3087 SDOperand CWD = DAG.getLoad(MVT::i32, Store, Addr, NULL, 0); 3088 3089 // Transform as necessary 3090 SDOperand CWD1 = 3091 DAG.getNode(ISD::AND, MVT::i32, 3092 CWD, DAG.getConstant(3, MVT::i32)); 3093 SDOperand CWD2 = 3094 DAG.getNode(ISD::SRL, MVT::i32, 3095 DAG.getNode(ISD::AND, MVT::i32, 3096 DAG.getNode(ISD::XOR, MVT::i32, 3097 CWD, DAG.getConstant(3, MVT::i32)), 3098 DAG.getConstant(3, MVT::i32)), 3099 DAG.getConstant(1, MVT::i8)); 3100 3101 SDOperand RetVal = 3102 DAG.getNode(ISD::XOR, MVT::i32, CWD1, CWD2); 3103 3104 return DAG.getNode((VT.getSizeInBits() < 16 ? 3105 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 3106} 3107 3108SDOperand PPCTargetLowering::LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) { 3109 MVT VT = Op.getValueType(); 3110 unsigned BitWidth = VT.getSizeInBits(); 3111 assert(Op.getNumOperands() == 3 && 3112 VT == Op.getOperand(1).getValueType() && 3113 "Unexpected SHL!"); 3114 3115 // Expand into a bunch of logical ops. Note that these ops 3116 // depend on the PPC behavior for oversized shift amounts. 3117 SDOperand Lo = Op.getOperand(0); 3118 SDOperand Hi = Op.getOperand(1); 3119 SDOperand Amt = Op.getOperand(2); 3120 MVT AmtVT = Amt.getValueType(); 3121 3122 SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 3123 DAG.getConstant(BitWidth, AmtVT), Amt); 3124 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, VT, Hi, Amt); 3125 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, VT, Lo, Tmp1); 3126 SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 3127 SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 3128 DAG.getConstant(-BitWidth, AmtVT)); 3129 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, VT, Lo, Tmp5); 3130 SDOperand OutHi = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6); 3131 SDOperand OutLo = DAG.getNode(PPCISD::SHL, VT, Lo, Amt); 3132 SDOperand OutOps[] = { OutLo, OutHi }; 3133 return DAG.getMergeValues(OutOps, 2); 3134} 3135 3136SDOperand PPCTargetLowering::LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) { 3137 MVT VT = Op.getValueType(); 3138 unsigned BitWidth = VT.getSizeInBits(); 3139 assert(Op.getNumOperands() == 3 && 3140 VT == Op.getOperand(1).getValueType() && 3141 "Unexpected SRL!"); 3142 3143 // Expand into a bunch of logical ops. Note that these ops 3144 // depend on the PPC behavior for oversized shift amounts. 3145 SDOperand Lo = Op.getOperand(0); 3146 SDOperand Hi = Op.getOperand(1); 3147 SDOperand Amt = Op.getOperand(2); 3148 MVT AmtVT = Amt.getValueType(); 3149 3150 SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 3151 DAG.getConstant(BitWidth, AmtVT), Amt); 3152 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt); 3153 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1); 3154 SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 3155 SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 3156 DAG.getConstant(-BitWidth, AmtVT)); 3157 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, VT, Hi, Tmp5); 3158 SDOperand OutLo = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6); 3159 SDOperand OutHi = DAG.getNode(PPCISD::SRL, VT, Hi, Amt); 3160 SDOperand OutOps[] = { OutLo, OutHi }; 3161 return DAG.getMergeValues(OutOps, 2); 3162} 3163 3164SDOperand PPCTargetLowering::LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) { 3165 MVT VT = Op.getValueType(); 3166 unsigned BitWidth = VT.getSizeInBits(); 3167 assert(Op.getNumOperands() == 3 && 3168 VT == Op.getOperand(1).getValueType() && 3169 "Unexpected SRA!"); 3170 3171 // Expand into a bunch of logical ops, followed by a select_cc. 3172 SDOperand Lo = Op.getOperand(0); 3173 SDOperand Hi = Op.getOperand(1); 3174 SDOperand Amt = Op.getOperand(2); 3175 MVT AmtVT = Amt.getValueType(); 3176 3177 SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 3178 DAG.getConstant(BitWidth, AmtVT), Amt); 3179 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt); 3180 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1); 3181 SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 3182 SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 3183 DAG.getConstant(-BitWidth, AmtVT)); 3184 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, VT, Hi, Tmp5); 3185 SDOperand OutHi = DAG.getNode(PPCISD::SRA, VT, Hi, Amt); 3186 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, AmtVT), 3187 Tmp4, Tmp6, ISD::SETLE); 3188 SDOperand OutOps[] = { OutLo, OutHi }; 3189 return DAG.getMergeValues(OutOps, 2); 3190} 3191 3192//===----------------------------------------------------------------------===// 3193// Vector related lowering. 3194// 3195 3196// If this is a vector of constants or undefs, get the bits. A bit in 3197// UndefBits is set if the corresponding element of the vector is an 3198// ISD::UNDEF value. For undefs, the corresponding VectorBits values are 3199// zero. Return true if this is not an array of constants, false if it is. 3200// 3201static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], 3202 uint64_t UndefBits[2]) { 3203 // Start with zero'd results. 3204 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; 3205 3206 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits(); 3207 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 3208 SDOperand OpVal = BV->getOperand(i); 3209 3210 unsigned PartNo = i >= e/2; // In the upper 128 bits? 3211 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. 3212 3213 uint64_t EltBits = 0; 3214 if (OpVal.getOpcode() == ISD::UNDEF) { 3215 uint64_t EltUndefBits = ~0U >> (32-EltBitSize); 3216 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); 3217 continue; 3218 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 3219 EltBits = CN->getValue() & (~0U >> (32-EltBitSize)); 3220 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 3221 assert(CN->getValueType(0) == MVT::f32 && 3222 "Only one legal FP vector type!"); 3223 EltBits = FloatToBits(CN->getValueAPF().convertToFloat()); 3224 } else { 3225 // Nonconstant element. 3226 return true; 3227 } 3228 3229 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); 3230 } 3231 3232 //printf("%llx %llx %llx %llx\n", 3233 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); 3234 return false; 3235} 3236 3237// If this is a splat (repetition) of a value across the whole vector, return 3238// the smallest size that splats it. For example, "0x01010101010101..." is a 3239// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 3240// SplatSize = 1 byte. 3241static bool isConstantSplat(const uint64_t Bits128[2], 3242 const uint64_t Undef128[2], 3243 unsigned &SplatBits, unsigned &SplatUndef, 3244 unsigned &SplatSize) { 3245 3246 // Don't let undefs prevent splats from matching. See if the top 64-bits are 3247 // the same as the lower 64-bits, ignoring undefs. 3248 if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0])) 3249 return false; // Can't be a splat if two pieces don't match. 3250 3251 uint64_t Bits64 = Bits128[0] | Bits128[1]; 3252 uint64_t Undef64 = Undef128[0] & Undef128[1]; 3253 3254 // Check that the top 32-bits are the same as the lower 32-bits, ignoring 3255 // undefs. 3256 if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64)) 3257 return false; // Can't be a splat if two pieces don't match. 3258 3259 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); 3260 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); 3261 3262 // If the top 16-bits are different than the lower 16-bits, ignoring 3263 // undefs, we have an i32 splat. 3264 if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) { 3265 SplatBits = Bits32; 3266 SplatUndef = Undef32; 3267 SplatSize = 4; 3268 return true; 3269 } 3270 3271 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); 3272 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); 3273 3274 // If the top 8-bits are different than the lower 8-bits, ignoring 3275 // undefs, we have an i16 splat. 3276 if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) { 3277 SplatBits = Bits16; 3278 SplatUndef = Undef16; 3279 SplatSize = 2; 3280 return true; 3281 } 3282 3283 // Otherwise, we have an 8-bit splat. 3284 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); 3285 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); 3286 SplatSize = 1; 3287 return true; 3288} 3289 3290/// BuildSplatI - Build a canonical splati of Val with an element size of 3291/// SplatSize. Cast the result to VT. 3292static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT VT, 3293 SelectionDAG &DAG) { 3294 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 3295 3296 static const MVT VTys[] = { // canonical VT to use for each size. 3297 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 3298 }; 3299 3300 MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 3301 3302 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 3303 if (Val == -1) 3304 SplatSize = 1; 3305 3306 MVT CanonicalVT = VTys[SplatSize-1]; 3307 3308 // Build a canonical splat for this value. 3309 SDOperand Elt = DAG.getConstant(Val, CanonicalVT.getVectorElementType()); 3310 SmallVector<SDOperand, 8> Ops; 3311 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 3312 SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, 3313 &Ops[0], Ops.size()); 3314 return DAG.getNode(ISD::BIT_CONVERT, ReqVT, Res); 3315} 3316 3317/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 3318/// specified intrinsic ID. 3319static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS, 3320 SelectionDAG &DAG, 3321 MVT DestVT = MVT::Other) { 3322 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 3323 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 3324 DAG.getConstant(IID, MVT::i32), LHS, RHS); 3325} 3326 3327/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 3328/// specified intrinsic ID. 3329static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1, 3330 SDOperand Op2, SelectionDAG &DAG, 3331 MVT DestVT = MVT::Other) { 3332 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 3333 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 3334 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 3335} 3336 3337 3338/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 3339/// amount. The result has the specified value type. 3340static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt, 3341 MVT VT, SelectionDAG &DAG) { 3342 // Force LHS/RHS to be the right type. 3343 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS); 3344 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS); 3345 3346 SDOperand Ops[16]; 3347 for (unsigned i = 0; i != 16; ++i) 3348 Ops[i] = DAG.getConstant(i+Amt, MVT::i32); 3349 SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS, 3350 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops,16)); 3351 return DAG.getNode(ISD::BIT_CONVERT, VT, T); 3352} 3353 3354// If this is a case we can't handle, return null and let the default 3355// expansion code take care of it. If we CAN select this case, and if it 3356// selects to a single instruction, return Op. Otherwise, if we can codegen 3357// this case more efficiently than a constant pool load, lower it to the 3358// sequence of ops that should be used. 3359SDOperand PPCTargetLowering::LowerBUILD_VECTOR(SDOperand Op, 3360 SelectionDAG &DAG) { 3361 // If this is a vector of constants or undefs, get the bits. A bit in 3362 // UndefBits is set if the corresponding element of the vector is an 3363 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 3364 // zero. 3365 uint64_t VectorBits[2]; 3366 uint64_t UndefBits[2]; 3367 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)) 3368 return SDOperand(); // Not a constant vector. 3369 3370 // If this is a splat (repetition) of a value across the whole vector, return 3371 // the smallest size that splats it. For example, "0x01010101010101..." is a 3372 // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 3373 // SplatSize = 1 byte. 3374 unsigned SplatBits, SplatUndef, SplatSize; 3375 if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){ 3376 bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0; 3377 3378 // First, handle single instruction cases. 3379 3380 // All zeros? 3381 if (SplatBits == 0) { 3382 // Canonicalize all zero vectors to be v4i32. 3383 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 3384 SDOperand Z = DAG.getConstant(0, MVT::i32); 3385 Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z); 3386 Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z); 3387 } 3388 return Op; 3389 } 3390 3391 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 3392 int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize); 3393 if (SextVal >= -16 && SextVal <= 15) 3394 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG); 3395 3396 3397 // Two instruction sequences. 3398 3399 // If this value is in the range [-32,30] and is even, use: 3400 // tmp = VSPLTI[bhw], result = add tmp, tmp 3401 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 3402 SDOperand Res = BuildSplatI(SextVal >> 1, SplatSize, MVT::Other, DAG); 3403 Res = DAG.getNode(ISD::ADD, Res.getValueType(), Res, Res); 3404 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3405 3406 } 3407 3408 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 3409 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 3410 // for fneg/fabs. 3411 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 3412 // Make -1 and vspltisw -1: 3413 SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG); 3414 3415 // Make the VSLW intrinsic, computing 0x8000_0000. 3416 SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 3417 OnesV, DAG); 3418 3419 // xor by OnesV to invert it. 3420 Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); 3421 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3422 } 3423 3424 // Check to see if this is a wide variety of vsplti*, binop self cases. 3425 unsigned SplatBitSize = SplatSize*8; 3426 static const signed char SplatCsts[] = { 3427 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 3428 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 3429 }; 3430 3431 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 3432 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 3433 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 3434 int i = SplatCsts[idx]; 3435 3436 // Figure out what shift amount will be used by altivec if shifted by i in 3437 // this splat size. 3438 unsigned TypeShiftAmt = i & (SplatBitSize-1); 3439 3440 // vsplti + shl self. 3441 if (SextVal == (i << (int)TypeShiftAmt)) { 3442 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3443 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3444 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 3445 Intrinsic::ppc_altivec_vslw 3446 }; 3447 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3448 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3449 } 3450 3451 // vsplti + srl self. 3452 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 3453 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3454 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3455 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 3456 Intrinsic::ppc_altivec_vsrw 3457 }; 3458 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3459 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3460 } 3461 3462 // vsplti + sra self. 3463 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 3464 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3465 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3466 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 3467 Intrinsic::ppc_altivec_vsraw 3468 }; 3469 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3470 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3471 } 3472 3473 // vsplti + rol self. 3474 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 3475 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 3476 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3477 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3478 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 3479 Intrinsic::ppc_altivec_vrlw 3480 }; 3481 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3482 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3483 } 3484 3485 // t = vsplti c, result = vsldoi t, t, 1 3486 if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { 3487 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 3488 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG); 3489 } 3490 // t = vsplti c, result = vsldoi t, t, 2 3491 if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { 3492 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 3493 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG); 3494 } 3495 // t = vsplti c, result = vsldoi t, t, 3 3496 if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { 3497 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 3498 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG); 3499 } 3500 } 3501 3502 // Three instruction sequences. 3503 3504 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 3505 if (SextVal >= 0 && SextVal <= 31) { 3506 SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG); 3507 SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); 3508 LHS = DAG.getNode(ISD::SUB, LHS.getValueType(), LHS, RHS); 3509 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); 3510 } 3511 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 3512 if (SextVal >= -31 && SextVal <= 0) { 3513 SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG); 3514 SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); 3515 LHS = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS); 3516 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); 3517 } 3518 } 3519 3520 return SDOperand(); 3521} 3522 3523/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 3524/// the specified operations to build the shuffle. 3525static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS, 3526 SDOperand RHS, SelectionDAG &DAG) { 3527 unsigned OpNum = (PFEntry >> 26) & 0x0F; 3528 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 3529 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 3530 3531 enum { 3532 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 3533 OP_VMRGHW, 3534 OP_VMRGLW, 3535 OP_VSPLTISW0, 3536 OP_VSPLTISW1, 3537 OP_VSPLTISW2, 3538 OP_VSPLTISW3, 3539 OP_VSLDOI4, 3540 OP_VSLDOI8, 3541 OP_VSLDOI12 3542 }; 3543 3544 if (OpNum == OP_COPY) { 3545 if (LHSID == (1*9+2)*9+3) return LHS; 3546 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 3547 return RHS; 3548 } 3549 3550 SDOperand OpLHS, OpRHS; 3551 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG); 3552 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG); 3553 3554 unsigned ShufIdxs[16]; 3555 switch (OpNum) { 3556 default: assert(0 && "Unknown i32 permute!"); 3557 case OP_VMRGHW: 3558 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 3559 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 3560 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 3561 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 3562 break; 3563 case OP_VMRGLW: 3564 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 3565 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 3566 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 3567 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 3568 break; 3569 case OP_VSPLTISW0: 3570 for (unsigned i = 0; i != 16; ++i) 3571 ShufIdxs[i] = (i&3)+0; 3572 break; 3573 case OP_VSPLTISW1: 3574 for (unsigned i = 0; i != 16; ++i) 3575 ShufIdxs[i] = (i&3)+4; 3576 break; 3577 case OP_VSPLTISW2: 3578 for (unsigned i = 0; i != 16; ++i) 3579 ShufIdxs[i] = (i&3)+8; 3580 break; 3581 case OP_VSPLTISW3: 3582 for (unsigned i = 0; i != 16; ++i) 3583 ShufIdxs[i] = (i&3)+12; 3584 break; 3585 case OP_VSLDOI4: 3586 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG); 3587 case OP_VSLDOI8: 3588 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG); 3589 case OP_VSLDOI12: 3590 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG); 3591 } 3592 SDOperand Ops[16]; 3593 for (unsigned i = 0; i != 16; ++i) 3594 Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i32); 3595 3596 return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS, 3597 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); 3598} 3599 3600/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 3601/// is a shuffle we can handle in a single instruction, return it. Otherwise, 3602/// return the code it can be lowered into. Worst case, it can always be 3603/// lowered into a vperm. 3604SDOperand PPCTargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, 3605 SelectionDAG &DAG) { 3606 SDOperand V1 = Op.getOperand(0); 3607 SDOperand V2 = Op.getOperand(1); 3608 SDOperand PermMask = Op.getOperand(2); 3609 3610 // Cases that are handled by instructions that take permute immediates 3611 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 3612 // selected by the instruction selector. 3613 if (V2.getOpcode() == ISD::UNDEF) { 3614 if (PPC::isSplatShuffleMask(PermMask.Val, 1) || 3615 PPC::isSplatShuffleMask(PermMask.Val, 2) || 3616 PPC::isSplatShuffleMask(PermMask.Val, 4) || 3617 PPC::isVPKUWUMShuffleMask(PermMask.Val, true) || 3618 PPC::isVPKUHUMShuffleMask(PermMask.Val, true) || 3619 PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 || 3620 PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) || 3621 PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) || 3622 PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) || 3623 PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) || 3624 PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) || 3625 PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) { 3626 return Op; 3627 } 3628 } 3629 3630 // Altivec has a variety of "shuffle immediates" that take two vector inputs 3631 // and produce a fixed permutation. If any of these match, do not lower to 3632 // VPERM. 3633 if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) || 3634 PPC::isVPKUHUMShuffleMask(PermMask.Val, false) || 3635 PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 || 3636 PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) || 3637 PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) || 3638 PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) || 3639 PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) || 3640 PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) || 3641 PPC::isVMRGHShuffleMask(PermMask.Val, 4, false)) 3642 return Op; 3643 3644 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 3645 // perfect shuffle table to emit an optimal matching sequence. 3646 unsigned PFIndexes[4]; 3647 bool isFourElementShuffle = true; 3648 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 3649 unsigned EltNo = 8; // Start out undef. 3650 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 3651 if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) 3652 continue; // Undef, ignore it. 3653 3654 unsigned ByteSource = 3655 cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue(); 3656 if ((ByteSource & 3) != j) { 3657 isFourElementShuffle = false; 3658 break; 3659 } 3660 3661 if (EltNo == 8) { 3662 EltNo = ByteSource/4; 3663 } else if (EltNo != ByteSource/4) { 3664 isFourElementShuffle = false; 3665 break; 3666 } 3667 } 3668 PFIndexes[i] = EltNo; 3669 } 3670 3671 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 3672 // perfect shuffle vector to determine if it is cost effective to do this as 3673 // discrete instructions, or whether we should use a vperm. 3674 if (isFourElementShuffle) { 3675 // Compute the index in the perfect shuffle table. 3676 unsigned PFTableIndex = 3677 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3678 3679 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3680 unsigned Cost = (PFEntry >> 30); 3681 3682 // Determining when to avoid vperm is tricky. Many things affect the cost 3683 // of vperm, particularly how many times the perm mask needs to be computed. 3684 // For example, if the perm mask can be hoisted out of a loop or is already 3685 // used (perhaps because there are multiple permutes with the same shuffle 3686 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 3687 // the loop requires an extra register. 3688 // 3689 // As a compromise, we only emit discrete instructions if the shuffle can be 3690 // generated in 3 or fewer operations. When we have loop information 3691 // available, if this block is within a loop, we should avoid using vperm 3692 // for 3-operation perms and use a constant pool load instead. 3693 if (Cost < 3) 3694 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG); 3695 } 3696 3697 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 3698 // vector that will get spilled to the constant pool. 3699 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 3700 3701 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 3702 // that it is in input element units, not in bytes. Convert now. 3703 MVT EltVT = V1.getValueType().getVectorElementType(); 3704 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 3705 3706 SmallVector<SDOperand, 16> ResultMask; 3707 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 3708 unsigned SrcElt; 3709 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) 3710 SrcElt = 0; 3711 else 3712 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 3713 3714 for (unsigned j = 0; j != BytesPerElement; ++j) 3715 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 3716 MVT::i8)); 3717 } 3718 3719 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, 3720 &ResultMask[0], ResultMask.size()); 3721 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 3722} 3723 3724/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 3725/// altivec comparison. If it is, return true and fill in Opc/isDot with 3726/// information about the intrinsic. 3727static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc, 3728 bool &isDot) { 3729 unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue(); 3730 CompareOpc = -1; 3731 isDot = false; 3732 switch (IntrinsicID) { 3733 default: return false; 3734 // Comparison predicates. 3735 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 3736 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 3737 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 3738 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 3739 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 3740 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 3741 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 3742 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 3743 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 3744 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 3745 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 3746 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 3747 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 3748 3749 // Normal Comparisons. 3750 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 3751 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 3752 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 3753 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 3754 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 3755 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 3756 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 3757 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 3758 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 3759 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 3760 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 3761 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 3762 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 3763 } 3764 return true; 3765} 3766 3767/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 3768/// lower, do it, otherwise return null. 3769SDOperand PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, 3770 SelectionDAG &DAG) { 3771 // If this is a lowered altivec predicate compare, CompareOpc is set to the 3772 // opcode number of the comparison. 3773 int CompareOpc; 3774 bool isDot; 3775 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 3776 return SDOperand(); // Don't custom lower most intrinsics. 3777 3778 // If this is a non-dot comparison, make the VCMP node and we are done. 3779 if (!isDot) { 3780 SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 3781 Op.getOperand(1), Op.getOperand(2), 3782 DAG.getConstant(CompareOpc, MVT::i32)); 3783 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp); 3784 } 3785 3786 // Create the PPCISD altivec 'dot' comparison node. 3787 SDOperand Ops[] = { 3788 Op.getOperand(2), // LHS 3789 Op.getOperand(3), // RHS 3790 DAG.getConstant(CompareOpc, MVT::i32) 3791 }; 3792 std::vector<MVT> VTs; 3793 VTs.push_back(Op.getOperand(2).getValueType()); 3794 VTs.push_back(MVT::Flag); 3795 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); 3796 3797 // Now that we have the comparison, emit a copy from the CR to a GPR. 3798 // This is flagged to the above dot comparison. 3799 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 3800 DAG.getRegister(PPC::CR6, MVT::i32), 3801 CompNode.getValue(1)); 3802 3803 // Unpack the result based on how the target uses it. 3804 unsigned BitNo; // Bit # of CR6. 3805 bool InvertBit; // Invert result? 3806 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 3807 default: // Can't happen, don't crash on invalid number though. 3808 case 0: // Return the value of the EQ bit of CR6. 3809 BitNo = 0; InvertBit = false; 3810 break; 3811 case 1: // Return the inverted value of the EQ bit of CR6. 3812 BitNo = 0; InvertBit = true; 3813 break; 3814 case 2: // Return the value of the LT bit of CR6. 3815 BitNo = 2; InvertBit = false; 3816 break; 3817 case 3: // Return the inverted value of the LT bit of CR6. 3818 BitNo = 2; InvertBit = true; 3819 break; 3820 } 3821 3822 // Shift the bit into the low position. 3823 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 3824 DAG.getConstant(8-(3-BitNo), MVT::i32)); 3825 // Isolate the bit. 3826 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 3827 DAG.getConstant(1, MVT::i32)); 3828 3829 // If we are supposed to, toggle the bit. 3830 if (InvertBit) 3831 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 3832 DAG.getConstant(1, MVT::i32)); 3833 return Flags; 3834} 3835 3836SDOperand PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, 3837 SelectionDAG &DAG) { 3838 // Create a stack slot that is 16-byte aligned. 3839 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 3840 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 3841 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3842 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 3843 3844 // Store the input value into Value#0 of the stack slot. 3845 SDOperand Store = DAG.getStore(DAG.getEntryNode(), 3846 Op.getOperand(0), FIdx, NULL, 0); 3847 // Load it out. 3848 return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0); 3849} 3850 3851SDOperand PPCTargetLowering::LowerMUL(SDOperand Op, SelectionDAG &DAG) { 3852 if (Op.getValueType() == MVT::v4i32) { 3853 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3854 3855 SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG); 3856 SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt. 3857 3858 SDOperand RHSSwap = // = vrlw RHS, 16 3859 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG); 3860 3861 // Shrinkify inputs to v8i16. 3862 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS); 3863 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS); 3864 RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap); 3865 3866 // Low parts multiplied together, generating 32-bit results (we ignore the 3867 // top parts). 3868 SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 3869 LHS, RHS, DAG, MVT::v4i32); 3870 3871 SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 3872 LHS, RHSSwap, Zero, DAG, MVT::v4i32); 3873 // Shift the high parts up 16 bits. 3874 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG); 3875 return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd); 3876 } else if (Op.getValueType() == MVT::v8i16) { 3877 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3878 3879 SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG); 3880 3881 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 3882 LHS, RHS, Zero, DAG); 3883 } else if (Op.getValueType() == MVT::v16i8) { 3884 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3885 3886 // Multiply the even 8-bit parts, producing 16-bit sums. 3887 SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 3888 LHS, RHS, DAG, MVT::v8i16); 3889 EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts); 3890 3891 // Multiply the odd 8-bit parts, producing 16-bit sums. 3892 SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 3893 LHS, RHS, DAG, MVT::v8i16); 3894 OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts); 3895 3896 // Merge the results together. 3897 SDOperand Ops[16]; 3898 for (unsigned i = 0; i != 8; ++i) { 3899 Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8); 3900 Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8); 3901 } 3902 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts, 3903 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); 3904 } else { 3905 assert(0 && "Unknown mul to lower!"); 3906 abort(); 3907 } 3908} 3909 3910/// LowerOperation - Provide custom lowering hooks for some operations. 3911/// 3912SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 3913 switch (Op.getOpcode()) { 3914 default: assert(0 && "Wasn't expecting to be able to lower this!"); 3915 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3916 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 3917 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3918 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 3919 case ISD::SETCC: return LowerSETCC(Op, DAG); 3920 case ISD::VASTART: 3921 return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, 3922 VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); 3923 3924 case ISD::VAARG: 3925 return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, 3926 VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); 3927 3928 case ISD::FORMAL_ARGUMENTS: 3929 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, 3930 VarArgsStackOffset, VarArgsNumGPR, 3931 VarArgsNumFPR, PPCSubTarget); 3932 3933 case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget, 3934 getTargetMachine()); 3935 case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); 3936 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 3937 case ISD::DYNAMIC_STACKALLOC: 3938 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 3939 3940 case ISD::ATOMIC_LOAD_ADD: return LowerAtomicLOAD_ADD(Op, DAG); 3941 case ISD::ATOMIC_CMP_SWAP: return LowerAtomicCMP_SWAP(Op, DAG); 3942 case ISD::ATOMIC_SWAP: return LowerAtomicSWAP(Op, DAG); 3943 3944 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3945 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 3946 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 3947 case ISD::FP_ROUND_INREG: return LowerFP_ROUND_INREG(Op, DAG); 3948 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 3949 3950 // Lower 64-bit shifts. 3951 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 3952 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 3953 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 3954 3955 // Vector-related lowering. 3956 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3957 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3958 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 3959 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 3960 case ISD::MUL: return LowerMUL(Op, DAG); 3961 3962 // Frame & Return address. 3963 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 3964 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3965 } 3966 return SDOperand(); 3967} 3968 3969SDNode *PPCTargetLowering::ReplaceNodeResults(SDNode *N, SelectionDAG &DAG) { 3970 switch (N->getOpcode()) { 3971 default: assert(0 && "Wasn't expecting to be able to lower this!"); 3972 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(SDOperand(N, 0), DAG).Val; 3973 } 3974} 3975 3976 3977//===----------------------------------------------------------------------===// 3978// Other Lowering Code 3979//===----------------------------------------------------------------------===// 3980 3981MachineBasicBlock * 3982PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 3983 MachineBasicBlock *BB) { 3984 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3985 assert((MI->getOpcode() == PPC::SELECT_CC_I4 || 3986 MI->getOpcode() == PPC::SELECT_CC_I8 || 3987 MI->getOpcode() == PPC::SELECT_CC_F4 || 3988 MI->getOpcode() == PPC::SELECT_CC_F8 || 3989 MI->getOpcode() == PPC::SELECT_CC_VRRC) && 3990 "Unexpected instr type to insert"); 3991 3992 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 3993 // control-flow pattern. The incoming instruction knows the destination vreg 3994 // to set, the condition code register to branch on, the true/false values to 3995 // select between, and a branch opcode to use. 3996 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3997 MachineFunction::iterator It = BB; 3998 ++It; 3999 4000 // thisMBB: 4001 // ... 4002 // TrueVal = ... 4003 // cmpTY ccX, r1, r2 4004 // bCC copy1MBB 4005 // fallthrough --> copy0MBB 4006 MachineBasicBlock *thisMBB = BB; 4007 MachineFunction *F = BB->getParent(); 4008 MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); 4009 MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); 4010 unsigned SelectPred = MI->getOperand(4).getImm(); 4011 BuildMI(BB, TII->get(PPC::BCC)) 4012 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 4013 F->insert(It, copy0MBB); 4014 F->insert(It, sinkMBB); 4015 // Update machine-CFG edges by transferring all successors of the current 4016 // block to the new block which will contain the Phi node for the select. 4017 sinkMBB->transferSuccessors(BB); 4018 // Next, add the true and fallthrough blocks as its successors. 4019 BB->addSuccessor(copy0MBB); 4020 BB->addSuccessor(sinkMBB); 4021 4022 // copy0MBB: 4023 // %FalseValue = ... 4024 // # fallthrough to sinkMBB 4025 BB = copy0MBB; 4026 4027 // Update machine-CFG edges 4028 BB->addSuccessor(sinkMBB); 4029 4030 // sinkMBB: 4031 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4032 // ... 4033 BB = sinkMBB; 4034 BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg()) 4035 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 4036 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4037 4038 F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. 4039 return BB; 4040} 4041 4042//===----------------------------------------------------------------------===// 4043// Target Optimization Hooks 4044//===----------------------------------------------------------------------===// 4045 4046SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 4047 DAGCombinerInfo &DCI) const { 4048 TargetMachine &TM = getTargetMachine(); 4049 SelectionDAG &DAG = DCI.DAG; 4050 switch (N->getOpcode()) { 4051 default: break; 4052 case PPCISD::SHL: 4053 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 4054 if (C->getValue() == 0) // 0 << V -> 0. 4055 return N->getOperand(0); 4056 } 4057 break; 4058 case PPCISD::SRL: 4059 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 4060 if (C->getValue() == 0) // 0 >>u V -> 0. 4061 return N->getOperand(0); 4062 } 4063 break; 4064 case PPCISD::SRA: 4065 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 4066 if (C->getValue() == 0 || // 0 >>s V -> 0. 4067 C->isAllOnesValue()) // -1 >>s V -> -1. 4068 return N->getOperand(0); 4069 } 4070 break; 4071 4072 case ISD::SINT_TO_FP: 4073 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 4074 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 4075 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 4076 // We allow the src/dst to be either f32/f64, but the intermediate 4077 // type must be i64. 4078 if (N->getOperand(0).getValueType() == MVT::i64 && 4079 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 4080 SDOperand Val = N->getOperand(0).getOperand(0); 4081 if (Val.getValueType() == MVT::f32) { 4082 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 4083 DCI.AddToWorklist(Val.Val); 4084 } 4085 4086 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 4087 DCI.AddToWorklist(Val.Val); 4088 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 4089 DCI.AddToWorklist(Val.Val); 4090 if (N->getValueType(0) == MVT::f32) { 4091 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val, 4092 DAG.getIntPtrConstant(0)); 4093 DCI.AddToWorklist(Val.Val); 4094 } 4095 return Val; 4096 } else if (N->getOperand(0).getValueType() == MVT::i32) { 4097 // If the intermediate type is i32, we can avoid the load/store here 4098 // too. 4099 } 4100 } 4101 } 4102 break; 4103 case ISD::STORE: 4104 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 4105 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 4106 !cast<StoreSDNode>(N)->isTruncatingStore() && 4107 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 4108 N->getOperand(1).getValueType() == MVT::i32 && 4109 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 4110 SDOperand Val = N->getOperand(1).getOperand(0); 4111 if (Val.getValueType() == MVT::f32) { 4112 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 4113 DCI.AddToWorklist(Val.Val); 4114 } 4115 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 4116 DCI.AddToWorklist(Val.Val); 4117 4118 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 4119 N->getOperand(2), N->getOperand(3)); 4120 DCI.AddToWorklist(Val.Val); 4121 return Val; 4122 } 4123 4124 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 4125 if (N->getOperand(1).getOpcode() == ISD::BSWAP && 4126 N->getOperand(1).Val->hasOneUse() && 4127 (N->getOperand(1).getValueType() == MVT::i32 || 4128 N->getOperand(1).getValueType() == MVT::i16)) { 4129 SDOperand BSwapOp = N->getOperand(1).getOperand(0); 4130 // Do an any-extend to 32-bits if this is a half-word input. 4131 if (BSwapOp.getValueType() == MVT::i16) 4132 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp); 4133 4134 return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp, 4135 N->getOperand(2), N->getOperand(3), 4136 DAG.getValueType(N->getOperand(1).getValueType())); 4137 } 4138 break; 4139 case ISD::BSWAP: 4140 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 4141 if (ISD::isNON_EXTLoad(N->getOperand(0).Val) && 4142 N->getOperand(0).hasOneUse() && 4143 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { 4144 SDOperand Load = N->getOperand(0); 4145 LoadSDNode *LD = cast<LoadSDNode>(Load); 4146 // Create the byte-swapping load. 4147 std::vector<MVT> VTs; 4148 VTs.push_back(MVT::i32); 4149 VTs.push_back(MVT::Other); 4150 SDOperand MO = DAG.getMemOperand(LD->getMemOperand()); 4151 SDOperand Ops[] = { 4152 LD->getChain(), // Chain 4153 LD->getBasePtr(), // Ptr 4154 MO, // MemOperand 4155 DAG.getValueType(N->getValueType(0)) // VT 4156 }; 4157 SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops, 4); 4158 4159 // If this is an i16 load, insert the truncate. 4160 SDOperand ResVal = BSLoad; 4161 if (N->getValueType(0) == MVT::i16) 4162 ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad); 4163 4164 // First, combine the bswap away. This makes the value produced by the 4165 // load dead. 4166 DCI.CombineTo(N, ResVal); 4167 4168 // Next, combine the load away, we give it a bogus result value but a real 4169 // chain result. The result value is dead because the bswap is dead. 4170 DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1)); 4171 4172 // Return N so it doesn't get rechecked! 4173 return SDOperand(N, 0); 4174 } 4175 4176 break; 4177 case PPCISD::VCMP: { 4178 // If a VCMPo node already exists with exactly the same operands as this 4179 // node, use its result instead of this node (VCMPo computes both a CR6 and 4180 // a normal output). 4181 // 4182 if (!N->getOperand(0).hasOneUse() && 4183 !N->getOperand(1).hasOneUse() && 4184 !N->getOperand(2).hasOneUse()) { 4185 4186 // Scan all of the users of the LHS, looking for VCMPo's that match. 4187 SDNode *VCMPoNode = 0; 4188 4189 SDNode *LHSN = N->getOperand(0).Val; 4190 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 4191 UI != E; ++UI) 4192 if ((*UI).getUser()->getOpcode() == PPCISD::VCMPo && 4193 (*UI).getUser()->getOperand(1) == N->getOperand(1) && 4194 (*UI).getUser()->getOperand(2) == N->getOperand(2) && 4195 (*UI).getUser()->getOperand(0) == N->getOperand(0)) { 4196 VCMPoNode = UI->getUser(); 4197 break; 4198 } 4199 4200 // If there is no VCMPo node, or if the flag value has a single use, don't 4201 // transform this. 4202 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 4203 break; 4204 4205 // Look at the (necessarily single) use of the flag value. If it has a 4206 // chain, this transformation is more complex. Note that multiple things 4207 // could use the value result, which we should ignore. 4208 SDNode *FlagUser = 0; 4209 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 4210 FlagUser == 0; ++UI) { 4211 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 4212 SDNode *User = UI->getUser(); 4213 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 4214 if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) { 4215 FlagUser = User; 4216 break; 4217 } 4218 } 4219 } 4220 4221 // If the user is a MFCR instruction, we know this is safe. Otherwise we 4222 // give up for right now. 4223 if (FlagUser->getOpcode() == PPCISD::MFCR) 4224 return SDOperand(VCMPoNode, 0); 4225 } 4226 break; 4227 } 4228 case ISD::BR_CC: { 4229 // If this is a branch on an altivec predicate comparison, lower this so 4230 // that we don't have to do a MFCR: instead, branch directly on CR6. This 4231 // lowering is done pre-legalize, because the legalizer lowers the predicate 4232 // compare down to code that is difficult to reassemble. 4233 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 4234 SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3); 4235 int CompareOpc; 4236 bool isDot; 4237 4238 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 4239 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 4240 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 4241 assert(isDot && "Can't compare against a vector result!"); 4242 4243 // If this is a comparison against something other than 0/1, then we know 4244 // that the condition is never/always true. 4245 unsigned Val = cast<ConstantSDNode>(RHS)->getValue(); 4246 if (Val != 0 && Val != 1) { 4247 if (CC == ISD::SETEQ) // Cond never true, remove branch. 4248 return N->getOperand(0); 4249 // Always !=, turn it into an unconditional branch. 4250 return DAG.getNode(ISD::BR, MVT::Other, 4251 N->getOperand(0), N->getOperand(4)); 4252 } 4253 4254 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 4255 4256 // Create the PPCISD altivec 'dot' comparison node. 4257 std::vector<MVT> VTs; 4258 SDOperand Ops[] = { 4259 LHS.getOperand(2), // LHS of compare 4260 LHS.getOperand(3), // RHS of compare 4261 DAG.getConstant(CompareOpc, MVT::i32) 4262 }; 4263 VTs.push_back(LHS.getOperand(2).getValueType()); 4264 VTs.push_back(MVT::Flag); 4265 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); 4266 4267 // Unpack the result based on how the target uses it. 4268 PPC::Predicate CompOpc; 4269 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) { 4270 default: // Can't happen, don't crash on invalid number though. 4271 case 0: // Branch on the value of the EQ bit of CR6. 4272 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 4273 break; 4274 case 1: // Branch on the inverted value of the EQ bit of CR6. 4275 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 4276 break; 4277 case 2: // Branch on the value of the LT bit of CR6. 4278 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 4279 break; 4280 case 3: // Branch on the inverted value of the LT bit of CR6. 4281 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 4282 break; 4283 } 4284 4285 return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0), 4286 DAG.getConstant(CompOpc, MVT::i32), 4287 DAG.getRegister(PPC::CR6, MVT::i32), 4288 N->getOperand(4), CompNode.getValue(1)); 4289 } 4290 break; 4291 } 4292 } 4293 4294 return SDOperand(); 4295} 4296 4297//===----------------------------------------------------------------------===// 4298// Inline Assembly Support 4299//===----------------------------------------------------------------------===// 4300 4301void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4302 const APInt &Mask, 4303 APInt &KnownZero, 4304 APInt &KnownOne, 4305 const SelectionDAG &DAG, 4306 unsigned Depth) const { 4307 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 4308 switch (Op.getOpcode()) { 4309 default: break; 4310 case PPCISD::LBRX: { 4311 // lhbrx is known to have the top bits cleared out. 4312 if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16) 4313 KnownZero = 0xFFFF0000; 4314 break; 4315 } 4316 case ISD::INTRINSIC_WO_CHAIN: { 4317 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 4318 default: break; 4319 case Intrinsic::ppc_altivec_vcmpbfp_p: 4320 case Intrinsic::ppc_altivec_vcmpeqfp_p: 4321 case Intrinsic::ppc_altivec_vcmpequb_p: 4322 case Intrinsic::ppc_altivec_vcmpequh_p: 4323 case Intrinsic::ppc_altivec_vcmpequw_p: 4324 case Intrinsic::ppc_altivec_vcmpgefp_p: 4325 case Intrinsic::ppc_altivec_vcmpgtfp_p: 4326 case Intrinsic::ppc_altivec_vcmpgtsb_p: 4327 case Intrinsic::ppc_altivec_vcmpgtsh_p: 4328 case Intrinsic::ppc_altivec_vcmpgtsw_p: 4329 case Intrinsic::ppc_altivec_vcmpgtub_p: 4330 case Intrinsic::ppc_altivec_vcmpgtuh_p: 4331 case Intrinsic::ppc_altivec_vcmpgtuw_p: 4332 KnownZero = ~1U; // All bits but the low one are known to be zero. 4333 break; 4334 } 4335 } 4336 } 4337} 4338 4339 4340/// getConstraintType - Given a constraint, return the type of 4341/// constraint it is for this target. 4342PPCTargetLowering::ConstraintType 4343PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 4344 if (Constraint.size() == 1) { 4345 switch (Constraint[0]) { 4346 default: break; 4347 case 'b': 4348 case 'r': 4349 case 'f': 4350 case 'v': 4351 case 'y': 4352 return C_RegisterClass; 4353 } 4354 } 4355 return TargetLowering::getConstraintType(Constraint); 4356} 4357 4358std::pair<unsigned, const TargetRegisterClass*> 4359PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4360 MVT VT) const { 4361 if (Constraint.size() == 1) { 4362 // GCC RS6000 Constraint Letters 4363 switch (Constraint[0]) { 4364 case 'b': // R1-R31 4365 case 'r': // R0-R31 4366 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 4367 return std::make_pair(0U, PPC::G8RCRegisterClass); 4368 return std::make_pair(0U, PPC::GPRCRegisterClass); 4369 case 'f': 4370 if (VT == MVT::f32) 4371 return std::make_pair(0U, PPC::F4RCRegisterClass); 4372 else if (VT == MVT::f64) 4373 return std::make_pair(0U, PPC::F8RCRegisterClass); 4374 break; 4375 case 'v': 4376 return std::make_pair(0U, PPC::VRRCRegisterClass); 4377 case 'y': // crrc 4378 return std::make_pair(0U, PPC::CRRCRegisterClass); 4379 } 4380 } 4381 4382 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4383} 4384 4385 4386/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 4387/// vector. If it is invalid, don't add anything to Ops. 4388void PPCTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, char Letter, 4389 std::vector<SDOperand>&Ops, 4390 SelectionDAG &DAG) const { 4391 SDOperand Result(0,0); 4392 switch (Letter) { 4393 default: break; 4394 case 'I': 4395 case 'J': 4396 case 'K': 4397 case 'L': 4398 case 'M': 4399 case 'N': 4400 case 'O': 4401 case 'P': { 4402 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 4403 if (!CST) return; // Must be an immediate to match. 4404 unsigned Value = CST->getValue(); 4405 switch (Letter) { 4406 default: assert(0 && "Unknown constraint letter!"); 4407 case 'I': // "I" is a signed 16-bit constant. 4408 if ((short)Value == (int)Value) 4409 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4410 break; 4411 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 4412 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 4413 if ((short)Value == 0) 4414 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4415 break; 4416 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 4417 if ((Value >> 16) == 0) 4418 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4419 break; 4420 case 'M': // "M" is a constant that is greater than 31. 4421 if (Value > 31) 4422 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4423 break; 4424 case 'N': // "N" is a positive constant that is an exact power of two. 4425 if ((int)Value > 0 && isPowerOf2_32(Value)) 4426 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4427 break; 4428 case 'O': // "O" is the constant zero. 4429 if (Value == 0) 4430 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4431 break; 4432 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 4433 if ((short)-Value == (int)-Value) 4434 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4435 break; 4436 } 4437 break; 4438 } 4439 } 4440 4441 if (Result.Val) { 4442 Ops.push_back(Result); 4443 return; 4444 } 4445 4446 // Handle standard constraint letters. 4447 TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG); 4448} 4449 4450// isLegalAddressingMode - Return true if the addressing mode represented 4451// by AM is legal for this target, for a load/store of the specified type. 4452bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 4453 const Type *Ty) const { 4454 // FIXME: PPC does not allow r+i addressing modes for vectors! 4455 4456 // PPC allows a sign-extended 16-bit immediate field. 4457 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 4458 return false; 4459 4460 // No global is ever allowed as a base. 4461 if (AM.BaseGV) 4462 return false; 4463 4464 // PPC only support r+r, 4465 switch (AM.Scale) { 4466 case 0: // "r+i" or just "i", depending on HasBaseReg. 4467 break; 4468 case 1: 4469 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 4470 return false; 4471 // Otherwise we have r+r or r+i. 4472 break; 4473 case 2: 4474 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 4475 return false; 4476 // Allow 2*r as r+r. 4477 break; 4478 default: 4479 // No other scales are supported. 4480 return false; 4481 } 4482 4483 return true; 4484} 4485 4486/// isLegalAddressImmediate - Return true if the integer value can be used 4487/// as the offset of the target addressing mode for load / store of the 4488/// given type. 4489bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ 4490 // PPC allows a sign-extended 16-bit immediate field. 4491 return (V > -(1 << 16) && V < (1 << 16)-1); 4492} 4493 4494bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 4495 return false; 4496} 4497 4498SDOperand PPCTargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4499 // Depths > 0 not supported yet! 4500 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4501 return SDOperand(); 4502 4503 MachineFunction &MF = DAG.getMachineFunction(); 4504 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 4505 4506 // Just load the return address off the stack. 4507 SDOperand RetAddrFI = getReturnAddrFrameIndex(DAG); 4508 4509 // Make sure the function really does not optimize away the store of the RA 4510 // to the stack. 4511 FuncInfo->setLRStoreRequired(); 4512 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4513} 4514 4515SDOperand PPCTargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4516 // Depths > 0 not supported yet! 4517 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4518 return SDOperand(); 4519 4520 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4521 bool isPPC64 = PtrVT == MVT::i64; 4522 4523 MachineFunction &MF = DAG.getMachineFunction(); 4524 MachineFrameInfo *MFI = MF.getFrameInfo(); 4525 bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects()) 4526 && MFI->getStackSize(); 4527 4528 if (isPPC64) 4529 return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::X31 : PPC::X1, 4530 MVT::i64); 4531 else 4532 return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::R31 : PPC::R1, 4533 MVT::i32); 4534} 4535