PPCISelLowering.cpp revision 28873106309db515d58889a4c4fa3e0a92d1b60e
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "PPCMachineFunctionInfo.h" 16#include "PPCPredicates.h" 17#include "PPCTargetMachine.h" 18#include "PPCPerfectShuffle.h" 19#include "llvm/ADT/STLExtras.h" 20#include "llvm/ADT/VectorExtras.h" 21#include "llvm/CodeGen/CallingConvLower.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/MachineFunction.h" 24#include "llvm/CodeGen/MachineInstrBuilder.h" 25#include "llvm/CodeGen/MachineRegisterInfo.h" 26#include "llvm/CodeGen/PseudoSourceValue.h" 27#include "llvm/CodeGen/SelectionDAG.h" 28#include "llvm/CallingConv.h" 29#include "llvm/Constants.h" 30#include "llvm/Function.h" 31#include "llvm/Intrinsics.h" 32#include "llvm/ParameterAttributes.h" 33#include "llvm/Support/MathExtras.h" 34#include "llvm/Target/TargetOptions.h" 35#include "llvm/Support/CommandLine.h" 36using namespace llvm; 37 38static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc", 39cl::desc("enable preincrement load/store generation on PPC (experimental)"), 40 cl::Hidden); 41 42PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 43 : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()), 44 PPCAtomicLabelIndex(0) { 45 46 setPow2DivIsCheap(); 47 48 // Use _setjmp/_longjmp instead of setjmp/longjmp. 49 setUseUnderscoreSetJmp(true); 50 setUseUnderscoreLongJmp(true); 51 52 // Set up the register classes. 53 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 54 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 55 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 56 57 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 58 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); 59 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Expand); 60 61 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 62 63 // PowerPC has pre-inc load and store's. 64 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 65 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 66 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 67 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 68 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 69 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 70 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 71 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 72 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 73 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 74 75 // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg) 76 setConvertAction(MVT::ppcf128, MVT::f64, Expand); 77 setConvertAction(MVT::ppcf128, MVT::f32, Expand); 78 // This is used in the ppcf128->int sequence. Note it has different semantics 79 // from FP_ROUND: that rounds to nearest, this rounds to zero. 80 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 81 82 // PowerPC has no intrinsics for these particular operations 83 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 84 85 // PowerPC has no SREM/UREM instructions 86 setOperationAction(ISD::SREM, MVT::i32, Expand); 87 setOperationAction(ISD::UREM, MVT::i32, Expand); 88 setOperationAction(ISD::SREM, MVT::i64, Expand); 89 setOperationAction(ISD::UREM, MVT::i64, Expand); 90 91 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 92 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 93 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 94 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 95 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 96 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 97 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 98 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 99 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 100 101 // We don't support sin/cos/sqrt/fmod/pow 102 setOperationAction(ISD::FSIN , MVT::f64, Expand); 103 setOperationAction(ISD::FCOS , MVT::f64, Expand); 104 setOperationAction(ISD::FREM , MVT::f64, Expand); 105 setOperationAction(ISD::FPOW , MVT::f64, Expand); 106 setOperationAction(ISD::FSIN , MVT::f32, Expand); 107 setOperationAction(ISD::FCOS , MVT::f32, Expand); 108 setOperationAction(ISD::FREM , MVT::f32, Expand); 109 setOperationAction(ISD::FPOW , MVT::f32, Expand); 110 111 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 112 113 // If we're enabling GP optimizations, use hardware square root 114 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 115 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 116 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 117 } 118 119 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 120 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 121 122 // PowerPC does not have BSWAP, CTPOP or CTTZ 123 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 124 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 125 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 126 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 127 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 128 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 129 130 // PowerPC does not have ROTR 131 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 132 133 // PowerPC does not have Select 134 setOperationAction(ISD::SELECT, MVT::i32, Expand); 135 setOperationAction(ISD::SELECT, MVT::i64, Expand); 136 setOperationAction(ISD::SELECT, MVT::f32, Expand); 137 setOperationAction(ISD::SELECT, MVT::f64, Expand); 138 139 // PowerPC wants to turn select_cc of FP into fsel when possible. 140 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 141 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 142 143 // PowerPC wants to optimize integer setcc a bit 144 setOperationAction(ISD::SETCC, MVT::i32, Custom); 145 146 // PowerPC does not have BRCOND which requires SetCC 147 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 148 149 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 150 151 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 152 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 153 154 // PowerPC does not have [U|S]INT_TO_FP 155 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 156 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 157 158 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 159 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 160 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); 161 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); 162 163 // We cannot sextinreg(i1). Expand to shifts. 164 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 165 166 // Support label based line numbers. 167 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 168 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 169 170 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 171 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 172 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 173 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 174 175 176 // We want to legalize GlobalAddress and ConstantPool nodes into the 177 // appropriate instructions to materialize the address. 178 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 179 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 180 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 181 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 182 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 183 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 184 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 185 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 186 187 // RET must be custom lowered, to meet ABI requirements 188 setOperationAction(ISD::RET , MVT::Other, Custom); 189 190 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 191 setOperationAction(ISD::VASTART , MVT::Other, Custom); 192 193 // VAARG is custom lowered with ELF 32 ABI 194 if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI()) 195 setOperationAction(ISD::VAARG, MVT::Other, Custom); 196 else 197 setOperationAction(ISD::VAARG, MVT::Other, Expand); 198 199 // Use the default implementation. 200 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 201 setOperationAction(ISD::VAEND , MVT::Other, Expand); 202 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 203 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 204 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 205 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 206 207 setOperationAction(ISD::ATOMIC_LOAD_ADD , MVT::i32 , Custom); 208 setOperationAction(ISD::ATOMIC_CMP_SWAP , MVT::i32 , Custom); 209 setOperationAction(ISD::ATOMIC_SWAP , MVT::i32 , Custom); 210 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 211 setOperationAction(ISD::ATOMIC_LOAD_ADD , MVT::i64 , Custom); 212 setOperationAction(ISD::ATOMIC_CMP_SWAP , MVT::i64 , Custom); 213 setOperationAction(ISD::ATOMIC_SWAP , MVT::i64 , Custom); 214 } 215 216 // We want to custom lower some of our intrinsics. 217 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 218 219 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 220 // They also have instructions for converting between i64 and fp. 221 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 222 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 223 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 224 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 225 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 226 227 // FIXME: disable this lowered code. This generates 64-bit register values, 228 // and we don't model the fact that the top part is clobbered by calls. We 229 // need to flag these together so that the value isn't live across a call. 230 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 231 232 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 233 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 234 } else { 235 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 236 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 237 } 238 239 if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { 240 // 64-bit PowerPC implementations can support i64 types directly 241 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 242 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 243 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 244 // 64-bit PowerPC wants to expand i128 shifts itself. 245 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 246 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 247 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 248 } else { 249 // 32-bit PowerPC wants to expand i64 shifts itself. 250 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 251 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 252 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 253 } 254 255 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 256 // First set operation action for all vector types to expand. Then we 257 // will selectively turn on ones that can be effectively codegen'd. 258 for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 259 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 260 MVT VT = (MVT::SimpleValueType)i; 261 262 // add/sub are legal for all supported vector VT's. 263 setOperationAction(ISD::ADD , VT, Legal); 264 setOperationAction(ISD::SUB , VT, Legal); 265 266 // We promote all shuffles to v16i8. 267 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote); 268 AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8); 269 270 // We promote all non-typed operations to v4i32. 271 setOperationAction(ISD::AND , VT, Promote); 272 AddPromotedToType (ISD::AND , VT, MVT::v4i32); 273 setOperationAction(ISD::OR , VT, Promote); 274 AddPromotedToType (ISD::OR , VT, MVT::v4i32); 275 setOperationAction(ISD::XOR , VT, Promote); 276 AddPromotedToType (ISD::XOR , VT, MVT::v4i32); 277 setOperationAction(ISD::LOAD , VT, Promote); 278 AddPromotedToType (ISD::LOAD , VT, MVT::v4i32); 279 setOperationAction(ISD::SELECT, VT, Promote); 280 AddPromotedToType (ISD::SELECT, VT, MVT::v4i32); 281 setOperationAction(ISD::STORE, VT, Promote); 282 AddPromotedToType (ISD::STORE, VT, MVT::v4i32); 283 284 // No other operations are legal. 285 setOperationAction(ISD::MUL , VT, Expand); 286 setOperationAction(ISD::SDIV, VT, Expand); 287 setOperationAction(ISD::SREM, VT, Expand); 288 setOperationAction(ISD::UDIV, VT, Expand); 289 setOperationAction(ISD::UREM, VT, Expand); 290 setOperationAction(ISD::FDIV, VT, Expand); 291 setOperationAction(ISD::FNEG, VT, Expand); 292 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Expand); 293 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); 294 setOperationAction(ISD::BUILD_VECTOR, VT, Expand); 295 setOperationAction(ISD::UMUL_LOHI, VT, Expand); 296 setOperationAction(ISD::SMUL_LOHI, VT, Expand); 297 setOperationAction(ISD::UDIVREM, VT, Expand); 298 setOperationAction(ISD::SDIVREM, VT, Expand); 299 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand); 300 setOperationAction(ISD::FPOW, VT, Expand); 301 setOperationAction(ISD::CTPOP, VT, Expand); 302 setOperationAction(ISD::CTLZ, VT, Expand); 303 setOperationAction(ISD::CTTZ, VT, Expand); 304 } 305 306 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 307 // with merges, splats, etc. 308 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 309 310 setOperationAction(ISD::AND , MVT::v4i32, Legal); 311 setOperationAction(ISD::OR , MVT::v4i32, Legal); 312 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 313 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 314 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 315 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 316 317 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 318 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 319 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 320 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 321 322 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 323 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 324 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 325 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 326 327 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 328 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 329 330 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 331 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 332 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 333 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 334 } 335 336 setShiftAmountType(MVT::i32); 337 setSetCCResultContents(ZeroOrOneSetCCResult); 338 339 if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { 340 setStackPointerRegisterToSaveRestore(PPC::X1); 341 setExceptionPointerRegister(PPC::X3); 342 setExceptionSelectorRegister(PPC::X4); 343 } else { 344 setStackPointerRegisterToSaveRestore(PPC::R1); 345 setExceptionPointerRegister(PPC::R3); 346 setExceptionSelectorRegister(PPC::R4); 347 } 348 349 // We have target-specific dag combine patterns for the following nodes: 350 setTargetDAGCombine(ISD::SINT_TO_FP); 351 setTargetDAGCombine(ISD::STORE); 352 setTargetDAGCombine(ISD::BR_CC); 353 setTargetDAGCombine(ISD::BSWAP); 354 355 // Darwin long double math library functions have $LDBL128 appended. 356 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) { 357 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 358 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 359 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 360 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 361 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 362 } 363 364 computeRegisterProperties(); 365} 366 367/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 368/// function arguments in the caller parameter area. 369unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { 370 TargetMachine &TM = getTargetMachine(); 371 // Darwin passes everything on 4 byte boundary. 372 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) 373 return 4; 374 // FIXME Elf TBD 375 return 4; 376} 377 378const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 379 switch (Opcode) { 380 default: return 0; 381 case PPCISD::FSEL: return "PPCISD::FSEL"; 382 case PPCISD::FCFID: return "PPCISD::FCFID"; 383 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 384 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 385 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 386 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 387 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 388 case PPCISD::VPERM: return "PPCISD::VPERM"; 389 case PPCISD::Hi: return "PPCISD::Hi"; 390 case PPCISD::Lo: return "PPCISD::Lo"; 391 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 392 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 393 case PPCISD::SRL: return "PPCISD::SRL"; 394 case PPCISD::SRA: return "PPCISD::SRA"; 395 case PPCISD::SHL: return "PPCISD::SHL"; 396 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 397 case PPCISD::STD_32: return "PPCISD::STD_32"; 398 case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF"; 399 case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho"; 400 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 401 case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho"; 402 case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF"; 403 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 404 case PPCISD::MFCR: return "PPCISD::MFCR"; 405 case PPCISD::VCMP: return "PPCISD::VCMP"; 406 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 407 case PPCISD::LBRX: return "PPCISD::LBRX"; 408 case PPCISD::STBRX: return "PPCISD::STBRX"; 409 case PPCISD::LARX: return "PPCISD::LARX"; 410 case PPCISD::STCX: return "PPCISD::STCX"; 411 case PPCISD::CMP_UNRESERVE: return "PPCISD::CMP_UNRESERVE"; 412 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 413 case PPCISD::MFFS: return "PPCISD::MFFS"; 414 case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; 415 case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; 416 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 417 case PPCISD::MTFSF: return "PPCISD::MTFSF"; 418 case PPCISD::TAILCALL: return "PPCISD::TAILCALL"; 419 case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN"; 420 } 421} 422 423 424MVT PPCTargetLowering::getSetCCResultType(const SDOperand &) const { 425 return MVT::i32; 426} 427 428 429//===----------------------------------------------------------------------===// 430// Node matching predicates, for use by the tblgen matching code. 431//===----------------------------------------------------------------------===// 432 433/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 434static bool isFloatingPointZero(SDOperand Op) { 435 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 436 return CFP->getValueAPF().isZero(); 437 else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) { 438 // Maybe this has already been legalized into the constant pool? 439 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 440 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 441 return CFP->getValueAPF().isZero(); 442 } 443 return false; 444} 445 446/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 447/// true if Op is undef or if it matches the specified value. 448static bool isConstantOrUndef(SDOperand Op, unsigned Val) { 449 return Op.getOpcode() == ISD::UNDEF || 450 cast<ConstantSDNode>(Op)->getValue() == Val; 451} 452 453/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 454/// VPKUHUM instruction. 455bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { 456 if (!isUnary) { 457 for (unsigned i = 0; i != 16; ++i) 458 if (!isConstantOrUndef(N->getOperand(i), i*2+1)) 459 return false; 460 } else { 461 for (unsigned i = 0; i != 8; ++i) 462 if (!isConstantOrUndef(N->getOperand(i), i*2+1) || 463 !isConstantOrUndef(N->getOperand(i+8), i*2+1)) 464 return false; 465 } 466 return true; 467} 468 469/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 470/// VPKUWUM instruction. 471bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { 472 if (!isUnary) { 473 for (unsigned i = 0; i != 16; i += 2) 474 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 475 !isConstantOrUndef(N->getOperand(i+1), i*2+3)) 476 return false; 477 } else { 478 for (unsigned i = 0; i != 8; i += 2) 479 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 480 !isConstantOrUndef(N->getOperand(i+1), i*2+3) || 481 !isConstantOrUndef(N->getOperand(i+8), i*2+2) || 482 !isConstantOrUndef(N->getOperand(i+9), i*2+3)) 483 return false; 484 } 485 return true; 486} 487 488/// isVMerge - Common function, used to match vmrg* shuffles. 489/// 490static bool isVMerge(SDNode *N, unsigned UnitSize, 491 unsigned LHSStart, unsigned RHSStart) { 492 assert(N->getOpcode() == ISD::BUILD_VECTOR && 493 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 494 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 495 "Unsupported merge size!"); 496 497 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 498 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 499 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 500 LHSStart+j+i*UnitSize) || 501 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 502 RHSStart+j+i*UnitSize)) 503 return false; 504 } 505 return true; 506} 507 508/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 509/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 510bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 511 if (!isUnary) 512 return isVMerge(N, UnitSize, 8, 24); 513 return isVMerge(N, UnitSize, 8, 8); 514} 515 516/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 517/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 518bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 519 if (!isUnary) 520 return isVMerge(N, UnitSize, 0, 16); 521 return isVMerge(N, UnitSize, 0, 0); 522} 523 524 525/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 526/// amount, otherwise return -1. 527int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 528 assert(N->getOpcode() == ISD::BUILD_VECTOR && 529 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 530 // Find the first non-undef value in the shuffle mask. 531 unsigned i; 532 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 533 /*search*/; 534 535 if (i == 16) return -1; // all undef. 536 537 // Otherwise, check to see if the rest of the elements are consequtively 538 // numbered from this value. 539 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 540 if (ShiftAmt < i) return -1; 541 ShiftAmt -= i; 542 543 if (!isUnary) { 544 // Check the rest of the elements to see if they are consequtive. 545 for (++i; i != 16; ++i) 546 if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) 547 return -1; 548 } else { 549 // Check the rest of the elements to see if they are consequtive. 550 for (++i; i != 16; ++i) 551 if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) 552 return -1; 553 } 554 555 return ShiftAmt; 556} 557 558/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 559/// specifies a splat of a single element that is suitable for input to 560/// VSPLTB/VSPLTH/VSPLTW. 561bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 562 assert(N->getOpcode() == ISD::BUILD_VECTOR && 563 N->getNumOperands() == 16 && 564 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 565 566 // This is a splat operation if each element of the permute is the same, and 567 // if the value doesn't reference the second vector. 568 unsigned ElementBase = 0; 569 SDOperand Elt = N->getOperand(0); 570 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 571 ElementBase = EltV->getValue(); 572 else 573 return false; // FIXME: Handle UNDEF elements too! 574 575 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 576 return false; 577 578 // Check that they are consequtive. 579 for (unsigned i = 1; i != EltSize; ++i) { 580 if (!isa<ConstantSDNode>(N->getOperand(i)) || 581 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 582 return false; 583 } 584 585 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 586 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 587 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 588 assert(isa<ConstantSDNode>(N->getOperand(i)) && 589 "Invalid VECTOR_SHUFFLE mask!"); 590 for (unsigned j = 0; j != EltSize; ++j) 591 if (N->getOperand(i+j) != N->getOperand(j)) 592 return false; 593 } 594 595 return true; 596} 597 598/// isAllNegativeZeroVector - Returns true if all elements of build_vector 599/// are -0.0. 600bool PPC::isAllNegativeZeroVector(SDNode *N) { 601 assert(N->getOpcode() == ISD::BUILD_VECTOR); 602 if (PPC::isSplatShuffleMask(N, N->getNumOperands())) 603 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N)) 604 return CFP->getValueAPF().isNegZero(); 605 return false; 606} 607 608/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 609/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 610unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 611 assert(isSplatShuffleMask(N, EltSize)); 612 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 613} 614 615/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 616/// by using a vspltis[bhw] instruction of the specified element size, return 617/// the constant being splatted. The ByteSize field indicates the number of 618/// bytes of each element [124] -> [bhw]. 619SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 620 SDOperand OpVal(0, 0); 621 622 // If ByteSize of the splat is bigger than the element size of the 623 // build_vector, then we have a case where we are checking for a splat where 624 // multiple elements of the buildvector are folded together into a single 625 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 626 unsigned EltSize = 16/N->getNumOperands(); 627 if (EltSize < ByteSize) { 628 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 629 SDOperand UniquedVals[4]; 630 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 631 632 // See if all of the elements in the buildvector agree across. 633 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 634 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 635 // If the element isn't a constant, bail fully out. 636 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand(); 637 638 639 if (UniquedVals[i&(Multiple-1)].Val == 0) 640 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 641 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 642 return SDOperand(); // no match. 643 } 644 645 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 646 // either constant or undef values that are identical for each chunk. See 647 // if these chunks can form into a larger vspltis*. 648 649 // Check to see if all of the leading entries are either 0 or -1. If 650 // neither, then this won't fit into the immediate field. 651 bool LeadingZero = true; 652 bool LeadingOnes = true; 653 for (unsigned i = 0; i != Multiple-1; ++i) { 654 if (UniquedVals[i].Val == 0) continue; // Must have been undefs. 655 656 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 657 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 658 } 659 // Finally, check the least significant entry. 660 if (LeadingZero) { 661 if (UniquedVals[Multiple-1].Val == 0) 662 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 663 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue(); 664 if (Val < 16) 665 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 666 } 667 if (LeadingOnes) { 668 if (UniquedVals[Multiple-1].Val == 0) 669 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 670 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended(); 671 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 672 return DAG.getTargetConstant(Val, MVT::i32); 673 } 674 675 return SDOperand(); 676 } 677 678 // Check to see if this buildvec has a single non-undef value in its elements. 679 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 680 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 681 if (OpVal.Val == 0) 682 OpVal = N->getOperand(i); 683 else if (OpVal != N->getOperand(i)) 684 return SDOperand(); 685 } 686 687 if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def. 688 689 unsigned ValSizeInBytes = 0; 690 uint64_t Value = 0; 691 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 692 Value = CN->getValue(); 693 ValSizeInBytes = CN->getValueType(0).getSizeInBits()/8; 694 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 695 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 696 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 697 ValSizeInBytes = 4; 698 } 699 700 // If the splat value is larger than the element value, then we can never do 701 // this splat. The only case that we could fit the replicated bits into our 702 // immediate field for would be zero, and we prefer to use vxor for it. 703 if (ValSizeInBytes < ByteSize) return SDOperand(); 704 705 // If the element value is larger than the splat value, cut it in half and 706 // check to see if the two halves are equal. Continue doing this until we 707 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 708 while (ValSizeInBytes > ByteSize) { 709 ValSizeInBytes >>= 1; 710 711 // If the top half equals the bottom half, we're still ok. 712 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 713 (Value & ((1 << (8*ValSizeInBytes))-1))) 714 return SDOperand(); 715 } 716 717 // Properly sign extend the value. 718 int ShAmt = (4-ByteSize)*8; 719 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 720 721 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 722 if (MaskVal == 0) return SDOperand(); 723 724 // Finally, if this value fits in a 5 bit sext field, return it 725 if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) 726 return DAG.getTargetConstant(MaskVal, MVT::i32); 727 return SDOperand(); 728} 729 730//===----------------------------------------------------------------------===// 731// Addressing Mode Selection 732//===----------------------------------------------------------------------===// 733 734/// isIntS16Immediate - This method tests to see if the node is either a 32-bit 735/// or 64-bit immediate, and if the value can be accurately represented as a 736/// sign extension from a 16-bit value. If so, this returns true and the 737/// immediate. 738static bool isIntS16Immediate(SDNode *N, short &Imm) { 739 if (N->getOpcode() != ISD::Constant) 740 return false; 741 742 Imm = (short)cast<ConstantSDNode>(N)->getValue(); 743 if (N->getValueType(0) == MVT::i32) 744 return Imm == (int32_t)cast<ConstantSDNode>(N)->getValue(); 745 else 746 return Imm == (int64_t)cast<ConstantSDNode>(N)->getValue(); 747} 748static bool isIntS16Immediate(SDOperand Op, short &Imm) { 749 return isIntS16Immediate(Op.Val, Imm); 750} 751 752 753/// SelectAddressRegReg - Given the specified addressed, check to see if it 754/// can be represented as an indexed [r+r] operation. Returns false if it 755/// can be more efficiently represented with [r+imm]. 756bool PPCTargetLowering::SelectAddressRegReg(SDOperand N, SDOperand &Base, 757 SDOperand &Index, 758 SelectionDAG &DAG) { 759 short imm = 0; 760 if (N.getOpcode() == ISD::ADD) { 761 if (isIntS16Immediate(N.getOperand(1), imm)) 762 return false; // r+i 763 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 764 return false; // r+i 765 766 Base = N.getOperand(0); 767 Index = N.getOperand(1); 768 return true; 769 } else if (N.getOpcode() == ISD::OR) { 770 if (isIntS16Immediate(N.getOperand(1), imm)) 771 return false; // r+i can fold it if we can. 772 773 // If this is an or of disjoint bitfields, we can codegen this as an add 774 // (for better address arithmetic) if the LHS and RHS of the OR are provably 775 // disjoint. 776 APInt LHSKnownZero, LHSKnownOne; 777 APInt RHSKnownZero, RHSKnownOne; 778 DAG.ComputeMaskedBits(N.getOperand(0), 779 APInt::getAllOnesValue(N.getOperand(0) 780 .getValueSizeInBits()), 781 LHSKnownZero, LHSKnownOne); 782 783 if (LHSKnownZero.getBoolValue()) { 784 DAG.ComputeMaskedBits(N.getOperand(1), 785 APInt::getAllOnesValue(N.getOperand(1) 786 .getValueSizeInBits()), 787 RHSKnownZero, RHSKnownOne); 788 // If all of the bits are known zero on the LHS or RHS, the add won't 789 // carry. 790 if (~(LHSKnownZero | RHSKnownZero) == 0) { 791 Base = N.getOperand(0); 792 Index = N.getOperand(1); 793 return true; 794 } 795 } 796 } 797 798 return false; 799} 800 801/// Returns true if the address N can be represented by a base register plus 802/// a signed 16-bit displacement [r+imm], and if it is not better 803/// represented as reg+reg. 804bool PPCTargetLowering::SelectAddressRegImm(SDOperand N, SDOperand &Disp, 805 SDOperand &Base, SelectionDAG &DAG){ 806 // If this can be more profitably realized as r+r, fail. 807 if (SelectAddressRegReg(N, Disp, Base, DAG)) 808 return false; 809 810 if (N.getOpcode() == ISD::ADD) { 811 short imm = 0; 812 if (isIntS16Immediate(N.getOperand(1), imm)) { 813 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 814 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 815 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 816 } else { 817 Base = N.getOperand(0); 818 } 819 return true; // [r+i] 820 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 821 // Match LOAD (ADD (X, Lo(G))). 822 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue() 823 && "Cannot handle constant offsets yet!"); 824 Disp = N.getOperand(1).getOperand(0); // The global address. 825 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 826 Disp.getOpcode() == ISD::TargetConstantPool || 827 Disp.getOpcode() == ISD::TargetJumpTable); 828 Base = N.getOperand(0); 829 return true; // [&g+r] 830 } 831 } else if (N.getOpcode() == ISD::OR) { 832 short imm = 0; 833 if (isIntS16Immediate(N.getOperand(1), imm)) { 834 // If this is an or of disjoint bitfields, we can codegen this as an add 835 // (for better address arithmetic) if the LHS and RHS of the OR are 836 // provably disjoint. 837 APInt LHSKnownZero, LHSKnownOne; 838 DAG.ComputeMaskedBits(N.getOperand(0), 839 APInt::getAllOnesValue(N.getOperand(0) 840 .getValueSizeInBits()), 841 LHSKnownZero, LHSKnownOne); 842 843 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 844 // If all of the bits are known zero on the LHS or RHS, the add won't 845 // carry. 846 Base = N.getOperand(0); 847 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 848 return true; 849 } 850 } 851 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 852 // Loading from a constant address. 853 854 // If this address fits entirely in a 16-bit sext immediate field, codegen 855 // this as "d, 0" 856 short Imm; 857 if (isIntS16Immediate(CN, Imm)) { 858 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 859 Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); 860 return true; 861 } 862 863 // Handle 32-bit sext immediates with LIS + addr mode. 864 if (CN->getValueType(0) == MVT::i32 || 865 (int64_t)CN->getValue() == (int)CN->getValue()) { 866 int Addr = (int)CN->getValue(); 867 868 // Otherwise, break this down into an LIS + disp. 869 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 870 871 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 872 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 873 Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); 874 return true; 875 } 876 } 877 878 Disp = DAG.getTargetConstant(0, getPointerTy()); 879 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 880 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 881 else 882 Base = N; 883 return true; // [r+0] 884} 885 886/// SelectAddressRegRegOnly - Given the specified addressed, force it to be 887/// represented as an indexed [r+r] operation. 888bool PPCTargetLowering::SelectAddressRegRegOnly(SDOperand N, SDOperand &Base, 889 SDOperand &Index, 890 SelectionDAG &DAG) { 891 // Check to see if we can easily represent this as an [r+r] address. This 892 // will fail if it thinks that the address is more profitably represented as 893 // reg+imm, e.g. where imm = 0. 894 if (SelectAddressRegReg(N, Base, Index, DAG)) 895 return true; 896 897 // If the operand is an addition, always emit this as [r+r], since this is 898 // better (for code size, and execution, as the memop does the add for free) 899 // than emitting an explicit add. 900 if (N.getOpcode() == ISD::ADD) { 901 Base = N.getOperand(0); 902 Index = N.getOperand(1); 903 return true; 904 } 905 906 // Otherwise, do it the hard way, using R0 as the base register. 907 Base = DAG.getRegister(PPC::R0, N.getValueType()); 908 Index = N; 909 return true; 910} 911 912/// SelectAddressRegImmShift - Returns true if the address N can be 913/// represented by a base register plus a signed 14-bit displacement 914/// [r+imm*4]. Suitable for use by STD and friends. 915bool PPCTargetLowering::SelectAddressRegImmShift(SDOperand N, SDOperand &Disp, 916 SDOperand &Base, 917 SelectionDAG &DAG) { 918 // If this can be more profitably realized as r+r, fail. 919 if (SelectAddressRegReg(N, Disp, Base, DAG)) 920 return false; 921 922 if (N.getOpcode() == ISD::ADD) { 923 short imm = 0; 924 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 925 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 926 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 927 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 928 } else { 929 Base = N.getOperand(0); 930 } 931 return true; // [r+i] 932 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 933 // Match LOAD (ADD (X, Lo(G))). 934 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue() 935 && "Cannot handle constant offsets yet!"); 936 Disp = N.getOperand(1).getOperand(0); // The global address. 937 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 938 Disp.getOpcode() == ISD::TargetConstantPool || 939 Disp.getOpcode() == ISD::TargetJumpTable); 940 Base = N.getOperand(0); 941 return true; // [&g+r] 942 } 943 } else if (N.getOpcode() == ISD::OR) { 944 short imm = 0; 945 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 946 // If this is an or of disjoint bitfields, we can codegen this as an add 947 // (for better address arithmetic) if the LHS and RHS of the OR are 948 // provably disjoint. 949 APInt LHSKnownZero, LHSKnownOne; 950 DAG.ComputeMaskedBits(N.getOperand(0), 951 APInt::getAllOnesValue(N.getOperand(0) 952 .getValueSizeInBits()), 953 LHSKnownZero, LHSKnownOne); 954 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 955 // If all of the bits are known zero on the LHS or RHS, the add won't 956 // carry. 957 Base = N.getOperand(0); 958 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 959 return true; 960 } 961 } 962 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 963 // Loading from a constant address. Verify low two bits are clear. 964 if ((CN->getValue() & 3) == 0) { 965 // If this address fits entirely in a 14-bit sext immediate field, codegen 966 // this as "d, 0" 967 short Imm; 968 if (isIntS16Immediate(CN, Imm)) { 969 Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); 970 Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); 971 return true; 972 } 973 974 // Fold the low-part of 32-bit absolute addresses into addr mode. 975 if (CN->getValueType(0) == MVT::i32 || 976 (int64_t)CN->getValue() == (int)CN->getValue()) { 977 int Addr = (int)CN->getValue(); 978 979 // Otherwise, break this down into an LIS + disp. 980 Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); 981 982 Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); 983 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 984 Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); 985 return true; 986 } 987 } 988 } 989 990 Disp = DAG.getTargetConstant(0, getPointerTy()); 991 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 992 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 993 else 994 Base = N; 995 return true; // [r+0] 996} 997 998 999/// getPreIndexedAddressParts - returns true by value, base pointer and 1000/// offset pointer and addressing mode by reference if the node's address 1001/// can be legally represented as pre-indexed load / store address. 1002bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base, 1003 SDOperand &Offset, 1004 ISD::MemIndexedMode &AM, 1005 SelectionDAG &DAG) { 1006 // Disabled by default for now. 1007 if (!EnablePPCPreinc) return false; 1008 1009 SDOperand Ptr; 1010 MVT VT; 1011 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1012 Ptr = LD->getBasePtr(); 1013 VT = LD->getMemoryVT(); 1014 1015 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1016 ST = ST; 1017 Ptr = ST->getBasePtr(); 1018 VT = ST->getMemoryVT(); 1019 } else 1020 return false; 1021 1022 // PowerPC doesn't have preinc load/store instructions for vectors. 1023 if (VT.isVector()) 1024 return false; 1025 1026 // TODO: Check reg+reg first. 1027 1028 // LDU/STU use reg+imm*4, others use reg+imm. 1029 if (VT != MVT::i64) { 1030 // reg + imm 1031 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) 1032 return false; 1033 } else { 1034 // reg + imm * 4. 1035 if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) 1036 return false; 1037 } 1038 1039 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1040 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1041 // sext i32 to i64 when addr mode is r+i. 1042 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1043 LD->getExtensionType() == ISD::SEXTLOAD && 1044 isa<ConstantSDNode>(Offset)) 1045 return false; 1046 } 1047 1048 AM = ISD::PRE_INC; 1049 return true; 1050} 1051 1052//===----------------------------------------------------------------------===// 1053// LowerOperation implementation 1054//===----------------------------------------------------------------------===// 1055 1056SDOperand PPCTargetLowering::LowerConstantPool(SDOperand Op, 1057 SelectionDAG &DAG) { 1058 MVT PtrVT = Op.getValueType(); 1059 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1060 Constant *C = CP->getConstVal(); 1061 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 1062 SDOperand Zero = DAG.getConstant(0, PtrVT); 1063 1064 const TargetMachine &TM = DAG.getTarget(); 1065 1066 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero); 1067 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero); 1068 1069 // If this is a non-darwin platform, we don't support non-static relo models 1070 // yet. 1071 if (TM.getRelocationModel() == Reloc::Static || 1072 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1073 // Generate non-pic code that has direct accesses to the constant pool. 1074 // The address of the global is just (hi(&g)+lo(&g)). 1075 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1076 } 1077 1078 if (TM.getRelocationModel() == Reloc::PIC_) { 1079 // With PIC, the first instruction is actually "GR+hi(&G)". 1080 Hi = DAG.getNode(ISD::ADD, PtrVT, 1081 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1082 } 1083 1084 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1085 return Lo; 1086} 1087 1088SDOperand PPCTargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 1089 MVT PtrVT = Op.getValueType(); 1090 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1091 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1092 SDOperand Zero = DAG.getConstant(0, PtrVT); 1093 1094 const TargetMachine &TM = DAG.getTarget(); 1095 1096 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero); 1097 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero); 1098 1099 // If this is a non-darwin platform, we don't support non-static relo models 1100 // yet. 1101 if (TM.getRelocationModel() == Reloc::Static || 1102 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1103 // Generate non-pic code that has direct accesses to the constant pool. 1104 // The address of the global is just (hi(&g)+lo(&g)). 1105 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1106 } 1107 1108 if (TM.getRelocationModel() == Reloc::PIC_) { 1109 // With PIC, the first instruction is actually "GR+hi(&G)". 1110 Hi = DAG.getNode(ISD::ADD, PtrVT, 1111 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1112 } 1113 1114 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1115 return Lo; 1116} 1117 1118SDOperand PPCTargetLowering::LowerGlobalTLSAddress(SDOperand Op, 1119 SelectionDAG &DAG) { 1120 assert(0 && "TLS not implemented for PPC."); 1121 return SDOperand(); // Not reached 1122} 1123 1124SDOperand PPCTargetLowering::LowerGlobalAddress(SDOperand Op, 1125 SelectionDAG &DAG) { 1126 MVT PtrVT = Op.getValueType(); 1127 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1128 GlobalValue *GV = GSDN->getGlobal(); 1129 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); 1130 // If it's a debug information descriptor, don't mess with it. 1131 if (DAG.isVerifiedDebugInfoDesc(Op)) 1132 return GA; 1133 SDOperand Zero = DAG.getConstant(0, PtrVT); 1134 1135 const TargetMachine &TM = DAG.getTarget(); 1136 1137 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero); 1138 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero); 1139 1140 // If this is a non-darwin platform, we don't support non-static relo models 1141 // yet. 1142 if (TM.getRelocationModel() == Reloc::Static || 1143 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1144 // Generate non-pic code that has direct accesses to globals. 1145 // The address of the global is just (hi(&g)+lo(&g)). 1146 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1147 } 1148 1149 if (TM.getRelocationModel() == Reloc::PIC_) { 1150 // With PIC, the first instruction is actually "GR+hi(&G)". 1151 Hi = DAG.getNode(ISD::ADD, PtrVT, 1152 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1153 } 1154 1155 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1156 1157 if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV)) 1158 return Lo; 1159 1160 // If the global is weak or external, we have to go through the lazy 1161 // resolution stub. 1162 return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0); 1163} 1164 1165SDOperand PPCTargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 1166 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1167 1168 // If we're comparing for equality to zero, expose the fact that this is 1169 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1170 // fold the new nodes. 1171 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1172 if (C->isNullValue() && CC == ISD::SETEQ) { 1173 MVT VT = Op.getOperand(0).getValueType(); 1174 SDOperand Zext = Op.getOperand(0); 1175 if (VT.bitsLT(MVT::i32)) { 1176 VT = MVT::i32; 1177 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 1178 } 1179 unsigned Log2b = Log2_32(VT.getSizeInBits()); 1180 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 1181 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz, 1182 DAG.getConstant(Log2b, MVT::i32)); 1183 return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc); 1184 } 1185 // Leave comparisons against 0 and -1 alone for now, since they're usually 1186 // optimized. FIXME: revisit this when we can custom lower all setcc 1187 // optimizations. 1188 if (C->isAllOnesValue() || C->isNullValue()) 1189 return SDOperand(); 1190 } 1191 1192 // If we have an integer seteq/setne, turn it into a compare against zero 1193 // by xor'ing the rhs with the lhs, which is faster than setting a 1194 // condition register, reading it back out, and masking the correct bit. The 1195 // normal approach here uses sub to do this instead of xor. Using xor exposes 1196 // the result to other bit-twiddling opportunities. 1197 MVT LHSVT = Op.getOperand(0).getValueType(); 1198 if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1199 MVT VT = Op.getValueType(); 1200 SDOperand Sub = DAG.getNode(ISD::XOR, LHSVT, Op.getOperand(0), 1201 Op.getOperand(1)); 1202 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 1203 } 1204 return SDOperand(); 1205} 1206 1207SDOperand PPCTargetLowering::LowerVAARG(SDOperand Op, SelectionDAG &DAG, 1208 int VarArgsFrameIndex, 1209 int VarArgsStackOffset, 1210 unsigned VarArgsNumGPR, 1211 unsigned VarArgsNumFPR, 1212 const PPCSubtarget &Subtarget) { 1213 1214 assert(0 && "VAARG in ELF32 ABI not implemented yet!"); 1215 return SDOperand(); // Not reached 1216} 1217 1218SDOperand PPCTargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG, 1219 int VarArgsFrameIndex, 1220 int VarArgsStackOffset, 1221 unsigned VarArgsNumGPR, 1222 unsigned VarArgsNumFPR, 1223 const PPCSubtarget &Subtarget) { 1224 1225 if (Subtarget.isMachoABI()) { 1226 // vastart just stores the address of the VarArgsFrameIndex slot into the 1227 // memory location argument. 1228 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1229 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1230 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1231 return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV, 0); 1232 } 1233 1234 // For ELF 32 ABI we follow the layout of the va_list struct. 1235 // We suppose the given va_list is already allocated. 1236 // 1237 // typedef struct { 1238 // char gpr; /* index into the array of 8 GPRs 1239 // * stored in the register save area 1240 // * gpr=0 corresponds to r3, 1241 // * gpr=1 to r4, etc. 1242 // */ 1243 // char fpr; /* index into the array of 8 FPRs 1244 // * stored in the register save area 1245 // * fpr=0 corresponds to f1, 1246 // * fpr=1 to f2, etc. 1247 // */ 1248 // char *overflow_arg_area; 1249 // /* location on stack that holds 1250 // * the next overflow argument 1251 // */ 1252 // char *reg_save_area; 1253 // /* where r3:r10 and f1:f8 (if saved) 1254 // * are stored 1255 // */ 1256 // } va_list[1]; 1257 1258 1259 SDOperand ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8); 1260 SDOperand ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8); 1261 1262 1263 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1264 1265 SDOperand StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); 1266 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1267 1268 uint64_t FrameOffset = PtrVT.getSizeInBits()/8; 1269 SDOperand ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 1270 1271 uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1; 1272 SDOperand ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 1273 1274 uint64_t FPROffset = 1; 1275 SDOperand ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 1276 1277 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1278 1279 // Store first byte : number of int regs 1280 SDOperand firstStore = DAG.getStore(Op.getOperand(0), ArgGPR, 1281 Op.getOperand(1), SV, 0); 1282 uint64_t nextOffset = FPROffset; 1283 SDOperand nextPtr = DAG.getNode(ISD::ADD, PtrVT, Op.getOperand(1), 1284 ConstFPROffset); 1285 1286 // Store second byte : number of float regs 1287 SDOperand secondStore = 1288 DAG.getStore(firstStore, ArgFPR, nextPtr, SV, nextOffset); 1289 nextOffset += StackOffset; 1290 nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstStackOffset); 1291 1292 // Store second word : arguments given on stack 1293 SDOperand thirdStore = 1294 DAG.getStore(secondStore, StackOffsetFI, nextPtr, SV, nextOffset); 1295 nextOffset += FrameOffset; 1296 nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstFrameOffset); 1297 1298 // Store third word : arguments given in registers 1299 return DAG.getStore(thirdStore, FR, nextPtr, SV, nextOffset); 1300 1301} 1302 1303#include "PPCGenCallingConv.inc" 1304 1305/// GetFPR - Get the set of FP registers that should be allocated for arguments, 1306/// depending on which subtarget is selected. 1307static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { 1308 if (Subtarget.isMachoABI()) { 1309 static const unsigned FPR[] = { 1310 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1311 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1312 }; 1313 return FPR; 1314 } 1315 1316 1317 static const unsigned FPR[] = { 1318 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1319 PPC::F8 1320 }; 1321 return FPR; 1322} 1323 1324/// CalculateStackSlotSize - Calculates the size reserved for this argument on 1325/// the stack. 1326static unsigned CalculateStackSlotSize(SDOperand Arg, SDOperand Flag, 1327 bool isVarArg, unsigned PtrByteSize) { 1328 MVT ArgVT = Arg.getValueType(); 1329 ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Flag)->getArgFlags(); 1330 unsigned ArgSize =ArgVT.getSizeInBits()/8; 1331 if (Flags.isByVal()) 1332 ArgSize = Flags.getByValSize(); 1333 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1334 1335 return ArgSize; 1336} 1337 1338SDOperand 1339PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, 1340 SelectionDAG &DAG, 1341 int &VarArgsFrameIndex, 1342 int &VarArgsStackOffset, 1343 unsigned &VarArgsNumGPR, 1344 unsigned &VarArgsNumFPR, 1345 const PPCSubtarget &Subtarget) { 1346 // TODO: add description of PPC stack frame format, or at least some docs. 1347 // 1348 MachineFunction &MF = DAG.getMachineFunction(); 1349 MachineFrameInfo *MFI = MF.getFrameInfo(); 1350 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1351 SmallVector<SDOperand, 8> ArgValues; 1352 SDOperand Root = Op.getOperand(0); 1353 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1354 1355 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1356 bool isPPC64 = PtrVT == MVT::i64; 1357 bool isMachoABI = Subtarget.isMachoABI(); 1358 bool isELF32_ABI = Subtarget.isELF32_ABI(); 1359 // Potential tail calls could cause overwriting of argument stack slots. 1360 unsigned CC = MF.getFunction()->getCallingConv(); 1361 bool isImmutable = !(PerformTailCallOpt && (CC==CallingConv::Fast)); 1362 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1363 1364 unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1365 // Area that is at least reserved in caller of this function. 1366 unsigned MinReservedArea = ArgOffset; 1367 1368 static const unsigned GPR_32[] = { // 32-bit registers. 1369 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1370 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1371 }; 1372 static const unsigned GPR_64[] = { // 64-bit registers. 1373 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 1374 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 1375 }; 1376 1377 static const unsigned *FPR = GetFPR(Subtarget); 1378 1379 static const unsigned VR[] = { 1380 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 1381 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 1382 }; 1383 1384 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 1385 const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8; 1386 const unsigned Num_VR_Regs = array_lengthof( VR); 1387 1388 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 1389 1390 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 1391 1392 // In 32-bit non-varargs functions, the stack space for vectors is after the 1393 // stack space for non-vectors. We do not use this space unless we have 1394 // too many vectors to fit in registers, something that only occurs in 1395 // constructed examples:), but we have to walk the arglist to figure 1396 // that out...for the pathological case, compute VecArgOffset as the 1397 // start of the vector parameter area. Computing VecArgOffset is the 1398 // entire point of the following loop. 1399 // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying 1400 // to handle Elf here. 1401 unsigned VecArgOffset = ArgOffset; 1402 if (!isVarArg && !isPPC64) { 1403 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; 1404 ++ArgNo) { 1405 MVT ObjectVT = Op.getValue(ArgNo).getValueType(); 1406 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1407 ISD::ArgFlagsTy Flags = 1408 cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags(); 1409 1410 if (Flags.isByVal()) { 1411 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 1412 ObjSize = Flags.getByValSize(); 1413 unsigned ArgSize = 1414 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1415 VecArgOffset += ArgSize; 1416 continue; 1417 } 1418 1419 switch(ObjectVT.getSimpleVT()) { 1420 default: assert(0 && "Unhandled argument type!"); 1421 case MVT::i32: 1422 case MVT::f32: 1423 VecArgOffset += isPPC64 ? 8 : 4; 1424 break; 1425 case MVT::i64: // PPC64 1426 case MVT::f64: 1427 VecArgOffset += 8; 1428 break; 1429 case MVT::v4f32: 1430 case MVT::v4i32: 1431 case MVT::v8i16: 1432 case MVT::v16i8: 1433 // Nothing to do, we're only looking at Nonvector args here. 1434 break; 1435 } 1436 } 1437 } 1438 // We've found where the vector parameter area in memory is. Skip the 1439 // first 12 parameters; these don't use that memory. 1440 VecArgOffset = ((VecArgOffset+15)/16)*16; 1441 VecArgOffset += 12*16; 1442 1443 // Add DAG nodes to load the arguments or copy them out of registers. On 1444 // entry to a function on PPC, the arguments start after the linkage area, 1445 // although the first ones are often in registers. 1446 // 1447 // In the ELF 32 ABI, GPRs and stack are double word align: an argument 1448 // represented with two words (long long or double) must be copied to an 1449 // even GPR_idx value or to an even ArgOffset value. 1450 1451 SmallVector<SDOperand, 8> MemOps; 1452 unsigned nAltivecParamsAtEnd = 0; 1453 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { 1454 SDOperand ArgVal; 1455 bool needsLoad = false; 1456 MVT ObjectVT = Op.getValue(ArgNo).getValueType(); 1457 unsigned ObjSize = ObjectVT.getSizeInBits()/8; 1458 unsigned ArgSize = ObjSize; 1459 ISD::ArgFlagsTy Flags = 1460 cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags(); 1461 // See if next argument requires stack alignment in ELF 1462 bool Align = Flags.isSplit(); 1463 1464 unsigned CurArgOffset = ArgOffset; 1465 1466 // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. 1467 if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || 1468 ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8) { 1469 if (isVarArg || isPPC64) { 1470 MinReservedArea = ((MinReservedArea+15)/16)*16; 1471 MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), 1472 Op.getOperand(ArgNo+3), 1473 isVarArg, 1474 PtrByteSize); 1475 } else nAltivecParamsAtEnd++; 1476 } else 1477 // Calculate min reserved area. 1478 MinReservedArea += CalculateStackSlotSize(Op.getValue(ArgNo), 1479 Op.getOperand(ArgNo+3), 1480 isVarArg, 1481 PtrByteSize); 1482 1483 // FIXME alignment for ELF may not be right 1484 // FIXME the codegen can be much improved in some cases. 1485 // We do not have to keep everything in memory. 1486 if (Flags.isByVal()) { 1487 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 1488 ObjSize = Flags.getByValSize(); 1489 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1490 // Double word align in ELF 1491 if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); 1492 // Objects of size 1 and 2 are right justified, everything else is 1493 // left justified. This means the memory address is adjusted forwards. 1494 if (ObjSize==1 || ObjSize==2) { 1495 CurArgOffset = CurArgOffset + (4 - ObjSize); 1496 } 1497 // The value of the object is its address. 1498 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); 1499 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 1500 ArgValues.push_back(FIN); 1501 if (ObjSize==1 || ObjSize==2) { 1502 if (GPR_idx != Num_GPR_Regs) { 1503 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1504 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1505 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1506 SDOperand Store = DAG.getTruncStore(Val.getValue(1), Val, FIN, 1507 NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); 1508 MemOps.push_back(Store); 1509 ++GPR_idx; 1510 if (isMachoABI) ArgOffset += PtrByteSize; 1511 } else { 1512 ArgOffset += PtrByteSize; 1513 } 1514 continue; 1515 } 1516 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 1517 // Store whatever pieces of the object are in registers 1518 // to memory. ArgVal will be address of the beginning of 1519 // the object. 1520 if (GPR_idx != Num_GPR_Regs) { 1521 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1522 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1523 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); 1524 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 1525 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1526 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1527 MemOps.push_back(Store); 1528 ++GPR_idx; 1529 if (isMachoABI) ArgOffset += PtrByteSize; 1530 } else { 1531 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 1532 break; 1533 } 1534 } 1535 continue; 1536 } 1537 1538 switch (ObjectVT.getSimpleVT()) { 1539 default: assert(0 && "Unhandled argument type!"); 1540 case MVT::i32: 1541 if (!isPPC64) { 1542 // Double word align in ELF 1543 if (Align && isELF32_ABI) GPR_idx += (GPR_idx % 2); 1544 1545 if (GPR_idx != Num_GPR_Regs) { 1546 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1547 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1548 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); 1549 ++GPR_idx; 1550 } else { 1551 needsLoad = true; 1552 ArgSize = PtrByteSize; 1553 } 1554 // Stack align in ELF 1555 if (needsLoad && Align && isELF32_ABI) 1556 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1557 // All int arguments reserve stack space in Macho ABI. 1558 if (isMachoABI || needsLoad) ArgOffset += PtrByteSize; 1559 break; 1560 } 1561 // FALLTHROUGH 1562 case MVT::i64: // PPC64 1563 if (GPR_idx != Num_GPR_Regs) { 1564 unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 1565 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1566 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1567 1568 if (ObjectVT == MVT::i32) { 1569 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 1570 // value to MVT::i64 and then truncate to the correct register size. 1571 if (Flags.isSExt()) 1572 ArgVal = DAG.getNode(ISD::AssertSext, MVT::i64, ArgVal, 1573 DAG.getValueType(ObjectVT)); 1574 else if (Flags.isZExt()) 1575 ArgVal = DAG.getNode(ISD::AssertZext, MVT::i64, ArgVal, 1576 DAG.getValueType(ObjectVT)); 1577 1578 ArgVal = DAG.getNode(ISD::TRUNCATE, MVT::i32, ArgVal); 1579 } 1580 1581 ++GPR_idx; 1582 } else { 1583 needsLoad = true; 1584 } 1585 // All int arguments reserve stack space in Macho ABI. 1586 if (isMachoABI || needsLoad) ArgOffset += 8; 1587 break; 1588 1589 case MVT::f32: 1590 case MVT::f64: 1591 // Every 4 bytes of argument space consumes one of the GPRs available for 1592 // argument passing. 1593 if (GPR_idx != Num_GPR_Regs && isMachoABI) { 1594 ++GPR_idx; 1595 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 1596 ++GPR_idx; 1597 } 1598 if (FPR_idx != Num_FPR_Regs) { 1599 unsigned VReg; 1600 if (ObjectVT == MVT::f32) 1601 VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass); 1602 else 1603 VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 1604 RegInfo.addLiveIn(FPR[FPR_idx], VReg); 1605 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1606 ++FPR_idx; 1607 } else { 1608 needsLoad = true; 1609 } 1610 1611 // Stack align in ELF 1612 if (needsLoad && Align && isELF32_ABI) 1613 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1614 // All FP arguments reserve stack space in Macho ABI. 1615 if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize; 1616 break; 1617 case MVT::v4f32: 1618 case MVT::v4i32: 1619 case MVT::v8i16: 1620 case MVT::v16i8: 1621 // Note that vector arguments in registers don't reserve stack space, 1622 // except in varargs functions. 1623 if (VR_idx != Num_VR_Regs) { 1624 unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass); 1625 RegInfo.addLiveIn(VR[VR_idx], VReg); 1626 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1627 if (isVarArg) { 1628 while ((ArgOffset % 16) != 0) { 1629 ArgOffset += PtrByteSize; 1630 if (GPR_idx != Num_GPR_Regs) 1631 GPR_idx++; 1632 } 1633 ArgOffset += 16; 1634 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); 1635 } 1636 ++VR_idx; 1637 } else { 1638 if (!isVarArg && !isPPC64) { 1639 // Vectors go after all the nonvectors. 1640 CurArgOffset = VecArgOffset; 1641 VecArgOffset += 16; 1642 } else { 1643 // Vectors are aligned. 1644 ArgOffset = ((ArgOffset+15)/16)*16; 1645 CurArgOffset = ArgOffset; 1646 ArgOffset += 16; 1647 } 1648 needsLoad = true; 1649 } 1650 break; 1651 } 1652 1653 // We need to load the argument to a virtual register if we determined above 1654 // that we ran out of physical registers of the appropriate type. 1655 if (needsLoad) { 1656 int FI = MFI->CreateFixedObject(ObjSize, 1657 CurArgOffset + (ArgSize - ObjSize), 1658 isImmutable); 1659 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 1660 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); 1661 } 1662 1663 ArgValues.push_back(ArgVal); 1664 } 1665 1666 // Set the size that is at least reserved in caller of this function. Tail 1667 // call optimized function's reserved stack space needs to be aligned so that 1668 // taking the difference between two stack areas will result in an aligned 1669 // stack. 1670 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1671 // Add the Altivec parameters at the end, if needed. 1672 if (nAltivecParamsAtEnd) { 1673 MinReservedArea = ((MinReservedArea+15)/16)*16; 1674 MinReservedArea += 16*nAltivecParamsAtEnd; 1675 } 1676 MinReservedArea = 1677 std::max(MinReservedArea, 1678 PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); 1679 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> 1680 getStackAlignment(); 1681 unsigned AlignMask = TargetAlign-1; 1682 MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; 1683 FI->setMinReservedArea(MinReservedArea); 1684 1685 // If the function takes variable number of arguments, make a frame index for 1686 // the start of the first vararg value... for expansion of llvm.va_start. 1687 if (isVarArg) { 1688 1689 int depth; 1690 if (isELF32_ABI) { 1691 VarArgsNumGPR = GPR_idx; 1692 VarArgsNumFPR = FPR_idx; 1693 1694 // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame 1695 // pointer. 1696 depth = -(Num_GPR_Regs * PtrVT.getSizeInBits()/8 + 1697 Num_FPR_Regs * MVT(MVT::f64).getSizeInBits()/8 + 1698 PtrVT.getSizeInBits()/8); 1699 1700 VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 1701 ArgOffset); 1702 1703 } 1704 else 1705 depth = ArgOffset; 1706 1707 VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8, 1708 depth); 1709 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1710 1711 // In ELF 32 ABI, the fixed integer arguments of a variadic function are 1712 // stored to the VarArgsFrameIndex on the stack. 1713 if (isELF32_ABI) { 1714 for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) { 1715 SDOperand Val = DAG.getRegister(GPR[GPR_idx], PtrVT); 1716 SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0); 1717 MemOps.push_back(Store); 1718 // Increment the address by four for the next argument to store 1719 SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 1720 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1721 } 1722 } 1723 1724 // If this function is vararg, store any remaining integer argument regs 1725 // to their spots on the stack so that they may be loaded by deferencing the 1726 // result of va_next. 1727 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 1728 unsigned VReg; 1729 if (isPPC64) 1730 VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 1731 else 1732 VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1733 1734 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1735 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1736 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1737 MemOps.push_back(Store); 1738 // Increment the address by four for the next argument to store 1739 SDOperand PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, PtrVT); 1740 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1741 } 1742 1743 // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex 1744 // on the stack. 1745 if (isELF32_ABI) { 1746 for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) { 1747 SDOperand Val = DAG.getRegister(FPR[FPR_idx], MVT::f64); 1748 SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0); 1749 MemOps.push_back(Store); 1750 // Increment the address by eight for the next argument to store 1751 SDOperand PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, 1752 PtrVT); 1753 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1754 } 1755 1756 for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) { 1757 unsigned VReg; 1758 VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 1759 1760 RegInfo.addLiveIn(FPR[FPR_idx], VReg); 1761 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::f64); 1762 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1763 MemOps.push_back(Store); 1764 // Increment the address by eight for the next argument to store 1765 SDOperand PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, 1766 PtrVT); 1767 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1768 } 1769 } 1770 } 1771 1772 if (!MemOps.empty()) 1773 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); 1774 1775 ArgValues.push_back(Root); 1776 1777 // Return the new list of results. 1778 std::vector<MVT> RetVT(Op.Val->value_begin(), 1779 Op.Val->value_end()); 1780 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size()); 1781} 1782 1783/// CalculateParameterAndLinkageAreaSize - Get the size of the paramter plus 1784/// linkage area. 1785static unsigned 1786CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, 1787 bool isPPC64, 1788 bool isMachoABI, 1789 bool isVarArg, 1790 unsigned CC, 1791 SDOperand Call, 1792 unsigned &nAltivecParamsAtEnd) { 1793 // Count how many bytes are to be pushed on the stack, including the linkage 1794 // area, and parameter passing area. We start with 24/48 bytes, which is 1795 // prereserved space for [SP][CR][LR][3 x unused]. 1796 unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1797 unsigned NumOps = (Call.getNumOperands() - 5) / 2; 1798 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1799 1800 // Add up all the space actually used. 1801 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 1802 // they all go in registers, but we must reserve stack space for them for 1803 // possible use by the caller. In varargs or 64-bit calls, parameters are 1804 // assigned stack space in order, with padding so Altivec parameters are 1805 // 16-byte aligned. 1806 nAltivecParamsAtEnd = 0; 1807 for (unsigned i = 0; i != NumOps; ++i) { 1808 SDOperand Arg = Call.getOperand(5+2*i); 1809 SDOperand Flag = Call.getOperand(5+2*i+1); 1810 MVT ArgVT = Arg.getValueType(); 1811 // Varargs Altivec parameters are padded to a 16 byte boundary. 1812 if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || 1813 ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { 1814 if (!isVarArg && !isPPC64) { 1815 // Non-varargs Altivec parameters go after all the non-Altivec 1816 // parameters; handle those later so we know how much padding we need. 1817 nAltivecParamsAtEnd++; 1818 continue; 1819 } 1820 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 1821 NumBytes = ((NumBytes+15)/16)*16; 1822 } 1823 NumBytes += CalculateStackSlotSize(Arg, Flag, isVarArg, PtrByteSize); 1824 } 1825 1826 // Allow for Altivec parameters at the end, if needed. 1827 if (nAltivecParamsAtEnd) { 1828 NumBytes = ((NumBytes+15)/16)*16; 1829 NumBytes += 16*nAltivecParamsAtEnd; 1830 } 1831 1832 // The prolog code of the callee may store up to 8 GPR argument registers to 1833 // the stack, allowing va_start to index over them in memory if its varargs. 1834 // Because we cannot tell if this is needed on the caller side, we have to 1835 // conservatively assume that it is needed. As such, make sure we have at 1836 // least enough stack space for the caller to store the 8 GPRs. 1837 NumBytes = std::max(NumBytes, 1838 PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); 1839 1840 // Tail call needs the stack to be aligned. 1841 if (CC==CallingConv::Fast && PerformTailCallOpt) { 1842 unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameInfo()-> 1843 getStackAlignment(); 1844 unsigned AlignMask = TargetAlign-1; 1845 NumBytes = (NumBytes + AlignMask) & ~AlignMask; 1846 } 1847 1848 return NumBytes; 1849} 1850 1851/// CalculateTailCallSPDiff - Get the amount the stack pointer has to be 1852/// adjusted to accomodate the arguments for the tailcall. 1853static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool IsTailCall, 1854 unsigned ParamSize) { 1855 1856 if (!IsTailCall) return 0; 1857 1858 PPCFunctionInfo *FI = DAG.getMachineFunction().getInfo<PPCFunctionInfo>(); 1859 unsigned CallerMinReservedArea = FI->getMinReservedArea(); 1860 int SPDiff = (int)CallerMinReservedArea - (int)ParamSize; 1861 // Remember only if the new adjustement is bigger. 1862 if (SPDiff < FI->getTailCallSPDelta()) 1863 FI->setTailCallSPDelta(SPDiff); 1864 1865 return SPDiff; 1866} 1867 1868/// IsEligibleForTailCallElimination - Check to see whether the next instruction 1869/// following the call is a return. A function is eligible if caller/callee 1870/// calling conventions match, currently only fastcc supports tail calls, and 1871/// the function CALL is immediatly followed by a RET. 1872bool 1873PPCTargetLowering::IsEligibleForTailCallOptimization(SDOperand Call, 1874 SDOperand Ret, 1875 SelectionDAG& DAG) const { 1876 // Variable argument functions are not supported. 1877 if (!PerformTailCallOpt || 1878 cast<ConstantSDNode>(Call.getOperand(2))->getValue() != 0) return false; 1879 1880 if (CheckTailCallReturnConstraints(Call, Ret)) { 1881 MachineFunction &MF = DAG.getMachineFunction(); 1882 unsigned CallerCC = MF.getFunction()->getCallingConv(); 1883 unsigned CalleeCC = cast<ConstantSDNode>(Call.getOperand(1))->getValue(); 1884 if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) { 1885 // Functions containing by val parameters are not supported. 1886 for (unsigned i = 0; i != ((Call.getNumOperands()-5)/2); i++) { 1887 ISD::ArgFlagsTy Flags = cast<ARG_FLAGSSDNode>(Call.getOperand(5+2*i+1)) 1888 ->getArgFlags(); 1889 if (Flags.isByVal()) return false; 1890 } 1891 1892 SDOperand Callee = Call.getOperand(4); 1893 // Non PIC/GOT tail calls are supported. 1894 if (getTargetMachine().getRelocationModel() != Reloc::PIC_) 1895 return true; 1896 1897 // At the moment we can only do local tail calls (in same module, hidden 1898 // or protected) if we are generating PIC. 1899 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1900 return G->getGlobal()->hasHiddenVisibility() 1901 || G->getGlobal()->hasProtectedVisibility(); 1902 } 1903 } 1904 1905 return false; 1906} 1907 1908/// isCallCompatibleAddress - Return the immediate to use if the specified 1909/// 32-bit value is representable in the immediate field of a BxA instruction. 1910static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) { 1911 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1912 if (!C) return 0; 1913 1914 int Addr = C->getValue(); 1915 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1916 (Addr << 6 >> 6) != Addr) 1917 return 0; // Top 6 bits have to be sext of immediate. 1918 1919 return DAG.getConstant((int)C->getValue() >> 2, 1920 DAG.getTargetLoweringInfo().getPointerTy()).Val; 1921} 1922 1923namespace { 1924 1925struct TailCallArgumentInfo { 1926 SDOperand Arg; 1927 SDOperand FrameIdxOp; 1928 int FrameIdx; 1929 1930 TailCallArgumentInfo() : FrameIdx(0) {} 1931}; 1932 1933} 1934 1935/// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot. 1936static void 1937StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, 1938 SDOperand Chain, 1939 const SmallVector<TailCallArgumentInfo, 8> &TailCallArgs, 1940 SmallVector<SDOperand, 8> &MemOpChains) { 1941 for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) { 1942 SDOperand Arg = TailCallArgs[i].Arg; 1943 SDOperand FIN = TailCallArgs[i].FrameIdxOp; 1944 int FI = TailCallArgs[i].FrameIdx; 1945 // Store relative to framepointer. 1946 MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, 1947 PseudoSourceValue::getFixedStack(), 1948 FI)); 1949 } 1950} 1951 1952/// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to 1953/// the appropriate stack slot for the tail call optimized function call. 1954static SDOperand EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, 1955 MachineFunction &MF, 1956 SDOperand Chain, 1957 SDOperand OldRetAddr, 1958 SDOperand OldFP, 1959 int SPDiff, 1960 bool isPPC64, 1961 bool isMachoABI) { 1962 if (SPDiff) { 1963 // Calculate the new stack slot for the return address. 1964 int SlotSize = isPPC64 ? 8 : 4; 1965 int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64, 1966 isMachoABI); 1967 int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize, 1968 NewRetAddrLoc); 1969 int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, 1970 isMachoABI); 1971 int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc); 1972 1973 MVT VT = isPPC64 ? MVT::i64 : MVT::i32; 1974 SDOperand NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT); 1975 Chain = DAG.getStore(Chain, OldRetAddr, NewRetAddrFrIdx, 1976 PseudoSourceValue::getFixedStack(), NewRetAddr); 1977 SDOperand NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT); 1978 Chain = DAG.getStore(Chain, OldFP, NewFramePtrIdx, 1979 PseudoSourceValue::getFixedStack(), NewFPIdx); 1980 } 1981 return Chain; 1982} 1983 1984/// CalculateTailCallArgDest - Remember Argument for later processing. Calculate 1985/// the position of the argument. 1986static void 1987CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, 1988 SDOperand Arg, int SPDiff, unsigned ArgOffset, 1989 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { 1990 int Offset = ArgOffset + SPDiff; 1991 uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8; 1992 int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); 1993 MVT VT = isPPC64 ? MVT::i64 : MVT::i32; 1994 SDOperand FIN = DAG.getFrameIndex(FI, VT); 1995 TailCallArgumentInfo Info; 1996 Info.Arg = Arg; 1997 Info.FrameIdxOp = FIN; 1998 Info.FrameIdx = FI; 1999 TailCallArguments.push_back(Info); 2000} 2001 2002/// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address 2003/// stack slot. Returns the chain as result and the loaded frame pointers in 2004/// LROpOut/FPOpout. Used when tail calling. 2005SDOperand PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, 2006 int SPDiff, 2007 SDOperand Chain, 2008 SDOperand &LROpOut, 2009 SDOperand &FPOpOut) { 2010 if (SPDiff) { 2011 // Load the LR and FP stack slot for later adjusting. 2012 MVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; 2013 LROpOut = getReturnAddrFrameIndex(DAG); 2014 LROpOut = DAG.getLoad(VT, Chain, LROpOut, NULL, 0); 2015 Chain = SDOperand(LROpOut.Val, 1); 2016 FPOpOut = getFramePointerFrameIndex(DAG); 2017 FPOpOut = DAG.getLoad(VT, Chain, FPOpOut, NULL, 0); 2018 Chain = SDOperand(FPOpOut.Val, 1); 2019 } 2020 return Chain; 2021} 2022 2023/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 2024/// by "Src" to address "Dst" of size "Size". Alignment information is 2025/// specified by the specific parameter attribute. The copy will be passed as 2026/// a byval function parameter. 2027/// Sometimes what we are copying is the end of a larger object, the part that 2028/// does not fit in registers. 2029static SDOperand 2030CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, 2031 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 2032 unsigned Size) { 2033 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 2034 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, Flags.getByValAlign(), false, 2035 NULL, 0, NULL, 0); 2036} 2037 2038/// LowerMemOpCallTo - Store the argument to the stack or remember it in case of 2039/// tail calls. 2040static void 2041LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDOperand Chain, 2042 SDOperand Arg, SDOperand PtrOff, int SPDiff, 2043 unsigned ArgOffset, bool isPPC64, bool isTailCall, 2044 bool isVector, SmallVector<SDOperand, 8> &MemOpChains, 2045 SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) { 2046 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2047 if (!isTailCall) { 2048 if (isVector) { 2049 SDOperand StackPtr; 2050 if (isPPC64) 2051 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 2052 else 2053 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 2054 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, 2055 DAG.getConstant(ArgOffset, PtrVT)); 2056 } 2057 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 2058 // Calculate and remember argument location. 2059 } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset, 2060 TailCallArguments); 2061} 2062 2063SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, 2064 const PPCSubtarget &Subtarget, 2065 TargetMachine &TM) { 2066 SDOperand Chain = Op.getOperand(0); 2067 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 2068 unsigned CC = cast<ConstantSDNode>(Op.getOperand(1))->getValue(); 2069 bool isTailCall = cast<ConstantSDNode>(Op.getOperand(3))->getValue() != 0 && 2070 CC == CallingConv::Fast && PerformTailCallOpt; 2071 SDOperand Callee = Op.getOperand(4); 2072 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 2073 2074 bool isMachoABI = Subtarget.isMachoABI(); 2075 bool isELF32_ABI = Subtarget.isELF32_ABI(); 2076 2077 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2078 bool isPPC64 = PtrVT == MVT::i64; 2079 unsigned PtrByteSize = isPPC64 ? 8 : 4; 2080 2081 MachineFunction &MF = DAG.getMachineFunction(); 2082 2083 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 2084 // SelectExpr to use to put the arguments in the appropriate registers. 2085 std::vector<SDOperand> args_to_use; 2086 2087 // Mark this function as potentially containing a function that contains a 2088 // tail call. As a consequence the frame pointer will be used for dynamicalloc 2089 // and restoring the callers stack pointer in this functions epilog. This is 2090 // done because by tail calling the called function might overwrite the value 2091 // in this function's (MF) stack pointer stack slot 0(SP). 2092 if (PerformTailCallOpt && CC==CallingConv::Fast) 2093 MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); 2094 2095 unsigned nAltivecParamsAtEnd = 0; 2096 2097 // Count how many bytes are to be pushed on the stack, including the linkage 2098 // area, and parameter passing area. We start with 24/48 bytes, which is 2099 // prereserved space for [SP][CR][LR][3 x unused]. 2100 unsigned NumBytes = 2101 CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isMachoABI, isVarArg, CC, 2102 Op, nAltivecParamsAtEnd); 2103 2104 // Calculate by how many bytes the stack has to be adjusted in case of tail 2105 // call optimization. 2106 int SPDiff = CalculateTailCallSPDiff(DAG, isTailCall, NumBytes); 2107 2108 // Adjust the stack pointer for the new arguments... 2109 // These operations are automatically eliminated by the prolog/epilog pass 2110 Chain = DAG.getCALLSEQ_START(Chain, 2111 DAG.getConstant(NumBytes, PtrVT)); 2112 SDOperand CallSeqStart = Chain; 2113 2114 // Load the return address and frame pointer so it can be move somewhere else 2115 // later. 2116 SDOperand LROp, FPOp; 2117 Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp); 2118 2119 // Set up a copy of the stack pointer for use loading and storing any 2120 // arguments that may not fit in the registers available for argument 2121 // passing. 2122 SDOperand StackPtr; 2123 if (isPPC64) 2124 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 2125 else 2126 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 2127 2128 // Figure out which arguments are going to go in registers, and which in 2129 // memory. Also, if this is a vararg function, floating point operations 2130 // must be stored to our stack, and loaded into integer regs as well, if 2131 // any integer regs are available for argument passing. 2132 unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 2133 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 2134 2135 static const unsigned GPR_32[] = { // 32-bit registers. 2136 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 2137 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 2138 }; 2139 static const unsigned GPR_64[] = { // 64-bit registers. 2140 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 2141 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 2142 }; 2143 static const unsigned *FPR = GetFPR(Subtarget); 2144 2145 static const unsigned VR[] = { 2146 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 2147 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 2148 }; 2149 const unsigned NumGPRs = array_lengthof(GPR_32); 2150 const unsigned NumFPRs = isMachoABI ? 13 : 8; 2151 const unsigned NumVRs = array_lengthof( VR); 2152 2153 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 2154 2155 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 2156 SmallVector<TailCallArgumentInfo, 8> TailCallArguments; 2157 2158 SmallVector<SDOperand, 8> MemOpChains; 2159 for (unsigned i = 0; i != NumOps; ++i) { 2160 bool inMem = false; 2161 SDOperand Arg = Op.getOperand(5+2*i); 2162 ISD::ArgFlagsTy Flags = 2163 cast<ARG_FLAGSSDNode>(Op.getOperand(5+2*i+1))->getArgFlags(); 2164 // See if next argument requires stack alignment in ELF 2165 bool Align = Flags.isSplit(); 2166 2167 // PtrOff will be used to store the current argument to the stack if a 2168 // register cannot be found for it. 2169 SDOperand PtrOff; 2170 2171 // Stack align in ELF 32 2172 if (isELF32_ABI && Align) 2173 PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize, 2174 StackPtr.getValueType()); 2175 else 2176 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 2177 2178 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); 2179 2180 // On PPC64, promote integers to 64-bit values. 2181 if (isPPC64 && Arg.getValueType() == MVT::i32) { 2182 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 2183 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 2184 Arg = DAG.getNode(ExtOp, MVT::i64, Arg); 2185 } 2186 2187 // FIXME Elf untested, what are alignment rules? 2188 // FIXME memcpy is used way more than necessary. Correctness first. 2189 if (Flags.isByVal()) { 2190 unsigned Size = Flags.getByValSize(); 2191 if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); 2192 if (Size==1 || Size==2) { 2193 // Very small objects are passed right-justified. 2194 // Everything else is passed left-justified. 2195 MVT VT = (Size==1) ? MVT::i8 : MVT::i16; 2196 if (GPR_idx != NumGPRs) { 2197 SDOperand Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, Chain, Arg, 2198 NULL, 0, VT); 2199 MemOpChains.push_back(Load.getValue(1)); 2200 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 2201 if (isMachoABI) 2202 ArgOffset += PtrByteSize; 2203 } else { 2204 SDOperand Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); 2205 SDOperand AddPtr = DAG.getNode(ISD::ADD, PtrVT, PtrOff, Const); 2206 SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, 2207 CallSeqStart.Val->getOperand(0), 2208 Flags, DAG, Size); 2209 // This must go outside the CALLSEQ_START..END. 2210 SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 2211 CallSeqStart.Val->getOperand(1)); 2212 DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val); 2213 Chain = CallSeqStart = NewCallSeqStart; 2214 ArgOffset += PtrByteSize; 2215 } 2216 continue; 2217 } 2218 // Copy entire object into memory. There are cases where gcc-generated 2219 // code assumes it is there, even if it could be put entirely into 2220 // registers. (This is not what the doc says.) 2221 SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 2222 CallSeqStart.Val->getOperand(0), 2223 Flags, DAG, Size); 2224 // This must go outside the CALLSEQ_START..END. 2225 SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 2226 CallSeqStart.Val->getOperand(1)); 2227 DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val); 2228 Chain = CallSeqStart = NewCallSeqStart; 2229 // And copy the pieces of it that fit into registers. 2230 for (unsigned j=0; j<Size; j+=PtrByteSize) { 2231 SDOperand Const = DAG.getConstant(j, PtrOff.getValueType()); 2232 SDOperand AddArg = DAG.getNode(ISD::ADD, PtrVT, Arg, Const); 2233 if (GPR_idx != NumGPRs) { 2234 SDOperand Load = DAG.getLoad(PtrVT, Chain, AddArg, NULL, 0); 2235 MemOpChains.push_back(Load.getValue(1)); 2236 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 2237 if (isMachoABI) 2238 ArgOffset += PtrByteSize; 2239 } else { 2240 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 2241 break; 2242 } 2243 } 2244 continue; 2245 } 2246 2247 switch (Arg.getValueType().getSimpleVT()) { 2248 default: assert(0 && "Unexpected ValueType for argument!"); 2249 case MVT::i32: 2250 case MVT::i64: 2251 // Double word align in ELF 2252 if (isELF32_ABI && Align) GPR_idx += (GPR_idx % 2); 2253 if (GPR_idx != NumGPRs) { 2254 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 2255 } else { 2256 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2257 isPPC64, isTailCall, false, MemOpChains, 2258 TailCallArguments); 2259 inMem = true; 2260 } 2261 if (inMem || isMachoABI) { 2262 // Stack align in ELF 2263 if (isELF32_ABI && Align) 2264 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 2265 2266 ArgOffset += PtrByteSize; 2267 } 2268 break; 2269 case MVT::f32: 2270 case MVT::f64: 2271 if (FPR_idx != NumFPRs) { 2272 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 2273 2274 if (isVarArg) { 2275 SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 2276 MemOpChains.push_back(Store); 2277 2278 // Float varargs are always shadowed in available integer registers 2279 if (GPR_idx != NumGPRs) { 2280 SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0); 2281 MemOpChains.push_back(Load.getValue(1)); 2282 if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], 2283 Load)); 2284 } 2285 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 2286 SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 2287 PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour); 2288 SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0); 2289 MemOpChains.push_back(Load.getValue(1)); 2290 if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], 2291 Load)); 2292 } 2293 } else { 2294 // If we have any FPRs remaining, we may also have GPRs remaining. 2295 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 2296 // GPRs. 2297 if (isMachoABI) { 2298 if (GPR_idx != NumGPRs) 2299 ++GPR_idx; 2300 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 2301 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 2302 ++GPR_idx; 2303 } 2304 } 2305 } else { 2306 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2307 isPPC64, isTailCall, false, MemOpChains, 2308 TailCallArguments); 2309 inMem = true; 2310 } 2311 if (inMem || isMachoABI) { 2312 // Stack align in ELF 2313 if (isELF32_ABI && Align) 2314 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 2315 if (isPPC64) 2316 ArgOffset += 8; 2317 else 2318 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 2319 } 2320 break; 2321 case MVT::v4f32: 2322 case MVT::v4i32: 2323 case MVT::v8i16: 2324 case MVT::v16i8: 2325 if (isVarArg) { 2326 // These go aligned on the stack, or in the corresponding R registers 2327 // when within range. The Darwin PPC ABI doc claims they also go in 2328 // V registers; in fact gcc does this only for arguments that are 2329 // prototyped, not for those that match the ... We do it for all 2330 // arguments, seems to work. 2331 while (ArgOffset % 16 !=0) { 2332 ArgOffset += PtrByteSize; 2333 if (GPR_idx != NumGPRs) 2334 GPR_idx++; 2335 } 2336 // We could elide this store in the case where the object fits 2337 // entirely in R registers. Maybe later. 2338 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, 2339 DAG.getConstant(ArgOffset, PtrVT)); 2340 SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 2341 MemOpChains.push_back(Store); 2342 if (VR_idx != NumVRs) { 2343 SDOperand Load = DAG.getLoad(MVT::v4f32, Store, PtrOff, NULL, 0); 2344 MemOpChains.push_back(Load.getValue(1)); 2345 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 2346 } 2347 ArgOffset += 16; 2348 for (unsigned i=0; i<16; i+=PtrByteSize) { 2349 if (GPR_idx == NumGPRs) 2350 break; 2351 SDOperand Ix = DAG.getNode(ISD::ADD, PtrVT, PtrOff, 2352 DAG.getConstant(i, PtrVT)); 2353 SDOperand Load = DAG.getLoad(PtrVT, Store, Ix, NULL, 0); 2354 MemOpChains.push_back(Load.getValue(1)); 2355 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 2356 } 2357 break; 2358 } 2359 2360 // Non-varargs Altivec params generally go in registers, but have 2361 // stack space allocated at the end. 2362 if (VR_idx != NumVRs) { 2363 // Doesn't have GPR space allocated. 2364 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 2365 } else if (nAltivecParamsAtEnd==0) { 2366 // We are emitting Altivec params in order. 2367 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2368 isPPC64, isTailCall, true, MemOpChains, 2369 TailCallArguments); 2370 ArgOffset += 16; 2371 } 2372 break; 2373 } 2374 } 2375 // If all Altivec parameters fit in registers, as they usually do, 2376 // they get stack space following the non-Altivec parameters. We 2377 // don't track this here because nobody below needs it. 2378 // If there are more Altivec parameters than fit in registers emit 2379 // the stores here. 2380 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 2381 unsigned j = 0; 2382 // Offset is aligned; skip 1st 12 params which go in V registers. 2383 ArgOffset = ((ArgOffset+15)/16)*16; 2384 ArgOffset += 12*16; 2385 for (unsigned i = 0; i != NumOps; ++i) { 2386 SDOperand Arg = Op.getOperand(5+2*i); 2387 MVT ArgType = Arg.getValueType(); 2388 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 2389 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 2390 if (++j > NumVRs) { 2391 SDOperand PtrOff; 2392 // We are emitting Altivec params in order. 2393 LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, 2394 isPPC64, isTailCall, true, MemOpChains, 2395 TailCallArguments); 2396 ArgOffset += 16; 2397 } 2398 } 2399 } 2400 } 2401 2402 if (!MemOpChains.empty()) 2403 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 2404 &MemOpChains[0], MemOpChains.size()); 2405 2406 // Build a sequence of copy-to-reg nodes chained together with token chain 2407 // and flag operands which copy the outgoing args into the appropriate regs. 2408 SDOperand InFlag; 2409 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 2410 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 2411 InFlag); 2412 InFlag = Chain.getValue(1); 2413 } 2414 2415 // With the ELF 32 ABI, set CR6 to true if this is a vararg call. 2416 if (isVarArg && isELF32_ABI) { 2417 SDOperand SetCR(DAG.getTargetNode(PPC::CRSET, MVT::i32), 0); 2418 Chain = DAG.getCopyToReg(Chain, PPC::CR1EQ, SetCR, InFlag); 2419 InFlag = Chain.getValue(1); 2420 } 2421 2422 // Emit a sequence of copyto/copyfrom virtual registers for arguments that 2423 // might overwrite each other in case of tail call optimization. 2424 if (isTailCall) { 2425 SmallVector<SDOperand, 8> MemOpChains2; 2426 // Do not flag preceeding copytoreg stuff together with the following stuff. 2427 InFlag = SDOperand(); 2428 StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments, 2429 MemOpChains2); 2430 if (!MemOpChains2.empty()) 2431 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 2432 &MemOpChains2[0], MemOpChains2.size()); 2433 2434 // Store the return address to the appropriate stack slot. 2435 Chain = EmitTailCallStoreFPAndRetAddr(DAG, MF, Chain, LROp, FPOp, SPDiff, 2436 isPPC64, isMachoABI); 2437 } 2438 2439 // Emit callseq_end just before tailcall node. 2440 if (isTailCall) { 2441 SmallVector<SDOperand, 8> CallSeqOps; 2442 SDVTList CallSeqNodeTys = DAG.getVTList(MVT::Other, MVT::Flag); 2443 CallSeqOps.push_back(Chain); 2444 CallSeqOps.push_back(DAG.getIntPtrConstant(NumBytes)); 2445 CallSeqOps.push_back(DAG.getIntPtrConstant(0)); 2446 if (InFlag.Val) 2447 CallSeqOps.push_back(InFlag); 2448 Chain = DAG.getNode(ISD::CALLSEQ_END, CallSeqNodeTys, &CallSeqOps[0], 2449 CallSeqOps.size()); 2450 InFlag = Chain.getValue(1); 2451 } 2452 2453 std::vector<MVT> NodeTys; 2454 NodeTys.push_back(MVT::Other); // Returns a chain 2455 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 2456 2457 SmallVector<SDOperand, 8> Ops; 2458 unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF; 2459 2460 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 2461 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 2462 // node so that legalize doesn't hack it. 2463 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2464 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); 2465 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 2466 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); 2467 else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) 2468 // If this is an absolute destination address, use the munged value. 2469 Callee = SDOperand(Dest, 0); 2470 else { 2471 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 2472 // to do the call, we can't use PPCISD::CALL. 2473 SDOperand MTCTROps[] = {Chain, Callee, InFlag}; 2474 Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, MTCTROps, 2+(InFlag.Val!=0)); 2475 InFlag = Chain.getValue(1); 2476 2477 // Copy the callee address into R12/X12 on darwin. 2478 if (isMachoABI) { 2479 unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12; 2480 Chain = DAG.getCopyToReg(Chain, Reg, Callee, InFlag); 2481 InFlag = Chain.getValue(1); 2482 } 2483 2484 NodeTys.clear(); 2485 NodeTys.push_back(MVT::Other); 2486 NodeTys.push_back(MVT::Flag); 2487 Ops.push_back(Chain); 2488 CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF; 2489 Callee.Val = 0; 2490 // Add CTR register as callee so a bctr can be emitted later. 2491 if (isTailCall) 2492 Ops.push_back(DAG.getRegister(PPC::CTR, getPointerTy())); 2493 } 2494 2495 // If this is a direct call, pass the chain and the callee. 2496 if (Callee.Val) { 2497 Ops.push_back(Chain); 2498 Ops.push_back(Callee); 2499 } 2500 // If this is a tail call add stack pointer delta. 2501 if (isTailCall) 2502 Ops.push_back(DAG.getConstant(SPDiff, MVT::i32)); 2503 2504 // Add argument registers to the end of the list so that they are known live 2505 // into the call. 2506 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 2507 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 2508 RegsToPass[i].second.getValueType())); 2509 2510 // When performing tail call optimization the callee pops its arguments off 2511 // the stack. Account for this here so these bytes can be pushed back on in 2512 // PPCRegisterInfo::eliminateCallFramePseudoInstr. 2513 int BytesCalleePops = 2514 (CC==CallingConv::Fast && PerformTailCallOpt) ? NumBytes : 0; 2515 2516 if (InFlag.Val) 2517 Ops.push_back(InFlag); 2518 2519 // Emit tail call. 2520 if (isTailCall) { 2521 assert(InFlag.Val && 2522 "Flag must be set. Depend on flag being set in LowerRET"); 2523 Chain = DAG.getNode(PPCISD::TAILCALL, 2524 Op.Val->getVTList(), &Ops[0], Ops.size()); 2525 return SDOperand(Chain.Val, Op.ResNo); 2526 } 2527 2528 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); 2529 InFlag = Chain.getValue(1); 2530 2531 Chain = DAG.getCALLSEQ_END(Chain, 2532 DAG.getConstant(NumBytes, PtrVT), 2533 DAG.getConstant(BytesCalleePops, PtrVT), 2534 InFlag); 2535 if (Op.Val->getValueType(0) != MVT::Other) 2536 InFlag = Chain.getValue(1); 2537 2538 SmallVector<SDOperand, 16> ResultVals; 2539 SmallVector<CCValAssign, 16> RVLocs; 2540 unsigned CallerCC = DAG.getMachineFunction().getFunction()->getCallingConv(); 2541 CCState CCInfo(CallerCC, isVarArg, TM, RVLocs); 2542 CCInfo.AnalyzeCallResult(Op.Val, RetCC_PPC); 2543 2544 // Copy all of the result registers out of their specified physreg. 2545 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 2546 CCValAssign &VA = RVLocs[i]; 2547 MVT VT = VA.getValVT(); 2548 assert(VA.isRegLoc() && "Can only return in registers!"); 2549 Chain = DAG.getCopyFromReg(Chain, VA.getLocReg(), VT, InFlag).getValue(1); 2550 ResultVals.push_back(Chain.getValue(0)); 2551 InFlag = Chain.getValue(2); 2552 } 2553 2554 // If the function returns void, just return the chain. 2555 if (RVLocs.empty()) 2556 return Chain; 2557 2558 // Otherwise, merge everything together with a MERGE_VALUES node. 2559 ResultVals.push_back(Chain); 2560 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 2561 &ResultVals[0], ResultVals.size()); 2562 return Res.getValue(Op.ResNo); 2563} 2564 2565SDOperand PPCTargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG, 2566 TargetMachine &TM) { 2567 SmallVector<CCValAssign, 16> RVLocs; 2568 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 2569 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 2570 CCState CCInfo(CC, isVarArg, TM, RVLocs); 2571 CCInfo.AnalyzeReturn(Op.Val, RetCC_PPC); 2572 2573 // If this is the first return lowered for this function, add the regs to the 2574 // liveout set for the function. 2575 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 2576 for (unsigned i = 0; i != RVLocs.size(); ++i) 2577 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 2578 } 2579 2580 SDOperand Chain = Op.getOperand(0); 2581 2582 Chain = GetPossiblePreceedingTailCall(Chain, PPCISD::TAILCALL); 2583 if (Chain.getOpcode() == PPCISD::TAILCALL) { 2584 SDOperand TailCall = Chain; 2585 SDOperand TargetAddress = TailCall.getOperand(1); 2586 SDOperand StackAdjustment = TailCall.getOperand(2); 2587 2588 assert(((TargetAddress.getOpcode() == ISD::Register && 2589 cast<RegisterSDNode>(TargetAddress)->getReg() == PPC::CTR) || 2590 TargetAddress.getOpcode() == ISD::TargetExternalSymbol || 2591 TargetAddress.getOpcode() == ISD::TargetGlobalAddress || 2592 isa<ConstantSDNode>(TargetAddress)) && 2593 "Expecting an global address, external symbol, absolute value or register"); 2594 2595 assert(StackAdjustment.getOpcode() == ISD::Constant && 2596 "Expecting a const value"); 2597 2598 SmallVector<SDOperand,8> Operands; 2599 Operands.push_back(Chain.getOperand(0)); 2600 Operands.push_back(TargetAddress); 2601 Operands.push_back(StackAdjustment); 2602 // Copy registers used by the call. Last operand is a flag so it is not 2603 // copied. 2604 for (unsigned i=3; i < TailCall.getNumOperands()-1; i++) { 2605 Operands.push_back(Chain.getOperand(i)); 2606 } 2607 return DAG.getNode(PPCISD::TC_RETURN, MVT::Other, &Operands[0], 2608 Operands.size()); 2609 } 2610 2611 SDOperand Flag; 2612 2613 // Copy the result values into the output registers. 2614 for (unsigned i = 0; i != RVLocs.size(); ++i) { 2615 CCValAssign &VA = RVLocs[i]; 2616 assert(VA.isRegLoc() && "Can only return in registers!"); 2617 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); 2618 Flag = Chain.getValue(1); 2619 } 2620 2621 if (Flag.Val) 2622 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain, Flag); 2623 else 2624 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain); 2625} 2626 2627SDOperand PPCTargetLowering::LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG, 2628 const PPCSubtarget &Subtarget) { 2629 // When we pop the dynamic allocation we need to restore the SP link. 2630 2631 // Get the corect type for pointers. 2632 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2633 2634 // Construct the stack pointer operand. 2635 bool IsPPC64 = Subtarget.isPPC64(); 2636 unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1; 2637 SDOperand StackPtr = DAG.getRegister(SP, PtrVT); 2638 2639 // Get the operands for the STACKRESTORE. 2640 SDOperand Chain = Op.getOperand(0); 2641 SDOperand SaveSP = Op.getOperand(1); 2642 2643 // Load the old link SP. 2644 SDOperand LoadLinkSP = DAG.getLoad(PtrVT, Chain, StackPtr, NULL, 0); 2645 2646 // Restore the stack pointer. 2647 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), SP, SaveSP); 2648 2649 // Store the old link SP. 2650 return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0); 2651} 2652 2653 2654 2655SDOperand 2656PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { 2657 MachineFunction &MF = DAG.getMachineFunction(); 2658 bool IsPPC64 = PPCSubTarget.isPPC64(); 2659 bool isMachoABI = PPCSubTarget.isMachoABI(); 2660 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2661 2662 // Get current frame pointer save index. The users of this index will be 2663 // primarily DYNALLOC instructions. 2664 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2665 int RASI = FI->getReturnAddrSaveIndex(); 2666 2667 // If the frame pointer save index hasn't been defined yet. 2668 if (!RASI) { 2669 // Find out what the fix offset of the frame pointer save area. 2670 int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isMachoABI); 2671 // Allocate the frame index for frame pointer save area. 2672 RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset); 2673 // Save the result. 2674 FI->setReturnAddrSaveIndex(RASI); 2675 } 2676 return DAG.getFrameIndex(RASI, PtrVT); 2677} 2678 2679SDOperand 2680PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { 2681 MachineFunction &MF = DAG.getMachineFunction(); 2682 bool IsPPC64 = PPCSubTarget.isPPC64(); 2683 bool isMachoABI = PPCSubTarget.isMachoABI(); 2684 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2685 2686 // Get current frame pointer save index. The users of this index will be 2687 // primarily DYNALLOC instructions. 2688 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2689 int FPSI = FI->getFramePointerSaveIndex(); 2690 2691 // If the frame pointer save index hasn't been defined yet. 2692 if (!FPSI) { 2693 // Find out what the fix offset of the frame pointer save area. 2694 int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI); 2695 2696 // Allocate the frame index for frame pointer save area. 2697 FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); 2698 // Save the result. 2699 FI->setFramePointerSaveIndex(FPSI); 2700 } 2701 return DAG.getFrameIndex(FPSI, PtrVT); 2702} 2703 2704SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 2705 SelectionDAG &DAG, 2706 const PPCSubtarget &Subtarget) { 2707 // Get the inputs. 2708 SDOperand Chain = Op.getOperand(0); 2709 SDOperand Size = Op.getOperand(1); 2710 2711 // Get the corect type for pointers. 2712 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2713 // Negate the size. 2714 SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT, 2715 DAG.getConstant(0, PtrVT), Size); 2716 // Construct a node for the frame pointer save index. 2717 SDOperand FPSIdx = getFramePointerFrameIndex(DAG); 2718 // Build a DYNALLOC node. 2719 SDOperand Ops[3] = { Chain, NegSize, FPSIdx }; 2720 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 2721 return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3); 2722} 2723 2724SDOperand PPCTargetLowering::LowerAtomicLOAD_ADD(SDOperand Op, SelectionDAG &DAG) { 2725 MVT VT = Op.Val->getValueType(0); 2726 SDOperand Chain = Op.getOperand(0); 2727 SDOperand Ptr = Op.getOperand(1); 2728 SDOperand Incr = Op.getOperand(2); 2729 2730 // Issue a "load and reserve". 2731 std::vector<MVT> VTs; 2732 VTs.push_back(VT); 2733 VTs.push_back(MVT::Other); 2734 2735 SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32); 2736 SDOperand Ops[] = { 2737 Chain, // Chain 2738 Ptr, // Ptr 2739 Label, // Label 2740 }; 2741 SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3); 2742 Chain = Load.getValue(1); 2743 2744 // Compute new value. 2745 SDOperand NewVal = DAG.getNode(ISD::ADD, VT, Load, Incr); 2746 2747 // Issue a "store and check". 2748 SDOperand Ops2[] = { 2749 Chain, // Chain 2750 NewVal, // Value 2751 Ptr, // Ptr 2752 Label, // Label 2753 }; 2754 SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops2, 4); 2755 SDOperand OutOps[] = { Load, Store }; 2756 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other), 2757 OutOps, 2); 2758} 2759 2760SDOperand PPCTargetLowering::LowerAtomicCMP_SWAP(SDOperand Op, SelectionDAG &DAG) { 2761 MVT VT = Op.Val->getValueType(0); 2762 SDOperand Chain = Op.getOperand(0); 2763 SDOperand Ptr = Op.getOperand(1); 2764 SDOperand NewVal = Op.getOperand(2); 2765 SDOperand OldVal = Op.getOperand(3); 2766 2767 // Issue a "load and reserve". 2768 std::vector<MVT> VTs; 2769 VTs.push_back(VT); 2770 VTs.push_back(MVT::Other); 2771 2772 SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32); 2773 SDOperand Ops[] = { 2774 Chain, // Chain 2775 Ptr, // Ptr 2776 Label, // Label 2777 }; 2778 SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3); 2779 Chain = Load.getValue(1); 2780 2781 // Compare and unreserve if not equal. 2782 SDOperand Ops2[] = { 2783 Chain, // Chain 2784 OldVal, // Old value 2785 Load, // Value in memory 2786 Label, // Label 2787 }; 2788 Chain = DAG.getNode(PPCISD::CMP_UNRESERVE, MVT::Other, Ops2, 4); 2789 2790 // Issue a "store and check". 2791 SDOperand Ops3[] = { 2792 Chain, // Chain 2793 NewVal, // Value 2794 Ptr, // Ptr 2795 Label, // Label 2796 }; 2797 SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops3, 4); 2798 SDOperand OutOps[] = { Load, Store }; 2799 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other), 2800 OutOps, 2); 2801} 2802 2803SDOperand PPCTargetLowering::LowerAtomicSWAP(SDOperand Op, SelectionDAG &DAG) { 2804 MVT VT = Op.Val->getValueType(0); 2805 SDOperand Chain = Op.getOperand(0); 2806 SDOperand Ptr = Op.getOperand(1); 2807 SDOperand NewVal = Op.getOperand(2); 2808 2809 // Issue a "load and reserve". 2810 std::vector<MVT> VTs; 2811 VTs.push_back(VT); 2812 VTs.push_back(MVT::Other); 2813 2814 SDOperand Label = DAG.getConstant(PPCAtomicLabelIndex++, MVT::i32); 2815 SDOperand Ops[] = { 2816 Chain, // Chain 2817 Ptr, // Ptr 2818 Label, // Label 2819 }; 2820 SDOperand Load = DAG.getNode(PPCISD::LARX, VTs, Ops, 3); 2821 Chain = Load.getValue(1); 2822 2823 // Issue a "store and check". 2824 SDOperand Ops2[] = { 2825 Chain, // Chain 2826 NewVal, // Value 2827 Ptr, // Ptr 2828 Label, // Label 2829 }; 2830 SDOperand Store = DAG.getNode(PPCISD::STCX, MVT::Other, Ops2, 4); 2831 SDOperand OutOps[] = { Load, Store }; 2832 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, MVT::Other), 2833 OutOps, 2); 2834} 2835 2836/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 2837/// possible. 2838SDOperand PPCTargetLowering::LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { 2839 // Not FP? Not a fsel. 2840 if (!Op.getOperand(0).getValueType().isFloatingPoint() || 2841 !Op.getOperand(2).getValueType().isFloatingPoint()) 2842 return SDOperand(); 2843 2844 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2845 2846 // Cannot handle SETEQ/SETNE. 2847 if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand(); 2848 2849 MVT ResVT = Op.getValueType(); 2850 MVT CmpVT = Op.getOperand(0).getValueType(); 2851 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 2852 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3); 2853 2854 // If the RHS of the comparison is a 0.0, we don't need to do the 2855 // subtraction at all. 2856 if (isFloatingPointZero(RHS)) 2857 switch (CC) { 2858 default: break; // SETUO etc aren't handled by fsel. 2859 case ISD::SETULT: 2860 case ISD::SETOLT: 2861 case ISD::SETLT: 2862 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 2863 case ISD::SETUGE: 2864 case ISD::SETOGE: 2865 case ISD::SETGE: 2866 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 2867 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 2868 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 2869 case ISD::SETUGT: 2870 case ISD::SETOGT: 2871 case ISD::SETGT: 2872 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 2873 case ISD::SETULE: 2874 case ISD::SETOLE: 2875 case ISD::SETLE: 2876 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 2877 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 2878 return DAG.getNode(PPCISD::FSEL, ResVT, 2879 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 2880 } 2881 2882 SDOperand Cmp; 2883 switch (CC) { 2884 default: break; // SETUO etc aren't handled by fsel. 2885 case ISD::SETULT: 2886 case ISD::SETOLT: 2887 case ISD::SETLT: 2888 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 2889 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2890 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2891 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 2892 case ISD::SETUGE: 2893 case ISD::SETOGE: 2894 case ISD::SETGE: 2895 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 2896 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2897 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2898 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 2899 case ISD::SETUGT: 2900 case ISD::SETOGT: 2901 case ISD::SETGT: 2902 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 2903 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2904 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2905 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 2906 case ISD::SETULE: 2907 case ISD::SETOLE: 2908 case ISD::SETLE: 2909 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 2910 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2911 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2912 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 2913 } 2914 return SDOperand(); 2915} 2916 2917// FIXME: Split this code up when LegalizeDAGTypes lands. 2918SDOperand PPCTargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 2919 assert(Op.getOperand(0).getValueType().isFloatingPoint()); 2920 SDOperand Src = Op.getOperand(0); 2921 if (Src.getValueType() == MVT::f32) 2922 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 2923 2924 SDOperand Tmp; 2925 switch (Op.getValueType().getSimpleVT()) { 2926 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 2927 case MVT::i32: 2928 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 2929 break; 2930 case MVT::i64: 2931 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 2932 break; 2933 } 2934 2935 // Convert the FP value to an int value through memory. 2936 SDOperand FIPtr = DAG.CreateStackTemporary(MVT::f64); 2937 2938 // Emit a store to the stack slot. 2939 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Tmp, FIPtr, NULL, 0); 2940 2941 // Result is a load from the stack slot. If loading 4 bytes, make sure to 2942 // add in a bias. 2943 if (Op.getValueType() == MVT::i32) 2944 FIPtr = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, 2945 DAG.getConstant(4, FIPtr.getValueType())); 2946 return DAG.getLoad(Op.getValueType(), Chain, FIPtr, NULL, 0); 2947} 2948 2949SDOperand PPCTargetLowering::LowerFP_ROUND_INREG(SDOperand Op, 2950 SelectionDAG &DAG) { 2951 assert(Op.getValueType() == MVT::ppcf128); 2952 SDNode *Node = Op.Val; 2953 assert(Node->getOperand(0).getValueType() == MVT::ppcf128); 2954 assert(Node->getOperand(0).Val->getOpcode() == ISD::BUILD_PAIR); 2955 SDOperand Lo = Node->getOperand(0).Val->getOperand(0); 2956 SDOperand Hi = Node->getOperand(0).Val->getOperand(1); 2957 2958 // This sequence changes FPSCR to do round-to-zero, adds the two halves 2959 // of the long double, and puts FPSCR back the way it was. We do not 2960 // actually model FPSCR. 2961 std::vector<MVT> NodeTys; 2962 SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg; 2963 2964 NodeTys.push_back(MVT::f64); // Return register 2965 NodeTys.push_back(MVT::Flag); // Returns a flag for later insns 2966 Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); 2967 MFFSreg = Result.getValue(0); 2968 InFlag = Result.getValue(1); 2969 2970 NodeTys.clear(); 2971 NodeTys.push_back(MVT::Flag); // Returns a flag 2972 Ops[0] = DAG.getConstant(31, MVT::i32); 2973 Ops[1] = InFlag; 2974 Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2); 2975 InFlag = Result.getValue(0); 2976 2977 NodeTys.clear(); 2978 NodeTys.push_back(MVT::Flag); // Returns a flag 2979 Ops[0] = DAG.getConstant(30, MVT::i32); 2980 Ops[1] = InFlag; 2981 Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2); 2982 InFlag = Result.getValue(0); 2983 2984 NodeTys.clear(); 2985 NodeTys.push_back(MVT::f64); // result of add 2986 NodeTys.push_back(MVT::Flag); // Returns a flag 2987 Ops[0] = Lo; 2988 Ops[1] = Hi; 2989 Ops[2] = InFlag; 2990 Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3); 2991 FPreg = Result.getValue(0); 2992 InFlag = Result.getValue(1); 2993 2994 NodeTys.clear(); 2995 NodeTys.push_back(MVT::f64); 2996 Ops[0] = DAG.getConstant(1, MVT::i32); 2997 Ops[1] = MFFSreg; 2998 Ops[2] = FPreg; 2999 Ops[3] = InFlag; 3000 Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4); 3001 FPreg = Result.getValue(0); 3002 3003 // We know the low half is about to be thrown away, so just use something 3004 // convenient. 3005 return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg); 3006} 3007 3008SDOperand PPCTargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 3009 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 3010 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 3011 return SDOperand(); 3012 3013 if (Op.getOperand(0).getValueType() == MVT::i64) { 3014 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 3015 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 3016 if (Op.getValueType() == MVT::f32) 3017 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); 3018 return FP; 3019 } 3020 3021 assert(Op.getOperand(0).getValueType() == MVT::i32 && 3022 "Unhandled SINT_TO_FP type in custom expander!"); 3023 // Since we only generate this in 64-bit mode, we can take advantage of 3024 // 64-bit registers. In particular, sign extend the input value into the 3025 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 3026 // then lfd it and fcfid it. 3027 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 3028 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 3029 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3030 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 3031 3032 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 3033 Op.getOperand(0)); 3034 3035 // STD the extended value into the stack slot. 3036 MachineMemOperand MO(PseudoSourceValue::getFixedStack(), 3037 MachineMemOperand::MOStore, FrameIdx, 8, 8); 3038 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 3039 DAG.getEntryNode(), Ext64, FIdx, 3040 DAG.getMemOperand(MO)); 3041 // Load the value as a double. 3042 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, NULL, 0); 3043 3044 // FCFID it and return it. 3045 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 3046 if (Op.getValueType() == MVT::f32) 3047 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); 3048 return FP; 3049} 3050 3051SDOperand PPCTargetLowering::LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG) { 3052 /* 3053 The rounding mode is in bits 30:31 of FPSR, and has the following 3054 settings: 3055 00 Round to nearest 3056 01 Round to 0 3057 10 Round to +inf 3058 11 Round to -inf 3059 3060 FLT_ROUNDS, on the other hand, expects the following: 3061 -1 Undefined 3062 0 Round to 0 3063 1 Round to nearest 3064 2 Round to +inf 3065 3 Round to -inf 3066 3067 To perform the conversion, we do: 3068 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 3069 */ 3070 3071 MachineFunction &MF = DAG.getMachineFunction(); 3072 MVT VT = Op.getValueType(); 3073 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3074 std::vector<MVT> NodeTys; 3075 SDOperand MFFSreg, InFlag; 3076 3077 // Save FP Control Word to register 3078 NodeTys.push_back(MVT::f64); // return register 3079 NodeTys.push_back(MVT::Flag); // unused in this context 3080 SDOperand Chain = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); 3081 3082 // Save FP register to stack slot 3083 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 3084 SDOperand StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 3085 SDOperand Store = DAG.getStore(DAG.getEntryNode(), Chain, 3086 StackSlot, NULL, 0); 3087 3088 // Load FP Control Word from low 32 bits of stack slot. 3089 SDOperand Four = DAG.getConstant(4, PtrVT); 3090 SDOperand Addr = DAG.getNode(ISD::ADD, PtrVT, StackSlot, Four); 3091 SDOperand CWD = DAG.getLoad(MVT::i32, Store, Addr, NULL, 0); 3092 3093 // Transform as necessary 3094 SDOperand CWD1 = 3095 DAG.getNode(ISD::AND, MVT::i32, 3096 CWD, DAG.getConstant(3, MVT::i32)); 3097 SDOperand CWD2 = 3098 DAG.getNode(ISD::SRL, MVT::i32, 3099 DAG.getNode(ISD::AND, MVT::i32, 3100 DAG.getNode(ISD::XOR, MVT::i32, 3101 CWD, DAG.getConstant(3, MVT::i32)), 3102 DAG.getConstant(3, MVT::i32)), 3103 DAG.getConstant(1, MVT::i8)); 3104 3105 SDOperand RetVal = 3106 DAG.getNode(ISD::XOR, MVT::i32, CWD1, CWD2); 3107 3108 return DAG.getNode((VT.getSizeInBits() < 16 ? 3109 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 3110} 3111 3112SDOperand PPCTargetLowering::LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) { 3113 MVT VT = Op.getValueType(); 3114 unsigned BitWidth = VT.getSizeInBits(); 3115 assert(Op.getNumOperands() == 3 && 3116 VT == Op.getOperand(1).getValueType() && 3117 "Unexpected SHL!"); 3118 3119 // Expand into a bunch of logical ops. Note that these ops 3120 // depend on the PPC behavior for oversized shift amounts. 3121 SDOperand Lo = Op.getOperand(0); 3122 SDOperand Hi = Op.getOperand(1); 3123 SDOperand Amt = Op.getOperand(2); 3124 MVT AmtVT = Amt.getValueType(); 3125 3126 SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 3127 DAG.getConstant(BitWidth, AmtVT), Amt); 3128 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, VT, Hi, Amt); 3129 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, VT, Lo, Tmp1); 3130 SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 3131 SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 3132 DAG.getConstant(-BitWidth, AmtVT)); 3133 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, VT, Lo, Tmp5); 3134 SDOperand OutHi = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6); 3135 SDOperand OutLo = DAG.getNode(PPCISD::SHL, VT, Lo, Amt); 3136 SDOperand OutOps[] = { OutLo, OutHi }; 3137 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT), 3138 OutOps, 2); 3139} 3140 3141SDOperand PPCTargetLowering::LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) { 3142 MVT VT = Op.getValueType(); 3143 unsigned BitWidth = VT.getSizeInBits(); 3144 assert(Op.getNumOperands() == 3 && 3145 VT == Op.getOperand(1).getValueType() && 3146 "Unexpected SRL!"); 3147 3148 // Expand into a bunch of logical ops. Note that these ops 3149 // depend on the PPC behavior for oversized shift amounts. 3150 SDOperand Lo = Op.getOperand(0); 3151 SDOperand Hi = Op.getOperand(1); 3152 SDOperand Amt = Op.getOperand(2); 3153 MVT AmtVT = Amt.getValueType(); 3154 3155 SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 3156 DAG.getConstant(BitWidth, AmtVT), Amt); 3157 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt); 3158 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1); 3159 SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 3160 SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 3161 DAG.getConstant(-BitWidth, AmtVT)); 3162 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, VT, Hi, Tmp5); 3163 SDOperand OutLo = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6); 3164 SDOperand OutHi = DAG.getNode(PPCISD::SRL, VT, Hi, Amt); 3165 SDOperand OutOps[] = { OutLo, OutHi }; 3166 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT), 3167 OutOps, 2); 3168} 3169 3170SDOperand PPCTargetLowering::LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) { 3171 MVT VT = Op.getValueType(); 3172 unsigned BitWidth = VT.getSizeInBits(); 3173 assert(Op.getNumOperands() == 3 && 3174 VT == Op.getOperand(1).getValueType() && 3175 "Unexpected SRA!"); 3176 3177 // Expand into a bunch of logical ops, followed by a select_cc. 3178 SDOperand Lo = Op.getOperand(0); 3179 SDOperand Hi = Op.getOperand(1); 3180 SDOperand Amt = Op.getOperand(2); 3181 MVT AmtVT = Amt.getValueType(); 3182 3183 SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 3184 DAG.getConstant(BitWidth, AmtVT), Amt); 3185 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt); 3186 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1); 3187 SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 3188 SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 3189 DAG.getConstant(-BitWidth, AmtVT)); 3190 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, VT, Hi, Tmp5); 3191 SDOperand OutHi = DAG.getNode(PPCISD::SRA, VT, Hi, Amt); 3192 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, AmtVT), 3193 Tmp4, Tmp6, ISD::SETLE); 3194 SDOperand OutOps[] = { OutLo, OutHi }; 3195 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT), 3196 OutOps, 2); 3197} 3198 3199//===----------------------------------------------------------------------===// 3200// Vector related lowering. 3201// 3202 3203// If this is a vector of constants or undefs, get the bits. A bit in 3204// UndefBits is set if the corresponding element of the vector is an 3205// ISD::UNDEF value. For undefs, the corresponding VectorBits values are 3206// zero. Return true if this is not an array of constants, false if it is. 3207// 3208static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], 3209 uint64_t UndefBits[2]) { 3210 // Start with zero'd results. 3211 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; 3212 3213 unsigned EltBitSize = BV->getOperand(0).getValueType().getSizeInBits(); 3214 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 3215 SDOperand OpVal = BV->getOperand(i); 3216 3217 unsigned PartNo = i >= e/2; // In the upper 128 bits? 3218 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. 3219 3220 uint64_t EltBits = 0; 3221 if (OpVal.getOpcode() == ISD::UNDEF) { 3222 uint64_t EltUndefBits = ~0U >> (32-EltBitSize); 3223 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); 3224 continue; 3225 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 3226 EltBits = CN->getValue() & (~0U >> (32-EltBitSize)); 3227 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 3228 assert(CN->getValueType(0) == MVT::f32 && 3229 "Only one legal FP vector type!"); 3230 EltBits = FloatToBits(CN->getValueAPF().convertToFloat()); 3231 } else { 3232 // Nonconstant element. 3233 return true; 3234 } 3235 3236 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); 3237 } 3238 3239 //printf("%llx %llx %llx %llx\n", 3240 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); 3241 return false; 3242} 3243 3244// If this is a splat (repetition) of a value across the whole vector, return 3245// the smallest size that splats it. For example, "0x01010101010101..." is a 3246// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 3247// SplatSize = 1 byte. 3248static bool isConstantSplat(const uint64_t Bits128[2], 3249 const uint64_t Undef128[2], 3250 unsigned &SplatBits, unsigned &SplatUndef, 3251 unsigned &SplatSize) { 3252 3253 // Don't let undefs prevent splats from matching. See if the top 64-bits are 3254 // the same as the lower 64-bits, ignoring undefs. 3255 if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0])) 3256 return false; // Can't be a splat if two pieces don't match. 3257 3258 uint64_t Bits64 = Bits128[0] | Bits128[1]; 3259 uint64_t Undef64 = Undef128[0] & Undef128[1]; 3260 3261 // Check that the top 32-bits are the same as the lower 32-bits, ignoring 3262 // undefs. 3263 if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64)) 3264 return false; // Can't be a splat if two pieces don't match. 3265 3266 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); 3267 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); 3268 3269 // If the top 16-bits are different than the lower 16-bits, ignoring 3270 // undefs, we have an i32 splat. 3271 if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) { 3272 SplatBits = Bits32; 3273 SplatUndef = Undef32; 3274 SplatSize = 4; 3275 return true; 3276 } 3277 3278 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); 3279 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); 3280 3281 // If the top 8-bits are different than the lower 8-bits, ignoring 3282 // undefs, we have an i16 splat. 3283 if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) { 3284 SplatBits = Bits16; 3285 SplatUndef = Undef16; 3286 SplatSize = 2; 3287 return true; 3288 } 3289 3290 // Otherwise, we have an 8-bit splat. 3291 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); 3292 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); 3293 SplatSize = 1; 3294 return true; 3295} 3296 3297/// BuildSplatI - Build a canonical splati of Val with an element size of 3298/// SplatSize. Cast the result to VT. 3299static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT VT, 3300 SelectionDAG &DAG) { 3301 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 3302 3303 static const MVT VTys[] = { // canonical VT to use for each size. 3304 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 3305 }; 3306 3307 MVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 3308 3309 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 3310 if (Val == -1) 3311 SplatSize = 1; 3312 3313 MVT CanonicalVT = VTys[SplatSize-1]; 3314 3315 // Build a canonical splat for this value. 3316 SDOperand Elt = DAG.getConstant(Val, CanonicalVT.getVectorElementType()); 3317 SmallVector<SDOperand, 8> Ops; 3318 Ops.assign(CanonicalVT.getVectorNumElements(), Elt); 3319 SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, 3320 &Ops[0], Ops.size()); 3321 return DAG.getNode(ISD::BIT_CONVERT, ReqVT, Res); 3322} 3323 3324/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 3325/// specified intrinsic ID. 3326static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS, 3327 SelectionDAG &DAG, 3328 MVT DestVT = MVT::Other) { 3329 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 3330 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 3331 DAG.getConstant(IID, MVT::i32), LHS, RHS); 3332} 3333 3334/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 3335/// specified intrinsic ID. 3336static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1, 3337 SDOperand Op2, SelectionDAG &DAG, 3338 MVT DestVT = MVT::Other) { 3339 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 3340 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 3341 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 3342} 3343 3344 3345/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 3346/// amount. The result has the specified value type. 3347static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt, 3348 MVT VT, SelectionDAG &DAG) { 3349 // Force LHS/RHS to be the right type. 3350 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS); 3351 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS); 3352 3353 SDOperand Ops[16]; 3354 for (unsigned i = 0; i != 16; ++i) 3355 Ops[i] = DAG.getConstant(i+Amt, MVT::i32); 3356 SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS, 3357 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops,16)); 3358 return DAG.getNode(ISD::BIT_CONVERT, VT, T); 3359} 3360 3361// If this is a case we can't handle, return null and let the default 3362// expansion code take care of it. If we CAN select this case, and if it 3363// selects to a single instruction, return Op. Otherwise, if we can codegen 3364// this case more efficiently than a constant pool load, lower it to the 3365// sequence of ops that should be used. 3366SDOperand PPCTargetLowering::LowerBUILD_VECTOR(SDOperand Op, 3367 SelectionDAG &DAG) { 3368 // If this is a vector of constants or undefs, get the bits. A bit in 3369 // UndefBits is set if the corresponding element of the vector is an 3370 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 3371 // zero. 3372 uint64_t VectorBits[2]; 3373 uint64_t UndefBits[2]; 3374 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)) 3375 return SDOperand(); // Not a constant vector. 3376 3377 // If this is a splat (repetition) of a value across the whole vector, return 3378 // the smallest size that splats it. For example, "0x01010101010101..." is a 3379 // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 3380 // SplatSize = 1 byte. 3381 unsigned SplatBits, SplatUndef, SplatSize; 3382 if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){ 3383 bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0; 3384 3385 // First, handle single instruction cases. 3386 3387 // All zeros? 3388 if (SplatBits == 0) { 3389 // Canonicalize all zero vectors to be v4i32. 3390 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 3391 SDOperand Z = DAG.getConstant(0, MVT::i32); 3392 Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z); 3393 Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z); 3394 } 3395 return Op; 3396 } 3397 3398 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 3399 int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize); 3400 if (SextVal >= -16 && SextVal <= 15) 3401 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG); 3402 3403 3404 // Two instruction sequences. 3405 3406 // If this value is in the range [-32,30] and is even, use: 3407 // tmp = VSPLTI[bhw], result = add tmp, tmp 3408 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 3409 Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG); 3410 return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op); 3411 } 3412 3413 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 3414 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 3415 // for fneg/fabs. 3416 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 3417 // Make -1 and vspltisw -1: 3418 SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG); 3419 3420 // Make the VSLW intrinsic, computing 0x8000_0000. 3421 SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 3422 OnesV, DAG); 3423 3424 // xor by OnesV to invert it. 3425 Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); 3426 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3427 } 3428 3429 // Check to see if this is a wide variety of vsplti*, binop self cases. 3430 unsigned SplatBitSize = SplatSize*8; 3431 static const signed char SplatCsts[] = { 3432 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 3433 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 3434 }; 3435 3436 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 3437 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 3438 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 3439 int i = SplatCsts[idx]; 3440 3441 // Figure out what shift amount will be used by altivec if shifted by i in 3442 // this splat size. 3443 unsigned TypeShiftAmt = i & (SplatBitSize-1); 3444 3445 // vsplti + shl self. 3446 if (SextVal == (i << (int)TypeShiftAmt)) { 3447 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3448 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3449 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 3450 Intrinsic::ppc_altivec_vslw 3451 }; 3452 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3453 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3454 } 3455 3456 // vsplti + srl self. 3457 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 3458 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3459 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3460 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 3461 Intrinsic::ppc_altivec_vsrw 3462 }; 3463 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3464 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3465 } 3466 3467 // vsplti + sra self. 3468 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 3469 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3470 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3471 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 3472 Intrinsic::ppc_altivec_vsraw 3473 }; 3474 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3475 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3476 } 3477 3478 // vsplti + rol self. 3479 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 3480 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 3481 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 3482 static const unsigned IIDs[] = { // Intrinsic to use for each size. 3483 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 3484 Intrinsic::ppc_altivec_vrlw 3485 }; 3486 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 3487 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 3488 } 3489 3490 // t = vsplti c, result = vsldoi t, t, 1 3491 if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { 3492 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 3493 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG); 3494 } 3495 // t = vsplti c, result = vsldoi t, t, 2 3496 if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { 3497 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 3498 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG); 3499 } 3500 // t = vsplti c, result = vsldoi t, t, 3 3501 if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { 3502 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 3503 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG); 3504 } 3505 } 3506 3507 // Three instruction sequences. 3508 3509 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 3510 if (SextVal >= 0 && SextVal <= 31) { 3511 SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG); 3512 SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); 3513 LHS = DAG.getNode(ISD::SUB, LHS.getValueType(), LHS, RHS); 3514 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); 3515 } 3516 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 3517 if (SextVal >= -31 && SextVal <= 0) { 3518 SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG); 3519 SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); 3520 LHS = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS); 3521 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); 3522 } 3523 } 3524 3525 return SDOperand(); 3526} 3527 3528/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 3529/// the specified operations to build the shuffle. 3530static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS, 3531 SDOperand RHS, SelectionDAG &DAG) { 3532 unsigned OpNum = (PFEntry >> 26) & 0x0F; 3533 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 3534 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 3535 3536 enum { 3537 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 3538 OP_VMRGHW, 3539 OP_VMRGLW, 3540 OP_VSPLTISW0, 3541 OP_VSPLTISW1, 3542 OP_VSPLTISW2, 3543 OP_VSPLTISW3, 3544 OP_VSLDOI4, 3545 OP_VSLDOI8, 3546 OP_VSLDOI12 3547 }; 3548 3549 if (OpNum == OP_COPY) { 3550 if (LHSID == (1*9+2)*9+3) return LHS; 3551 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 3552 return RHS; 3553 } 3554 3555 SDOperand OpLHS, OpRHS; 3556 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG); 3557 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG); 3558 3559 unsigned ShufIdxs[16]; 3560 switch (OpNum) { 3561 default: assert(0 && "Unknown i32 permute!"); 3562 case OP_VMRGHW: 3563 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 3564 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 3565 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 3566 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 3567 break; 3568 case OP_VMRGLW: 3569 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 3570 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 3571 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 3572 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 3573 break; 3574 case OP_VSPLTISW0: 3575 for (unsigned i = 0; i != 16; ++i) 3576 ShufIdxs[i] = (i&3)+0; 3577 break; 3578 case OP_VSPLTISW1: 3579 for (unsigned i = 0; i != 16; ++i) 3580 ShufIdxs[i] = (i&3)+4; 3581 break; 3582 case OP_VSPLTISW2: 3583 for (unsigned i = 0; i != 16; ++i) 3584 ShufIdxs[i] = (i&3)+8; 3585 break; 3586 case OP_VSPLTISW3: 3587 for (unsigned i = 0; i != 16; ++i) 3588 ShufIdxs[i] = (i&3)+12; 3589 break; 3590 case OP_VSLDOI4: 3591 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG); 3592 case OP_VSLDOI8: 3593 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG); 3594 case OP_VSLDOI12: 3595 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG); 3596 } 3597 SDOperand Ops[16]; 3598 for (unsigned i = 0; i != 16; ++i) 3599 Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i32); 3600 3601 return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS, 3602 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); 3603} 3604 3605/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 3606/// is a shuffle we can handle in a single instruction, return it. Otherwise, 3607/// return the code it can be lowered into. Worst case, it can always be 3608/// lowered into a vperm. 3609SDOperand PPCTargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, 3610 SelectionDAG &DAG) { 3611 SDOperand V1 = Op.getOperand(0); 3612 SDOperand V2 = Op.getOperand(1); 3613 SDOperand PermMask = Op.getOperand(2); 3614 3615 // Cases that are handled by instructions that take permute immediates 3616 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 3617 // selected by the instruction selector. 3618 if (V2.getOpcode() == ISD::UNDEF) { 3619 if (PPC::isSplatShuffleMask(PermMask.Val, 1) || 3620 PPC::isSplatShuffleMask(PermMask.Val, 2) || 3621 PPC::isSplatShuffleMask(PermMask.Val, 4) || 3622 PPC::isVPKUWUMShuffleMask(PermMask.Val, true) || 3623 PPC::isVPKUHUMShuffleMask(PermMask.Val, true) || 3624 PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 || 3625 PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) || 3626 PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) || 3627 PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) || 3628 PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) || 3629 PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) || 3630 PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) { 3631 return Op; 3632 } 3633 } 3634 3635 // Altivec has a variety of "shuffle immediates" that take two vector inputs 3636 // and produce a fixed permutation. If any of these match, do not lower to 3637 // VPERM. 3638 if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) || 3639 PPC::isVPKUHUMShuffleMask(PermMask.Val, false) || 3640 PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 || 3641 PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) || 3642 PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) || 3643 PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) || 3644 PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) || 3645 PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) || 3646 PPC::isVMRGHShuffleMask(PermMask.Val, 4, false)) 3647 return Op; 3648 3649 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 3650 // perfect shuffle table to emit an optimal matching sequence. 3651 unsigned PFIndexes[4]; 3652 bool isFourElementShuffle = true; 3653 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 3654 unsigned EltNo = 8; // Start out undef. 3655 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 3656 if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) 3657 continue; // Undef, ignore it. 3658 3659 unsigned ByteSource = 3660 cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue(); 3661 if ((ByteSource & 3) != j) { 3662 isFourElementShuffle = false; 3663 break; 3664 } 3665 3666 if (EltNo == 8) { 3667 EltNo = ByteSource/4; 3668 } else if (EltNo != ByteSource/4) { 3669 isFourElementShuffle = false; 3670 break; 3671 } 3672 } 3673 PFIndexes[i] = EltNo; 3674 } 3675 3676 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 3677 // perfect shuffle vector to determine if it is cost effective to do this as 3678 // discrete instructions, or whether we should use a vperm. 3679 if (isFourElementShuffle) { 3680 // Compute the index in the perfect shuffle table. 3681 unsigned PFTableIndex = 3682 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3683 3684 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3685 unsigned Cost = (PFEntry >> 30); 3686 3687 // Determining when to avoid vperm is tricky. Many things affect the cost 3688 // of vperm, particularly how many times the perm mask needs to be computed. 3689 // For example, if the perm mask can be hoisted out of a loop or is already 3690 // used (perhaps because there are multiple permutes with the same shuffle 3691 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 3692 // the loop requires an extra register. 3693 // 3694 // As a compromise, we only emit discrete instructions if the shuffle can be 3695 // generated in 3 or fewer operations. When we have loop information 3696 // available, if this block is within a loop, we should avoid using vperm 3697 // for 3-operation perms and use a constant pool load instead. 3698 if (Cost < 3) 3699 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG); 3700 } 3701 3702 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 3703 // vector that will get spilled to the constant pool. 3704 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 3705 3706 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 3707 // that it is in input element units, not in bytes. Convert now. 3708 MVT EltVT = V1.getValueType().getVectorElementType(); 3709 unsigned BytesPerElement = EltVT.getSizeInBits()/8; 3710 3711 SmallVector<SDOperand, 16> ResultMask; 3712 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 3713 unsigned SrcElt; 3714 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) 3715 SrcElt = 0; 3716 else 3717 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 3718 3719 for (unsigned j = 0; j != BytesPerElement; ++j) 3720 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 3721 MVT::i8)); 3722 } 3723 3724 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, 3725 &ResultMask[0], ResultMask.size()); 3726 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 3727} 3728 3729/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 3730/// altivec comparison. If it is, return true and fill in Opc/isDot with 3731/// information about the intrinsic. 3732static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc, 3733 bool &isDot) { 3734 unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue(); 3735 CompareOpc = -1; 3736 isDot = false; 3737 switch (IntrinsicID) { 3738 default: return false; 3739 // Comparison predicates. 3740 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 3741 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 3742 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 3743 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 3744 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 3745 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 3746 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 3747 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 3748 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 3749 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 3750 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 3751 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 3752 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 3753 3754 // Normal Comparisons. 3755 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 3756 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 3757 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 3758 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 3759 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 3760 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 3761 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 3762 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 3763 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 3764 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 3765 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 3766 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 3767 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 3768 } 3769 return true; 3770} 3771 3772/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 3773/// lower, do it, otherwise return null. 3774SDOperand PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, 3775 SelectionDAG &DAG) { 3776 // If this is a lowered altivec predicate compare, CompareOpc is set to the 3777 // opcode number of the comparison. 3778 int CompareOpc; 3779 bool isDot; 3780 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 3781 return SDOperand(); // Don't custom lower most intrinsics. 3782 3783 // If this is a non-dot comparison, make the VCMP node and we are done. 3784 if (!isDot) { 3785 SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 3786 Op.getOperand(1), Op.getOperand(2), 3787 DAG.getConstant(CompareOpc, MVT::i32)); 3788 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp); 3789 } 3790 3791 // Create the PPCISD altivec 'dot' comparison node. 3792 SDOperand Ops[] = { 3793 Op.getOperand(2), // LHS 3794 Op.getOperand(3), // RHS 3795 DAG.getConstant(CompareOpc, MVT::i32) 3796 }; 3797 std::vector<MVT> VTs; 3798 VTs.push_back(Op.getOperand(2).getValueType()); 3799 VTs.push_back(MVT::Flag); 3800 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); 3801 3802 // Now that we have the comparison, emit a copy from the CR to a GPR. 3803 // This is flagged to the above dot comparison. 3804 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 3805 DAG.getRegister(PPC::CR6, MVT::i32), 3806 CompNode.getValue(1)); 3807 3808 // Unpack the result based on how the target uses it. 3809 unsigned BitNo; // Bit # of CR6. 3810 bool InvertBit; // Invert result? 3811 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 3812 default: // Can't happen, don't crash on invalid number though. 3813 case 0: // Return the value of the EQ bit of CR6. 3814 BitNo = 0; InvertBit = false; 3815 break; 3816 case 1: // Return the inverted value of the EQ bit of CR6. 3817 BitNo = 0; InvertBit = true; 3818 break; 3819 case 2: // Return the value of the LT bit of CR6. 3820 BitNo = 2; InvertBit = false; 3821 break; 3822 case 3: // Return the inverted value of the LT bit of CR6. 3823 BitNo = 2; InvertBit = true; 3824 break; 3825 } 3826 3827 // Shift the bit into the low position. 3828 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 3829 DAG.getConstant(8-(3-BitNo), MVT::i32)); 3830 // Isolate the bit. 3831 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 3832 DAG.getConstant(1, MVT::i32)); 3833 3834 // If we are supposed to, toggle the bit. 3835 if (InvertBit) 3836 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 3837 DAG.getConstant(1, MVT::i32)); 3838 return Flags; 3839} 3840 3841SDOperand PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, 3842 SelectionDAG &DAG) { 3843 // Create a stack slot that is 16-byte aligned. 3844 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 3845 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 3846 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3847 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 3848 3849 // Store the input value into Value#0 of the stack slot. 3850 SDOperand Store = DAG.getStore(DAG.getEntryNode(), 3851 Op.getOperand(0), FIdx, NULL, 0); 3852 // Load it out. 3853 return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0); 3854} 3855 3856SDOperand PPCTargetLowering::LowerMUL(SDOperand Op, SelectionDAG &DAG) { 3857 if (Op.getValueType() == MVT::v4i32) { 3858 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3859 3860 SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG); 3861 SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt. 3862 3863 SDOperand RHSSwap = // = vrlw RHS, 16 3864 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG); 3865 3866 // Shrinkify inputs to v8i16. 3867 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS); 3868 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS); 3869 RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap); 3870 3871 // Low parts multiplied together, generating 32-bit results (we ignore the 3872 // top parts). 3873 SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 3874 LHS, RHS, DAG, MVT::v4i32); 3875 3876 SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 3877 LHS, RHSSwap, Zero, DAG, MVT::v4i32); 3878 // Shift the high parts up 16 bits. 3879 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG); 3880 return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd); 3881 } else if (Op.getValueType() == MVT::v8i16) { 3882 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3883 3884 SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG); 3885 3886 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 3887 LHS, RHS, Zero, DAG); 3888 } else if (Op.getValueType() == MVT::v16i8) { 3889 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3890 3891 // Multiply the even 8-bit parts, producing 16-bit sums. 3892 SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 3893 LHS, RHS, DAG, MVT::v8i16); 3894 EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts); 3895 3896 // Multiply the odd 8-bit parts, producing 16-bit sums. 3897 SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 3898 LHS, RHS, DAG, MVT::v8i16); 3899 OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts); 3900 3901 // Merge the results together. 3902 SDOperand Ops[16]; 3903 for (unsigned i = 0; i != 8; ++i) { 3904 Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8); 3905 Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8); 3906 } 3907 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts, 3908 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); 3909 } else { 3910 assert(0 && "Unknown mul to lower!"); 3911 abort(); 3912 } 3913} 3914 3915/// LowerOperation - Provide custom lowering hooks for some operations. 3916/// 3917SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 3918 switch (Op.getOpcode()) { 3919 default: assert(0 && "Wasn't expecting to be able to lower this!"); 3920 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3921 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 3922 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3923 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 3924 case ISD::SETCC: return LowerSETCC(Op, DAG); 3925 case ISD::VASTART: 3926 return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, 3927 VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); 3928 3929 case ISD::VAARG: 3930 return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, 3931 VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); 3932 3933 case ISD::FORMAL_ARGUMENTS: 3934 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, 3935 VarArgsStackOffset, VarArgsNumGPR, 3936 VarArgsNumFPR, PPCSubTarget); 3937 3938 case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget, 3939 getTargetMachine()); 3940 case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); 3941 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 3942 case ISD::DYNAMIC_STACKALLOC: 3943 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 3944 3945 case ISD::ATOMIC_LOAD_ADD: return LowerAtomicLOAD_ADD(Op, DAG); 3946 case ISD::ATOMIC_CMP_SWAP: return LowerAtomicCMP_SWAP(Op, DAG); 3947 case ISD::ATOMIC_SWAP: return LowerAtomicSWAP(Op, DAG); 3948 3949 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3950 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 3951 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 3952 case ISD::FP_ROUND_INREG: return LowerFP_ROUND_INREG(Op, DAG); 3953 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 3954 3955 // Lower 64-bit shifts. 3956 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 3957 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 3958 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 3959 3960 // Vector-related lowering. 3961 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3962 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3963 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 3964 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 3965 case ISD::MUL: return LowerMUL(Op, DAG); 3966 3967 // Frame & Return address. 3968 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 3969 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3970 } 3971 return SDOperand(); 3972} 3973 3974SDNode *PPCTargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { 3975 switch (N->getOpcode()) { 3976 default: assert(0 && "Wasn't expecting to be able to lower this!"); 3977 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(SDOperand(N, 0), DAG).Val; 3978 } 3979} 3980 3981 3982//===----------------------------------------------------------------------===// 3983// Other Lowering Code 3984//===----------------------------------------------------------------------===// 3985 3986MachineBasicBlock * 3987PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 3988 MachineBasicBlock *BB) { 3989 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3990 assert((MI->getOpcode() == PPC::SELECT_CC_I4 || 3991 MI->getOpcode() == PPC::SELECT_CC_I8 || 3992 MI->getOpcode() == PPC::SELECT_CC_F4 || 3993 MI->getOpcode() == PPC::SELECT_CC_F8 || 3994 MI->getOpcode() == PPC::SELECT_CC_VRRC) && 3995 "Unexpected instr type to insert"); 3996 3997 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 3998 // control-flow pattern. The incoming instruction knows the destination vreg 3999 // to set, the condition code register to branch on, the true/false values to 4000 // select between, and a branch opcode to use. 4001 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 4002 ilist<MachineBasicBlock>::iterator It = BB; 4003 ++It; 4004 4005 // thisMBB: 4006 // ... 4007 // TrueVal = ... 4008 // cmpTY ccX, r1, r2 4009 // bCC copy1MBB 4010 // fallthrough --> copy0MBB 4011 MachineBasicBlock *thisMBB = BB; 4012 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 4013 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 4014 unsigned SelectPred = MI->getOperand(4).getImm(); 4015 BuildMI(BB, TII->get(PPC::BCC)) 4016 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 4017 MachineFunction *F = BB->getParent(); 4018 F->getBasicBlockList().insert(It, copy0MBB); 4019 F->getBasicBlockList().insert(It, sinkMBB); 4020 // Update machine-CFG edges by transferring all successors of the current 4021 // block to the new block which will contain the Phi node for the select. 4022 sinkMBB->transferSuccessors(BB); 4023 // Next, add the true and fallthrough blocks as its successors. 4024 BB->addSuccessor(copy0MBB); 4025 BB->addSuccessor(sinkMBB); 4026 4027 // copy0MBB: 4028 // %FalseValue = ... 4029 // # fallthrough to sinkMBB 4030 BB = copy0MBB; 4031 4032 // Update machine-CFG edges 4033 BB->addSuccessor(sinkMBB); 4034 4035 // sinkMBB: 4036 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 4037 // ... 4038 BB = sinkMBB; 4039 BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg()) 4040 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 4041 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 4042 4043 delete MI; // The pseudo instruction is gone now. 4044 return BB; 4045} 4046 4047//===----------------------------------------------------------------------===// 4048// Target Optimization Hooks 4049//===----------------------------------------------------------------------===// 4050 4051SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 4052 DAGCombinerInfo &DCI) const { 4053 TargetMachine &TM = getTargetMachine(); 4054 SelectionDAG &DAG = DCI.DAG; 4055 switch (N->getOpcode()) { 4056 default: break; 4057 case PPCISD::SHL: 4058 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 4059 if (C->getValue() == 0) // 0 << V -> 0. 4060 return N->getOperand(0); 4061 } 4062 break; 4063 case PPCISD::SRL: 4064 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 4065 if (C->getValue() == 0) // 0 >>u V -> 0. 4066 return N->getOperand(0); 4067 } 4068 break; 4069 case PPCISD::SRA: 4070 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 4071 if (C->getValue() == 0 || // 0 >>s V -> 0. 4072 C->isAllOnesValue()) // -1 >>s V -> -1. 4073 return N->getOperand(0); 4074 } 4075 break; 4076 4077 case ISD::SINT_TO_FP: 4078 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 4079 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 4080 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 4081 // We allow the src/dst to be either f32/f64, but the intermediate 4082 // type must be i64. 4083 if (N->getOperand(0).getValueType() == MVT::i64 && 4084 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 4085 SDOperand Val = N->getOperand(0).getOperand(0); 4086 if (Val.getValueType() == MVT::f32) { 4087 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 4088 DCI.AddToWorklist(Val.Val); 4089 } 4090 4091 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 4092 DCI.AddToWorklist(Val.Val); 4093 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 4094 DCI.AddToWorklist(Val.Val); 4095 if (N->getValueType(0) == MVT::f32) { 4096 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val, 4097 DAG.getIntPtrConstant(0)); 4098 DCI.AddToWorklist(Val.Val); 4099 } 4100 return Val; 4101 } else if (N->getOperand(0).getValueType() == MVT::i32) { 4102 // If the intermediate type is i32, we can avoid the load/store here 4103 // too. 4104 } 4105 } 4106 } 4107 break; 4108 case ISD::STORE: 4109 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 4110 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 4111 !cast<StoreSDNode>(N)->isTruncatingStore() && 4112 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 4113 N->getOperand(1).getValueType() == MVT::i32 && 4114 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 4115 SDOperand Val = N->getOperand(1).getOperand(0); 4116 if (Val.getValueType() == MVT::f32) { 4117 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 4118 DCI.AddToWorklist(Val.Val); 4119 } 4120 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 4121 DCI.AddToWorklist(Val.Val); 4122 4123 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 4124 N->getOperand(2), N->getOperand(3)); 4125 DCI.AddToWorklist(Val.Val); 4126 return Val; 4127 } 4128 4129 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 4130 if (N->getOperand(1).getOpcode() == ISD::BSWAP && 4131 N->getOperand(1).Val->hasOneUse() && 4132 (N->getOperand(1).getValueType() == MVT::i32 || 4133 N->getOperand(1).getValueType() == MVT::i16)) { 4134 SDOperand BSwapOp = N->getOperand(1).getOperand(0); 4135 // Do an any-extend to 32-bits if this is a half-word input. 4136 if (BSwapOp.getValueType() == MVT::i16) 4137 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp); 4138 4139 return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp, 4140 N->getOperand(2), N->getOperand(3), 4141 DAG.getValueType(N->getOperand(1).getValueType())); 4142 } 4143 break; 4144 case ISD::BSWAP: 4145 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 4146 if (ISD::isNON_EXTLoad(N->getOperand(0).Val) && 4147 N->getOperand(0).hasOneUse() && 4148 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { 4149 SDOperand Load = N->getOperand(0); 4150 LoadSDNode *LD = cast<LoadSDNode>(Load); 4151 // Create the byte-swapping load. 4152 std::vector<MVT> VTs; 4153 VTs.push_back(MVT::i32); 4154 VTs.push_back(MVT::Other); 4155 SDOperand MO = DAG.getMemOperand(LD->getMemOperand()); 4156 SDOperand Ops[] = { 4157 LD->getChain(), // Chain 4158 LD->getBasePtr(), // Ptr 4159 MO, // MemOperand 4160 DAG.getValueType(N->getValueType(0)) // VT 4161 }; 4162 SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops, 4); 4163 4164 // If this is an i16 load, insert the truncate. 4165 SDOperand ResVal = BSLoad; 4166 if (N->getValueType(0) == MVT::i16) 4167 ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad); 4168 4169 // First, combine the bswap away. This makes the value produced by the 4170 // load dead. 4171 DCI.CombineTo(N, ResVal); 4172 4173 // Next, combine the load away, we give it a bogus result value but a real 4174 // chain result. The result value is dead because the bswap is dead. 4175 DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1)); 4176 4177 // Return N so it doesn't get rechecked! 4178 return SDOperand(N, 0); 4179 } 4180 4181 break; 4182 case PPCISD::VCMP: { 4183 // If a VCMPo node already exists with exactly the same operands as this 4184 // node, use its result instead of this node (VCMPo computes both a CR6 and 4185 // a normal output). 4186 // 4187 if (!N->getOperand(0).hasOneUse() && 4188 !N->getOperand(1).hasOneUse() && 4189 !N->getOperand(2).hasOneUse()) { 4190 4191 // Scan all of the users of the LHS, looking for VCMPo's that match. 4192 SDNode *VCMPoNode = 0; 4193 4194 SDNode *LHSN = N->getOperand(0).Val; 4195 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 4196 UI != E; ++UI) 4197 if ((*UI).getUser()->getOpcode() == PPCISD::VCMPo && 4198 (*UI).getUser()->getOperand(1) == N->getOperand(1) && 4199 (*UI).getUser()->getOperand(2) == N->getOperand(2) && 4200 (*UI).getUser()->getOperand(0) == N->getOperand(0)) { 4201 VCMPoNode = UI->getUser(); 4202 break; 4203 } 4204 4205 // If there is no VCMPo node, or if the flag value has a single use, don't 4206 // transform this. 4207 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 4208 break; 4209 4210 // Look at the (necessarily single) use of the flag value. If it has a 4211 // chain, this transformation is more complex. Note that multiple things 4212 // could use the value result, which we should ignore. 4213 SDNode *FlagUser = 0; 4214 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 4215 FlagUser == 0; ++UI) { 4216 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 4217 SDNode *User = UI->getUser(); 4218 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 4219 if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) { 4220 FlagUser = User; 4221 break; 4222 } 4223 } 4224 } 4225 4226 // If the user is a MFCR instruction, we know this is safe. Otherwise we 4227 // give up for right now. 4228 if (FlagUser->getOpcode() == PPCISD::MFCR) 4229 return SDOperand(VCMPoNode, 0); 4230 } 4231 break; 4232 } 4233 case ISD::BR_CC: { 4234 // If this is a branch on an altivec predicate comparison, lower this so 4235 // that we don't have to do a MFCR: instead, branch directly on CR6. This 4236 // lowering is done pre-legalize, because the legalizer lowers the predicate 4237 // compare down to code that is difficult to reassemble. 4238 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 4239 SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3); 4240 int CompareOpc; 4241 bool isDot; 4242 4243 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 4244 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 4245 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 4246 assert(isDot && "Can't compare against a vector result!"); 4247 4248 // If this is a comparison against something other than 0/1, then we know 4249 // that the condition is never/always true. 4250 unsigned Val = cast<ConstantSDNode>(RHS)->getValue(); 4251 if (Val != 0 && Val != 1) { 4252 if (CC == ISD::SETEQ) // Cond never true, remove branch. 4253 return N->getOperand(0); 4254 // Always !=, turn it into an unconditional branch. 4255 return DAG.getNode(ISD::BR, MVT::Other, 4256 N->getOperand(0), N->getOperand(4)); 4257 } 4258 4259 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 4260 4261 // Create the PPCISD altivec 'dot' comparison node. 4262 std::vector<MVT> VTs; 4263 SDOperand Ops[] = { 4264 LHS.getOperand(2), // LHS of compare 4265 LHS.getOperand(3), // RHS of compare 4266 DAG.getConstant(CompareOpc, MVT::i32) 4267 }; 4268 VTs.push_back(LHS.getOperand(2).getValueType()); 4269 VTs.push_back(MVT::Flag); 4270 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); 4271 4272 // Unpack the result based on how the target uses it. 4273 PPC::Predicate CompOpc; 4274 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) { 4275 default: // Can't happen, don't crash on invalid number though. 4276 case 0: // Branch on the value of the EQ bit of CR6. 4277 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 4278 break; 4279 case 1: // Branch on the inverted value of the EQ bit of CR6. 4280 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 4281 break; 4282 case 2: // Branch on the value of the LT bit of CR6. 4283 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 4284 break; 4285 case 3: // Branch on the inverted value of the LT bit of CR6. 4286 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 4287 break; 4288 } 4289 4290 return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0), 4291 DAG.getConstant(CompOpc, MVT::i32), 4292 DAG.getRegister(PPC::CR6, MVT::i32), 4293 N->getOperand(4), CompNode.getValue(1)); 4294 } 4295 break; 4296 } 4297 } 4298 4299 return SDOperand(); 4300} 4301 4302//===----------------------------------------------------------------------===// 4303// Inline Assembly Support 4304//===----------------------------------------------------------------------===// 4305 4306void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 4307 const APInt &Mask, 4308 APInt &KnownZero, 4309 APInt &KnownOne, 4310 const SelectionDAG &DAG, 4311 unsigned Depth) const { 4312 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 4313 switch (Op.getOpcode()) { 4314 default: break; 4315 case PPCISD::LBRX: { 4316 // lhbrx is known to have the top bits cleared out. 4317 if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16) 4318 KnownZero = 0xFFFF0000; 4319 break; 4320 } 4321 case ISD::INTRINSIC_WO_CHAIN: { 4322 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 4323 default: break; 4324 case Intrinsic::ppc_altivec_vcmpbfp_p: 4325 case Intrinsic::ppc_altivec_vcmpeqfp_p: 4326 case Intrinsic::ppc_altivec_vcmpequb_p: 4327 case Intrinsic::ppc_altivec_vcmpequh_p: 4328 case Intrinsic::ppc_altivec_vcmpequw_p: 4329 case Intrinsic::ppc_altivec_vcmpgefp_p: 4330 case Intrinsic::ppc_altivec_vcmpgtfp_p: 4331 case Intrinsic::ppc_altivec_vcmpgtsb_p: 4332 case Intrinsic::ppc_altivec_vcmpgtsh_p: 4333 case Intrinsic::ppc_altivec_vcmpgtsw_p: 4334 case Intrinsic::ppc_altivec_vcmpgtub_p: 4335 case Intrinsic::ppc_altivec_vcmpgtuh_p: 4336 case Intrinsic::ppc_altivec_vcmpgtuw_p: 4337 KnownZero = ~1U; // All bits but the low one are known to be zero. 4338 break; 4339 } 4340 } 4341 } 4342} 4343 4344 4345/// getConstraintType - Given a constraint, return the type of 4346/// constraint it is for this target. 4347PPCTargetLowering::ConstraintType 4348PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 4349 if (Constraint.size() == 1) { 4350 switch (Constraint[0]) { 4351 default: break; 4352 case 'b': 4353 case 'r': 4354 case 'f': 4355 case 'v': 4356 case 'y': 4357 return C_RegisterClass; 4358 } 4359 } 4360 return TargetLowering::getConstraintType(Constraint); 4361} 4362 4363std::pair<unsigned, const TargetRegisterClass*> 4364PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 4365 MVT VT) const { 4366 if (Constraint.size() == 1) { 4367 // GCC RS6000 Constraint Letters 4368 switch (Constraint[0]) { 4369 case 'b': // R1-R31 4370 case 'r': // R0-R31 4371 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 4372 return std::make_pair(0U, PPC::G8RCRegisterClass); 4373 return std::make_pair(0U, PPC::GPRCRegisterClass); 4374 case 'f': 4375 if (VT == MVT::f32) 4376 return std::make_pair(0U, PPC::F4RCRegisterClass); 4377 else if (VT == MVT::f64) 4378 return std::make_pair(0U, PPC::F8RCRegisterClass); 4379 break; 4380 case 'v': 4381 return std::make_pair(0U, PPC::VRRCRegisterClass); 4382 case 'y': // crrc 4383 return std::make_pair(0U, PPC::CRRCRegisterClass); 4384 } 4385 } 4386 4387 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 4388} 4389 4390 4391/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 4392/// vector. If it is invalid, don't add anything to Ops. 4393void PPCTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, char Letter, 4394 std::vector<SDOperand>&Ops, 4395 SelectionDAG &DAG) const { 4396 SDOperand Result(0,0); 4397 switch (Letter) { 4398 default: break; 4399 case 'I': 4400 case 'J': 4401 case 'K': 4402 case 'L': 4403 case 'M': 4404 case 'N': 4405 case 'O': 4406 case 'P': { 4407 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 4408 if (!CST) return; // Must be an immediate to match. 4409 unsigned Value = CST->getValue(); 4410 switch (Letter) { 4411 default: assert(0 && "Unknown constraint letter!"); 4412 case 'I': // "I" is a signed 16-bit constant. 4413 if ((short)Value == (int)Value) 4414 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4415 break; 4416 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 4417 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 4418 if ((short)Value == 0) 4419 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4420 break; 4421 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 4422 if ((Value >> 16) == 0) 4423 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4424 break; 4425 case 'M': // "M" is a constant that is greater than 31. 4426 if (Value > 31) 4427 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4428 break; 4429 case 'N': // "N" is a positive constant that is an exact power of two. 4430 if ((int)Value > 0 && isPowerOf2_32(Value)) 4431 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4432 break; 4433 case 'O': // "O" is the constant zero. 4434 if (Value == 0) 4435 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4436 break; 4437 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 4438 if ((short)-Value == (int)-Value) 4439 Result = DAG.getTargetConstant(Value, Op.getValueType()); 4440 break; 4441 } 4442 break; 4443 } 4444 } 4445 4446 if (Result.Val) { 4447 Ops.push_back(Result); 4448 return; 4449 } 4450 4451 // Handle standard constraint letters. 4452 TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG); 4453} 4454 4455// isLegalAddressingMode - Return true if the addressing mode represented 4456// by AM is legal for this target, for a load/store of the specified type. 4457bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 4458 const Type *Ty) const { 4459 // FIXME: PPC does not allow r+i addressing modes for vectors! 4460 4461 // PPC allows a sign-extended 16-bit immediate field. 4462 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 4463 return false; 4464 4465 // No global is ever allowed as a base. 4466 if (AM.BaseGV) 4467 return false; 4468 4469 // PPC only support r+r, 4470 switch (AM.Scale) { 4471 case 0: // "r+i" or just "i", depending on HasBaseReg. 4472 break; 4473 case 1: 4474 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 4475 return false; 4476 // Otherwise we have r+r or r+i. 4477 break; 4478 case 2: 4479 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 4480 return false; 4481 // Allow 2*r as r+r. 4482 break; 4483 default: 4484 // No other scales are supported. 4485 return false; 4486 } 4487 4488 return true; 4489} 4490 4491/// isLegalAddressImmediate - Return true if the integer value can be used 4492/// as the offset of the target addressing mode for load / store of the 4493/// given type. 4494bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ 4495 // PPC allows a sign-extended 16-bit immediate field. 4496 return (V > -(1 << 16) && V < (1 << 16)-1); 4497} 4498 4499bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 4500 return false; 4501} 4502 4503SDOperand PPCTargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 4504 // Depths > 0 not supported yet! 4505 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4506 return SDOperand(); 4507 4508 MachineFunction &MF = DAG.getMachineFunction(); 4509 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 4510 4511 // Just load the return address off the stack. 4512 SDOperand RetAddrFI = getReturnAddrFrameIndex(DAG); 4513 4514 // Make sure the function really does not optimize away the store of the RA 4515 // to the stack. 4516 FuncInfo->setLRStoreRequired(); 4517 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 4518} 4519 4520SDOperand PPCTargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4521 // Depths > 0 not supported yet! 4522 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4523 return SDOperand(); 4524 4525 MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4526 bool isPPC64 = PtrVT == MVT::i64; 4527 4528 MachineFunction &MF = DAG.getMachineFunction(); 4529 MachineFrameInfo *MFI = MF.getFrameInfo(); 4530 bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects()) 4531 && MFI->getStackSize(); 4532 4533 if (isPPC64) 4534 return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::X31 : PPC::X1, 4535 MVT::i64); 4536 else 4537 return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::R31 : PPC::R1, 4538 MVT::i32); 4539} 4540