PPCISelLowering.cpp revision ddf89566a93081cb230bb9406a72ab2d3eada4a7
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "PPCMachineFunctionInfo.h" 16#include "PPCPredicates.h" 17#include "PPCTargetMachine.h" 18#include "PPCPerfectShuffle.h" 19#include "llvm/ADT/STLExtras.h" 20#include "llvm/ADT/VectorExtras.h" 21#include "llvm/Analysis/ScalarEvolutionExpressions.h" 22#include "llvm/CodeGen/CallingConvLower.h" 23#include "llvm/CodeGen/MachineFrameInfo.h" 24#include "llvm/CodeGen/MachineFunction.h" 25#include "llvm/CodeGen/MachineInstrBuilder.h" 26#include "llvm/CodeGen/MachineRegisterInfo.h" 27#include "llvm/CodeGen/SelectionDAG.h" 28#include "llvm/Constants.h" 29#include "llvm/Function.h" 30#include "llvm/Intrinsics.h" 31#include "llvm/Support/MathExtras.h" 32#include "llvm/Target/TargetOptions.h" 33#include "llvm/Support/CommandLine.h" 34using namespace llvm; 35 36static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc", 37cl::desc("enable preincrement load/store generation on PPC (experimental)"), 38 cl::Hidden); 39 40PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 41 : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) { 42 43 setPow2DivIsCheap(); 44 45 // Use _setjmp/_longjmp instead of setjmp/longjmp. 46 setUseUnderscoreSetJmp(true); 47 setUseUnderscoreLongJmp(true); 48 49 // Set up the register classes. 50 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 51 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 52 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 53 54 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 55 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Expand); 56 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Expand); 57 58 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 59 60 // PowerPC has pre-inc load and store's. 61 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 62 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 63 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 64 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 65 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 66 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 67 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 68 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 69 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 70 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 71 72 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 73 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 74 75 // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg) 76 setConvertAction(MVT::ppcf128, MVT::f64, Expand); 77 setConvertAction(MVT::ppcf128, MVT::f32, Expand); 78 // This is used in the ppcf128->int sequence. Note it has different semantics 79 // from FP_ROUND: that rounds to nearest, this rounds to zero. 80 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 81 82 // PowerPC has no intrinsics for these particular operations 83 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); 84 setOperationAction(ISD::MEMSET, MVT::Other, Expand); 85 setOperationAction(ISD::MEMCPY, MVT::Other, Expand); 86 87 // PowerPC has no SREM/UREM instructions 88 setOperationAction(ISD::SREM, MVT::i32, Expand); 89 setOperationAction(ISD::UREM, MVT::i32, Expand); 90 setOperationAction(ISD::SREM, MVT::i64, Expand); 91 setOperationAction(ISD::UREM, MVT::i64, Expand); 92 93 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 94 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 95 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 96 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 97 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 98 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 99 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 100 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 101 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 102 103 // We don't support sin/cos/sqrt/fmod/pow 104 setOperationAction(ISD::FSIN , MVT::f64, Expand); 105 setOperationAction(ISD::FCOS , MVT::f64, Expand); 106 setOperationAction(ISD::FREM , MVT::f64, Expand); 107 setOperationAction(ISD::FPOW , MVT::f64, Expand); 108 setOperationAction(ISD::FSIN , MVT::f32, Expand); 109 setOperationAction(ISD::FCOS , MVT::f32, Expand); 110 setOperationAction(ISD::FREM , MVT::f32, Expand); 111 setOperationAction(ISD::FPOW , MVT::f32, Expand); 112 113 // If we're enabling GP optimizations, use hardware square root 114 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 115 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 116 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 117 } 118 119 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 120 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 121 122 // PowerPC does not have BSWAP, CTPOP or CTTZ 123 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 124 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 125 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 126 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 127 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 128 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 129 130 // PowerPC does not have ROTR 131 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 132 133 // PowerPC does not have Select 134 setOperationAction(ISD::SELECT, MVT::i32, Expand); 135 setOperationAction(ISD::SELECT, MVT::i64, Expand); 136 setOperationAction(ISD::SELECT, MVT::f32, Expand); 137 setOperationAction(ISD::SELECT, MVT::f64, Expand); 138 139 // PowerPC wants to turn select_cc of FP into fsel when possible. 140 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 141 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 142 143 // PowerPC wants to optimize integer setcc a bit 144 setOperationAction(ISD::SETCC, MVT::i32, Custom); 145 146 // PowerPC does not have BRCOND which requires SetCC 147 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 148 149 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 150 151 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 152 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 153 154 // PowerPC does not have [U|S]INT_TO_FP 155 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 156 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 157 158 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 159 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 160 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); 161 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); 162 163 // We cannot sextinreg(i1). Expand to shifts. 164 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 165 166 // Support label based line numbers. 167 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 168 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 169 170 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 171 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 172 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 173 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 174 175 176 // We want to legalize GlobalAddress and ConstantPool nodes into the 177 // appropriate instructions to materialize the address. 178 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 179 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 180 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 181 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 182 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 183 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 184 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 185 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 186 187 // RET must be custom lowered, to meet ABI requirements 188 setOperationAction(ISD::RET , MVT::Other, Custom); 189 190 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 191 setOperationAction(ISD::VASTART , MVT::Other, Custom); 192 193 // VAARG is custom lowered with ELF 32 ABI 194 if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI()) 195 setOperationAction(ISD::VAARG, MVT::Other, Custom); 196 else 197 setOperationAction(ISD::VAARG, MVT::Other, Expand); 198 199 // Use the default implementation. 200 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 201 setOperationAction(ISD::VAEND , MVT::Other, Expand); 202 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 203 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 204 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 205 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 206 207 // We want to custom lower some of our intrinsics. 208 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 209 210 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 211 // They also have instructions for converting between i64 and fp. 212 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 213 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 214 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 215 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 216 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 217 218 // FIXME: disable this lowered code. This generates 64-bit register values, 219 // and we don't model the fact that the top part is clobbered by calls. We 220 // need to flag these together so that the value isn't live across a call. 221 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 222 223 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 224 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 225 } else { 226 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 227 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 228 } 229 230 if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { 231 // 64-bit PowerPC implementations can support i64 types directly 232 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 233 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 234 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 235 } else { 236 // 32-bit PowerPC wants to expand i64 shifts itself. 237 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 238 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 239 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 240 } 241 242 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 243 // First set operation action for all vector types to expand. Then we 244 // will selectively turn on ones that can be effectively codegen'd. 245 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 246 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 247 // add/sub are legal for all supported vector VT's. 248 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); 249 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); 250 251 // We promote all shuffles to v16i8. 252 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote); 253 AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8); 254 255 // We promote all non-typed operations to v4i32. 256 setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote); 257 AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32); 258 setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote); 259 AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32); 260 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote); 261 AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32); 262 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote); 263 AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32); 264 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 265 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32); 266 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote); 267 AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32); 268 269 // No other operations are legal. 270 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 271 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 272 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 273 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 274 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 275 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 276 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 277 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 278 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 279 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand); 280 setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); 281 setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); 282 setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); 283 setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); 284 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand); 285 setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); 286 setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); 287 setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); 288 setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); 289 } 290 291 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 292 // with merges, splats, etc. 293 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 294 295 setOperationAction(ISD::AND , MVT::v4i32, Legal); 296 setOperationAction(ISD::OR , MVT::v4i32, Legal); 297 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 298 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 299 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 300 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 301 302 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 303 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 304 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 305 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 306 307 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 308 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 309 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 310 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 311 312 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 313 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 314 315 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 316 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 317 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 318 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 319 } 320 321 setSetCCResultType(MVT::i32); 322 setShiftAmountType(MVT::i32); 323 setSetCCResultContents(ZeroOrOneSetCCResult); 324 325 if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { 326 setStackPointerRegisterToSaveRestore(PPC::X1); 327 setExceptionPointerRegister(PPC::X3); 328 setExceptionSelectorRegister(PPC::X4); 329 } else { 330 setStackPointerRegisterToSaveRestore(PPC::R1); 331 setExceptionPointerRegister(PPC::R3); 332 setExceptionSelectorRegister(PPC::R4); 333 } 334 335 // We have target-specific dag combine patterns for the following nodes: 336 setTargetDAGCombine(ISD::SINT_TO_FP); 337 setTargetDAGCombine(ISD::STORE); 338 setTargetDAGCombine(ISD::BR_CC); 339 setTargetDAGCombine(ISD::BSWAP); 340 341 // Darwin long double math library functions have $LDBL128 appended. 342 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) { 343 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 344 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 345 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 346 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 347 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 348 } 349 350 computeRegisterProperties(); 351} 352 353const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 354 switch (Opcode) { 355 default: return 0; 356 case PPCISD::FSEL: return "PPCISD::FSEL"; 357 case PPCISD::FCFID: return "PPCISD::FCFID"; 358 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 359 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 360 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 361 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 362 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 363 case PPCISD::VPERM: return "PPCISD::VPERM"; 364 case PPCISD::Hi: return "PPCISD::Hi"; 365 case PPCISD::Lo: return "PPCISD::Lo"; 366 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 367 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 368 case PPCISD::SRL: return "PPCISD::SRL"; 369 case PPCISD::SRA: return "PPCISD::SRA"; 370 case PPCISD::SHL: return "PPCISD::SHL"; 371 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 372 case PPCISD::STD_32: return "PPCISD::STD_32"; 373 case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF"; 374 case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho"; 375 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 376 case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho"; 377 case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF"; 378 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 379 case PPCISD::MFCR: return "PPCISD::MFCR"; 380 case PPCISD::VCMP: return "PPCISD::VCMP"; 381 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 382 case PPCISD::LBRX: return "PPCISD::LBRX"; 383 case PPCISD::STBRX: return "PPCISD::STBRX"; 384 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 385 } 386} 387 388//===----------------------------------------------------------------------===// 389// Node matching predicates, for use by the tblgen matching code. 390//===----------------------------------------------------------------------===// 391 392/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 393static bool isFloatingPointZero(SDOperand Op) { 394 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 395 return CFP->getValueAPF().isZero(); 396 else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) { 397 // Maybe this has already been legalized into the constant pool? 398 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 399 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 400 return CFP->getValueAPF().isZero(); 401 } 402 return false; 403} 404 405/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 406/// true if Op is undef or if it matches the specified value. 407static bool isConstantOrUndef(SDOperand Op, unsigned Val) { 408 return Op.getOpcode() == ISD::UNDEF || 409 cast<ConstantSDNode>(Op)->getValue() == Val; 410} 411 412/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 413/// VPKUHUM instruction. 414bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { 415 if (!isUnary) { 416 for (unsigned i = 0; i != 16; ++i) 417 if (!isConstantOrUndef(N->getOperand(i), i*2+1)) 418 return false; 419 } else { 420 for (unsigned i = 0; i != 8; ++i) 421 if (!isConstantOrUndef(N->getOperand(i), i*2+1) || 422 !isConstantOrUndef(N->getOperand(i+8), i*2+1)) 423 return false; 424 } 425 return true; 426} 427 428/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 429/// VPKUWUM instruction. 430bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { 431 if (!isUnary) { 432 for (unsigned i = 0; i != 16; i += 2) 433 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 434 !isConstantOrUndef(N->getOperand(i+1), i*2+3)) 435 return false; 436 } else { 437 for (unsigned i = 0; i != 8; i += 2) 438 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 439 !isConstantOrUndef(N->getOperand(i+1), i*2+3) || 440 !isConstantOrUndef(N->getOperand(i+8), i*2+2) || 441 !isConstantOrUndef(N->getOperand(i+9), i*2+3)) 442 return false; 443 } 444 return true; 445} 446 447/// isVMerge - Common function, used to match vmrg* shuffles. 448/// 449static bool isVMerge(SDNode *N, unsigned UnitSize, 450 unsigned LHSStart, unsigned RHSStart) { 451 assert(N->getOpcode() == ISD::BUILD_VECTOR && 452 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 453 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 454 "Unsupported merge size!"); 455 456 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 457 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 458 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 459 LHSStart+j+i*UnitSize) || 460 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 461 RHSStart+j+i*UnitSize)) 462 return false; 463 } 464 return true; 465} 466 467/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 468/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 469bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 470 if (!isUnary) 471 return isVMerge(N, UnitSize, 8, 24); 472 return isVMerge(N, UnitSize, 8, 8); 473} 474 475/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 476/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 477bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 478 if (!isUnary) 479 return isVMerge(N, UnitSize, 0, 16); 480 return isVMerge(N, UnitSize, 0, 0); 481} 482 483 484/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 485/// amount, otherwise return -1. 486int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 487 assert(N->getOpcode() == ISD::BUILD_VECTOR && 488 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 489 // Find the first non-undef value in the shuffle mask. 490 unsigned i; 491 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 492 /*search*/; 493 494 if (i == 16) return -1; // all undef. 495 496 // Otherwise, check to see if the rest of the elements are consequtively 497 // numbered from this value. 498 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 499 if (ShiftAmt < i) return -1; 500 ShiftAmt -= i; 501 502 if (!isUnary) { 503 // Check the rest of the elements to see if they are consequtive. 504 for (++i; i != 16; ++i) 505 if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) 506 return -1; 507 } else { 508 // Check the rest of the elements to see if they are consequtive. 509 for (++i; i != 16; ++i) 510 if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) 511 return -1; 512 } 513 514 return ShiftAmt; 515} 516 517/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 518/// specifies a splat of a single element that is suitable for input to 519/// VSPLTB/VSPLTH/VSPLTW. 520bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 521 assert(N->getOpcode() == ISD::BUILD_VECTOR && 522 N->getNumOperands() == 16 && 523 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 524 525 // This is a splat operation if each element of the permute is the same, and 526 // if the value doesn't reference the second vector. 527 unsigned ElementBase = 0; 528 SDOperand Elt = N->getOperand(0); 529 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 530 ElementBase = EltV->getValue(); 531 else 532 return false; // FIXME: Handle UNDEF elements too! 533 534 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 535 return false; 536 537 // Check that they are consequtive. 538 for (unsigned i = 1; i != EltSize; ++i) { 539 if (!isa<ConstantSDNode>(N->getOperand(i)) || 540 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 541 return false; 542 } 543 544 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 545 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 546 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 547 assert(isa<ConstantSDNode>(N->getOperand(i)) && 548 "Invalid VECTOR_SHUFFLE mask!"); 549 for (unsigned j = 0; j != EltSize; ++j) 550 if (N->getOperand(i+j) != N->getOperand(j)) 551 return false; 552 } 553 554 return true; 555} 556 557/// isAllNegativeZeroVector - Returns true if all elements of build_vector 558/// are -0.0. 559bool PPC::isAllNegativeZeroVector(SDNode *N) { 560 assert(N->getOpcode() == ISD::BUILD_VECTOR); 561 if (PPC::isSplatShuffleMask(N, N->getNumOperands())) 562 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N)) 563 return CFP->getValueAPF().isNegZero(); 564 return false; 565} 566 567/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 568/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 569unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 570 assert(isSplatShuffleMask(N, EltSize)); 571 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 572} 573 574/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 575/// by using a vspltis[bhw] instruction of the specified element size, return 576/// the constant being splatted. The ByteSize field indicates the number of 577/// bytes of each element [124] -> [bhw]. 578SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 579 SDOperand OpVal(0, 0); 580 581 // If ByteSize of the splat is bigger than the element size of the 582 // build_vector, then we have a case where we are checking for a splat where 583 // multiple elements of the buildvector are folded together into a single 584 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 585 unsigned EltSize = 16/N->getNumOperands(); 586 if (EltSize < ByteSize) { 587 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 588 SDOperand UniquedVals[4]; 589 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 590 591 // See if all of the elements in the buildvector agree across. 592 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 593 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 594 // If the element isn't a constant, bail fully out. 595 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand(); 596 597 598 if (UniquedVals[i&(Multiple-1)].Val == 0) 599 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 600 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 601 return SDOperand(); // no match. 602 } 603 604 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 605 // either constant or undef values that are identical for each chunk. See 606 // if these chunks can form into a larger vspltis*. 607 608 // Check to see if all of the leading entries are either 0 or -1. If 609 // neither, then this won't fit into the immediate field. 610 bool LeadingZero = true; 611 bool LeadingOnes = true; 612 for (unsigned i = 0; i != Multiple-1; ++i) { 613 if (UniquedVals[i].Val == 0) continue; // Must have been undefs. 614 615 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 616 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 617 } 618 // Finally, check the least significant entry. 619 if (LeadingZero) { 620 if (UniquedVals[Multiple-1].Val == 0) 621 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 622 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue(); 623 if (Val < 16) 624 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 625 } 626 if (LeadingOnes) { 627 if (UniquedVals[Multiple-1].Val == 0) 628 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 629 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended(); 630 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 631 return DAG.getTargetConstant(Val, MVT::i32); 632 } 633 634 return SDOperand(); 635 } 636 637 // Check to see if this buildvec has a single non-undef value in its elements. 638 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 639 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 640 if (OpVal.Val == 0) 641 OpVal = N->getOperand(i); 642 else if (OpVal != N->getOperand(i)) 643 return SDOperand(); 644 } 645 646 if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def. 647 648 unsigned ValSizeInBytes = 0; 649 uint64_t Value = 0; 650 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 651 Value = CN->getValue(); 652 ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8; 653 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 654 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 655 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 656 ValSizeInBytes = 4; 657 } 658 659 // If the splat value is larger than the element value, then we can never do 660 // this splat. The only case that we could fit the replicated bits into our 661 // immediate field for would be zero, and we prefer to use vxor for it. 662 if (ValSizeInBytes < ByteSize) return SDOperand(); 663 664 // If the element value is larger than the splat value, cut it in half and 665 // check to see if the two halves are equal. Continue doing this until we 666 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 667 while (ValSizeInBytes > ByteSize) { 668 ValSizeInBytes >>= 1; 669 670 // If the top half equals the bottom half, we're still ok. 671 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 672 (Value & ((1 << (8*ValSizeInBytes))-1))) 673 return SDOperand(); 674 } 675 676 // Properly sign extend the value. 677 int ShAmt = (4-ByteSize)*8; 678 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 679 680 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 681 if (MaskVal == 0) return SDOperand(); 682 683 // Finally, if this value fits in a 5 bit sext field, return it 684 if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) 685 return DAG.getTargetConstant(MaskVal, MVT::i32); 686 return SDOperand(); 687} 688 689//===----------------------------------------------------------------------===// 690// Addressing Mode Selection 691//===----------------------------------------------------------------------===// 692 693/// isIntS16Immediate - This method tests to see if the node is either a 32-bit 694/// or 64-bit immediate, and if the value can be accurately represented as a 695/// sign extension from a 16-bit value. If so, this returns true and the 696/// immediate. 697static bool isIntS16Immediate(SDNode *N, short &Imm) { 698 if (N->getOpcode() != ISD::Constant) 699 return false; 700 701 Imm = (short)cast<ConstantSDNode>(N)->getValue(); 702 if (N->getValueType(0) == MVT::i32) 703 return Imm == (int32_t)cast<ConstantSDNode>(N)->getValue(); 704 else 705 return Imm == (int64_t)cast<ConstantSDNode>(N)->getValue(); 706} 707static bool isIntS16Immediate(SDOperand Op, short &Imm) { 708 return isIntS16Immediate(Op.Val, Imm); 709} 710 711 712/// SelectAddressRegReg - Given the specified addressed, check to see if it 713/// can be represented as an indexed [r+r] operation. Returns false if it 714/// can be more efficiently represented with [r+imm]. 715bool PPCTargetLowering::SelectAddressRegReg(SDOperand N, SDOperand &Base, 716 SDOperand &Index, 717 SelectionDAG &DAG) { 718 short imm = 0; 719 if (N.getOpcode() == ISD::ADD) { 720 if (isIntS16Immediate(N.getOperand(1), imm)) 721 return false; // r+i 722 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 723 return false; // r+i 724 725 Base = N.getOperand(0); 726 Index = N.getOperand(1); 727 return true; 728 } else if (N.getOpcode() == ISD::OR) { 729 if (isIntS16Immediate(N.getOperand(1), imm)) 730 return false; // r+i can fold it if we can. 731 732 // If this is an or of disjoint bitfields, we can codegen this as an add 733 // (for better address arithmetic) if the LHS and RHS of the OR are provably 734 // disjoint. 735 uint64_t LHSKnownZero, LHSKnownOne; 736 uint64_t RHSKnownZero, RHSKnownOne; 737 DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); 738 739 if (LHSKnownZero) { 740 DAG.ComputeMaskedBits(N.getOperand(1), ~0U, RHSKnownZero, RHSKnownOne); 741 // If all of the bits are known zero on the LHS or RHS, the add won't 742 // carry. 743 if ((LHSKnownZero | RHSKnownZero) == ~0U) { 744 Base = N.getOperand(0); 745 Index = N.getOperand(1); 746 return true; 747 } 748 } 749 } 750 751 return false; 752} 753 754/// Returns true if the address N can be represented by a base register plus 755/// a signed 16-bit displacement [r+imm], and if it is not better 756/// represented as reg+reg. 757bool PPCTargetLowering::SelectAddressRegImm(SDOperand N, SDOperand &Disp, 758 SDOperand &Base, SelectionDAG &DAG){ 759 // If this can be more profitably realized as r+r, fail. 760 if (SelectAddressRegReg(N, Disp, Base, DAG)) 761 return false; 762 763 if (N.getOpcode() == ISD::ADD) { 764 short imm = 0; 765 if (isIntS16Immediate(N.getOperand(1), imm)) { 766 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 767 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 768 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 769 } else { 770 Base = N.getOperand(0); 771 } 772 return true; // [r+i] 773 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 774 // Match LOAD (ADD (X, Lo(G))). 775 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue() 776 && "Cannot handle constant offsets yet!"); 777 Disp = N.getOperand(1).getOperand(0); // The global address. 778 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 779 Disp.getOpcode() == ISD::TargetConstantPool || 780 Disp.getOpcode() == ISD::TargetJumpTable); 781 Base = N.getOperand(0); 782 return true; // [&g+r] 783 } 784 } else if (N.getOpcode() == ISD::OR) { 785 short imm = 0; 786 if (isIntS16Immediate(N.getOperand(1), imm)) { 787 // If this is an or of disjoint bitfields, we can codegen this as an add 788 // (for better address arithmetic) if the LHS and RHS of the OR are 789 // provably disjoint. 790 uint64_t LHSKnownZero, LHSKnownOne; 791 DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); 792 if ((LHSKnownZero|~(unsigned)imm) == ~0U) { 793 // If all of the bits are known zero on the LHS or RHS, the add won't 794 // carry. 795 Base = N.getOperand(0); 796 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 797 return true; 798 } 799 } 800 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 801 // Loading from a constant address. 802 803 // If this address fits entirely in a 16-bit sext immediate field, codegen 804 // this as "d, 0" 805 short Imm; 806 if (isIntS16Immediate(CN, Imm)) { 807 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 808 Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); 809 return true; 810 } 811 812 // Handle 32-bit sext immediates with LIS + addr mode. 813 if (CN->getValueType(0) == MVT::i32 || 814 (int64_t)CN->getValue() == (int)CN->getValue()) { 815 int Addr = (int)CN->getValue(); 816 817 // Otherwise, break this down into an LIS + disp. 818 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 819 820 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 821 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 822 Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); 823 return true; 824 } 825 } 826 827 Disp = DAG.getTargetConstant(0, getPointerTy()); 828 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 829 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 830 else 831 Base = N; 832 return true; // [r+0] 833} 834 835/// SelectAddressRegRegOnly - Given the specified addressed, force it to be 836/// represented as an indexed [r+r] operation. 837bool PPCTargetLowering::SelectAddressRegRegOnly(SDOperand N, SDOperand &Base, 838 SDOperand &Index, 839 SelectionDAG &DAG) { 840 // Check to see if we can easily represent this as an [r+r] address. This 841 // will fail if it thinks that the address is more profitably represented as 842 // reg+imm, e.g. where imm = 0. 843 if (SelectAddressRegReg(N, Base, Index, DAG)) 844 return true; 845 846 // If the operand is an addition, always emit this as [r+r], since this is 847 // better (for code size, and execution, as the memop does the add for free) 848 // than emitting an explicit add. 849 if (N.getOpcode() == ISD::ADD) { 850 Base = N.getOperand(0); 851 Index = N.getOperand(1); 852 return true; 853 } 854 855 // Otherwise, do it the hard way, using R0 as the base register. 856 Base = DAG.getRegister(PPC::R0, N.getValueType()); 857 Index = N; 858 return true; 859} 860 861/// SelectAddressRegImmShift - Returns true if the address N can be 862/// represented by a base register plus a signed 14-bit displacement 863/// [r+imm*4]. Suitable for use by STD and friends. 864bool PPCTargetLowering::SelectAddressRegImmShift(SDOperand N, SDOperand &Disp, 865 SDOperand &Base, 866 SelectionDAG &DAG) { 867 // If this can be more profitably realized as r+r, fail. 868 if (SelectAddressRegReg(N, Disp, Base, DAG)) 869 return false; 870 871 if (N.getOpcode() == ISD::ADD) { 872 short imm = 0; 873 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 874 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 875 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 876 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 877 } else { 878 Base = N.getOperand(0); 879 } 880 return true; // [r+i] 881 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 882 // Match LOAD (ADD (X, Lo(G))). 883 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue() 884 && "Cannot handle constant offsets yet!"); 885 Disp = N.getOperand(1).getOperand(0); // The global address. 886 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 887 Disp.getOpcode() == ISD::TargetConstantPool || 888 Disp.getOpcode() == ISD::TargetJumpTable); 889 Base = N.getOperand(0); 890 return true; // [&g+r] 891 } 892 } else if (N.getOpcode() == ISD::OR) { 893 short imm = 0; 894 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 895 // If this is an or of disjoint bitfields, we can codegen this as an add 896 // (for better address arithmetic) if the LHS and RHS of the OR are 897 // provably disjoint. 898 uint64_t LHSKnownZero, LHSKnownOne; 899 DAG.ComputeMaskedBits(N.getOperand(0), ~0U, LHSKnownZero, LHSKnownOne); 900 if ((LHSKnownZero|~(unsigned)imm) == ~0U) { 901 // If all of the bits are known zero on the LHS or RHS, the add won't 902 // carry. 903 Base = N.getOperand(0); 904 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 905 return true; 906 } 907 } 908 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 909 // Loading from a constant address. Verify low two bits are clear. 910 if ((CN->getValue() & 3) == 0) { 911 // If this address fits entirely in a 14-bit sext immediate field, codegen 912 // this as "d, 0" 913 short Imm; 914 if (isIntS16Immediate(CN, Imm)) { 915 Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); 916 Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); 917 return true; 918 } 919 920 // Fold the low-part of 32-bit absolute addresses into addr mode. 921 if (CN->getValueType(0) == MVT::i32 || 922 (int64_t)CN->getValue() == (int)CN->getValue()) { 923 int Addr = (int)CN->getValue(); 924 925 // Otherwise, break this down into an LIS + disp. 926 Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); 927 928 Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); 929 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 930 Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); 931 return true; 932 } 933 } 934 } 935 936 Disp = DAG.getTargetConstant(0, getPointerTy()); 937 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 938 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 939 else 940 Base = N; 941 return true; // [r+0] 942} 943 944 945/// getPreIndexedAddressParts - returns true by value, base pointer and 946/// offset pointer and addressing mode by reference if the node's address 947/// can be legally represented as pre-indexed load / store address. 948bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base, 949 SDOperand &Offset, 950 ISD::MemIndexedMode &AM, 951 SelectionDAG &DAG) { 952 // Disabled by default for now. 953 if (!EnablePPCPreinc) return false; 954 955 SDOperand Ptr; 956 MVT::ValueType VT; 957 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 958 Ptr = LD->getBasePtr(); 959 VT = LD->getLoadedVT(); 960 961 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 962 ST = ST; 963 Ptr = ST->getBasePtr(); 964 VT = ST->getStoredVT(); 965 } else 966 return false; 967 968 // PowerPC doesn't have preinc load/store instructions for vectors. 969 if (MVT::isVector(VT)) 970 return false; 971 972 // TODO: Check reg+reg first. 973 974 // LDU/STU use reg+imm*4, others use reg+imm. 975 if (VT != MVT::i64) { 976 // reg + imm 977 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) 978 return false; 979 } else { 980 // reg + imm * 4. 981 if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) 982 return false; 983 } 984 985 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 986 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 987 // sext i32 to i64 when addr mode is r+i. 988 if (LD->getValueType(0) == MVT::i64 && LD->getLoadedVT() == MVT::i32 && 989 LD->getExtensionType() == ISD::SEXTLOAD && 990 isa<ConstantSDNode>(Offset)) 991 return false; 992 } 993 994 AM = ISD::PRE_INC; 995 return true; 996} 997 998//===----------------------------------------------------------------------===// 999// LowerOperation implementation 1000//===----------------------------------------------------------------------===// 1001 1002static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 1003 MVT::ValueType PtrVT = Op.getValueType(); 1004 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1005 Constant *C = CP->getConstVal(); 1006 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 1007 SDOperand Zero = DAG.getConstant(0, PtrVT); 1008 1009 const TargetMachine &TM = DAG.getTarget(); 1010 1011 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero); 1012 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero); 1013 1014 // If this is a non-darwin platform, we don't support non-static relo models 1015 // yet. 1016 if (TM.getRelocationModel() == Reloc::Static || 1017 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1018 // Generate non-pic code that has direct accesses to the constant pool. 1019 // The address of the global is just (hi(&g)+lo(&g)). 1020 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1021 } 1022 1023 if (TM.getRelocationModel() == Reloc::PIC_) { 1024 // With PIC, the first instruction is actually "GR+hi(&G)". 1025 Hi = DAG.getNode(ISD::ADD, PtrVT, 1026 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1027 } 1028 1029 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1030 return Lo; 1031} 1032 1033static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 1034 MVT::ValueType PtrVT = Op.getValueType(); 1035 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1036 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1037 SDOperand Zero = DAG.getConstant(0, PtrVT); 1038 1039 const TargetMachine &TM = DAG.getTarget(); 1040 1041 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero); 1042 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero); 1043 1044 // If this is a non-darwin platform, we don't support non-static relo models 1045 // yet. 1046 if (TM.getRelocationModel() == Reloc::Static || 1047 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1048 // Generate non-pic code that has direct accesses to the constant pool. 1049 // The address of the global is just (hi(&g)+lo(&g)). 1050 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1051 } 1052 1053 if (TM.getRelocationModel() == Reloc::PIC_) { 1054 // With PIC, the first instruction is actually "GR+hi(&G)". 1055 Hi = DAG.getNode(ISD::ADD, PtrVT, 1056 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1057 } 1058 1059 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1060 return Lo; 1061} 1062 1063static SDOperand LowerGlobalTLSAddress(SDOperand Op, SelectionDAG &DAG) { 1064 assert(0 && "TLS not implemented for PPC."); 1065} 1066 1067static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 1068 MVT::ValueType PtrVT = Op.getValueType(); 1069 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1070 GlobalValue *GV = GSDN->getGlobal(); 1071 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); 1072 SDOperand Zero = DAG.getConstant(0, PtrVT); 1073 1074 const TargetMachine &TM = DAG.getTarget(); 1075 1076 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero); 1077 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero); 1078 1079 // If this is a non-darwin platform, we don't support non-static relo models 1080 // yet. 1081 if (TM.getRelocationModel() == Reloc::Static || 1082 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1083 // Generate non-pic code that has direct accesses to globals. 1084 // The address of the global is just (hi(&g)+lo(&g)). 1085 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1086 } 1087 1088 if (TM.getRelocationModel() == Reloc::PIC_) { 1089 // With PIC, the first instruction is actually "GR+hi(&G)". 1090 Hi = DAG.getNode(ISD::ADD, PtrVT, 1091 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1092 } 1093 1094 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1095 1096 if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV)) 1097 return Lo; 1098 1099 // If the global is weak or external, we have to go through the lazy 1100 // resolution stub. 1101 return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0); 1102} 1103 1104static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 1105 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1106 1107 // If we're comparing for equality to zero, expose the fact that this is 1108 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1109 // fold the new nodes. 1110 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1111 if (C->isNullValue() && CC == ISD::SETEQ) { 1112 MVT::ValueType VT = Op.getOperand(0).getValueType(); 1113 SDOperand Zext = Op.getOperand(0); 1114 if (VT < MVT::i32) { 1115 VT = MVT::i32; 1116 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 1117 } 1118 unsigned Log2b = Log2_32(MVT::getSizeInBits(VT)); 1119 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 1120 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz, 1121 DAG.getConstant(Log2b, MVT::i32)); 1122 return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc); 1123 } 1124 // Leave comparisons against 0 and -1 alone for now, since they're usually 1125 // optimized. FIXME: revisit this when we can custom lower all setcc 1126 // optimizations. 1127 if (C->isAllOnesValue() || C->isNullValue()) 1128 return SDOperand(); 1129 } 1130 1131 // If we have an integer seteq/setne, turn it into a compare against zero 1132 // by xor'ing the rhs with the lhs, which is faster than setting a 1133 // condition register, reading it back out, and masking the correct bit. The 1134 // normal approach here uses sub to do this instead of xor. Using xor exposes 1135 // the result to other bit-twiddling opportunities. 1136 MVT::ValueType LHSVT = Op.getOperand(0).getValueType(); 1137 if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1138 MVT::ValueType VT = Op.getValueType(); 1139 SDOperand Sub = DAG.getNode(ISD::XOR, LHSVT, Op.getOperand(0), 1140 Op.getOperand(1)); 1141 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 1142 } 1143 return SDOperand(); 1144} 1145 1146static SDOperand LowerVAARG(SDOperand Op, SelectionDAG &DAG, 1147 int VarArgsFrameIndex, 1148 int VarArgsStackOffset, 1149 unsigned VarArgsNumGPR, 1150 unsigned VarArgsNumFPR, 1151 const PPCSubtarget &Subtarget) { 1152 1153 assert(0 && "VAARG in ELF32 ABI not implemented yet!"); 1154} 1155 1156static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG, 1157 int VarArgsFrameIndex, 1158 int VarArgsStackOffset, 1159 unsigned VarArgsNumGPR, 1160 unsigned VarArgsNumFPR, 1161 const PPCSubtarget &Subtarget) { 1162 1163 if (Subtarget.isMachoABI()) { 1164 // vastart just stores the address of the VarArgsFrameIndex slot into the 1165 // memory location argument. 1166 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1167 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1168 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 1169 return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV->getValue(), 1170 SV->getOffset()); 1171 } 1172 1173 // For ELF 32 ABI we follow the layout of the va_list struct. 1174 // We suppose the given va_list is already allocated. 1175 // 1176 // typedef struct { 1177 // char gpr; /* index into the array of 8 GPRs 1178 // * stored in the register save area 1179 // * gpr=0 corresponds to r3, 1180 // * gpr=1 to r4, etc. 1181 // */ 1182 // char fpr; /* index into the array of 8 FPRs 1183 // * stored in the register save area 1184 // * fpr=0 corresponds to f1, 1185 // * fpr=1 to f2, etc. 1186 // */ 1187 // char *overflow_arg_area; 1188 // /* location on stack that holds 1189 // * the next overflow argument 1190 // */ 1191 // char *reg_save_area; 1192 // /* where r3:r10 and f1:f8 (if saved) 1193 // * are stored 1194 // */ 1195 // } va_list[1]; 1196 1197 1198 SDOperand ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8); 1199 SDOperand ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8); 1200 1201 1202 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1203 1204 SDOperand StackOffset = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); 1205 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1206 1207 SDOperand ConstFrameOffset = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, 1208 PtrVT); 1209 SDOperand ConstStackOffset = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8 - 1, 1210 PtrVT); 1211 SDOperand ConstFPROffset = DAG.getConstant(1, PtrVT); 1212 1213 SrcValueSDNode *SV = cast<SrcValueSDNode>(Op.getOperand(2)); 1214 1215 // Store first byte : number of int regs 1216 SDOperand firstStore = DAG.getStore(Op.getOperand(0), ArgGPR, 1217 Op.getOperand(1), SV->getValue(), 1218 SV->getOffset()); 1219 SDOperand nextPtr = DAG.getNode(ISD::ADD, PtrVT, Op.getOperand(1), 1220 ConstFPROffset); 1221 1222 // Store second byte : number of float regs 1223 SDOperand secondStore = DAG.getStore(firstStore, ArgFPR, nextPtr, 1224 SV->getValue(), SV->getOffset()); 1225 nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstStackOffset); 1226 1227 // Store second word : arguments given on stack 1228 SDOperand thirdStore = DAG.getStore(secondStore, StackOffset, nextPtr, 1229 SV->getValue(), SV->getOffset()); 1230 nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstFrameOffset); 1231 1232 // Store third word : arguments given in registers 1233 return DAG.getStore(thirdStore, FR, nextPtr, SV->getValue(), 1234 SV->getOffset()); 1235 1236} 1237 1238#include "PPCGenCallingConv.inc" 1239 1240/// GetFPR - Get the set of FP registers that should be allocated for arguments, 1241/// depending on which subtarget is selected. 1242static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { 1243 if (Subtarget.isMachoABI()) { 1244 static const unsigned FPR[] = { 1245 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1246 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1247 }; 1248 return FPR; 1249 } 1250 1251 1252 static const unsigned FPR[] = { 1253 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1254 PPC::F8 1255 }; 1256 return FPR; 1257} 1258 1259static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, 1260 int &VarArgsFrameIndex, 1261 int &VarArgsStackOffset, 1262 unsigned &VarArgsNumGPR, 1263 unsigned &VarArgsNumFPR, 1264 const PPCSubtarget &Subtarget) { 1265 // TODO: add description of PPC stack frame format, or at least some docs. 1266 // 1267 MachineFunction &MF = DAG.getMachineFunction(); 1268 MachineFrameInfo *MFI = MF.getFrameInfo(); 1269 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1270 SmallVector<SDOperand, 8> ArgValues; 1271 SDOperand Root = Op.getOperand(0); 1272 1273 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1274 bool isPPC64 = PtrVT == MVT::i64; 1275 bool isMachoABI = Subtarget.isMachoABI(); 1276 bool isELF32_ABI = Subtarget.isELF32_ABI(); 1277 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1278 1279 unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1280 1281 static const unsigned GPR_32[] = { // 32-bit registers. 1282 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1283 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1284 }; 1285 static const unsigned GPR_64[] = { // 64-bit registers. 1286 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 1287 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 1288 }; 1289 1290 static const unsigned *FPR = GetFPR(Subtarget); 1291 1292 static const unsigned VR[] = { 1293 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 1294 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 1295 }; 1296 1297 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 1298 const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8; 1299 const unsigned Num_VR_Regs = array_lengthof( VR); 1300 1301 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 1302 1303 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 1304 1305 // Add DAG nodes to load the arguments or copy them out of registers. On 1306 // entry to a function on PPC, the arguments start after the linkage area, 1307 // although the first ones are often in registers. 1308 // 1309 // In the ELF 32 ABI, GPRs and stack are double word align: an argument 1310 // represented with two words (long long or double) must be copied to an 1311 // even GPR_idx value or to an even ArgOffset value. 1312 1313 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { 1314 SDOperand ArgVal; 1315 bool needsLoad = false; 1316 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType(); 1317 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8; 1318 unsigned ArgSize = ObjSize; 1319 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(ArgNo+3))->getValue(); 1320 unsigned AlignFlag = 1 << ISD::ParamFlags::OrigAlignmentOffs; 1321 // See if next argument requires stack alignment in ELF 1322 bool Expand = (ObjectVT == MVT::f64) || ((ArgNo + 1 < e) && 1323 (cast<ConstantSDNode>(Op.getOperand(ArgNo+4))->getValue() & AlignFlag) && 1324 (!(Flags & AlignFlag))); 1325 1326 unsigned CurArgOffset = ArgOffset; 1327 switch (ObjectVT) { 1328 default: assert(0 && "Unhandled argument type!"); 1329 case MVT::i32: 1330 // Double word align in ELF 1331 if (Expand && isELF32_ABI) GPR_idx += (GPR_idx % 2); 1332 if (GPR_idx != Num_GPR_Regs) { 1333 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1334 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1335 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); 1336 ++GPR_idx; 1337 } else { 1338 needsLoad = true; 1339 ArgSize = PtrByteSize; 1340 } 1341 // Stack align in ELF 1342 if (needsLoad && Expand && isELF32_ABI) 1343 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1344 // All int arguments reserve stack space in Macho ABI. 1345 if (isMachoABI || needsLoad) ArgOffset += PtrByteSize; 1346 break; 1347 1348 case MVT::i64: // PPC64 1349 if (GPR_idx != Num_GPR_Regs) { 1350 unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 1351 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1352 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1353 ++GPR_idx; 1354 } else { 1355 needsLoad = true; 1356 } 1357 // All int arguments reserve stack space in Macho ABI. 1358 if (isMachoABI || needsLoad) ArgOffset += 8; 1359 break; 1360 1361 case MVT::f32: 1362 case MVT::f64: 1363 // Every 4 bytes of argument space consumes one of the GPRs available for 1364 // argument passing. 1365 if (GPR_idx != Num_GPR_Regs && isMachoABI) { 1366 ++GPR_idx; 1367 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 1368 ++GPR_idx; 1369 } 1370 if (FPR_idx != Num_FPR_Regs) { 1371 unsigned VReg; 1372 if (ObjectVT == MVT::f32) 1373 VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass); 1374 else 1375 VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 1376 RegInfo.addLiveIn(FPR[FPR_idx], VReg); 1377 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1378 ++FPR_idx; 1379 } else { 1380 needsLoad = true; 1381 } 1382 1383 // Stack align in ELF 1384 if (needsLoad && Expand && isELF32_ABI) 1385 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1386 // All FP arguments reserve stack space in Macho ABI. 1387 if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize; 1388 break; 1389 case MVT::v4f32: 1390 case MVT::v4i32: 1391 case MVT::v8i16: 1392 case MVT::v16i8: 1393 // Note that vector arguments in registers don't reserve stack space. 1394 if (VR_idx != Num_VR_Regs) { 1395 unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass); 1396 RegInfo.addLiveIn(VR[VR_idx], VReg); 1397 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1398 ++VR_idx; 1399 } else { 1400 // This should be simple, but requires getting 16-byte aligned stack 1401 // values. 1402 assert(0 && "Loading VR argument not implemented yet!"); 1403 needsLoad = true; 1404 } 1405 break; 1406 } 1407 1408 // We need to load the argument to a virtual register if we determined above 1409 // that we ran out of physical registers of the appropriate type 1410 if (needsLoad) { 1411 // If the argument is actually used, emit a load from the right stack 1412 // slot. 1413 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) { 1414 int FI = MFI->CreateFixedObject(ObjSize, 1415 CurArgOffset + (ArgSize - ObjSize)); 1416 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 1417 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); 1418 } else { 1419 // Don't emit a dead load. 1420 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT); 1421 } 1422 } 1423 1424 ArgValues.push_back(ArgVal); 1425 } 1426 1427 // If the function takes variable number of arguments, make a frame index for 1428 // the start of the first vararg value... for expansion of llvm.va_start. 1429 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1430 if (isVarArg) { 1431 1432 int depth; 1433 if (isELF32_ABI) { 1434 VarArgsNumGPR = GPR_idx; 1435 VarArgsNumFPR = FPR_idx; 1436 1437 // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame 1438 // pointer. 1439 depth = -(Num_GPR_Regs * MVT::getSizeInBits(PtrVT)/8 + 1440 Num_FPR_Regs * MVT::getSizeInBits(MVT::f64)/8 + 1441 MVT::getSizeInBits(PtrVT)/8); 1442 1443 VarArgsStackOffset = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8, 1444 ArgOffset); 1445 1446 } 1447 else 1448 depth = ArgOffset; 1449 1450 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8, 1451 depth); 1452 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1453 1454 SmallVector<SDOperand, 8> MemOps; 1455 1456 // In ELF 32 ABI, the fixed integer arguments of a variadic function are 1457 // stored to the VarArgsFrameIndex on the stack. 1458 if (isELF32_ABI) { 1459 for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) { 1460 SDOperand Val = DAG.getRegister(GPR[GPR_idx], PtrVT); 1461 SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0); 1462 MemOps.push_back(Store); 1463 // Increment the address by four for the next argument to store 1464 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT); 1465 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1466 } 1467 } 1468 1469 // If this function is vararg, store any remaining integer argument regs 1470 // to their spots on the stack so that they may be loaded by deferencing the 1471 // result of va_next. 1472 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 1473 unsigned VReg; 1474 if (isPPC64) 1475 VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 1476 else 1477 VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1478 1479 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1480 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1481 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1482 MemOps.push_back(Store); 1483 // Increment the address by four for the next argument to store 1484 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT); 1485 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1486 } 1487 1488 // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex 1489 // on the stack. 1490 if (isELF32_ABI) { 1491 for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) { 1492 SDOperand Val = DAG.getRegister(FPR[FPR_idx], MVT::f64); 1493 SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0); 1494 MemOps.push_back(Store); 1495 // Increment the address by eight for the next argument to store 1496 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8, 1497 PtrVT); 1498 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1499 } 1500 1501 for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) { 1502 unsigned VReg; 1503 VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 1504 1505 RegInfo.addLiveIn(FPR[FPR_idx], VReg); 1506 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::f64); 1507 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1508 MemOps.push_back(Store); 1509 // Increment the address by eight for the next argument to store 1510 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8, 1511 PtrVT); 1512 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1513 } 1514 } 1515 1516 if (!MemOps.empty()) 1517 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); 1518 } 1519 1520 ArgValues.push_back(Root); 1521 1522 // Return the new list of results. 1523 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(), 1524 Op.Val->value_end()); 1525 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size()); 1526} 1527 1528/// isCallCompatibleAddress - Return the immediate to use if the specified 1529/// 32-bit value is representable in the immediate field of a BxA instruction. 1530static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) { 1531 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1532 if (!C) return 0; 1533 1534 int Addr = C->getValue(); 1535 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1536 (Addr << 6 >> 6) != Addr) 1537 return 0; // Top 6 bits have to be sext of immediate. 1538 1539 return DAG.getConstant((int)C->getValue() >> 2, 1540 DAG.getTargetLoweringInfo().getPointerTy()).Val; 1541} 1542 1543 1544static SDOperand LowerCALL(SDOperand Op, SelectionDAG &DAG, 1545 const PPCSubtarget &Subtarget) { 1546 SDOperand Chain = Op.getOperand(0); 1547 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1548 SDOperand Callee = Op.getOperand(4); 1549 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1550 1551 bool isMachoABI = Subtarget.isMachoABI(); 1552 bool isELF32_ABI = Subtarget.isELF32_ABI(); 1553 1554 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1555 bool isPPC64 = PtrVT == MVT::i64; 1556 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1557 1558 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 1559 // SelectExpr to use to put the arguments in the appropriate registers. 1560 std::vector<SDOperand> args_to_use; 1561 1562 // Count how many bytes are to be pushed on the stack, including the linkage 1563 // area, and parameter passing area. We start with 24/48 bytes, which is 1564 // prereserved space for [SP][CR][LR][3 x unused]. 1565 unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1566 1567 // Add up all the space actually used. 1568 for (unsigned i = 0; i != NumOps; ++i) { 1569 unsigned ArgSize =MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8; 1570 ArgSize = std::max(ArgSize, PtrByteSize); 1571 NumBytes += ArgSize; 1572 } 1573 1574 // The prolog code of the callee may store up to 8 GPR argument registers to 1575 // the stack, allowing va_start to index over them in memory if its varargs. 1576 // Because we cannot tell if this is needed on the caller side, we have to 1577 // conservatively assume that it is needed. As such, make sure we have at 1578 // least enough stack space for the caller to store the 8 GPRs. 1579 NumBytes = std::max(NumBytes, 1580 PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); 1581 1582 // Adjust the stack pointer for the new arguments... 1583 // These operations are automatically eliminated by the prolog/epilog pass 1584 Chain = DAG.getCALLSEQ_START(Chain, 1585 DAG.getConstant(NumBytes, PtrVT)); 1586 1587 // Set up a copy of the stack pointer for use loading and storing any 1588 // arguments that may not fit in the registers available for argument 1589 // passing. 1590 SDOperand StackPtr; 1591 if (isPPC64) 1592 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 1593 else 1594 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 1595 1596 // Figure out which arguments are going to go in registers, and which in 1597 // memory. Also, if this is a vararg function, floating point operations 1598 // must be stored to our stack, and loaded into integer regs as well, if 1599 // any integer regs are available for argument passing. 1600 unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1601 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 1602 1603 static const unsigned GPR_32[] = { // 32-bit registers. 1604 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1605 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1606 }; 1607 static const unsigned GPR_64[] = { // 64-bit registers. 1608 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 1609 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 1610 }; 1611 static const unsigned *FPR = GetFPR(Subtarget); 1612 1613 static const unsigned VR[] = { 1614 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 1615 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 1616 }; 1617 const unsigned NumGPRs = array_lengthof(GPR_32); 1618 const unsigned NumFPRs = isMachoABI ? 13 : 8; 1619 const unsigned NumVRs = array_lengthof( VR); 1620 1621 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 1622 1623 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1624 SmallVector<SDOperand, 8> MemOpChains; 1625 for (unsigned i = 0; i != NumOps; ++i) { 1626 bool inMem = false; 1627 SDOperand Arg = Op.getOperand(5+2*i); 1628 unsigned Flags = cast<ConstantSDNode>(Op.getOperand(5+2*i+1))->getValue(); 1629 unsigned AlignFlag = 1 << ISD::ParamFlags::OrigAlignmentOffs; 1630 // See if next argument requires stack alignment in ELF 1631 unsigned next = 5+2*(i+1)+1; 1632 bool Expand = (Arg.getValueType() == MVT::f64) || ((i + 1 < NumOps) && 1633 (cast<ConstantSDNode>(Op.getOperand(next))->getValue() & AlignFlag) && 1634 (!(Flags & AlignFlag))); 1635 1636 // PtrOff will be used to store the current argument to the stack if a 1637 // register cannot be found for it. 1638 SDOperand PtrOff; 1639 1640 // Stack align in ELF 32 1641 if (isELF32_ABI && Expand) 1642 PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize, 1643 StackPtr.getValueType()); 1644 else 1645 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 1646 1647 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); 1648 1649 // On PPC64, promote integers to 64-bit values. 1650 if (isPPC64 && Arg.getValueType() == MVT::i32) { 1651 unsigned ExtOp = (Flags & 1) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 1652 1653 Arg = DAG.getNode(ExtOp, MVT::i64, Arg); 1654 } 1655 1656 switch (Arg.getValueType()) { 1657 default: assert(0 && "Unexpected ValueType for argument!"); 1658 case MVT::i32: 1659 case MVT::i64: 1660 // Double word align in ELF 1661 if (isELF32_ABI && Expand) GPR_idx += (GPR_idx % 2); 1662 if (GPR_idx != NumGPRs) { 1663 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 1664 } else { 1665 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1666 inMem = true; 1667 } 1668 if (inMem || isMachoABI) { 1669 // Stack align in ELF 1670 if (isELF32_ABI && Expand) 1671 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1672 1673 ArgOffset += PtrByteSize; 1674 } 1675 break; 1676 case MVT::f32: 1677 case MVT::f64: 1678 if (isVarArg) { 1679 // Float varargs need to be promoted to double. 1680 if (Arg.getValueType() == MVT::f32) 1681 Arg = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Arg); 1682 } 1683 1684 if (FPR_idx != NumFPRs) { 1685 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 1686 1687 if (isVarArg) { 1688 SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1689 MemOpChains.push_back(Store); 1690 1691 // Float varargs are always shadowed in available integer registers 1692 if (GPR_idx != NumGPRs) { 1693 SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0); 1694 MemOpChains.push_back(Load.getValue(1)); 1695 if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], 1696 Load)); 1697 } 1698 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 1699 SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 1700 PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour); 1701 SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0); 1702 MemOpChains.push_back(Load.getValue(1)); 1703 if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], 1704 Load)); 1705 } 1706 } else { 1707 // If we have any FPRs remaining, we may also have GPRs remaining. 1708 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 1709 // GPRs. 1710 if (isMachoABI) { 1711 if (GPR_idx != NumGPRs) 1712 ++GPR_idx; 1713 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 1714 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 1715 ++GPR_idx; 1716 } 1717 } 1718 } else { 1719 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1720 inMem = true; 1721 } 1722 if (inMem || isMachoABI) { 1723 // Stack align in ELF 1724 if (isELF32_ABI && Expand) 1725 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1726 if (isPPC64) 1727 ArgOffset += 8; 1728 else 1729 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 1730 } 1731 break; 1732 case MVT::v4f32: 1733 case MVT::v4i32: 1734 case MVT::v8i16: 1735 case MVT::v16i8: 1736 assert(!isVarArg && "Don't support passing vectors to varargs yet!"); 1737 assert(VR_idx != NumVRs && 1738 "Don't support passing more than 12 vector args yet!"); 1739 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 1740 break; 1741 } 1742 } 1743 if (!MemOpChains.empty()) 1744 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 1745 &MemOpChains[0], MemOpChains.size()); 1746 1747 // Build a sequence of copy-to-reg nodes chained together with token chain 1748 // and flag operands which copy the outgoing args into the appropriate regs. 1749 SDOperand InFlag; 1750 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 1751 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 1752 InFlag); 1753 InFlag = Chain.getValue(1); 1754 } 1755 1756 // With the ELF 32 ABI, set CR6 to true if this is a vararg call. 1757 if (isVarArg && isELF32_ABI) { 1758 SDOperand SetCR(DAG.getTargetNode(PPC::SETCR, MVT::i32), 0); 1759 Chain = DAG.getCopyToReg(Chain, PPC::CR6, SetCR, InFlag); 1760 InFlag = Chain.getValue(1); 1761 } 1762 1763 std::vector<MVT::ValueType> NodeTys; 1764 NodeTys.push_back(MVT::Other); // Returns a chain 1765 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 1766 1767 SmallVector<SDOperand, 8> Ops; 1768 unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF; 1769 1770 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 1771 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 1772 // node so that legalize doesn't hack it. 1773 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1774 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); 1775 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 1776 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); 1777 else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) 1778 // If this is an absolute destination address, use the munged value. 1779 Callee = SDOperand(Dest, 0); 1780 else { 1781 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 1782 // to do the call, we can't use PPCISD::CALL. 1783 SDOperand MTCTROps[] = {Chain, Callee, InFlag}; 1784 Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, MTCTROps, 2+(InFlag.Val!=0)); 1785 InFlag = Chain.getValue(1); 1786 1787 // Copy the callee address into R12 on darwin. 1788 if (isMachoABI) { 1789 Chain = DAG.getCopyToReg(Chain, PPC::R12, Callee, InFlag); 1790 InFlag = Chain.getValue(1); 1791 } 1792 1793 NodeTys.clear(); 1794 NodeTys.push_back(MVT::Other); 1795 NodeTys.push_back(MVT::Flag); 1796 Ops.push_back(Chain); 1797 CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF; 1798 Callee.Val = 0; 1799 } 1800 1801 // If this is a direct call, pass the chain and the callee. 1802 if (Callee.Val) { 1803 Ops.push_back(Chain); 1804 Ops.push_back(Callee); 1805 } 1806 1807 // Add argument registers to the end of the list so that they are known live 1808 // into the call. 1809 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 1810 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 1811 RegsToPass[i].second.getValueType())); 1812 1813 if (InFlag.Val) 1814 Ops.push_back(InFlag); 1815 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); 1816 InFlag = Chain.getValue(1); 1817 1818 Chain = DAG.getCALLSEQ_END(Chain, 1819 DAG.getConstant(NumBytes, PtrVT), 1820 DAG.getConstant(0, PtrVT), 1821 InFlag); 1822 if (Op.Val->getValueType(0) != MVT::Other) 1823 InFlag = Chain.getValue(1); 1824 1825 SDOperand ResultVals[3]; 1826 unsigned NumResults = 0; 1827 NodeTys.clear(); 1828 1829 // If the call has results, copy the values out of the ret val registers. 1830 switch (Op.Val->getValueType(0)) { 1831 default: assert(0 && "Unexpected ret value!"); 1832 case MVT::Other: break; 1833 case MVT::i32: 1834 if (Op.Val->getValueType(1) == MVT::i32) { 1835 Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1); 1836 ResultVals[0] = Chain.getValue(0); 1837 Chain = DAG.getCopyFromReg(Chain, PPC::R4, MVT::i32, 1838 Chain.getValue(2)).getValue(1); 1839 ResultVals[1] = Chain.getValue(0); 1840 NumResults = 2; 1841 NodeTys.push_back(MVT::i32); 1842 } else { 1843 Chain = DAG.getCopyFromReg(Chain, PPC::R3, MVT::i32, InFlag).getValue(1); 1844 ResultVals[0] = Chain.getValue(0); 1845 NumResults = 1; 1846 } 1847 NodeTys.push_back(MVT::i32); 1848 break; 1849 case MVT::i64: 1850 Chain = DAG.getCopyFromReg(Chain, PPC::X3, MVT::i64, InFlag).getValue(1); 1851 ResultVals[0] = Chain.getValue(0); 1852 NumResults = 1; 1853 NodeTys.push_back(MVT::i64); 1854 break; 1855 case MVT::f64: 1856 if (Op.Val->getValueType(1) == MVT::f64) { 1857 Chain = DAG.getCopyFromReg(Chain, PPC::F1, MVT::f64, InFlag).getValue(1); 1858 ResultVals[0] = Chain.getValue(0); 1859 Chain = DAG.getCopyFromReg(Chain, PPC::F2, MVT::f64, 1860 Chain.getValue(2)).getValue(1); 1861 ResultVals[1] = Chain.getValue(0); 1862 NumResults = 2; 1863 NodeTys.push_back(MVT::f64); 1864 NodeTys.push_back(MVT::f64); 1865 break; 1866 } 1867 // else fall through 1868 case MVT::f32: 1869 Chain = DAG.getCopyFromReg(Chain, PPC::F1, Op.Val->getValueType(0), 1870 InFlag).getValue(1); 1871 ResultVals[0] = Chain.getValue(0); 1872 NumResults = 1; 1873 NodeTys.push_back(Op.Val->getValueType(0)); 1874 break; 1875 case MVT::v4f32: 1876 case MVT::v4i32: 1877 case MVT::v8i16: 1878 case MVT::v16i8: 1879 Chain = DAG.getCopyFromReg(Chain, PPC::V2, Op.Val->getValueType(0), 1880 InFlag).getValue(1); 1881 ResultVals[0] = Chain.getValue(0); 1882 NumResults = 1; 1883 NodeTys.push_back(Op.Val->getValueType(0)); 1884 break; 1885 } 1886 1887 NodeTys.push_back(MVT::Other); 1888 1889 // If the function returns void, just return the chain. 1890 if (NumResults == 0) 1891 return Chain; 1892 1893 // Otherwise, merge everything together with a MERGE_VALUES node. 1894 ResultVals[NumResults++] = Chain; 1895 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, NodeTys, 1896 ResultVals, NumResults); 1897 return Res.getValue(Op.ResNo); 1898} 1899 1900static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG, TargetMachine &TM) { 1901 SmallVector<CCValAssign, 16> RVLocs; 1902 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 1903 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 1904 CCState CCInfo(CC, isVarArg, TM, RVLocs); 1905 CCInfo.AnalyzeReturn(Op.Val, RetCC_PPC); 1906 1907 // If this is the first return lowered for this function, add the regs to the 1908 // liveout set for the function. 1909 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 1910 for (unsigned i = 0; i != RVLocs.size(); ++i) 1911 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 1912 } 1913 1914 SDOperand Chain = Op.getOperand(0); 1915 SDOperand Flag; 1916 1917 // Copy the result values into the output registers. 1918 for (unsigned i = 0; i != RVLocs.size(); ++i) { 1919 CCValAssign &VA = RVLocs[i]; 1920 assert(VA.isRegLoc() && "Can only return in registers!"); 1921 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); 1922 Flag = Chain.getValue(1); 1923 } 1924 1925 if (Flag.Val) 1926 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain, Flag); 1927 else 1928 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain); 1929} 1930 1931static SDOperand LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG, 1932 const PPCSubtarget &Subtarget) { 1933 // When we pop the dynamic allocation we need to restore the SP link. 1934 1935 // Get the corect type for pointers. 1936 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1937 1938 // Construct the stack pointer operand. 1939 bool IsPPC64 = Subtarget.isPPC64(); 1940 unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1; 1941 SDOperand StackPtr = DAG.getRegister(SP, PtrVT); 1942 1943 // Get the operands for the STACKRESTORE. 1944 SDOperand Chain = Op.getOperand(0); 1945 SDOperand SaveSP = Op.getOperand(1); 1946 1947 // Load the old link SP. 1948 SDOperand LoadLinkSP = DAG.getLoad(PtrVT, Chain, StackPtr, NULL, 0); 1949 1950 // Restore the stack pointer. 1951 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), SP, SaveSP); 1952 1953 // Store the old link SP. 1954 return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0); 1955} 1956 1957static SDOperand LowerDYNAMIC_STACKALLOC(SDOperand Op, SelectionDAG &DAG, 1958 const PPCSubtarget &Subtarget) { 1959 MachineFunction &MF = DAG.getMachineFunction(); 1960 bool IsPPC64 = Subtarget.isPPC64(); 1961 bool isMachoABI = Subtarget.isMachoABI(); 1962 1963 // Get current frame pointer save index. The users of this index will be 1964 // primarily DYNALLOC instructions. 1965 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1966 int FPSI = FI->getFramePointerSaveIndex(); 1967 1968 // If the frame pointer save index hasn't been defined yet. 1969 if (!FPSI) { 1970 // Find out what the fix offset of the frame pointer save area. 1971 int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI); 1972 1973 // Allocate the frame index for frame pointer save area. 1974 FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); 1975 // Save the result. 1976 FI->setFramePointerSaveIndex(FPSI); 1977 } 1978 1979 // Get the inputs. 1980 SDOperand Chain = Op.getOperand(0); 1981 SDOperand Size = Op.getOperand(1); 1982 1983 // Get the corect type for pointers. 1984 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1985 // Negate the size. 1986 SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT, 1987 DAG.getConstant(0, PtrVT), Size); 1988 // Construct a node for the frame pointer save index. 1989 SDOperand FPSIdx = DAG.getFrameIndex(FPSI, PtrVT); 1990 // Build a DYNALLOC node. 1991 SDOperand Ops[3] = { Chain, NegSize, FPSIdx }; 1992 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 1993 return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3); 1994} 1995 1996 1997/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 1998/// possible. 1999static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { 2000 // Not FP? Not a fsel. 2001 if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) || 2002 !MVT::isFloatingPoint(Op.getOperand(2).getValueType())) 2003 return SDOperand(); 2004 2005 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2006 2007 // Cannot handle SETEQ/SETNE. 2008 if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand(); 2009 2010 MVT::ValueType ResVT = Op.getValueType(); 2011 MVT::ValueType CmpVT = Op.getOperand(0).getValueType(); 2012 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 2013 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3); 2014 2015 // If the RHS of the comparison is a 0.0, we don't need to do the 2016 // subtraction at all. 2017 if (isFloatingPointZero(RHS)) 2018 switch (CC) { 2019 default: break; // SETUO etc aren't handled by fsel. 2020 case ISD::SETULT: 2021 case ISD::SETOLT: 2022 case ISD::SETLT: 2023 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 2024 case ISD::SETUGE: 2025 case ISD::SETOGE: 2026 case ISD::SETGE: 2027 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 2028 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 2029 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 2030 case ISD::SETUGT: 2031 case ISD::SETOGT: 2032 case ISD::SETGT: 2033 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 2034 case ISD::SETULE: 2035 case ISD::SETOLE: 2036 case ISD::SETLE: 2037 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 2038 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 2039 return DAG.getNode(PPCISD::FSEL, ResVT, 2040 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 2041 } 2042 2043 SDOperand Cmp; 2044 switch (CC) { 2045 default: break; // SETUO etc aren't handled by fsel. 2046 case ISD::SETULT: 2047 case ISD::SETOLT: 2048 case ISD::SETLT: 2049 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 2050 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2051 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2052 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 2053 case ISD::SETUGE: 2054 case ISD::SETOGE: 2055 case ISD::SETGE: 2056 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 2057 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2058 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2059 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 2060 case ISD::SETUGT: 2061 case ISD::SETOGT: 2062 case ISD::SETGT: 2063 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 2064 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2065 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2066 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 2067 case ISD::SETULE: 2068 case ISD::SETOLE: 2069 case ISD::SETLE: 2070 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 2071 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2072 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2073 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 2074 } 2075 return SDOperand(); 2076} 2077 2078// FIXME: Split this code up when LegalizeDAGTypes lands. 2079static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 2080 assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType())); 2081 SDOperand Src = Op.getOperand(0); 2082 if (Src.getValueType() == MVT::f32) 2083 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 2084 2085 SDOperand Tmp; 2086 switch (Op.getValueType()) { 2087 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 2088 case MVT::i32: 2089 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 2090 break; 2091 case MVT::i64: 2092 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 2093 break; 2094 } 2095 2096 // Convert the FP value to an int value through memory. 2097 SDOperand FIPtr = DAG.CreateStackTemporary(MVT::f64); 2098 2099 // Emit a store to the stack slot. 2100 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Tmp, FIPtr, NULL, 0); 2101 2102 // Result is a load from the stack slot. If loading 4 bytes, make sure to 2103 // add in a bias. 2104 if (Op.getValueType() == MVT::i32) 2105 FIPtr = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, 2106 DAG.getConstant(4, FIPtr.getValueType())); 2107 return DAG.getLoad(Op.getValueType(), Chain, FIPtr, NULL, 0); 2108} 2109 2110static SDOperand LowerFP_ROUND_INREG(SDOperand Op, SelectionDAG &DAG) { 2111 assert(Op.getValueType() == MVT::ppcf128); 2112 SDNode *Node = Op.Val; 2113 assert(Node->getOperand(0).getValueType() == MVT::ppcf128); 2114 assert(Node->getOperand(0).Val->getOpcode() == ISD::BUILD_PAIR); 2115 SDOperand Lo = Node->getOperand(0).Val->getOperand(0); 2116 SDOperand Hi = Node->getOperand(0).Val->getOperand(1); 2117 2118 // This sequence changes FPSCR to do round-to-zero, adds the two halves 2119 // of the long double, and puts FPSCR back the way it was. We do not 2120 // actually model FPSCR. 2121 std::vector<MVT::ValueType> NodeTys; 2122 SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg; 2123 2124 NodeTys.push_back(MVT::f64); // Return register 2125 NodeTys.push_back(MVT::Flag); // Returns a flag for later insns 2126 Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); 2127 MFFSreg = Result.getValue(0); 2128 InFlag = Result.getValue(1); 2129 2130 NodeTys.clear(); 2131 NodeTys.push_back(MVT::Flag); // Returns a flag 2132 Ops[0] = DAG.getConstant(31, MVT::i32); 2133 Ops[1] = InFlag; 2134 Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2); 2135 InFlag = Result.getValue(0); 2136 2137 NodeTys.clear(); 2138 NodeTys.push_back(MVT::Flag); // Returns a flag 2139 Ops[0] = DAG.getConstant(30, MVT::i32); 2140 Ops[1] = InFlag; 2141 Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2); 2142 InFlag = Result.getValue(0); 2143 2144 NodeTys.clear(); 2145 NodeTys.push_back(MVT::f64); // result of add 2146 NodeTys.push_back(MVT::Flag); // Returns a flag 2147 Ops[0] = Lo; 2148 Ops[1] = Hi; 2149 Ops[2] = InFlag; 2150 Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3); 2151 FPreg = Result.getValue(0); 2152 InFlag = Result.getValue(1); 2153 2154 NodeTys.clear(); 2155 NodeTys.push_back(MVT::f64); 2156 Ops[0] = DAG.getConstant(1, MVT::i32); 2157 Ops[1] = MFFSreg; 2158 Ops[2] = FPreg; 2159 Ops[3] = InFlag; 2160 Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4); 2161 FPreg = Result.getValue(0); 2162 2163 // We know the low half is about to be thrown away, so just use something 2164 // convenient. 2165 return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg); 2166} 2167 2168static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 2169 if (Op.getOperand(0).getValueType() == MVT::i64) { 2170 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 2171 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 2172 if (Op.getValueType() == MVT::f32) 2173 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); 2174 return FP; 2175 } 2176 2177 assert(Op.getOperand(0).getValueType() == MVT::i32 && 2178 "Unhandled SINT_TO_FP type in custom expander!"); 2179 // Since we only generate this in 64-bit mode, we can take advantage of 2180 // 64-bit registers. In particular, sign extend the input value into the 2181 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 2182 // then lfd it and fcfid it. 2183 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 2184 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 2185 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2186 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 2187 2188 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 2189 Op.getOperand(0)); 2190 2191 // STD the extended value into the stack slot. 2192 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 2193 DAG.getEntryNode(), Ext64, FIdx, 2194 DAG.getSrcValue(NULL)); 2195 // Load the value as a double. 2196 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, NULL, 0); 2197 2198 // FCFID it and return it. 2199 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 2200 if (Op.getValueType() == MVT::f32) 2201 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); 2202 return FP; 2203} 2204 2205static SDOperand LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) { 2206 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 2207 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 2208 2209 // Expand into a bunch of logical ops. Note that these ops 2210 // depend on the PPC behavior for oversized shift amounts. 2211 SDOperand Lo = Op.getOperand(0); 2212 SDOperand Hi = Op.getOperand(1); 2213 SDOperand Amt = Op.getOperand(2); 2214 2215 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 2216 DAG.getConstant(32, MVT::i32), Amt); 2217 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt); 2218 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1); 2219 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 2220 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 2221 DAG.getConstant(-32U, MVT::i32)); 2222 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5); 2223 SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 2224 SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt); 2225 SDOperand OutOps[] = { OutLo, OutHi }; 2226 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32), 2227 OutOps, 2); 2228} 2229 2230static SDOperand LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) { 2231 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 2232 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRL!"); 2233 2234 // Otherwise, expand into a bunch of logical ops. Note that these ops 2235 // depend on the PPC behavior for oversized shift amounts. 2236 SDOperand Lo = Op.getOperand(0); 2237 SDOperand Hi = Op.getOperand(1); 2238 SDOperand Amt = Op.getOperand(2); 2239 2240 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 2241 DAG.getConstant(32, MVT::i32), Amt); 2242 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 2243 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 2244 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 2245 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 2246 DAG.getConstant(-32U, MVT::i32)); 2247 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5); 2248 SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 2249 SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt); 2250 SDOperand OutOps[] = { OutLo, OutHi }; 2251 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32), 2252 OutOps, 2); 2253} 2254 2255static SDOperand LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) { 2256 assert(Op.getNumOperands() == 3 && Op.getValueType() == MVT::i32 && 2257 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!"); 2258 2259 // Otherwise, expand into a bunch of logical ops, followed by a select_cc. 2260 SDOperand Lo = Op.getOperand(0); 2261 SDOperand Hi = Op.getOperand(1); 2262 SDOperand Amt = Op.getOperand(2); 2263 2264 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 2265 DAG.getConstant(32, MVT::i32), Amt); 2266 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 2267 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 2268 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 2269 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 2270 DAG.getConstant(-32U, MVT::i32)); 2271 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5); 2272 SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt); 2273 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32), 2274 Tmp4, Tmp6, ISD::SETLE); 2275 SDOperand OutOps[] = { OutLo, OutHi }; 2276 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(MVT::i32, MVT::i32), 2277 OutOps, 2); 2278} 2279 2280//===----------------------------------------------------------------------===// 2281// Vector related lowering. 2282// 2283 2284// If this is a vector of constants or undefs, get the bits. A bit in 2285// UndefBits is set if the corresponding element of the vector is an 2286// ISD::UNDEF value. For undefs, the corresponding VectorBits values are 2287// zero. Return true if this is not an array of constants, false if it is. 2288// 2289static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], 2290 uint64_t UndefBits[2]) { 2291 // Start with zero'd results. 2292 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; 2293 2294 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType()); 2295 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 2296 SDOperand OpVal = BV->getOperand(i); 2297 2298 unsigned PartNo = i >= e/2; // In the upper 128 bits? 2299 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. 2300 2301 uint64_t EltBits = 0; 2302 if (OpVal.getOpcode() == ISD::UNDEF) { 2303 uint64_t EltUndefBits = ~0U >> (32-EltBitSize); 2304 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); 2305 continue; 2306 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 2307 EltBits = CN->getValue() & (~0U >> (32-EltBitSize)); 2308 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 2309 assert(CN->getValueType(0) == MVT::f32 && 2310 "Only one legal FP vector type!"); 2311 EltBits = FloatToBits(CN->getValueAPF().convertToFloat()); 2312 } else { 2313 // Nonconstant element. 2314 return true; 2315 } 2316 2317 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); 2318 } 2319 2320 //printf("%llx %llx %llx %llx\n", 2321 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); 2322 return false; 2323} 2324 2325// If this is a splat (repetition) of a value across the whole vector, return 2326// the smallest size that splats it. For example, "0x01010101010101..." is a 2327// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 2328// SplatSize = 1 byte. 2329static bool isConstantSplat(const uint64_t Bits128[2], 2330 const uint64_t Undef128[2], 2331 unsigned &SplatBits, unsigned &SplatUndef, 2332 unsigned &SplatSize) { 2333 2334 // Don't let undefs prevent splats from matching. See if the top 64-bits are 2335 // the same as the lower 64-bits, ignoring undefs. 2336 if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0])) 2337 return false; // Can't be a splat if two pieces don't match. 2338 2339 uint64_t Bits64 = Bits128[0] | Bits128[1]; 2340 uint64_t Undef64 = Undef128[0] & Undef128[1]; 2341 2342 // Check that the top 32-bits are the same as the lower 32-bits, ignoring 2343 // undefs. 2344 if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64)) 2345 return false; // Can't be a splat if two pieces don't match. 2346 2347 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); 2348 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); 2349 2350 // If the top 16-bits are different than the lower 16-bits, ignoring 2351 // undefs, we have an i32 splat. 2352 if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) { 2353 SplatBits = Bits32; 2354 SplatUndef = Undef32; 2355 SplatSize = 4; 2356 return true; 2357 } 2358 2359 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); 2360 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); 2361 2362 // If the top 8-bits are different than the lower 8-bits, ignoring 2363 // undefs, we have an i16 splat. 2364 if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) { 2365 SplatBits = Bits16; 2366 SplatUndef = Undef16; 2367 SplatSize = 2; 2368 return true; 2369 } 2370 2371 // Otherwise, we have an 8-bit splat. 2372 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); 2373 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); 2374 SplatSize = 1; 2375 return true; 2376} 2377 2378/// BuildSplatI - Build a canonical splati of Val with an element size of 2379/// SplatSize. Cast the result to VT. 2380static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT, 2381 SelectionDAG &DAG) { 2382 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 2383 2384 static const MVT::ValueType VTys[] = { // canonical VT to use for each size. 2385 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 2386 }; 2387 2388 MVT::ValueType ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 2389 2390 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 2391 if (Val == -1) 2392 SplatSize = 1; 2393 2394 MVT::ValueType CanonicalVT = VTys[SplatSize-1]; 2395 2396 // Build a canonical splat for this value. 2397 SDOperand Elt = DAG.getConstant(Val, MVT::getVectorElementType(CanonicalVT)); 2398 SmallVector<SDOperand, 8> Ops; 2399 Ops.assign(MVT::getVectorNumElements(CanonicalVT), Elt); 2400 SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, 2401 &Ops[0], Ops.size()); 2402 return DAG.getNode(ISD::BIT_CONVERT, ReqVT, Res); 2403} 2404 2405/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 2406/// specified intrinsic ID. 2407static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS, 2408 SelectionDAG &DAG, 2409 MVT::ValueType DestVT = MVT::Other) { 2410 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 2411 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 2412 DAG.getConstant(IID, MVT::i32), LHS, RHS); 2413} 2414 2415/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 2416/// specified intrinsic ID. 2417static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1, 2418 SDOperand Op2, SelectionDAG &DAG, 2419 MVT::ValueType DestVT = MVT::Other) { 2420 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 2421 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 2422 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 2423} 2424 2425 2426/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 2427/// amount. The result has the specified value type. 2428static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt, 2429 MVT::ValueType VT, SelectionDAG &DAG) { 2430 // Force LHS/RHS to be the right type. 2431 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS); 2432 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS); 2433 2434 SDOperand Ops[16]; 2435 for (unsigned i = 0; i != 16; ++i) 2436 Ops[i] = DAG.getConstant(i+Amt, MVT::i32); 2437 SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS, 2438 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops,16)); 2439 return DAG.getNode(ISD::BIT_CONVERT, VT, T); 2440} 2441 2442// If this is a case we can't handle, return null and let the default 2443// expansion code take care of it. If we CAN select this case, and if it 2444// selects to a single instruction, return Op. Otherwise, if we can codegen 2445// this case more efficiently than a constant pool load, lower it to the 2446// sequence of ops that should be used. 2447static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2448 // If this is a vector of constants or undefs, get the bits. A bit in 2449 // UndefBits is set if the corresponding element of the vector is an 2450 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 2451 // zero. 2452 uint64_t VectorBits[2]; 2453 uint64_t UndefBits[2]; 2454 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)) 2455 return SDOperand(); // Not a constant vector. 2456 2457 // If this is a splat (repetition) of a value across the whole vector, return 2458 // the smallest size that splats it. For example, "0x01010101010101..." is a 2459 // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 2460 // SplatSize = 1 byte. 2461 unsigned SplatBits, SplatUndef, SplatSize; 2462 if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){ 2463 bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0; 2464 2465 // First, handle single instruction cases. 2466 2467 // All zeros? 2468 if (SplatBits == 0) { 2469 // Canonicalize all zero vectors to be v4i32. 2470 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 2471 SDOperand Z = DAG.getConstant(0, MVT::i32); 2472 Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z); 2473 Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z); 2474 } 2475 return Op; 2476 } 2477 2478 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 2479 int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize); 2480 if (SextVal >= -16 && SextVal <= 15) 2481 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG); 2482 2483 2484 // Two instruction sequences. 2485 2486 // If this value is in the range [-32,30] and is even, use: 2487 // tmp = VSPLTI[bhw], result = add tmp, tmp 2488 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 2489 Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG); 2490 return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op); 2491 } 2492 2493 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 2494 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 2495 // for fneg/fabs. 2496 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 2497 // Make -1 and vspltisw -1: 2498 SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG); 2499 2500 // Make the VSLW intrinsic, computing 0x8000_0000. 2501 SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 2502 OnesV, DAG); 2503 2504 // xor by OnesV to invert it. 2505 Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); 2506 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 2507 } 2508 2509 // Check to see if this is a wide variety of vsplti*, binop self cases. 2510 unsigned SplatBitSize = SplatSize*8; 2511 static const signed char SplatCsts[] = { 2512 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 2513 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 2514 }; 2515 2516 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 2517 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 2518 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 2519 int i = SplatCsts[idx]; 2520 2521 // Figure out what shift amount will be used by altivec if shifted by i in 2522 // this splat size. 2523 unsigned TypeShiftAmt = i & (SplatBitSize-1); 2524 2525 // vsplti + shl self. 2526 if (SextVal == (i << (int)TypeShiftAmt)) { 2527 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 2528 static const unsigned IIDs[] = { // Intrinsic to use for each size. 2529 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 2530 Intrinsic::ppc_altivec_vslw 2531 }; 2532 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 2533 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 2534 } 2535 2536 // vsplti + srl self. 2537 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 2538 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 2539 static const unsigned IIDs[] = { // Intrinsic to use for each size. 2540 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 2541 Intrinsic::ppc_altivec_vsrw 2542 }; 2543 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 2544 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 2545 } 2546 2547 // vsplti + sra self. 2548 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 2549 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 2550 static const unsigned IIDs[] = { // Intrinsic to use for each size. 2551 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 2552 Intrinsic::ppc_altivec_vsraw 2553 }; 2554 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 2555 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 2556 } 2557 2558 // vsplti + rol self. 2559 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 2560 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 2561 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 2562 static const unsigned IIDs[] = { // Intrinsic to use for each size. 2563 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 2564 Intrinsic::ppc_altivec_vrlw 2565 }; 2566 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 2567 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 2568 } 2569 2570 // t = vsplti c, result = vsldoi t, t, 1 2571 if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { 2572 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 2573 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG); 2574 } 2575 // t = vsplti c, result = vsldoi t, t, 2 2576 if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { 2577 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 2578 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG); 2579 } 2580 // t = vsplti c, result = vsldoi t, t, 3 2581 if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { 2582 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 2583 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG); 2584 } 2585 } 2586 2587 // Three instruction sequences. 2588 2589 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 2590 if (SextVal >= 0 && SextVal <= 31) { 2591 SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG); 2592 SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); 2593 LHS = DAG.getNode(ISD::SUB, LHS.getValueType(), LHS, RHS); 2594 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); 2595 } 2596 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 2597 if (SextVal >= -31 && SextVal <= 0) { 2598 SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG); 2599 SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); 2600 LHS = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS); 2601 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); 2602 } 2603 } 2604 2605 return SDOperand(); 2606} 2607 2608/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 2609/// the specified operations to build the shuffle. 2610static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS, 2611 SDOperand RHS, SelectionDAG &DAG) { 2612 unsigned OpNum = (PFEntry >> 26) & 0x0F; 2613 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 2614 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 2615 2616 enum { 2617 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 2618 OP_VMRGHW, 2619 OP_VMRGLW, 2620 OP_VSPLTISW0, 2621 OP_VSPLTISW1, 2622 OP_VSPLTISW2, 2623 OP_VSPLTISW3, 2624 OP_VSLDOI4, 2625 OP_VSLDOI8, 2626 OP_VSLDOI12 2627 }; 2628 2629 if (OpNum == OP_COPY) { 2630 if (LHSID == (1*9+2)*9+3) return LHS; 2631 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 2632 return RHS; 2633 } 2634 2635 SDOperand OpLHS, OpRHS; 2636 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG); 2637 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG); 2638 2639 unsigned ShufIdxs[16]; 2640 switch (OpNum) { 2641 default: assert(0 && "Unknown i32 permute!"); 2642 case OP_VMRGHW: 2643 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 2644 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 2645 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 2646 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 2647 break; 2648 case OP_VMRGLW: 2649 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 2650 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 2651 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 2652 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 2653 break; 2654 case OP_VSPLTISW0: 2655 for (unsigned i = 0; i != 16; ++i) 2656 ShufIdxs[i] = (i&3)+0; 2657 break; 2658 case OP_VSPLTISW1: 2659 for (unsigned i = 0; i != 16; ++i) 2660 ShufIdxs[i] = (i&3)+4; 2661 break; 2662 case OP_VSPLTISW2: 2663 for (unsigned i = 0; i != 16; ++i) 2664 ShufIdxs[i] = (i&3)+8; 2665 break; 2666 case OP_VSPLTISW3: 2667 for (unsigned i = 0; i != 16; ++i) 2668 ShufIdxs[i] = (i&3)+12; 2669 break; 2670 case OP_VSLDOI4: 2671 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG); 2672 case OP_VSLDOI8: 2673 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG); 2674 case OP_VSLDOI12: 2675 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG); 2676 } 2677 SDOperand Ops[16]; 2678 for (unsigned i = 0; i != 16; ++i) 2679 Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i32); 2680 2681 return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS, 2682 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); 2683} 2684 2685/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 2686/// is a shuffle we can handle in a single instruction, return it. Otherwise, 2687/// return the code it can be lowered into. Worst case, it can always be 2688/// lowered into a vperm. 2689static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 2690 SDOperand V1 = Op.getOperand(0); 2691 SDOperand V2 = Op.getOperand(1); 2692 SDOperand PermMask = Op.getOperand(2); 2693 2694 // Cases that are handled by instructions that take permute immediates 2695 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 2696 // selected by the instruction selector. 2697 if (V2.getOpcode() == ISD::UNDEF) { 2698 if (PPC::isSplatShuffleMask(PermMask.Val, 1) || 2699 PPC::isSplatShuffleMask(PermMask.Val, 2) || 2700 PPC::isSplatShuffleMask(PermMask.Val, 4) || 2701 PPC::isVPKUWUMShuffleMask(PermMask.Val, true) || 2702 PPC::isVPKUHUMShuffleMask(PermMask.Val, true) || 2703 PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 || 2704 PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) || 2705 PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) || 2706 PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) || 2707 PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) || 2708 PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) || 2709 PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) { 2710 return Op; 2711 } 2712 } 2713 2714 // Altivec has a variety of "shuffle immediates" that take two vector inputs 2715 // and produce a fixed permutation. If any of these match, do not lower to 2716 // VPERM. 2717 if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) || 2718 PPC::isVPKUHUMShuffleMask(PermMask.Val, false) || 2719 PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 || 2720 PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) || 2721 PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) || 2722 PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) || 2723 PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) || 2724 PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) || 2725 PPC::isVMRGHShuffleMask(PermMask.Val, 4, false)) 2726 return Op; 2727 2728 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 2729 // perfect shuffle table to emit an optimal matching sequence. 2730 unsigned PFIndexes[4]; 2731 bool isFourElementShuffle = true; 2732 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 2733 unsigned EltNo = 8; // Start out undef. 2734 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 2735 if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) 2736 continue; // Undef, ignore it. 2737 2738 unsigned ByteSource = 2739 cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue(); 2740 if ((ByteSource & 3) != j) { 2741 isFourElementShuffle = false; 2742 break; 2743 } 2744 2745 if (EltNo == 8) { 2746 EltNo = ByteSource/4; 2747 } else if (EltNo != ByteSource/4) { 2748 isFourElementShuffle = false; 2749 break; 2750 } 2751 } 2752 PFIndexes[i] = EltNo; 2753 } 2754 2755 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 2756 // perfect shuffle vector to determine if it is cost effective to do this as 2757 // discrete instructions, or whether we should use a vperm. 2758 if (isFourElementShuffle) { 2759 // Compute the index in the perfect shuffle table. 2760 unsigned PFTableIndex = 2761 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 2762 2763 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 2764 unsigned Cost = (PFEntry >> 30); 2765 2766 // Determining when to avoid vperm is tricky. Many things affect the cost 2767 // of vperm, particularly how many times the perm mask needs to be computed. 2768 // For example, if the perm mask can be hoisted out of a loop or is already 2769 // used (perhaps because there are multiple permutes with the same shuffle 2770 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 2771 // the loop requires an extra register. 2772 // 2773 // As a compromise, we only emit discrete instructions if the shuffle can be 2774 // generated in 3 or fewer operations. When we have loop information 2775 // available, if this block is within a loop, we should avoid using vperm 2776 // for 3-operation perms and use a constant pool load instead. 2777 if (Cost < 3) 2778 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG); 2779 } 2780 2781 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 2782 // vector that will get spilled to the constant pool. 2783 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 2784 2785 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 2786 // that it is in input element units, not in bytes. Convert now. 2787 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType()); 2788 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; 2789 2790 SmallVector<SDOperand, 16> ResultMask; 2791 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 2792 unsigned SrcElt; 2793 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) 2794 SrcElt = 0; 2795 else 2796 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 2797 2798 for (unsigned j = 0; j != BytesPerElement; ++j) 2799 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 2800 MVT::i8)); 2801 } 2802 2803 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, 2804 &ResultMask[0], ResultMask.size()); 2805 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 2806} 2807 2808/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 2809/// altivec comparison. If it is, return true and fill in Opc/isDot with 2810/// information about the intrinsic. 2811static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc, 2812 bool &isDot) { 2813 unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue(); 2814 CompareOpc = -1; 2815 isDot = false; 2816 switch (IntrinsicID) { 2817 default: return false; 2818 // Comparison predicates. 2819 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 2820 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 2821 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 2822 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 2823 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 2824 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 2825 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 2826 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 2827 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 2828 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 2829 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 2830 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 2831 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 2832 2833 // Normal Comparisons. 2834 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 2835 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 2836 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 2837 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 2838 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 2839 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 2840 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 2841 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 2842 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 2843 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 2844 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 2845 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 2846 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 2847 } 2848 return true; 2849} 2850 2851/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 2852/// lower, do it, otherwise return null. 2853static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 2854 // If this is a lowered altivec predicate compare, CompareOpc is set to the 2855 // opcode number of the comparison. 2856 int CompareOpc; 2857 bool isDot; 2858 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 2859 return SDOperand(); // Don't custom lower most intrinsics. 2860 2861 // If this is a non-dot comparison, make the VCMP node and we are done. 2862 if (!isDot) { 2863 SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 2864 Op.getOperand(1), Op.getOperand(2), 2865 DAG.getConstant(CompareOpc, MVT::i32)); 2866 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp); 2867 } 2868 2869 // Create the PPCISD altivec 'dot' comparison node. 2870 SDOperand Ops[] = { 2871 Op.getOperand(2), // LHS 2872 Op.getOperand(3), // RHS 2873 DAG.getConstant(CompareOpc, MVT::i32) 2874 }; 2875 std::vector<MVT::ValueType> VTs; 2876 VTs.push_back(Op.getOperand(2).getValueType()); 2877 VTs.push_back(MVT::Flag); 2878 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); 2879 2880 // Now that we have the comparison, emit a copy from the CR to a GPR. 2881 // This is flagged to the above dot comparison. 2882 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 2883 DAG.getRegister(PPC::CR6, MVT::i32), 2884 CompNode.getValue(1)); 2885 2886 // Unpack the result based on how the target uses it. 2887 unsigned BitNo; // Bit # of CR6. 2888 bool InvertBit; // Invert result? 2889 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 2890 default: // Can't happen, don't crash on invalid number though. 2891 case 0: // Return the value of the EQ bit of CR6. 2892 BitNo = 0; InvertBit = false; 2893 break; 2894 case 1: // Return the inverted value of the EQ bit of CR6. 2895 BitNo = 0; InvertBit = true; 2896 break; 2897 case 2: // Return the value of the LT bit of CR6. 2898 BitNo = 2; InvertBit = false; 2899 break; 2900 case 3: // Return the inverted value of the LT bit of CR6. 2901 BitNo = 2; InvertBit = true; 2902 break; 2903 } 2904 2905 // Shift the bit into the low position. 2906 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 2907 DAG.getConstant(8-(3-BitNo), MVT::i32)); 2908 // Isolate the bit. 2909 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 2910 DAG.getConstant(1, MVT::i32)); 2911 2912 // If we are supposed to, toggle the bit. 2913 if (InvertBit) 2914 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 2915 DAG.getConstant(1, MVT::i32)); 2916 return Flags; 2917} 2918 2919static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 2920 // Create a stack slot that is 16-byte aligned. 2921 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 2922 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 2923 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2924 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 2925 2926 // Store the input value into Value#0 of the stack slot. 2927 SDOperand Store = DAG.getStore(DAG.getEntryNode(), 2928 Op.getOperand(0), FIdx, NULL, 0); 2929 // Load it out. 2930 return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0); 2931} 2932 2933static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) { 2934 if (Op.getValueType() == MVT::v4i32) { 2935 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 2936 2937 SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG); 2938 SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt. 2939 2940 SDOperand RHSSwap = // = vrlw RHS, 16 2941 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG); 2942 2943 // Shrinkify inputs to v8i16. 2944 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS); 2945 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS); 2946 RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap); 2947 2948 // Low parts multiplied together, generating 32-bit results (we ignore the 2949 // top parts). 2950 SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 2951 LHS, RHS, DAG, MVT::v4i32); 2952 2953 SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 2954 LHS, RHSSwap, Zero, DAG, MVT::v4i32); 2955 // Shift the high parts up 16 bits. 2956 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG); 2957 return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd); 2958 } else if (Op.getValueType() == MVT::v8i16) { 2959 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 2960 2961 SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG); 2962 2963 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 2964 LHS, RHS, Zero, DAG); 2965 } else if (Op.getValueType() == MVT::v16i8) { 2966 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 2967 2968 // Multiply the even 8-bit parts, producing 16-bit sums. 2969 SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 2970 LHS, RHS, DAG, MVT::v8i16); 2971 EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts); 2972 2973 // Multiply the odd 8-bit parts, producing 16-bit sums. 2974 SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 2975 LHS, RHS, DAG, MVT::v8i16); 2976 OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts); 2977 2978 // Merge the results together. 2979 SDOperand Ops[16]; 2980 for (unsigned i = 0; i != 8; ++i) { 2981 Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8); 2982 Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8); 2983 } 2984 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts, 2985 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); 2986 } else { 2987 assert(0 && "Unknown mul to lower!"); 2988 abort(); 2989 } 2990} 2991 2992/// LowerOperation - Provide custom lowering hooks for some operations. 2993/// 2994SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 2995 switch (Op.getOpcode()) { 2996 default: assert(0 && "Wasn't expecting to be able to lower this!"); 2997 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 2998 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 2999 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3000 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 3001 case ISD::SETCC: return LowerSETCC(Op, DAG); 3002 case ISD::VASTART: 3003 return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, 3004 VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); 3005 3006 case ISD::VAARG: 3007 return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, 3008 VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); 3009 3010 case ISD::FORMAL_ARGUMENTS: 3011 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, 3012 VarArgsStackOffset, VarArgsNumGPR, 3013 VarArgsNumFPR, PPCSubTarget); 3014 3015 case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget); 3016 case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); 3017 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 3018 case ISD::DYNAMIC_STACKALLOC: 3019 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 3020 3021 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3022 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 3023 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 3024 case ISD::FP_ROUND_INREG: return LowerFP_ROUND_INREG(Op, DAG); 3025 3026 // Lower 64-bit shifts. 3027 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 3028 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 3029 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 3030 3031 // Vector-related lowering. 3032 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3033 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3034 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 3035 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 3036 case ISD::MUL: return LowerMUL(Op, DAG); 3037 3038 // Frame & Return address. 3039 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 3040 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3041 } 3042 return SDOperand(); 3043} 3044 3045SDNode *PPCTargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { 3046 switch (N->getOpcode()) { 3047 default: assert(0 && "Wasn't expecting to be able to lower this!"); 3048 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(SDOperand(N, 0), DAG).Val; 3049 } 3050} 3051 3052 3053//===----------------------------------------------------------------------===// 3054// Other Lowering Code 3055//===----------------------------------------------------------------------===// 3056 3057MachineBasicBlock * 3058PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 3059 MachineBasicBlock *BB) { 3060 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3061 assert((MI->getOpcode() == PPC::SELECT_CC_I4 || 3062 MI->getOpcode() == PPC::SELECT_CC_I8 || 3063 MI->getOpcode() == PPC::SELECT_CC_F4 || 3064 MI->getOpcode() == PPC::SELECT_CC_F8 || 3065 MI->getOpcode() == PPC::SELECT_CC_VRRC) && 3066 "Unexpected instr type to insert"); 3067 3068 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 3069 // control-flow pattern. The incoming instruction knows the destination vreg 3070 // to set, the condition code register to branch on, the true/false values to 3071 // select between, and a branch opcode to use. 3072 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3073 ilist<MachineBasicBlock>::iterator It = BB; 3074 ++It; 3075 3076 // thisMBB: 3077 // ... 3078 // TrueVal = ... 3079 // cmpTY ccX, r1, r2 3080 // bCC copy1MBB 3081 // fallthrough --> copy0MBB 3082 MachineBasicBlock *thisMBB = BB; 3083 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 3084 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 3085 unsigned SelectPred = MI->getOperand(4).getImm(); 3086 BuildMI(BB, TII->get(PPC::BCC)) 3087 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 3088 MachineFunction *F = BB->getParent(); 3089 F->getBasicBlockList().insert(It, copy0MBB); 3090 F->getBasicBlockList().insert(It, sinkMBB); 3091 // Update machine-CFG edges by first adding all successors of the current 3092 // block to the new block which will contain the Phi node for the select. 3093 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 3094 e = BB->succ_end(); i != e; ++i) 3095 sinkMBB->addSuccessor(*i); 3096 // Next, remove all successors of the current block, and add the true 3097 // and fallthrough blocks as its successors. 3098 while(!BB->succ_empty()) 3099 BB->removeSuccessor(BB->succ_begin()); 3100 BB->addSuccessor(copy0MBB); 3101 BB->addSuccessor(sinkMBB); 3102 3103 // copy0MBB: 3104 // %FalseValue = ... 3105 // # fallthrough to sinkMBB 3106 BB = copy0MBB; 3107 3108 // Update machine-CFG edges 3109 BB->addSuccessor(sinkMBB); 3110 3111 // sinkMBB: 3112 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 3113 // ... 3114 BB = sinkMBB; 3115 BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg()) 3116 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 3117 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 3118 3119 delete MI; // The pseudo instruction is gone now. 3120 return BB; 3121} 3122 3123//===----------------------------------------------------------------------===// 3124// Target Optimization Hooks 3125//===----------------------------------------------------------------------===// 3126 3127SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 3128 DAGCombinerInfo &DCI) const { 3129 TargetMachine &TM = getTargetMachine(); 3130 SelectionDAG &DAG = DCI.DAG; 3131 switch (N->getOpcode()) { 3132 default: break; 3133 case PPCISD::SHL: 3134 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 3135 if (C->getValue() == 0) // 0 << V -> 0. 3136 return N->getOperand(0); 3137 } 3138 break; 3139 case PPCISD::SRL: 3140 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 3141 if (C->getValue() == 0) // 0 >>u V -> 0. 3142 return N->getOperand(0); 3143 } 3144 break; 3145 case PPCISD::SRA: 3146 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 3147 if (C->getValue() == 0 || // 0 >>s V -> 0. 3148 C->isAllOnesValue()) // -1 >>s V -> -1. 3149 return N->getOperand(0); 3150 } 3151 break; 3152 3153 case ISD::SINT_TO_FP: 3154 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 3155 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 3156 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 3157 // We allow the src/dst to be either f32/f64, but the intermediate 3158 // type must be i64. 3159 if (N->getOperand(0).getValueType() == MVT::i64 && 3160 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 3161 SDOperand Val = N->getOperand(0).getOperand(0); 3162 if (Val.getValueType() == MVT::f32) { 3163 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 3164 DCI.AddToWorklist(Val.Val); 3165 } 3166 3167 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 3168 DCI.AddToWorklist(Val.Val); 3169 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 3170 DCI.AddToWorklist(Val.Val); 3171 if (N->getValueType(0) == MVT::f32) { 3172 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val, 3173 DAG.getIntPtrConstant(0)); 3174 DCI.AddToWorklist(Val.Val); 3175 } 3176 return Val; 3177 } else if (N->getOperand(0).getValueType() == MVT::i32) { 3178 // If the intermediate type is i32, we can avoid the load/store here 3179 // too. 3180 } 3181 } 3182 } 3183 break; 3184 case ISD::STORE: 3185 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 3186 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 3187 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 3188 N->getOperand(1).getValueType() == MVT::i32 && 3189 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 3190 SDOperand Val = N->getOperand(1).getOperand(0); 3191 if (Val.getValueType() == MVT::f32) { 3192 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 3193 DCI.AddToWorklist(Val.Val); 3194 } 3195 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 3196 DCI.AddToWorklist(Val.Val); 3197 3198 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 3199 N->getOperand(2), N->getOperand(3)); 3200 DCI.AddToWorklist(Val.Val); 3201 return Val; 3202 } 3203 3204 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 3205 if (N->getOperand(1).getOpcode() == ISD::BSWAP && 3206 N->getOperand(1).Val->hasOneUse() && 3207 (N->getOperand(1).getValueType() == MVT::i32 || 3208 N->getOperand(1).getValueType() == MVT::i16)) { 3209 SDOperand BSwapOp = N->getOperand(1).getOperand(0); 3210 // Do an any-extend to 32-bits if this is a half-word input. 3211 if (BSwapOp.getValueType() == MVT::i16) 3212 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp); 3213 3214 return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp, 3215 N->getOperand(2), N->getOperand(3), 3216 DAG.getValueType(N->getOperand(1).getValueType())); 3217 } 3218 break; 3219 case ISD::BSWAP: 3220 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 3221 if (ISD::isNON_EXTLoad(N->getOperand(0).Val) && 3222 N->getOperand(0).hasOneUse() && 3223 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { 3224 SDOperand Load = N->getOperand(0); 3225 LoadSDNode *LD = cast<LoadSDNode>(Load); 3226 // Create the byte-swapping load. 3227 std::vector<MVT::ValueType> VTs; 3228 VTs.push_back(MVT::i32); 3229 VTs.push_back(MVT::Other); 3230 SDOperand SV = DAG.getSrcValue(LD->getSrcValue(), LD->getSrcValueOffset()); 3231 SDOperand Ops[] = { 3232 LD->getChain(), // Chain 3233 LD->getBasePtr(), // Ptr 3234 SV, // SrcValue 3235 DAG.getValueType(N->getValueType(0)) // VT 3236 }; 3237 SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops, 4); 3238 3239 // If this is an i16 load, insert the truncate. 3240 SDOperand ResVal = BSLoad; 3241 if (N->getValueType(0) == MVT::i16) 3242 ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad); 3243 3244 // First, combine the bswap away. This makes the value produced by the 3245 // load dead. 3246 DCI.CombineTo(N, ResVal); 3247 3248 // Next, combine the load away, we give it a bogus result value but a real 3249 // chain result. The result value is dead because the bswap is dead. 3250 DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1)); 3251 3252 // Return N so it doesn't get rechecked! 3253 return SDOperand(N, 0); 3254 } 3255 3256 break; 3257 case PPCISD::VCMP: { 3258 // If a VCMPo node already exists with exactly the same operands as this 3259 // node, use its result instead of this node (VCMPo computes both a CR6 and 3260 // a normal output). 3261 // 3262 if (!N->getOperand(0).hasOneUse() && 3263 !N->getOperand(1).hasOneUse() && 3264 !N->getOperand(2).hasOneUse()) { 3265 3266 // Scan all of the users of the LHS, looking for VCMPo's that match. 3267 SDNode *VCMPoNode = 0; 3268 3269 SDNode *LHSN = N->getOperand(0).Val; 3270 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 3271 UI != E; ++UI) 3272 if ((*UI)->getOpcode() == PPCISD::VCMPo && 3273 (*UI)->getOperand(1) == N->getOperand(1) && 3274 (*UI)->getOperand(2) == N->getOperand(2) && 3275 (*UI)->getOperand(0) == N->getOperand(0)) { 3276 VCMPoNode = *UI; 3277 break; 3278 } 3279 3280 // If there is no VCMPo node, or if the flag value has a single use, don't 3281 // transform this. 3282 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 3283 break; 3284 3285 // Look at the (necessarily single) use of the flag value. If it has a 3286 // chain, this transformation is more complex. Note that multiple things 3287 // could use the value result, which we should ignore. 3288 SDNode *FlagUser = 0; 3289 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 3290 FlagUser == 0; ++UI) { 3291 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 3292 SDNode *User = *UI; 3293 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 3294 if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) { 3295 FlagUser = User; 3296 break; 3297 } 3298 } 3299 } 3300 3301 // If the user is a MFCR instruction, we know this is safe. Otherwise we 3302 // give up for right now. 3303 if (FlagUser->getOpcode() == PPCISD::MFCR) 3304 return SDOperand(VCMPoNode, 0); 3305 } 3306 break; 3307 } 3308 case ISD::BR_CC: { 3309 // If this is a branch on an altivec predicate comparison, lower this so 3310 // that we don't have to do a MFCR: instead, branch directly on CR6. This 3311 // lowering is done pre-legalize, because the legalizer lowers the predicate 3312 // compare down to code that is difficult to reassemble. 3313 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 3314 SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3); 3315 int CompareOpc; 3316 bool isDot; 3317 3318 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 3319 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 3320 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 3321 assert(isDot && "Can't compare against a vector result!"); 3322 3323 // If this is a comparison against something other than 0/1, then we know 3324 // that the condition is never/always true. 3325 unsigned Val = cast<ConstantSDNode>(RHS)->getValue(); 3326 if (Val != 0 && Val != 1) { 3327 if (CC == ISD::SETEQ) // Cond never true, remove branch. 3328 return N->getOperand(0); 3329 // Always !=, turn it into an unconditional branch. 3330 return DAG.getNode(ISD::BR, MVT::Other, 3331 N->getOperand(0), N->getOperand(4)); 3332 } 3333 3334 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 3335 3336 // Create the PPCISD altivec 'dot' comparison node. 3337 std::vector<MVT::ValueType> VTs; 3338 SDOperand Ops[] = { 3339 LHS.getOperand(2), // LHS of compare 3340 LHS.getOperand(3), // RHS of compare 3341 DAG.getConstant(CompareOpc, MVT::i32) 3342 }; 3343 VTs.push_back(LHS.getOperand(2).getValueType()); 3344 VTs.push_back(MVT::Flag); 3345 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); 3346 3347 // Unpack the result based on how the target uses it. 3348 PPC::Predicate CompOpc; 3349 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) { 3350 default: // Can't happen, don't crash on invalid number though. 3351 case 0: // Branch on the value of the EQ bit of CR6. 3352 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 3353 break; 3354 case 1: // Branch on the inverted value of the EQ bit of CR6. 3355 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 3356 break; 3357 case 2: // Branch on the value of the LT bit of CR6. 3358 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 3359 break; 3360 case 3: // Branch on the inverted value of the LT bit of CR6. 3361 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 3362 break; 3363 } 3364 3365 return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0), 3366 DAG.getConstant(CompOpc, MVT::i32), 3367 DAG.getRegister(PPC::CR6, MVT::i32), 3368 N->getOperand(4), CompNode.getValue(1)); 3369 } 3370 break; 3371 } 3372 } 3373 3374 return SDOperand(); 3375} 3376 3377//===----------------------------------------------------------------------===// 3378// Inline Assembly Support 3379//===----------------------------------------------------------------------===// 3380 3381void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3382 uint64_t Mask, 3383 uint64_t &KnownZero, 3384 uint64_t &KnownOne, 3385 const SelectionDAG &DAG, 3386 unsigned Depth) const { 3387 KnownZero = 0; 3388 KnownOne = 0; 3389 switch (Op.getOpcode()) { 3390 default: break; 3391 case PPCISD::LBRX: { 3392 // lhbrx is known to have the top bits cleared out. 3393 if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16) 3394 KnownZero = 0xFFFF0000; 3395 break; 3396 } 3397 case ISD::INTRINSIC_WO_CHAIN: { 3398 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 3399 default: break; 3400 case Intrinsic::ppc_altivec_vcmpbfp_p: 3401 case Intrinsic::ppc_altivec_vcmpeqfp_p: 3402 case Intrinsic::ppc_altivec_vcmpequb_p: 3403 case Intrinsic::ppc_altivec_vcmpequh_p: 3404 case Intrinsic::ppc_altivec_vcmpequw_p: 3405 case Intrinsic::ppc_altivec_vcmpgefp_p: 3406 case Intrinsic::ppc_altivec_vcmpgtfp_p: 3407 case Intrinsic::ppc_altivec_vcmpgtsb_p: 3408 case Intrinsic::ppc_altivec_vcmpgtsh_p: 3409 case Intrinsic::ppc_altivec_vcmpgtsw_p: 3410 case Intrinsic::ppc_altivec_vcmpgtub_p: 3411 case Intrinsic::ppc_altivec_vcmpgtuh_p: 3412 case Intrinsic::ppc_altivec_vcmpgtuw_p: 3413 KnownZero = ~1U; // All bits but the low one are known to be zero. 3414 break; 3415 } 3416 } 3417 } 3418} 3419 3420 3421/// getConstraintType - Given a constraint, return the type of 3422/// constraint it is for this target. 3423PPCTargetLowering::ConstraintType 3424PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 3425 if (Constraint.size() == 1) { 3426 switch (Constraint[0]) { 3427 default: break; 3428 case 'b': 3429 case 'r': 3430 case 'f': 3431 case 'v': 3432 case 'y': 3433 return C_RegisterClass; 3434 } 3435 } 3436 return TargetLowering::getConstraintType(Constraint); 3437} 3438 3439std::pair<unsigned, const TargetRegisterClass*> 3440PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 3441 MVT::ValueType VT) const { 3442 if (Constraint.size() == 1) { 3443 // GCC RS6000 Constraint Letters 3444 switch (Constraint[0]) { 3445 case 'b': // R1-R31 3446 case 'r': // R0-R31 3447 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 3448 return std::make_pair(0U, PPC::G8RCRegisterClass); 3449 return std::make_pair(0U, PPC::GPRCRegisterClass); 3450 case 'f': 3451 if (VT == MVT::f32) 3452 return std::make_pair(0U, PPC::F4RCRegisterClass); 3453 else if (VT == MVT::f64) 3454 return std::make_pair(0U, PPC::F8RCRegisterClass); 3455 break; 3456 case 'v': 3457 return std::make_pair(0U, PPC::VRRCRegisterClass); 3458 case 'y': // crrc 3459 return std::make_pair(0U, PPC::CRRCRegisterClass); 3460 } 3461 } 3462 3463 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 3464} 3465 3466 3467/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 3468/// vector. If it is invalid, don't add anything to Ops. 3469void PPCTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, char Letter, 3470 std::vector<SDOperand>&Ops, 3471 SelectionDAG &DAG) { 3472 SDOperand Result(0,0); 3473 switch (Letter) { 3474 default: break; 3475 case 'I': 3476 case 'J': 3477 case 'K': 3478 case 'L': 3479 case 'M': 3480 case 'N': 3481 case 'O': 3482 case 'P': { 3483 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 3484 if (!CST) return; // Must be an immediate to match. 3485 unsigned Value = CST->getValue(); 3486 switch (Letter) { 3487 default: assert(0 && "Unknown constraint letter!"); 3488 case 'I': // "I" is a signed 16-bit constant. 3489 if ((short)Value == (int)Value) 3490 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3491 break; 3492 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 3493 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 3494 if ((short)Value == 0) 3495 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3496 break; 3497 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 3498 if ((Value >> 16) == 0) 3499 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3500 break; 3501 case 'M': // "M" is a constant that is greater than 31. 3502 if (Value > 31) 3503 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3504 break; 3505 case 'N': // "N" is a positive constant that is an exact power of two. 3506 if ((int)Value > 0 && isPowerOf2_32(Value)) 3507 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3508 break; 3509 case 'O': // "O" is the constant zero. 3510 if (Value == 0) 3511 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3512 break; 3513 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 3514 if ((short)-Value == (int)-Value) 3515 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3516 break; 3517 } 3518 break; 3519 } 3520 } 3521 3522 if (Result.Val) { 3523 Ops.push_back(Result); 3524 return; 3525 } 3526 3527 // Handle standard constraint letters. 3528 TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG); 3529} 3530 3531// isLegalAddressingMode - Return true if the addressing mode represented 3532// by AM is legal for this target, for a load/store of the specified type. 3533bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 3534 const Type *Ty) const { 3535 // FIXME: PPC does not allow r+i addressing modes for vectors! 3536 3537 // PPC allows a sign-extended 16-bit immediate field. 3538 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 3539 return false; 3540 3541 // No global is ever allowed as a base. 3542 if (AM.BaseGV) 3543 return false; 3544 3545 // PPC only support r+r, 3546 switch (AM.Scale) { 3547 case 0: // "r+i" or just "i", depending on HasBaseReg. 3548 break; 3549 case 1: 3550 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 3551 return false; 3552 // Otherwise we have r+r or r+i. 3553 break; 3554 case 2: 3555 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 3556 return false; 3557 // Allow 2*r as r+r. 3558 break; 3559 default: 3560 // No other scales are supported. 3561 return false; 3562 } 3563 3564 return true; 3565} 3566 3567/// isLegalAddressImmediate - Return true if the integer value can be used 3568/// as the offset of the target addressing mode for load / store of the 3569/// given type. 3570bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ 3571 // PPC allows a sign-extended 16-bit immediate field. 3572 return (V > -(1 << 16) && V < (1 << 16)-1); 3573} 3574 3575bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 3576 return false; 3577} 3578 3579SDOperand PPCTargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 3580 // Depths > 0 not supported yet! 3581 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 3582 return SDOperand(); 3583 3584 MachineFunction &MF = DAG.getMachineFunction(); 3585 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3586 int RAIdx = FuncInfo->getReturnAddrSaveIndex(); 3587 if (RAIdx == 0) { 3588 bool isPPC64 = PPCSubTarget.isPPC64(); 3589 int Offset = 3590 PPCFrameInfo::getReturnSaveOffset(isPPC64, PPCSubTarget.isMachoABI()); 3591 3592 // Set up a frame object for the return address. 3593 RAIdx = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, Offset); 3594 3595 // Remember it for next time. 3596 FuncInfo->setReturnAddrSaveIndex(RAIdx); 3597 3598 // Make sure the function really does not optimize away the store of the RA 3599 // to the stack. 3600 FuncInfo->setLRStoreRequired(); 3601 } 3602 3603 // Just load the return address off the stack. 3604 SDOperand RetAddrFI = DAG.getFrameIndex(RAIdx, getPointerTy()); 3605 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 3606} 3607 3608SDOperand PPCTargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 3609 // Depths > 0 not supported yet! 3610 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 3611 return SDOperand(); 3612 3613 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3614 bool isPPC64 = PtrVT == MVT::i64; 3615 3616 MachineFunction &MF = DAG.getMachineFunction(); 3617 MachineFrameInfo *MFI = MF.getFrameInfo(); 3618 bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects()) 3619 && MFI->getStackSize(); 3620 3621 if (isPPC64) 3622 return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::X31 : PPC::X1, 3623 MVT::i64); 3624 else 3625 return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::R31 : PPC::R1, 3626 MVT::i32); 3627} 3628