PPCISelLowering.cpp revision dc1adac582fa120861f18ae7221bfe1421fea59f
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "PPCMachineFunctionInfo.h" 16#include "PPCPredicates.h" 17#include "PPCTargetMachine.h" 18#include "PPCPerfectShuffle.h" 19#include "llvm/ADT/STLExtras.h" 20#include "llvm/ADT/VectorExtras.h" 21#include "llvm/Analysis/ScalarEvolutionExpressions.h" 22#include "llvm/CodeGen/CallingConvLower.h" 23#include "llvm/CodeGen/MachineFrameInfo.h" 24#include "llvm/CodeGen/MachineFunction.h" 25#include "llvm/CodeGen/MachineInstrBuilder.h" 26#include "llvm/CodeGen/MachineRegisterInfo.h" 27#include "llvm/CodeGen/PseudoSourceValue.h" 28#include "llvm/CodeGen/SelectionDAG.h" 29#include "llvm/Constants.h" 30#include "llvm/Function.h" 31#include "llvm/Intrinsics.h" 32#include "llvm/Support/MathExtras.h" 33#include "llvm/Target/TargetOptions.h" 34#include "llvm/Support/CommandLine.h" 35using namespace llvm; 36 37static cl::opt<bool> EnablePPCPreinc("enable-ppc-preinc", 38cl::desc("enable preincrement load/store generation on PPC (experimental)"), 39 cl::Hidden); 40 41PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) 42 : TargetLowering(TM), PPCSubTarget(*TM.getSubtargetImpl()) { 43 44 setPow2DivIsCheap(); 45 46 // Use _setjmp/_longjmp instead of setjmp/longjmp. 47 setUseUnderscoreSetJmp(true); 48 setUseUnderscoreLongJmp(true); 49 50 // Set up the register classes. 51 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 52 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 53 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 54 55 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 56 setLoadXAction(ISD::SEXTLOAD, MVT::i1, Promote); 57 setLoadXAction(ISD::SEXTLOAD, MVT::i8, Expand); 58 59 setTruncStoreAction(MVT::f64, MVT::f32, Expand); 60 61 // PowerPC has pre-inc load and store's. 62 setIndexedLoadAction(ISD::PRE_INC, MVT::i1, Legal); 63 setIndexedLoadAction(ISD::PRE_INC, MVT::i8, Legal); 64 setIndexedLoadAction(ISD::PRE_INC, MVT::i16, Legal); 65 setIndexedLoadAction(ISD::PRE_INC, MVT::i32, Legal); 66 setIndexedLoadAction(ISD::PRE_INC, MVT::i64, Legal); 67 setIndexedStoreAction(ISD::PRE_INC, MVT::i1, Legal); 68 setIndexedStoreAction(ISD::PRE_INC, MVT::i8, Legal); 69 setIndexedStoreAction(ISD::PRE_INC, MVT::i16, Legal); 70 setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); 71 setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); 72 73 // Shortening conversions involving ppcf128 get expanded (2 regs -> 1 reg) 74 setConvertAction(MVT::ppcf128, MVT::f64, Expand); 75 setConvertAction(MVT::ppcf128, MVT::f32, Expand); 76 // This is used in the ppcf128->int sequence. Note it has different semantics 77 // from FP_ROUND: that rounds to nearest, this rounds to zero. 78 setOperationAction(ISD::FP_ROUND_INREG, MVT::ppcf128, Custom); 79 80 // PowerPC has no intrinsics for these particular operations 81 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); 82 setOperationAction(ISD::MEMSET, MVT::Other, Expand); 83 setOperationAction(ISD::MEMCPY, MVT::Other, Expand); 84 setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); 85 86 // PowerPC has no SREM/UREM instructions 87 setOperationAction(ISD::SREM, MVT::i32, Expand); 88 setOperationAction(ISD::UREM, MVT::i32, Expand); 89 setOperationAction(ISD::SREM, MVT::i64, Expand); 90 setOperationAction(ISD::UREM, MVT::i64, Expand); 91 92 // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. 93 setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); 94 setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); 95 setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); 96 setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); 97 setOperationAction(ISD::UDIVREM, MVT::i32, Expand); 98 setOperationAction(ISD::SDIVREM, MVT::i32, Expand); 99 setOperationAction(ISD::UDIVREM, MVT::i64, Expand); 100 setOperationAction(ISD::SDIVREM, MVT::i64, Expand); 101 102 // We don't support sin/cos/sqrt/fmod/pow 103 setOperationAction(ISD::FSIN , MVT::f64, Expand); 104 setOperationAction(ISD::FCOS , MVT::f64, Expand); 105 setOperationAction(ISD::FREM , MVT::f64, Expand); 106 setOperationAction(ISD::FPOW , MVT::f64, Expand); 107 setOperationAction(ISD::FSIN , MVT::f32, Expand); 108 setOperationAction(ISD::FCOS , MVT::f32, Expand); 109 setOperationAction(ISD::FREM , MVT::f32, Expand); 110 setOperationAction(ISD::FPOW , MVT::f32, Expand); 111 112 setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); 113 114 // If we're enabling GP optimizations, use hardware square root 115 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 116 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 117 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 118 } 119 120 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 121 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 122 123 // PowerPC does not have BSWAP, CTPOP or CTTZ 124 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 125 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 126 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 127 setOperationAction(ISD::BSWAP, MVT::i64 , Expand); 128 setOperationAction(ISD::CTPOP, MVT::i64 , Expand); 129 setOperationAction(ISD::CTTZ , MVT::i64 , Expand); 130 131 // PowerPC does not have ROTR 132 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 133 134 // PowerPC does not have Select 135 setOperationAction(ISD::SELECT, MVT::i32, Expand); 136 setOperationAction(ISD::SELECT, MVT::i64, Expand); 137 setOperationAction(ISD::SELECT, MVT::f32, Expand); 138 setOperationAction(ISD::SELECT, MVT::f64, Expand); 139 140 // PowerPC wants to turn select_cc of FP into fsel when possible. 141 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 142 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 143 144 // PowerPC wants to optimize integer setcc a bit 145 setOperationAction(ISD::SETCC, MVT::i32, Custom); 146 147 // PowerPC does not have BRCOND which requires SetCC 148 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 149 150 setOperationAction(ISD::BR_JT, MVT::Other, Expand); 151 152 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 153 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 154 155 // PowerPC does not have [U|S]INT_TO_FP 156 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 157 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 158 159 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 160 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 161 setOperationAction(ISD::BIT_CONVERT, MVT::i64, Expand); 162 setOperationAction(ISD::BIT_CONVERT, MVT::f64, Expand); 163 164 // We cannot sextinreg(i1). Expand to shifts. 165 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 166 167 // Support label based line numbers. 168 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 169 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 170 171 setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); 172 setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); 173 setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); 174 setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); 175 176 177 // We want to legalize GlobalAddress and ConstantPool nodes into the 178 // appropriate instructions to materialize the address. 179 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 180 setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom); 181 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 182 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 183 setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); 184 setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); 185 setOperationAction(ISD::ConstantPool, MVT::i64, Custom); 186 setOperationAction(ISD::JumpTable, MVT::i64, Custom); 187 188 // RET must be custom lowered, to meet ABI requirements 189 setOperationAction(ISD::RET , MVT::Other, Custom); 190 191 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 192 setOperationAction(ISD::VASTART , MVT::Other, Custom); 193 194 // VAARG is custom lowered with ELF 32 ABI 195 if (TM.getSubtarget<PPCSubtarget>().isELF32_ABI()) 196 setOperationAction(ISD::VAARG, MVT::Other, Custom); 197 else 198 setOperationAction(ISD::VAARG, MVT::Other, Expand); 199 200 // Use the default implementation. 201 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 202 setOperationAction(ISD::VAEND , MVT::Other, Expand); 203 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 204 setOperationAction(ISD::STACKRESTORE , MVT::Other, Custom); 205 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); 206 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64 , Custom); 207 208 // We want to custom lower some of our intrinsics. 209 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 210 211 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 212 // They also have instructions for converting between i64 and fp. 213 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 214 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); 215 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 216 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); 217 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 218 219 // FIXME: disable this lowered code. This generates 64-bit register values, 220 // and we don't model the fact that the top part is clobbered by calls. We 221 // need to flag these together so that the value isn't live across a call. 222 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 223 224 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 225 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 226 } else { 227 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 228 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 229 } 230 231 if (TM.getSubtarget<PPCSubtarget>().use64BitRegs()) { 232 // 64-bit PowerPC implementations can support i64 types directly 233 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 234 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 235 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 236 // 64-bit PowerPC wants to expand i128 shifts itself. 237 setOperationAction(ISD::SHL_PARTS, MVT::i64, Custom); 238 setOperationAction(ISD::SRA_PARTS, MVT::i64, Custom); 239 setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); 240 } else { 241 // 32-bit PowerPC wants to expand i64 shifts itself. 242 setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); 243 setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); 244 setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); 245 } 246 247 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 248 // First set operation action for all vector types to expand. Then we 249 // will selectively turn on ones that can be effectively codegen'd. 250 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 251 VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 252 // add/sub are legal for all supported vector VT's. 253 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); 254 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); 255 256 // We promote all shuffles to v16i8. 257 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote); 258 AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8); 259 260 // We promote all non-typed operations to v4i32. 261 setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote); 262 AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32); 263 setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote); 264 AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32); 265 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote); 266 AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32); 267 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote); 268 AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32); 269 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 270 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32); 271 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote); 272 AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32); 273 274 // No other operations are legal. 275 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 276 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 277 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 278 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 279 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 280 setOperationAction(ISD::FDIV, (MVT::ValueType)VT, Expand); 281 setOperationAction(ISD::FNEG, (MVT::ValueType)VT, Expand); 282 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 283 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 284 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand); 285 setOperationAction(ISD::UMUL_LOHI, (MVT::ValueType)VT, Expand); 286 setOperationAction(ISD::SMUL_LOHI, (MVT::ValueType)VT, Expand); 287 setOperationAction(ISD::UDIVREM, (MVT::ValueType)VT, Expand); 288 setOperationAction(ISD::SDIVREM, (MVT::ValueType)VT, Expand); 289 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand); 290 setOperationAction(ISD::FPOW, (MVT::ValueType)VT, Expand); 291 setOperationAction(ISD::CTPOP, (MVT::ValueType)VT, Expand); 292 setOperationAction(ISD::CTLZ, (MVT::ValueType)VT, Expand); 293 setOperationAction(ISD::CTTZ, (MVT::ValueType)VT, Expand); 294 } 295 296 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 297 // with merges, splats, etc. 298 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 299 300 setOperationAction(ISD::AND , MVT::v4i32, Legal); 301 setOperationAction(ISD::OR , MVT::v4i32, Legal); 302 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 303 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 304 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 305 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 306 307 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 308 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 309 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 310 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 311 312 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 313 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 314 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 315 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 316 317 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 318 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 319 320 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 321 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 322 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 323 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 324 } 325 326 setShiftAmountType(MVT::i32); 327 setSetCCResultContents(ZeroOrOneSetCCResult); 328 329 if (TM.getSubtarget<PPCSubtarget>().isPPC64()) { 330 setStackPointerRegisterToSaveRestore(PPC::X1); 331 setExceptionPointerRegister(PPC::X3); 332 setExceptionSelectorRegister(PPC::X4); 333 } else { 334 setStackPointerRegisterToSaveRestore(PPC::R1); 335 setExceptionPointerRegister(PPC::R3); 336 setExceptionSelectorRegister(PPC::R4); 337 } 338 339 // We have target-specific dag combine patterns for the following nodes: 340 setTargetDAGCombine(ISD::SINT_TO_FP); 341 setTargetDAGCombine(ISD::STORE); 342 setTargetDAGCombine(ISD::BR_CC); 343 setTargetDAGCombine(ISD::BSWAP); 344 345 // Darwin long double math library functions have $LDBL128 appended. 346 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) { 347 setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); 348 setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); 349 setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); 350 setLibcallName(RTLIB::SIN_PPCF128, "sinl$LDBL128"); 351 setLibcallName(RTLIB::SQRT_PPCF128, "sqrtl$LDBL128"); 352 } 353 354 computeRegisterProperties(); 355} 356 357/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 358/// function arguments in the caller parameter area. 359unsigned PPCTargetLowering::getByValTypeAlignment(const Type *Ty) const { 360 TargetMachine &TM = getTargetMachine(); 361 // Darwin passes everything on 4 byte boundary. 362 if (TM.getSubtarget<PPCSubtarget>().isDarwin()) 363 return 4; 364 // FIXME Elf TBD 365 return 4; 366} 367 368const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 369 switch (Opcode) { 370 default: return 0; 371 case PPCISD::FSEL: return "PPCISD::FSEL"; 372 case PPCISD::FCFID: return "PPCISD::FCFID"; 373 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 374 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 375 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 376 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 377 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 378 case PPCISD::VPERM: return "PPCISD::VPERM"; 379 case PPCISD::Hi: return "PPCISD::Hi"; 380 case PPCISD::Lo: return "PPCISD::Lo"; 381 case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; 382 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 383 case PPCISD::SRL: return "PPCISD::SRL"; 384 case PPCISD::SRA: return "PPCISD::SRA"; 385 case PPCISD::SHL: return "PPCISD::SHL"; 386 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 387 case PPCISD::STD_32: return "PPCISD::STD_32"; 388 case PPCISD::CALL_ELF: return "PPCISD::CALL_ELF"; 389 case PPCISD::CALL_Macho: return "PPCISD::CALL_Macho"; 390 case PPCISD::MTCTR: return "PPCISD::MTCTR"; 391 case PPCISD::BCTRL_Macho: return "PPCISD::BCTRL_Macho"; 392 case PPCISD::BCTRL_ELF: return "PPCISD::BCTRL_ELF"; 393 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 394 case PPCISD::MFCR: return "PPCISD::MFCR"; 395 case PPCISD::VCMP: return "PPCISD::VCMP"; 396 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 397 case PPCISD::LBRX: return "PPCISD::LBRX"; 398 case PPCISD::STBRX: return "PPCISD::STBRX"; 399 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 400 case PPCISD::MFFS: return "PPCISD::MFFS"; 401 case PPCISD::MTFSB0: return "PPCISD::MTFSB0"; 402 case PPCISD::MTFSB1: return "PPCISD::MTFSB1"; 403 case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ"; 404 case PPCISD::MTFSF: return "PPCISD::MTFSF"; 405 } 406} 407 408 409MVT::ValueType 410PPCTargetLowering::getSetCCResultType(const SDOperand &) const { 411 return MVT::i32; 412} 413 414 415//===----------------------------------------------------------------------===// 416// Node matching predicates, for use by the tblgen matching code. 417//===----------------------------------------------------------------------===// 418 419/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 420static bool isFloatingPointZero(SDOperand Op) { 421 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 422 return CFP->getValueAPF().isZero(); 423 else if (ISD::isEXTLoad(Op.Val) || ISD::isNON_EXTLoad(Op.Val)) { 424 // Maybe this has already been legalized into the constant pool? 425 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 426 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal())) 427 return CFP->getValueAPF().isZero(); 428 } 429 return false; 430} 431 432/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 433/// true if Op is undef or if it matches the specified value. 434static bool isConstantOrUndef(SDOperand Op, unsigned Val) { 435 return Op.getOpcode() == ISD::UNDEF || 436 cast<ConstantSDNode>(Op)->getValue() == Val; 437} 438 439/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 440/// VPKUHUM instruction. 441bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { 442 if (!isUnary) { 443 for (unsigned i = 0; i != 16; ++i) 444 if (!isConstantOrUndef(N->getOperand(i), i*2+1)) 445 return false; 446 } else { 447 for (unsigned i = 0; i != 8; ++i) 448 if (!isConstantOrUndef(N->getOperand(i), i*2+1) || 449 !isConstantOrUndef(N->getOperand(i+8), i*2+1)) 450 return false; 451 } 452 return true; 453} 454 455/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 456/// VPKUWUM instruction. 457bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { 458 if (!isUnary) { 459 for (unsigned i = 0; i != 16; i += 2) 460 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 461 !isConstantOrUndef(N->getOperand(i+1), i*2+3)) 462 return false; 463 } else { 464 for (unsigned i = 0; i != 8; i += 2) 465 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 466 !isConstantOrUndef(N->getOperand(i+1), i*2+3) || 467 !isConstantOrUndef(N->getOperand(i+8), i*2+2) || 468 !isConstantOrUndef(N->getOperand(i+9), i*2+3)) 469 return false; 470 } 471 return true; 472} 473 474/// isVMerge - Common function, used to match vmrg* shuffles. 475/// 476static bool isVMerge(SDNode *N, unsigned UnitSize, 477 unsigned LHSStart, unsigned RHSStart) { 478 assert(N->getOpcode() == ISD::BUILD_VECTOR && 479 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 480 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 481 "Unsupported merge size!"); 482 483 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 484 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 485 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 486 LHSStart+j+i*UnitSize) || 487 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 488 RHSStart+j+i*UnitSize)) 489 return false; 490 } 491 return true; 492} 493 494/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 495/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 496bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 497 if (!isUnary) 498 return isVMerge(N, UnitSize, 8, 24); 499 return isVMerge(N, UnitSize, 8, 8); 500} 501 502/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 503/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 504bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 505 if (!isUnary) 506 return isVMerge(N, UnitSize, 0, 16); 507 return isVMerge(N, UnitSize, 0, 0); 508} 509 510 511/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 512/// amount, otherwise return -1. 513int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 514 assert(N->getOpcode() == ISD::BUILD_VECTOR && 515 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 516 // Find the first non-undef value in the shuffle mask. 517 unsigned i; 518 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 519 /*search*/; 520 521 if (i == 16) return -1; // all undef. 522 523 // Otherwise, check to see if the rest of the elements are consequtively 524 // numbered from this value. 525 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 526 if (ShiftAmt < i) return -1; 527 ShiftAmt -= i; 528 529 if (!isUnary) { 530 // Check the rest of the elements to see if they are consequtive. 531 for (++i; i != 16; ++i) 532 if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) 533 return -1; 534 } else { 535 // Check the rest of the elements to see if they are consequtive. 536 for (++i; i != 16; ++i) 537 if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) 538 return -1; 539 } 540 541 return ShiftAmt; 542} 543 544/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 545/// specifies a splat of a single element that is suitable for input to 546/// VSPLTB/VSPLTH/VSPLTW. 547bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 548 assert(N->getOpcode() == ISD::BUILD_VECTOR && 549 N->getNumOperands() == 16 && 550 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 551 552 // This is a splat operation if each element of the permute is the same, and 553 // if the value doesn't reference the second vector. 554 unsigned ElementBase = 0; 555 SDOperand Elt = N->getOperand(0); 556 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 557 ElementBase = EltV->getValue(); 558 else 559 return false; // FIXME: Handle UNDEF elements too! 560 561 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 562 return false; 563 564 // Check that they are consequtive. 565 for (unsigned i = 1; i != EltSize; ++i) { 566 if (!isa<ConstantSDNode>(N->getOperand(i)) || 567 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 568 return false; 569 } 570 571 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 572 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 573 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 574 assert(isa<ConstantSDNode>(N->getOperand(i)) && 575 "Invalid VECTOR_SHUFFLE mask!"); 576 for (unsigned j = 0; j != EltSize; ++j) 577 if (N->getOperand(i+j) != N->getOperand(j)) 578 return false; 579 } 580 581 return true; 582} 583 584/// isAllNegativeZeroVector - Returns true if all elements of build_vector 585/// are -0.0. 586bool PPC::isAllNegativeZeroVector(SDNode *N) { 587 assert(N->getOpcode() == ISD::BUILD_VECTOR); 588 if (PPC::isSplatShuffleMask(N, N->getNumOperands())) 589 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N)) 590 return CFP->getValueAPF().isNegZero(); 591 return false; 592} 593 594/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 595/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 596unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 597 assert(isSplatShuffleMask(N, EltSize)); 598 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 599} 600 601/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 602/// by using a vspltis[bhw] instruction of the specified element size, return 603/// the constant being splatted. The ByteSize field indicates the number of 604/// bytes of each element [124] -> [bhw]. 605SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 606 SDOperand OpVal(0, 0); 607 608 // If ByteSize of the splat is bigger than the element size of the 609 // build_vector, then we have a case where we are checking for a splat where 610 // multiple elements of the buildvector are folded together into a single 611 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 612 unsigned EltSize = 16/N->getNumOperands(); 613 if (EltSize < ByteSize) { 614 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 615 SDOperand UniquedVals[4]; 616 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 617 618 // See if all of the elements in the buildvector agree across. 619 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 620 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 621 // If the element isn't a constant, bail fully out. 622 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand(); 623 624 625 if (UniquedVals[i&(Multiple-1)].Val == 0) 626 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 627 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 628 return SDOperand(); // no match. 629 } 630 631 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 632 // either constant or undef values that are identical for each chunk. See 633 // if these chunks can form into a larger vspltis*. 634 635 // Check to see if all of the leading entries are either 0 or -1. If 636 // neither, then this won't fit into the immediate field. 637 bool LeadingZero = true; 638 bool LeadingOnes = true; 639 for (unsigned i = 0; i != Multiple-1; ++i) { 640 if (UniquedVals[i].Val == 0) continue; // Must have been undefs. 641 642 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 643 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 644 } 645 // Finally, check the least significant entry. 646 if (LeadingZero) { 647 if (UniquedVals[Multiple-1].Val == 0) 648 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 649 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue(); 650 if (Val < 16) 651 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 652 } 653 if (LeadingOnes) { 654 if (UniquedVals[Multiple-1].Val == 0) 655 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 656 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended(); 657 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 658 return DAG.getTargetConstant(Val, MVT::i32); 659 } 660 661 return SDOperand(); 662 } 663 664 // Check to see if this buildvec has a single non-undef value in its elements. 665 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 666 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 667 if (OpVal.Val == 0) 668 OpVal = N->getOperand(i); 669 else if (OpVal != N->getOperand(i)) 670 return SDOperand(); 671 } 672 673 if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def. 674 675 unsigned ValSizeInBytes = 0; 676 uint64_t Value = 0; 677 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 678 Value = CN->getValue(); 679 ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8; 680 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 681 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 682 Value = FloatToBits(CN->getValueAPF().convertToFloat()); 683 ValSizeInBytes = 4; 684 } 685 686 // If the splat value is larger than the element value, then we can never do 687 // this splat. The only case that we could fit the replicated bits into our 688 // immediate field for would be zero, and we prefer to use vxor for it. 689 if (ValSizeInBytes < ByteSize) return SDOperand(); 690 691 // If the element value is larger than the splat value, cut it in half and 692 // check to see if the two halves are equal. Continue doing this until we 693 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 694 while (ValSizeInBytes > ByteSize) { 695 ValSizeInBytes >>= 1; 696 697 // If the top half equals the bottom half, we're still ok. 698 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 699 (Value & ((1 << (8*ValSizeInBytes))-1))) 700 return SDOperand(); 701 } 702 703 // Properly sign extend the value. 704 int ShAmt = (4-ByteSize)*8; 705 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 706 707 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 708 if (MaskVal == 0) return SDOperand(); 709 710 // Finally, if this value fits in a 5 bit sext field, return it 711 if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) 712 return DAG.getTargetConstant(MaskVal, MVT::i32); 713 return SDOperand(); 714} 715 716//===----------------------------------------------------------------------===// 717// Addressing Mode Selection 718//===----------------------------------------------------------------------===// 719 720/// isIntS16Immediate - This method tests to see if the node is either a 32-bit 721/// or 64-bit immediate, and if the value can be accurately represented as a 722/// sign extension from a 16-bit value. If so, this returns true and the 723/// immediate. 724static bool isIntS16Immediate(SDNode *N, short &Imm) { 725 if (N->getOpcode() != ISD::Constant) 726 return false; 727 728 Imm = (short)cast<ConstantSDNode>(N)->getValue(); 729 if (N->getValueType(0) == MVT::i32) 730 return Imm == (int32_t)cast<ConstantSDNode>(N)->getValue(); 731 else 732 return Imm == (int64_t)cast<ConstantSDNode>(N)->getValue(); 733} 734static bool isIntS16Immediate(SDOperand Op, short &Imm) { 735 return isIntS16Immediate(Op.Val, Imm); 736} 737 738 739/// SelectAddressRegReg - Given the specified addressed, check to see if it 740/// can be represented as an indexed [r+r] operation. Returns false if it 741/// can be more efficiently represented with [r+imm]. 742bool PPCTargetLowering::SelectAddressRegReg(SDOperand N, SDOperand &Base, 743 SDOperand &Index, 744 SelectionDAG &DAG) { 745 short imm = 0; 746 if (N.getOpcode() == ISD::ADD) { 747 if (isIntS16Immediate(N.getOperand(1), imm)) 748 return false; // r+i 749 if (N.getOperand(1).getOpcode() == PPCISD::Lo) 750 return false; // r+i 751 752 Base = N.getOperand(0); 753 Index = N.getOperand(1); 754 return true; 755 } else if (N.getOpcode() == ISD::OR) { 756 if (isIntS16Immediate(N.getOperand(1), imm)) 757 return false; // r+i can fold it if we can. 758 759 // If this is an or of disjoint bitfields, we can codegen this as an add 760 // (for better address arithmetic) if the LHS and RHS of the OR are provably 761 // disjoint. 762 APInt LHSKnownZero, LHSKnownOne; 763 APInt RHSKnownZero, RHSKnownOne; 764 DAG.ComputeMaskedBits(N.getOperand(0), 765 APInt::getAllOnesValue(N.getOperand(0) 766 .getValueSizeInBits()), 767 LHSKnownZero, LHSKnownOne); 768 769 if (LHSKnownZero.getBoolValue()) { 770 DAG.ComputeMaskedBits(N.getOperand(1), 771 APInt::getAllOnesValue(N.getOperand(1) 772 .getValueSizeInBits()), 773 RHSKnownZero, RHSKnownOne); 774 // If all of the bits are known zero on the LHS or RHS, the add won't 775 // carry. 776 if (~(LHSKnownZero | RHSKnownZero) == 0) { 777 Base = N.getOperand(0); 778 Index = N.getOperand(1); 779 return true; 780 } 781 } 782 } 783 784 return false; 785} 786 787/// Returns true if the address N can be represented by a base register plus 788/// a signed 16-bit displacement [r+imm], and if it is not better 789/// represented as reg+reg. 790bool PPCTargetLowering::SelectAddressRegImm(SDOperand N, SDOperand &Disp, 791 SDOperand &Base, SelectionDAG &DAG){ 792 // If this can be more profitably realized as r+r, fail. 793 if (SelectAddressRegReg(N, Disp, Base, DAG)) 794 return false; 795 796 if (N.getOpcode() == ISD::ADD) { 797 short imm = 0; 798 if (isIntS16Immediate(N.getOperand(1), imm)) { 799 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 800 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 801 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 802 } else { 803 Base = N.getOperand(0); 804 } 805 return true; // [r+i] 806 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 807 // Match LOAD (ADD (X, Lo(G))). 808 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue() 809 && "Cannot handle constant offsets yet!"); 810 Disp = N.getOperand(1).getOperand(0); // The global address. 811 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 812 Disp.getOpcode() == ISD::TargetConstantPool || 813 Disp.getOpcode() == ISD::TargetJumpTable); 814 Base = N.getOperand(0); 815 return true; // [&g+r] 816 } 817 } else if (N.getOpcode() == ISD::OR) { 818 short imm = 0; 819 if (isIntS16Immediate(N.getOperand(1), imm)) { 820 // If this is an or of disjoint bitfields, we can codegen this as an add 821 // (for better address arithmetic) if the LHS and RHS of the OR are 822 // provably disjoint. 823 APInt LHSKnownZero, LHSKnownOne; 824 DAG.ComputeMaskedBits(N.getOperand(0), 825 APInt::getAllOnesValue(N.getOperand(0) 826 .getValueSizeInBits()), 827 LHSKnownZero, LHSKnownOne); 828 829 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 830 // If all of the bits are known zero on the LHS or RHS, the add won't 831 // carry. 832 Base = N.getOperand(0); 833 Disp = DAG.getTargetConstant((int)imm & 0xFFFF, MVT::i32); 834 return true; 835 } 836 } 837 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 838 // Loading from a constant address. 839 840 // If this address fits entirely in a 16-bit sext immediate field, codegen 841 // this as "d, 0" 842 short Imm; 843 if (isIntS16Immediate(CN, Imm)) { 844 Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); 845 Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); 846 return true; 847 } 848 849 // Handle 32-bit sext immediates with LIS + addr mode. 850 if (CN->getValueType(0) == MVT::i32 || 851 (int64_t)CN->getValue() == (int)CN->getValue()) { 852 int Addr = (int)CN->getValue(); 853 854 // Otherwise, break this down into an LIS + disp. 855 Disp = DAG.getTargetConstant((short)Addr, MVT::i32); 856 857 Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, MVT::i32); 858 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 859 Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); 860 return true; 861 } 862 } 863 864 Disp = DAG.getTargetConstant(0, getPointerTy()); 865 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 866 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 867 else 868 Base = N; 869 return true; // [r+0] 870} 871 872/// SelectAddressRegRegOnly - Given the specified addressed, force it to be 873/// represented as an indexed [r+r] operation. 874bool PPCTargetLowering::SelectAddressRegRegOnly(SDOperand N, SDOperand &Base, 875 SDOperand &Index, 876 SelectionDAG &DAG) { 877 // Check to see if we can easily represent this as an [r+r] address. This 878 // will fail if it thinks that the address is more profitably represented as 879 // reg+imm, e.g. where imm = 0. 880 if (SelectAddressRegReg(N, Base, Index, DAG)) 881 return true; 882 883 // If the operand is an addition, always emit this as [r+r], since this is 884 // better (for code size, and execution, as the memop does the add for free) 885 // than emitting an explicit add. 886 if (N.getOpcode() == ISD::ADD) { 887 Base = N.getOperand(0); 888 Index = N.getOperand(1); 889 return true; 890 } 891 892 // Otherwise, do it the hard way, using R0 as the base register. 893 Base = DAG.getRegister(PPC::R0, N.getValueType()); 894 Index = N; 895 return true; 896} 897 898/// SelectAddressRegImmShift - Returns true if the address N can be 899/// represented by a base register plus a signed 14-bit displacement 900/// [r+imm*4]. Suitable for use by STD and friends. 901bool PPCTargetLowering::SelectAddressRegImmShift(SDOperand N, SDOperand &Disp, 902 SDOperand &Base, 903 SelectionDAG &DAG) { 904 // If this can be more profitably realized as r+r, fail. 905 if (SelectAddressRegReg(N, Disp, Base, DAG)) 906 return false; 907 908 if (N.getOpcode() == ISD::ADD) { 909 short imm = 0; 910 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 911 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 912 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) { 913 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 914 } else { 915 Base = N.getOperand(0); 916 } 917 return true; // [r+i] 918 } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) { 919 // Match LOAD (ADD (X, Lo(G))). 920 assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getValue() 921 && "Cannot handle constant offsets yet!"); 922 Disp = N.getOperand(1).getOperand(0); // The global address. 923 assert(Disp.getOpcode() == ISD::TargetGlobalAddress || 924 Disp.getOpcode() == ISD::TargetConstantPool || 925 Disp.getOpcode() == ISD::TargetJumpTable); 926 Base = N.getOperand(0); 927 return true; // [&g+r] 928 } 929 } else if (N.getOpcode() == ISD::OR) { 930 short imm = 0; 931 if (isIntS16Immediate(N.getOperand(1), imm) && (imm & 3) == 0) { 932 // If this is an or of disjoint bitfields, we can codegen this as an add 933 // (for better address arithmetic) if the LHS and RHS of the OR are 934 // provably disjoint. 935 APInt LHSKnownZero, LHSKnownOne; 936 DAG.ComputeMaskedBits(N.getOperand(0), 937 APInt::getAllOnesValue(N.getOperand(0) 938 .getValueSizeInBits()), 939 LHSKnownZero, LHSKnownOne); 940 if ((LHSKnownZero.getZExtValue()|~(uint64_t)imm) == ~0ULL) { 941 // If all of the bits are known zero on the LHS or RHS, the add won't 942 // carry. 943 Base = N.getOperand(0); 944 Disp = DAG.getTargetConstant(((int)imm & 0xFFFF) >> 2, MVT::i32); 945 return true; 946 } 947 } 948 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) { 949 // Loading from a constant address. Verify low two bits are clear. 950 if ((CN->getValue() & 3) == 0) { 951 // If this address fits entirely in a 14-bit sext immediate field, codegen 952 // this as "d, 0" 953 short Imm; 954 if (isIntS16Immediate(CN, Imm)) { 955 Disp = DAG.getTargetConstant((unsigned short)Imm >> 2, getPointerTy()); 956 Base = DAG.getRegister(PPC::R0, CN->getValueType(0)); 957 return true; 958 } 959 960 // Fold the low-part of 32-bit absolute addresses into addr mode. 961 if (CN->getValueType(0) == MVT::i32 || 962 (int64_t)CN->getValue() == (int)CN->getValue()) { 963 int Addr = (int)CN->getValue(); 964 965 // Otherwise, break this down into an LIS + disp. 966 Disp = DAG.getTargetConstant((short)Addr >> 2, MVT::i32); 967 968 Base = DAG.getTargetConstant((Addr-(signed short)Addr) >> 16, MVT::i32); 969 unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8; 970 Base = SDOperand(DAG.getTargetNode(Opc, CN->getValueType(0), Base), 0); 971 return true; 972 } 973 } 974 } 975 976 Disp = DAG.getTargetConstant(0, getPointerTy()); 977 if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) 978 Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType()); 979 else 980 Base = N; 981 return true; // [r+0] 982} 983 984 985/// getPreIndexedAddressParts - returns true by value, base pointer and 986/// offset pointer and addressing mode by reference if the node's address 987/// can be legally represented as pre-indexed load / store address. 988bool PPCTargetLowering::getPreIndexedAddressParts(SDNode *N, SDOperand &Base, 989 SDOperand &Offset, 990 ISD::MemIndexedMode &AM, 991 SelectionDAG &DAG) { 992 // Disabled by default for now. 993 if (!EnablePPCPreinc) return false; 994 995 SDOperand Ptr; 996 MVT::ValueType VT; 997 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 998 Ptr = LD->getBasePtr(); 999 VT = LD->getMemoryVT(); 1000 1001 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 1002 ST = ST; 1003 Ptr = ST->getBasePtr(); 1004 VT = ST->getMemoryVT(); 1005 } else 1006 return false; 1007 1008 // PowerPC doesn't have preinc load/store instructions for vectors. 1009 if (MVT::isVector(VT)) 1010 return false; 1011 1012 // TODO: Check reg+reg first. 1013 1014 // LDU/STU use reg+imm*4, others use reg+imm. 1015 if (VT != MVT::i64) { 1016 // reg + imm 1017 if (!SelectAddressRegImm(Ptr, Offset, Base, DAG)) 1018 return false; 1019 } else { 1020 // reg + imm * 4. 1021 if (!SelectAddressRegImmShift(Ptr, Offset, Base, DAG)) 1022 return false; 1023 } 1024 1025 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1026 // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of 1027 // sext i32 to i64 when addr mode is r+i. 1028 if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 && 1029 LD->getExtensionType() == ISD::SEXTLOAD && 1030 isa<ConstantSDNode>(Offset)) 1031 return false; 1032 } 1033 1034 AM = ISD::PRE_INC; 1035 return true; 1036} 1037 1038//===----------------------------------------------------------------------===// 1039// LowerOperation implementation 1040//===----------------------------------------------------------------------===// 1041 1042SDOperand PPCTargetLowering::LowerConstantPool(SDOperand Op, 1043 SelectionDAG &DAG) { 1044 MVT::ValueType PtrVT = Op.getValueType(); 1045 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 1046 Constant *C = CP->getConstVal(); 1047 SDOperand CPI = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment()); 1048 SDOperand Zero = DAG.getConstant(0, PtrVT); 1049 1050 const TargetMachine &TM = DAG.getTarget(); 1051 1052 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, CPI, Zero); 1053 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, CPI, Zero); 1054 1055 // If this is a non-darwin platform, we don't support non-static relo models 1056 // yet. 1057 if (TM.getRelocationModel() == Reloc::Static || 1058 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1059 // Generate non-pic code that has direct accesses to the constant pool. 1060 // The address of the global is just (hi(&g)+lo(&g)). 1061 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1062 } 1063 1064 if (TM.getRelocationModel() == Reloc::PIC_) { 1065 // With PIC, the first instruction is actually "GR+hi(&G)". 1066 Hi = DAG.getNode(ISD::ADD, PtrVT, 1067 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1068 } 1069 1070 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1071 return Lo; 1072} 1073 1074SDOperand PPCTargetLowering::LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 1075 MVT::ValueType PtrVT = Op.getValueType(); 1076 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 1077 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); 1078 SDOperand Zero = DAG.getConstant(0, PtrVT); 1079 1080 const TargetMachine &TM = DAG.getTarget(); 1081 1082 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, JTI, Zero); 1083 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, JTI, Zero); 1084 1085 // If this is a non-darwin platform, we don't support non-static relo models 1086 // yet. 1087 if (TM.getRelocationModel() == Reloc::Static || 1088 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1089 // Generate non-pic code that has direct accesses to the constant pool. 1090 // The address of the global is just (hi(&g)+lo(&g)). 1091 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1092 } 1093 1094 if (TM.getRelocationModel() == Reloc::PIC_) { 1095 // With PIC, the first instruction is actually "GR+hi(&G)". 1096 Hi = DAG.getNode(ISD::ADD, PtrVT, 1097 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1098 } 1099 1100 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1101 return Lo; 1102} 1103 1104SDOperand PPCTargetLowering::LowerGlobalTLSAddress(SDOperand Op, 1105 SelectionDAG &DAG) { 1106 assert(0 && "TLS not implemented for PPC."); 1107 return SDOperand(); // Not reached 1108} 1109 1110SDOperand PPCTargetLowering::LowerGlobalAddress(SDOperand Op, 1111 SelectionDAG &DAG) { 1112 MVT::ValueType PtrVT = Op.getValueType(); 1113 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 1114 GlobalValue *GV = GSDN->getGlobal(); 1115 SDOperand GA = DAG.getTargetGlobalAddress(GV, PtrVT, GSDN->getOffset()); 1116 // If it's a debug information descriptor, don't mess with it. 1117 if (DAG.isVerifiedDebugInfoDesc(Op)) 1118 return GA; 1119 SDOperand Zero = DAG.getConstant(0, PtrVT); 1120 1121 const TargetMachine &TM = DAG.getTarget(); 1122 1123 SDOperand Hi = DAG.getNode(PPCISD::Hi, PtrVT, GA, Zero); 1124 SDOperand Lo = DAG.getNode(PPCISD::Lo, PtrVT, GA, Zero); 1125 1126 // If this is a non-darwin platform, we don't support non-static relo models 1127 // yet. 1128 if (TM.getRelocationModel() == Reloc::Static || 1129 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 1130 // Generate non-pic code that has direct accesses to globals. 1131 // The address of the global is just (hi(&g)+lo(&g)). 1132 return DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1133 } 1134 1135 if (TM.getRelocationModel() == Reloc::PIC_) { 1136 // With PIC, the first instruction is actually "GR+hi(&G)". 1137 Hi = DAG.getNode(ISD::ADD, PtrVT, 1138 DAG.getNode(PPCISD::GlobalBaseReg, PtrVT), Hi); 1139 } 1140 1141 Lo = DAG.getNode(ISD::ADD, PtrVT, Hi, Lo); 1142 1143 if (!TM.getSubtarget<PPCSubtarget>().hasLazyResolverStub(GV)) 1144 return Lo; 1145 1146 // If the global is weak or external, we have to go through the lazy 1147 // resolution stub. 1148 return DAG.getLoad(PtrVT, DAG.getEntryNode(), Lo, NULL, 0); 1149} 1150 1151SDOperand PPCTargetLowering::LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 1152 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 1153 1154 // If we're comparing for equality to zero, expose the fact that this is 1155 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 1156 // fold the new nodes. 1157 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1158 if (C->isNullValue() && CC == ISD::SETEQ) { 1159 MVT::ValueType VT = Op.getOperand(0).getValueType(); 1160 SDOperand Zext = Op.getOperand(0); 1161 if (VT < MVT::i32) { 1162 VT = MVT::i32; 1163 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 1164 } 1165 unsigned Log2b = Log2_32(MVT::getSizeInBits(VT)); 1166 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 1167 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz, 1168 DAG.getConstant(Log2b, MVT::i32)); 1169 return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc); 1170 } 1171 // Leave comparisons against 0 and -1 alone for now, since they're usually 1172 // optimized. FIXME: revisit this when we can custom lower all setcc 1173 // optimizations. 1174 if (C->isAllOnesValue() || C->isNullValue()) 1175 return SDOperand(); 1176 } 1177 1178 // If we have an integer seteq/setne, turn it into a compare against zero 1179 // by xor'ing the rhs with the lhs, which is faster than setting a 1180 // condition register, reading it back out, and masking the correct bit. The 1181 // normal approach here uses sub to do this instead of xor. Using xor exposes 1182 // the result to other bit-twiddling opportunities. 1183 MVT::ValueType LHSVT = Op.getOperand(0).getValueType(); 1184 if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 1185 MVT::ValueType VT = Op.getValueType(); 1186 SDOperand Sub = DAG.getNode(ISD::XOR, LHSVT, Op.getOperand(0), 1187 Op.getOperand(1)); 1188 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 1189 } 1190 return SDOperand(); 1191} 1192 1193SDOperand PPCTargetLowering::LowerVAARG(SDOperand Op, SelectionDAG &DAG, 1194 int VarArgsFrameIndex, 1195 int VarArgsStackOffset, 1196 unsigned VarArgsNumGPR, 1197 unsigned VarArgsNumFPR, 1198 const PPCSubtarget &Subtarget) { 1199 1200 assert(0 && "VAARG in ELF32 ABI not implemented yet!"); 1201 return SDOperand(); // Not reached 1202} 1203 1204SDOperand PPCTargetLowering::LowerVASTART(SDOperand Op, SelectionDAG &DAG, 1205 int VarArgsFrameIndex, 1206 int VarArgsStackOffset, 1207 unsigned VarArgsNumGPR, 1208 unsigned VarArgsNumFPR, 1209 const PPCSubtarget &Subtarget) { 1210 1211 if (Subtarget.isMachoABI()) { 1212 // vastart just stores the address of the VarArgsFrameIndex slot into the 1213 // memory location argument. 1214 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1215 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1216 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1217 return DAG.getStore(Op.getOperand(0), FR, Op.getOperand(1), SV, 0); 1218 } 1219 1220 // For ELF 32 ABI we follow the layout of the va_list struct. 1221 // We suppose the given va_list is already allocated. 1222 // 1223 // typedef struct { 1224 // char gpr; /* index into the array of 8 GPRs 1225 // * stored in the register save area 1226 // * gpr=0 corresponds to r3, 1227 // * gpr=1 to r4, etc. 1228 // */ 1229 // char fpr; /* index into the array of 8 FPRs 1230 // * stored in the register save area 1231 // * fpr=0 corresponds to f1, 1232 // * fpr=1 to f2, etc. 1233 // */ 1234 // char *overflow_arg_area; 1235 // /* location on stack that holds 1236 // * the next overflow argument 1237 // */ 1238 // char *reg_save_area; 1239 // /* where r3:r10 and f1:f8 (if saved) 1240 // * are stored 1241 // */ 1242 // } va_list[1]; 1243 1244 1245 SDOperand ArgGPR = DAG.getConstant(VarArgsNumGPR, MVT::i8); 1246 SDOperand ArgFPR = DAG.getConstant(VarArgsNumFPR, MVT::i8); 1247 1248 1249 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1250 1251 SDOperand StackOffsetFI = DAG.getFrameIndex(VarArgsStackOffset, PtrVT); 1252 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1253 1254 uint64_t FrameOffset = MVT::getSizeInBits(PtrVT)/8; 1255 SDOperand ConstFrameOffset = DAG.getConstant(FrameOffset, PtrVT); 1256 1257 uint64_t StackOffset = MVT::getSizeInBits(PtrVT)/8 - 1; 1258 SDOperand ConstStackOffset = DAG.getConstant(StackOffset, PtrVT); 1259 1260 uint64_t FPROffset = 1; 1261 SDOperand ConstFPROffset = DAG.getConstant(FPROffset, PtrVT); 1262 1263 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); 1264 1265 // Store first byte : number of int regs 1266 SDOperand firstStore = DAG.getStore(Op.getOperand(0), ArgGPR, 1267 Op.getOperand(1), SV, 0); 1268 uint64_t nextOffset = FPROffset; 1269 SDOperand nextPtr = DAG.getNode(ISD::ADD, PtrVT, Op.getOperand(1), 1270 ConstFPROffset); 1271 1272 // Store second byte : number of float regs 1273 SDOperand secondStore = 1274 DAG.getStore(firstStore, ArgFPR, nextPtr, SV, nextOffset); 1275 nextOffset += StackOffset; 1276 nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstStackOffset); 1277 1278 // Store second word : arguments given on stack 1279 SDOperand thirdStore = 1280 DAG.getStore(secondStore, StackOffsetFI, nextPtr, SV, nextOffset); 1281 nextOffset += FrameOffset; 1282 nextPtr = DAG.getNode(ISD::ADD, PtrVT, nextPtr, ConstFrameOffset); 1283 1284 // Store third word : arguments given in registers 1285 return DAG.getStore(thirdStore, FR, nextPtr, SV, nextOffset); 1286 1287} 1288 1289#include "PPCGenCallingConv.inc" 1290 1291/// GetFPR - Get the set of FP registers that should be allocated for arguments, 1292/// depending on which subtarget is selected. 1293static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { 1294 if (Subtarget.isMachoABI()) { 1295 static const unsigned FPR[] = { 1296 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1297 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1298 }; 1299 return FPR; 1300 } 1301 1302 1303 static const unsigned FPR[] = { 1304 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1305 PPC::F8 1306 }; 1307 return FPR; 1308} 1309 1310SDOperand 1311PPCTargetLowering::LowerFORMAL_ARGUMENTS(SDOperand Op, 1312 SelectionDAG &DAG, 1313 int &VarArgsFrameIndex, 1314 int &VarArgsStackOffset, 1315 unsigned &VarArgsNumGPR, 1316 unsigned &VarArgsNumFPR, 1317 const PPCSubtarget &Subtarget) { 1318 // TODO: add description of PPC stack frame format, or at least some docs. 1319 // 1320 MachineFunction &MF = DAG.getMachineFunction(); 1321 MachineFrameInfo *MFI = MF.getFrameInfo(); 1322 MachineRegisterInfo &RegInfo = MF.getRegInfo(); 1323 SmallVector<SDOperand, 8> ArgValues; 1324 SDOperand Root = Op.getOperand(0); 1325 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1326 1327 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1328 bool isPPC64 = PtrVT == MVT::i64; 1329 bool isMachoABI = Subtarget.isMachoABI(); 1330 bool isELF32_ABI = Subtarget.isELF32_ABI(); 1331 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1332 1333 unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1334 1335 static const unsigned GPR_32[] = { // 32-bit registers. 1336 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1337 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1338 }; 1339 static const unsigned GPR_64[] = { // 64-bit registers. 1340 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 1341 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 1342 }; 1343 1344 static const unsigned *FPR = GetFPR(Subtarget); 1345 1346 static const unsigned VR[] = { 1347 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 1348 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 1349 }; 1350 1351 const unsigned Num_GPR_Regs = array_lengthof(GPR_32); 1352 const unsigned Num_FPR_Regs = isMachoABI ? 13 : 8; 1353 const unsigned Num_VR_Regs = array_lengthof( VR); 1354 1355 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 1356 1357 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 1358 1359 // In 32-bit non-varargs functions, the stack space for vectors is after the 1360 // stack space for non-vectors. We do not use this space unless we have 1361 // too many vectors to fit in registers, something that only occurs in 1362 // constructed examples:), but we have to walk the arglist to figure 1363 // that out...for the pathological case, compute VecArgOffset as the 1364 // start of the vector parameter area. Computing VecArgOffset is the 1365 // entire point of the following loop. 1366 // Altivec is not mentioned in the ppc32 Elf Supplement, so I'm not trying 1367 // to handle Elf here. 1368 unsigned VecArgOffset = ArgOffset; 1369 if (!isVarArg && !isPPC64) { 1370 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; 1371 ++ArgNo) { 1372 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType(); 1373 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8; 1374 ISD::ArgFlagsTy Flags = 1375 cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags(); 1376 1377 if (Flags.isByVal()) { 1378 // ObjSize is the true size, ArgSize rounded up to multiple of regs. 1379 ObjSize = Flags.getByValSize(); 1380 unsigned ArgSize = 1381 ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1382 VecArgOffset += ArgSize; 1383 continue; 1384 } 1385 1386 switch(ObjectVT) { 1387 default: assert(0 && "Unhandled argument type!"); 1388 case MVT::i32: 1389 case MVT::f32: 1390 VecArgOffset += isPPC64 ? 8 : 4; 1391 break; 1392 case MVT::i64: // PPC64 1393 case MVT::f64: 1394 VecArgOffset += 8; 1395 break; 1396 case MVT::v4f32: 1397 case MVT::v4i32: 1398 case MVT::v8i16: 1399 case MVT::v16i8: 1400 // Nothing to do, we're only looking at Nonvector args here. 1401 break; 1402 } 1403 } 1404 } 1405 // We've found where the vector parameter area in memory is. Skip the 1406 // first 12 parameters; these don't use that memory. 1407 VecArgOffset = ((VecArgOffset+15)/16)*16; 1408 VecArgOffset += 12*16; 1409 1410 // Add DAG nodes to load the arguments or copy them out of registers. On 1411 // entry to a function on PPC, the arguments start after the linkage area, 1412 // although the first ones are often in registers. 1413 // 1414 // In the ELF 32 ABI, GPRs and stack are double word align: an argument 1415 // represented with two words (long long or double) must be copied to an 1416 // even GPR_idx value or to an even ArgOffset value. TODO: implement this. 1417 1418 SmallVector<SDOperand, 8> MemOps; 1419 1420 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { 1421 SDOperand ArgVal; 1422 bool needsLoad = false; 1423 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType(); 1424 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8; 1425 unsigned ArgSize = ObjSize; 1426 ISD::ArgFlagsTy Flags = 1427 cast<ARG_FLAGSSDNode>(Op.getOperand(ArgNo+3))->getArgFlags(); 1428 // See if next argument requires stack alignment in ELF 1429 bool Expand = false; // TODO: implement this. 1430 1431 unsigned CurArgOffset = ArgOffset; 1432 1433 // FIXME alignment for ELF may not be right 1434 // FIXME the codegen can be much improved in some cases. 1435 // We do not have to keep everything in memory. 1436 if (Flags.isByVal()) { 1437 // ObjSize is the true size, ArgSize rounded up to multiple of registers. 1438 ObjSize = Flags.getByValSize(); 1439 ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1440 // Double word align in ELF 1441 if (Expand && isELF32_ABI) GPR_idx += (GPR_idx % 2); 1442 // Objects of size 1 and 2 are right justified, everything else is 1443 // left justified. This means the memory address is adjusted forwards. 1444 if (ObjSize==1 || ObjSize==2) { 1445 CurArgOffset = CurArgOffset + (4 - ObjSize); 1446 } 1447 // The value of the object is its address. 1448 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); 1449 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 1450 ArgValues.push_back(FIN); 1451 if (ObjSize==1 || ObjSize==2) { 1452 if (GPR_idx != Num_GPR_Regs) { 1453 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1454 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1455 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1456 SDOperand Store = DAG.getTruncStore(Val.getValue(1), Val, FIN, 1457 NULL, 0, ObjSize==1 ? MVT::i8 : MVT::i16 ); 1458 MemOps.push_back(Store); 1459 ++GPR_idx; 1460 if (isMachoABI) ArgOffset += PtrByteSize; 1461 } else { 1462 ArgOffset += PtrByteSize; 1463 } 1464 continue; 1465 } 1466 for (unsigned j = 0; j < ArgSize; j += PtrByteSize) { 1467 // Store whatever pieces of the object are in registers 1468 // to memory. ArgVal will be address of the beginning of 1469 // the object. 1470 if (GPR_idx != Num_GPR_Regs) { 1471 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1472 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1473 int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset); 1474 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 1475 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1476 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1477 MemOps.push_back(Store); 1478 ++GPR_idx; 1479 if (isMachoABI) ArgOffset += PtrByteSize; 1480 } else { 1481 ArgOffset += ArgSize - (ArgOffset-CurArgOffset); 1482 break; 1483 } 1484 } 1485 continue; 1486 } 1487 1488 switch (ObjectVT) { 1489 default: assert(0 && "Unhandled argument type!"); 1490 case MVT::i32: 1491 if (!isPPC64) { 1492 // Double word align in ELF 1493 if (Expand && isELF32_ABI) GPR_idx += (GPR_idx % 2); 1494 1495 if (GPR_idx != Num_GPR_Regs) { 1496 unsigned VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1497 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1498 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); 1499 ++GPR_idx; 1500 } else { 1501 needsLoad = true; 1502 ArgSize = PtrByteSize; 1503 } 1504 // Stack align in ELF 1505 if (needsLoad && Expand && isELF32_ABI) 1506 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1507 // All int arguments reserve stack space in Macho ABI. 1508 if (isMachoABI || needsLoad) ArgOffset += PtrByteSize; 1509 break; 1510 } 1511 // FALLTHROUGH 1512 case MVT::i64: // PPC64 1513 if (GPR_idx != Num_GPR_Regs) { 1514 unsigned VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 1515 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1516 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i64); 1517 1518 if (ObjectVT == MVT::i32) { 1519 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote 1520 // value to MVT::i64 and then truncate to the correct register size. 1521 if (Flags.isSExt()) 1522 ArgVal = DAG.getNode(ISD::AssertSext, MVT::i64, ArgVal, 1523 DAG.getValueType(ObjectVT)); 1524 else if (Flags.isZExt()) 1525 ArgVal = DAG.getNode(ISD::AssertZext, MVT::i64, ArgVal, 1526 DAG.getValueType(ObjectVT)); 1527 1528 ArgVal = DAG.getNode(ISD::TRUNCATE, MVT::i32, ArgVal); 1529 } 1530 1531 ++GPR_idx; 1532 } else { 1533 needsLoad = true; 1534 } 1535 // All int arguments reserve stack space in Macho ABI. 1536 if (isMachoABI || needsLoad) ArgOffset += 8; 1537 break; 1538 1539 case MVT::f32: 1540 case MVT::f64: 1541 // Every 4 bytes of argument space consumes one of the GPRs available for 1542 // argument passing. 1543 if (GPR_idx != Num_GPR_Regs && isMachoABI) { 1544 ++GPR_idx; 1545 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs && !isPPC64) 1546 ++GPR_idx; 1547 } 1548 if (FPR_idx != Num_FPR_Regs) { 1549 unsigned VReg; 1550 if (ObjectVT == MVT::f32) 1551 VReg = RegInfo.createVirtualRegister(&PPC::F4RCRegClass); 1552 else 1553 VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 1554 RegInfo.addLiveIn(FPR[FPR_idx], VReg); 1555 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1556 ++FPR_idx; 1557 } else { 1558 needsLoad = true; 1559 } 1560 1561 // Stack align in ELF 1562 if (needsLoad && Expand && isELF32_ABI) 1563 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1564 // All FP arguments reserve stack space in Macho ABI. 1565 if (isMachoABI || needsLoad) ArgOffset += isPPC64 ? 8 : ObjSize; 1566 break; 1567 case MVT::v4f32: 1568 case MVT::v4i32: 1569 case MVT::v8i16: 1570 case MVT::v16i8: 1571 // Note that vector arguments in registers don't reserve stack space, 1572 // except in varargs functions. 1573 if (VR_idx != Num_VR_Regs) { 1574 unsigned VReg = RegInfo.createVirtualRegister(&PPC::VRRCRegClass); 1575 RegInfo.addLiveIn(VR[VR_idx], VReg); 1576 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 1577 if (isVarArg) { 1578 while ((ArgOffset % 16) != 0) { 1579 ArgOffset += PtrByteSize; 1580 if (GPR_idx != Num_GPR_Regs) 1581 GPR_idx++; 1582 } 1583 ArgOffset += 16; 1584 GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); 1585 } 1586 ++VR_idx; 1587 } else { 1588 if (!isVarArg && !isPPC64) { 1589 // Vectors go after all the nonvectors. 1590 CurArgOffset = VecArgOffset; 1591 VecArgOffset += 16; 1592 } else { 1593 // Vectors are aligned. 1594 ArgOffset = ((ArgOffset+15)/16)*16; 1595 CurArgOffset = ArgOffset; 1596 ArgOffset += 16; 1597 } 1598 needsLoad = true; 1599 } 1600 break; 1601 } 1602 1603 // We need to load the argument to a virtual register if we determined above 1604 // that we ran out of physical registers of the appropriate type. 1605 if (needsLoad) { 1606 int FI = MFI->CreateFixedObject(ObjSize, 1607 CurArgOffset + (ArgSize - ObjSize)); 1608 SDOperand FIN = DAG.getFrameIndex(FI, PtrVT); 1609 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, NULL, 0); 1610 } 1611 1612 ArgValues.push_back(ArgVal); 1613 } 1614 1615 // If the function takes variable number of arguments, make a frame index for 1616 // the start of the first vararg value... for expansion of llvm.va_start. 1617 if (isVarArg) { 1618 1619 int depth; 1620 if (isELF32_ABI) { 1621 VarArgsNumGPR = GPR_idx; 1622 VarArgsNumFPR = FPR_idx; 1623 1624 // Make room for Num_GPR_Regs, Num_FPR_Regs and for a possible frame 1625 // pointer. 1626 depth = -(Num_GPR_Regs * MVT::getSizeInBits(PtrVT)/8 + 1627 Num_FPR_Regs * MVT::getSizeInBits(MVT::f64)/8 + 1628 MVT::getSizeInBits(PtrVT)/8); 1629 1630 VarArgsStackOffset = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8, 1631 ArgOffset); 1632 1633 } 1634 else 1635 depth = ArgOffset; 1636 1637 VarArgsFrameIndex = MFI->CreateFixedObject(MVT::getSizeInBits(PtrVT)/8, 1638 depth); 1639 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT); 1640 1641 // In ELF 32 ABI, the fixed integer arguments of a variadic function are 1642 // stored to the VarArgsFrameIndex on the stack. 1643 if (isELF32_ABI) { 1644 for (GPR_idx = 0; GPR_idx != VarArgsNumGPR; ++GPR_idx) { 1645 SDOperand Val = DAG.getRegister(GPR[GPR_idx], PtrVT); 1646 SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0); 1647 MemOps.push_back(Store); 1648 // Increment the address by four for the next argument to store 1649 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT); 1650 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1651 } 1652 } 1653 1654 // If this function is vararg, store any remaining integer argument regs 1655 // to their spots on the stack so that they may be loaded by deferencing the 1656 // result of va_next. 1657 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 1658 unsigned VReg; 1659 if (isPPC64) 1660 VReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass); 1661 else 1662 VReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass); 1663 1664 RegInfo.addLiveIn(GPR[GPR_idx], VReg); 1665 SDOperand Val = DAG.getCopyFromReg(Root, VReg, PtrVT); 1666 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1667 MemOps.push_back(Store); 1668 // Increment the address by four for the next argument to store 1669 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(PtrVT)/8, PtrVT); 1670 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1671 } 1672 1673 // In ELF 32 ABI, the double arguments are stored to the VarArgsFrameIndex 1674 // on the stack. 1675 if (isELF32_ABI) { 1676 for (FPR_idx = 0; FPR_idx != VarArgsNumFPR; ++FPR_idx) { 1677 SDOperand Val = DAG.getRegister(FPR[FPR_idx], MVT::f64); 1678 SDOperand Store = DAG.getStore(Root, Val, FIN, NULL, 0); 1679 MemOps.push_back(Store); 1680 // Increment the address by eight for the next argument to store 1681 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8, 1682 PtrVT); 1683 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1684 } 1685 1686 for (; FPR_idx != Num_FPR_Regs; ++FPR_idx) { 1687 unsigned VReg; 1688 VReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass); 1689 1690 RegInfo.addLiveIn(FPR[FPR_idx], VReg); 1691 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::f64); 1692 SDOperand Store = DAG.getStore(Val.getValue(1), Val, FIN, NULL, 0); 1693 MemOps.push_back(Store); 1694 // Increment the address by eight for the next argument to store 1695 SDOperand PtrOff = DAG.getConstant(MVT::getSizeInBits(MVT::f64)/8, 1696 PtrVT); 1697 FIN = DAG.getNode(ISD::ADD, PtrOff.getValueType(), FIN, PtrOff); 1698 } 1699 } 1700 } 1701 1702 if (!MemOps.empty()) 1703 Root = DAG.getNode(ISD::TokenFactor, MVT::Other,&MemOps[0],MemOps.size()); 1704 1705 ArgValues.push_back(Root); 1706 1707 // Return the new list of results. 1708 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(), 1709 Op.Val->value_end()); 1710 return DAG.getNode(ISD::MERGE_VALUES, RetVT, &ArgValues[0], ArgValues.size()); 1711} 1712 1713/// isCallCompatibleAddress - Return the immediate to use if the specified 1714/// 32-bit value is representable in the immediate field of a BxA instruction. 1715static SDNode *isBLACompatibleAddress(SDOperand Op, SelectionDAG &DAG) { 1716 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1717 if (!C) return 0; 1718 1719 int Addr = C->getValue(); 1720 if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero. 1721 (Addr << 6 >> 6) != Addr) 1722 return 0; // Top 6 bits have to be sext of immediate. 1723 1724 return DAG.getConstant((int)C->getValue() >> 2, 1725 DAG.getTargetLoweringInfo().getPointerTy()).Val; 1726} 1727 1728/// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified 1729/// by "Src" to address "Dst" of size "Size". Alignment information is 1730/// specified by the specific parameter attribute. The copy will be passed as 1731/// a byval function parameter. 1732/// Sometimes what we are copying is the end of a larger object, the part that 1733/// does not fit in registers. 1734static SDOperand 1735CreateCopyOfByValArgument(SDOperand Src, SDOperand Dst, SDOperand Chain, 1736 ISD::ArgFlagsTy Flags, SelectionDAG &DAG, 1737 unsigned Size) { 1738 SDOperand AlignNode = DAG.getConstant(Flags.getByValAlign(), MVT::i32); 1739 SDOperand SizeNode = DAG.getConstant(Size, MVT::i32); 1740 SDOperand AlwaysInline = DAG.getConstant(0, MVT::i32); 1741 return DAG.getMemcpy(Chain, Dst, Src, SizeNode, AlignNode, AlwaysInline); 1742} 1743 1744SDOperand PPCTargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG, 1745 const PPCSubtarget &Subtarget, 1746 TargetMachine &TM) { 1747 SDOperand Chain = Op.getOperand(0); 1748 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 1749 SDOperand Callee = Op.getOperand(4); 1750 unsigned NumOps = (Op.getNumOperands() - 5) / 2; 1751 1752 bool isMachoABI = Subtarget.isMachoABI(); 1753 bool isELF32_ABI = Subtarget.isELF32_ABI(); 1754 1755 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 1756 bool isPPC64 = PtrVT == MVT::i64; 1757 unsigned PtrByteSize = isPPC64 ? 8 : 4; 1758 1759 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 1760 // SelectExpr to use to put the arguments in the appropriate registers. 1761 std::vector<SDOperand> args_to_use; 1762 1763 // Count how many bytes are to be pushed on the stack, including the linkage 1764 // area, and parameter passing area. We start with 24/48 bytes, which is 1765 // prereserved space for [SP][CR][LR][3 x unused]. 1766 unsigned NumBytes = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1767 1768 // Add up all the space actually used. 1769 // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually 1770 // they all go in registers, but we must reserve stack space for them for 1771 // possible use by the caller. In varargs or 64-bit calls, parameters are 1772 // assigned stack space in order, with padding so Altivec parameters are 1773 // 16-byte aligned. 1774 unsigned nAltivecParamsAtEnd = 0; 1775 for (unsigned i = 0; i != NumOps; ++i) { 1776 SDOperand Arg = Op.getOperand(5+2*i); 1777 MVT::ValueType ArgVT = Arg.getValueType(); 1778 if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || 1779 ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8) { 1780 if (!isVarArg && !isPPC64) { 1781 // Non-varargs Altivec parameters go after all the non-Altivec parameters; 1782 // do those last so we know how much padding we need. 1783 nAltivecParamsAtEnd++; 1784 continue; 1785 } else { 1786 // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. 1787 NumBytes = ((NumBytes+15)/16)*16; 1788 } 1789 } 1790 ISD::ArgFlagsTy Flags = 1791 cast<ARG_FLAGSSDNode>(Op.getOperand(5+2*i+1))->getArgFlags(); 1792 unsigned ArgSize =MVT::getSizeInBits(Op.getOperand(5+2*i).getValueType())/8; 1793 if (Flags.isByVal()) 1794 ArgSize = Flags.getByValSize(); 1795 ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize; 1796 NumBytes += ArgSize; 1797 } 1798 // Allow for Altivec parameters at the end, if needed. 1799 if (nAltivecParamsAtEnd) { 1800 NumBytes = ((NumBytes+15)/16)*16; 1801 NumBytes += 16*nAltivecParamsAtEnd; 1802 } 1803 1804 // The prolog code of the callee may store up to 8 GPR argument registers to 1805 // the stack, allowing va_start to index over them in memory if its varargs. 1806 // Because we cannot tell if this is needed on the caller side, we have to 1807 // conservatively assume that it is needed. As such, make sure we have at 1808 // least enough stack space for the caller to store the 8 GPRs. 1809 NumBytes = std::max(NumBytes, 1810 PPCFrameInfo::getMinCallFrameSize(isPPC64, isMachoABI)); 1811 1812 // Adjust the stack pointer for the new arguments... 1813 // These operations are automatically eliminated by the prolog/epilog pass 1814 Chain = DAG.getCALLSEQ_START(Chain, 1815 DAG.getConstant(NumBytes, PtrVT)); 1816 SDOperand CallSeqStart = Chain; 1817 1818 // Set up a copy of the stack pointer for use loading and storing any 1819 // arguments that may not fit in the registers available for argument 1820 // passing. 1821 SDOperand StackPtr; 1822 if (isPPC64) 1823 StackPtr = DAG.getRegister(PPC::X1, MVT::i64); 1824 else 1825 StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 1826 1827 // Figure out which arguments are going to go in registers, and which in 1828 // memory. Also, if this is a vararg function, floating point operations 1829 // must be stored to our stack, and loaded into integer regs as well, if 1830 // any integer regs are available for argument passing. 1831 unsigned ArgOffset = PPCFrameInfo::getLinkageSize(isPPC64, isMachoABI); 1832 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 1833 1834 static const unsigned GPR_32[] = { // 32-bit registers. 1835 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1836 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1837 }; 1838 static const unsigned GPR_64[] = { // 64-bit registers. 1839 PPC::X3, PPC::X4, PPC::X5, PPC::X6, 1840 PPC::X7, PPC::X8, PPC::X9, PPC::X10, 1841 }; 1842 static const unsigned *FPR = GetFPR(Subtarget); 1843 1844 static const unsigned VR[] = { 1845 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 1846 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 1847 }; 1848 const unsigned NumGPRs = array_lengthof(GPR_32); 1849 const unsigned NumFPRs = isMachoABI ? 13 : 8; 1850 const unsigned NumVRs = array_lengthof( VR); 1851 1852 const unsigned *GPR = isPPC64 ? GPR_64 : GPR_32; 1853 1854 std::vector<std::pair<unsigned, SDOperand> > RegsToPass; 1855 SmallVector<SDOperand, 8> MemOpChains; 1856 for (unsigned i = 0; i != NumOps; ++i) { 1857 bool inMem = false; 1858 SDOperand Arg = Op.getOperand(5+2*i); 1859 ISD::ArgFlagsTy Flags = 1860 cast<ARG_FLAGSSDNode>(Op.getOperand(5+2*i+1))->getArgFlags(); 1861 // See if next argument requires stack alignment in ELF 1862 bool Expand = false; // TODO: implement this. 1863 1864 // PtrOff will be used to store the current argument to the stack if a 1865 // register cannot be found for it. 1866 SDOperand PtrOff; 1867 1868 // Stack align in ELF 32 1869 if (isELF32_ABI && Expand) 1870 PtrOff = DAG.getConstant(ArgOffset + ((ArgOffset/4) % 2) * PtrByteSize, 1871 StackPtr.getValueType()); 1872 else 1873 PtrOff = DAG.getConstant(ArgOffset, StackPtr.getValueType()); 1874 1875 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, PtrOff); 1876 1877 // On PPC64, promote integers to 64-bit values. 1878 if (isPPC64 && Arg.getValueType() == MVT::i32) { 1879 // FIXME: Should this use ANY_EXTEND if neither sext nor zext? 1880 unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 1881 Arg = DAG.getNode(ExtOp, MVT::i64, Arg); 1882 } 1883 1884 // FIXME Elf untested, what are alignment rules? 1885 // FIXME memcpy is used way more than necessary. Correctness first. 1886 if (Flags.isByVal()) { 1887 unsigned Size = Flags.getByValSize(); 1888 if (isELF32_ABI && Expand) GPR_idx += (GPR_idx % 2); 1889 if (Size==1 || Size==2) { 1890 // Very small objects are passed right-justified. 1891 // Everything else is passed left-justified. 1892 MVT::ValueType VT = (Size==1) ? MVT::i8 : MVT::i16; 1893 if (GPR_idx != NumGPRs) { 1894 SDOperand Load = DAG.getExtLoad(ISD::EXTLOAD, PtrVT, Chain, Arg, 1895 NULL, 0, VT); 1896 MemOpChains.push_back(Load.getValue(1)); 1897 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 1898 if (isMachoABI) 1899 ArgOffset += PtrByteSize; 1900 } else { 1901 SDOperand Const = DAG.getConstant(4 - Size, PtrOff.getValueType()); 1902 SDOperand AddPtr = DAG.getNode(ISD::ADD, PtrVT, PtrOff, Const); 1903 SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, AddPtr, 1904 CallSeqStart.Val->getOperand(0), 1905 Flags, DAG, Size); 1906 // This must go outside the CALLSEQ_START..END. 1907 SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 1908 CallSeqStart.Val->getOperand(1)); 1909 DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val); 1910 Chain = CallSeqStart = NewCallSeqStart; 1911 ArgOffset += PtrByteSize; 1912 } 1913 continue; 1914 } 1915 // Copy entire object into memory. There are cases where gcc-generated 1916 // code assumes it is there, even if it could be put entirely into 1917 // registers. (This is not what the doc says.) 1918 SDOperand MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff, 1919 CallSeqStart.Val->getOperand(0), 1920 Flags, DAG, Size); 1921 // This must go outside the CALLSEQ_START..END. 1922 SDOperand NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, 1923 CallSeqStart.Val->getOperand(1)); 1924 DAG.ReplaceAllUsesWith(CallSeqStart.Val, NewCallSeqStart.Val); 1925 Chain = CallSeqStart = NewCallSeqStart; 1926 // And copy the pieces of it that fit into registers. 1927 for (unsigned j=0; j<Size; j+=PtrByteSize) { 1928 SDOperand Const = DAG.getConstant(j, PtrOff.getValueType()); 1929 SDOperand AddArg = DAG.getNode(ISD::ADD, PtrVT, Arg, Const); 1930 if (GPR_idx != NumGPRs) { 1931 SDOperand Load = DAG.getLoad(PtrVT, Chain, AddArg, NULL, 0); 1932 MemOpChains.push_back(Load.getValue(1)); 1933 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 1934 if (isMachoABI) 1935 ArgOffset += PtrByteSize; 1936 } else { 1937 ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize; 1938 break; 1939 } 1940 } 1941 continue; 1942 } 1943 1944 switch (Arg.getValueType()) { 1945 default: assert(0 && "Unexpected ValueType for argument!"); 1946 case MVT::i32: 1947 case MVT::i64: 1948 // Double word align in ELF 1949 if (isELF32_ABI && Expand) GPR_idx += (GPR_idx % 2); 1950 if (GPR_idx != NumGPRs) { 1951 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); 1952 } else { 1953 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 1954 inMem = true; 1955 } 1956 if (inMem || isMachoABI) { 1957 // Stack align in ELF 1958 if (isELF32_ABI && Expand) 1959 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 1960 1961 ArgOffset += PtrByteSize; 1962 } 1963 break; 1964 case MVT::f32: 1965 case MVT::f64: 1966 if (FPR_idx != NumFPRs) { 1967 RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg)); 1968 1969 if (isVarArg) { 1970 SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 1971 MemOpChains.push_back(Store); 1972 1973 // Float varargs are always shadowed in available integer registers 1974 if (GPR_idx != NumGPRs) { 1975 SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0); 1976 MemOpChains.push_back(Load.getValue(1)); 1977 if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], 1978 Load)); 1979 } 1980 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && !isPPC64){ 1981 SDOperand ConstFour = DAG.getConstant(4, PtrOff.getValueType()); 1982 PtrOff = DAG.getNode(ISD::ADD, PtrVT, PtrOff, ConstFour); 1983 SDOperand Load = DAG.getLoad(PtrVT, Store, PtrOff, NULL, 0); 1984 MemOpChains.push_back(Load.getValue(1)); 1985 if (isMachoABI) RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], 1986 Load)); 1987 } 1988 } else { 1989 // If we have any FPRs remaining, we may also have GPRs remaining. 1990 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 1991 // GPRs. 1992 if (isMachoABI) { 1993 if (GPR_idx != NumGPRs) 1994 ++GPR_idx; 1995 if (GPR_idx != NumGPRs && Arg.getValueType() == MVT::f64 && 1996 !isPPC64) // PPC64 has 64-bit GPR's obviously :) 1997 ++GPR_idx; 1998 } 1999 } 2000 } else { 2001 MemOpChains.push_back(DAG.getStore(Chain, Arg, PtrOff, NULL, 0)); 2002 inMem = true; 2003 } 2004 if (inMem || isMachoABI) { 2005 // Stack align in ELF 2006 if (isELF32_ABI && Expand) 2007 ArgOffset += ((ArgOffset/4) % 2) * PtrByteSize; 2008 if (isPPC64) 2009 ArgOffset += 8; 2010 else 2011 ArgOffset += Arg.getValueType() == MVT::f32 ? 4 : 8; 2012 } 2013 break; 2014 case MVT::v4f32: 2015 case MVT::v4i32: 2016 case MVT::v8i16: 2017 case MVT::v16i8: 2018 if (isVarArg) { 2019 // These go aligned on the stack, or in the corresponding R registers 2020 // when within range. The Darwin PPC ABI doc claims they also go in 2021 // V registers; in fact gcc does this only for arguments that are 2022 // prototyped, not for those that match the ... We do it for all 2023 // arguments, seems to work. 2024 while (ArgOffset % 16 !=0) { 2025 ArgOffset += PtrByteSize; 2026 if (GPR_idx != NumGPRs) 2027 GPR_idx++; 2028 } 2029 // We could elide this store in the case where the object fits 2030 // entirely in R registers. Maybe later. 2031 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, 2032 DAG.getConstant(ArgOffset, PtrVT)); 2033 SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 2034 MemOpChains.push_back(Store); 2035 if (VR_idx != NumVRs) { 2036 SDOperand Load = DAG.getLoad(MVT::v4f32, Store, PtrOff, NULL, 0); 2037 MemOpChains.push_back(Load.getValue(1)); 2038 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); 2039 } 2040 ArgOffset += 16; 2041 for (unsigned i=0; i<16; i+=PtrByteSize) { 2042 if (GPR_idx == NumGPRs) 2043 break; 2044 SDOperand Ix = DAG.getNode(ISD::ADD, PtrVT, PtrOff, 2045 DAG.getConstant(i, PtrVT)); 2046 SDOperand Load = DAG.getLoad(PtrVT, Store, Ix, NULL, 0); 2047 MemOpChains.push_back(Load.getValue(1)); 2048 RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); 2049 } 2050 break; 2051 } 2052 // Non-varargs Altivec params generally go in registers, but have 2053 // stack space allocated at the end. 2054 if (VR_idx != NumVRs) { 2055 // Doesn't have GPR space allocated. 2056 RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg)); 2057 } else if (nAltivecParamsAtEnd==0) { 2058 // We are emitting Altivec params in order. 2059 PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, 2060 DAG.getConstant(ArgOffset, PtrVT)); 2061 SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 2062 MemOpChains.push_back(Store); 2063 ArgOffset += 16; 2064 } 2065 break; 2066 } 2067 } 2068 // If all Altivec parameters fit in registers, as they usually do, 2069 // they get stack space following the non-Altivec parameters. We 2070 // don't track this here because nobody below needs it. 2071 // If there are more Altivec parameters than fit in registers emit 2072 // the stores here. 2073 if (!isVarArg && nAltivecParamsAtEnd > NumVRs) { 2074 unsigned j = 0; 2075 // Offset is aligned; skip 1st 12 params which go in V registers. 2076 ArgOffset = ((ArgOffset+15)/16)*16; 2077 ArgOffset += 12*16; 2078 for (unsigned i = 0; i != NumOps; ++i) { 2079 SDOperand Arg = Op.getOperand(5+2*i); 2080 MVT::ValueType ArgType = Arg.getValueType(); 2081 if (ArgType==MVT::v4f32 || ArgType==MVT::v4i32 || 2082 ArgType==MVT::v8i16 || ArgType==MVT::v16i8) { 2083 if (++j > NumVRs) { 2084 SDOperand PtrOff = DAG.getNode(ISD::ADD, PtrVT, StackPtr, 2085 DAG.getConstant(ArgOffset, PtrVT)); 2086 SDOperand Store = DAG.getStore(Chain, Arg, PtrOff, NULL, 0); 2087 MemOpChains.push_back(Store); 2088 ArgOffset += 16; 2089 } 2090 } 2091 } 2092 } 2093 2094 if (!MemOpChains.empty()) 2095 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, 2096 &MemOpChains[0], MemOpChains.size()); 2097 2098 // Build a sequence of copy-to-reg nodes chained together with token chain 2099 // and flag operands which copy the outgoing args into the appropriate regs. 2100 SDOperand InFlag; 2101 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { 2102 Chain = DAG.getCopyToReg(Chain, RegsToPass[i].first, RegsToPass[i].second, 2103 InFlag); 2104 InFlag = Chain.getValue(1); 2105 } 2106 2107 // With the ELF 32 ABI, set CR6 to true if this is a vararg call. 2108 if (isVarArg && isELF32_ABI) { 2109 SDOperand SetCR(DAG.getTargetNode(PPC::CRSET, MVT::i32), 0); 2110 Chain = DAG.getCopyToReg(Chain, PPC::CR1EQ, SetCR, InFlag); 2111 InFlag = Chain.getValue(1); 2112 } 2113 2114 std::vector<MVT::ValueType> NodeTys; 2115 NodeTys.push_back(MVT::Other); // Returns a chain 2116 NodeTys.push_back(MVT::Flag); // Returns a flag for retval copy to use. 2117 2118 SmallVector<SDOperand, 8> Ops; 2119 unsigned CallOpc = isMachoABI? PPCISD::CALL_Macho : PPCISD::CALL_ELF; 2120 2121 // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every 2122 // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol 2123 // node so that legalize doesn't hack it. 2124 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2125 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), Callee.getValueType()); 2126 else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) 2127 Callee = DAG.getTargetExternalSymbol(S->getSymbol(), Callee.getValueType()); 2128 else if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) 2129 // If this is an absolute destination address, use the munged value. 2130 Callee = SDOperand(Dest, 0); 2131 else { 2132 // Otherwise, this is an indirect call. We have to use a MTCTR/BCTRL pair 2133 // to do the call, we can't use PPCISD::CALL. 2134 SDOperand MTCTROps[] = {Chain, Callee, InFlag}; 2135 Chain = DAG.getNode(PPCISD::MTCTR, NodeTys, MTCTROps, 2+(InFlag.Val!=0)); 2136 InFlag = Chain.getValue(1); 2137 2138 // Copy the callee address into R12/X12 on darwin. 2139 if (isMachoABI) { 2140 unsigned Reg = Callee.getValueType() == MVT::i32 ? PPC::R12 : PPC::X12; 2141 Chain = DAG.getCopyToReg(Chain, Reg, Callee, InFlag); 2142 InFlag = Chain.getValue(1); 2143 } 2144 2145 NodeTys.clear(); 2146 NodeTys.push_back(MVT::Other); 2147 NodeTys.push_back(MVT::Flag); 2148 Ops.push_back(Chain); 2149 CallOpc = isMachoABI ? PPCISD::BCTRL_Macho : PPCISD::BCTRL_ELF; 2150 Callee.Val = 0; 2151 } 2152 2153 // If this is a direct call, pass the chain and the callee. 2154 if (Callee.Val) { 2155 Ops.push_back(Chain); 2156 Ops.push_back(Callee); 2157 } 2158 2159 // Add argument registers to the end of the list so that they are known live 2160 // into the call. 2161 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) 2162 Ops.push_back(DAG.getRegister(RegsToPass[i].first, 2163 RegsToPass[i].second.getValueType())); 2164 2165 if (InFlag.Val) 2166 Ops.push_back(InFlag); 2167 Chain = DAG.getNode(CallOpc, NodeTys, &Ops[0], Ops.size()); 2168 InFlag = Chain.getValue(1); 2169 2170 Chain = DAG.getCALLSEQ_END(Chain, 2171 DAG.getConstant(NumBytes, PtrVT), 2172 DAG.getConstant(0, PtrVT), 2173 InFlag); 2174 if (Op.Val->getValueType(0) != MVT::Other) 2175 InFlag = Chain.getValue(1); 2176 2177 SmallVector<SDOperand, 16> ResultVals; 2178 SmallVector<CCValAssign, 16> RVLocs; 2179 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 2180 CCState CCInfo(CC, isVarArg, TM, RVLocs); 2181 CCInfo.AnalyzeCallResult(Op.Val, RetCC_PPC); 2182 2183 // Copy all of the result registers out of their specified physreg. 2184 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { 2185 CCValAssign &VA = RVLocs[i]; 2186 MVT::ValueType VT = VA.getValVT(); 2187 assert(VA.isRegLoc() && "Can only return in registers!"); 2188 Chain = DAG.getCopyFromReg(Chain, VA.getLocReg(), VT, InFlag).getValue(1); 2189 ResultVals.push_back(Chain.getValue(0)); 2190 InFlag = Chain.getValue(2); 2191 } 2192 2193 // If the function returns void, just return the chain. 2194 if (RVLocs.empty()) 2195 return Chain; 2196 2197 // Otherwise, merge everything together with a MERGE_VALUES node. 2198 ResultVals.push_back(Chain); 2199 SDOperand Res = DAG.getNode(ISD::MERGE_VALUES, Op.Val->getVTList(), 2200 &ResultVals[0], ResultVals.size()); 2201 return Res.getValue(Op.ResNo); 2202} 2203 2204SDOperand PPCTargetLowering::LowerRET(SDOperand Op, SelectionDAG &DAG, 2205 TargetMachine &TM) { 2206 SmallVector<CCValAssign, 16> RVLocs; 2207 unsigned CC = DAG.getMachineFunction().getFunction()->getCallingConv(); 2208 bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); 2209 CCState CCInfo(CC, isVarArg, TM, RVLocs); 2210 CCInfo.AnalyzeReturn(Op.Val, RetCC_PPC); 2211 2212 // If this is the first return lowered for this function, add the regs to the 2213 // liveout set for the function. 2214 if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { 2215 for (unsigned i = 0; i != RVLocs.size(); ++i) 2216 DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); 2217 } 2218 2219 SDOperand Chain = Op.getOperand(0); 2220 SDOperand Flag; 2221 2222 // Copy the result values into the output registers. 2223 for (unsigned i = 0; i != RVLocs.size(); ++i) { 2224 CCValAssign &VA = RVLocs[i]; 2225 assert(VA.isRegLoc() && "Can only return in registers!"); 2226 Chain = DAG.getCopyToReg(Chain, VA.getLocReg(), Op.getOperand(i*2+1), Flag); 2227 Flag = Chain.getValue(1); 2228 } 2229 2230 if (Flag.Val) 2231 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain, Flag); 2232 else 2233 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Chain); 2234} 2235 2236SDOperand PPCTargetLowering::LowerSTACKRESTORE(SDOperand Op, SelectionDAG &DAG, 2237 const PPCSubtarget &Subtarget) { 2238 // When we pop the dynamic allocation we need to restore the SP link. 2239 2240 // Get the corect type for pointers. 2241 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2242 2243 // Construct the stack pointer operand. 2244 bool IsPPC64 = Subtarget.isPPC64(); 2245 unsigned SP = IsPPC64 ? PPC::X1 : PPC::R1; 2246 SDOperand StackPtr = DAG.getRegister(SP, PtrVT); 2247 2248 // Get the operands for the STACKRESTORE. 2249 SDOperand Chain = Op.getOperand(0); 2250 SDOperand SaveSP = Op.getOperand(1); 2251 2252 // Load the old link SP. 2253 SDOperand LoadLinkSP = DAG.getLoad(PtrVT, Chain, StackPtr, NULL, 0); 2254 2255 // Restore the stack pointer. 2256 Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), SP, SaveSP); 2257 2258 // Store the old link SP. 2259 return DAG.getStore(Chain, LoadLinkSP, StackPtr, NULL, 0); 2260} 2261 2262SDOperand PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDOperand Op, 2263 SelectionDAG &DAG, 2264 const PPCSubtarget &Subtarget) { 2265 MachineFunction &MF = DAG.getMachineFunction(); 2266 bool IsPPC64 = Subtarget.isPPC64(); 2267 bool isMachoABI = Subtarget.isMachoABI(); 2268 2269 // Get current frame pointer save index. The users of this index will be 2270 // primarily DYNALLOC instructions. 2271 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 2272 int FPSI = FI->getFramePointerSaveIndex(); 2273 2274 // If the frame pointer save index hasn't been defined yet. 2275 if (!FPSI) { 2276 // Find out what the fix offset of the frame pointer save area. 2277 int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64, isMachoABI); 2278 2279 // Allocate the frame index for frame pointer save area. 2280 FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset); 2281 // Save the result. 2282 FI->setFramePointerSaveIndex(FPSI); 2283 } 2284 2285 // Get the inputs. 2286 SDOperand Chain = Op.getOperand(0); 2287 SDOperand Size = Op.getOperand(1); 2288 2289 // Get the corect type for pointers. 2290 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2291 // Negate the size. 2292 SDOperand NegSize = DAG.getNode(ISD::SUB, PtrVT, 2293 DAG.getConstant(0, PtrVT), Size); 2294 // Construct a node for the frame pointer save index. 2295 SDOperand FPSIdx = DAG.getFrameIndex(FPSI, PtrVT); 2296 // Build a DYNALLOC node. 2297 SDOperand Ops[3] = { Chain, NegSize, FPSIdx }; 2298 SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other); 2299 return DAG.getNode(PPCISD::DYNALLOC, VTs, Ops, 3); 2300} 2301 2302 2303/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 2304/// possible. 2305SDOperand PPCTargetLowering::LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { 2306 // Not FP? Not a fsel. 2307 if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) || 2308 !MVT::isFloatingPoint(Op.getOperand(2).getValueType())) 2309 return SDOperand(); 2310 2311 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 2312 2313 // Cannot handle SETEQ/SETNE. 2314 if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand(); 2315 2316 MVT::ValueType ResVT = Op.getValueType(); 2317 MVT::ValueType CmpVT = Op.getOperand(0).getValueType(); 2318 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 2319 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3); 2320 2321 // If the RHS of the comparison is a 0.0, we don't need to do the 2322 // subtraction at all. 2323 if (isFloatingPointZero(RHS)) 2324 switch (CC) { 2325 default: break; // SETUO etc aren't handled by fsel. 2326 case ISD::SETULT: 2327 case ISD::SETOLT: 2328 case ISD::SETLT: 2329 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 2330 case ISD::SETUGE: 2331 case ISD::SETOGE: 2332 case ISD::SETGE: 2333 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 2334 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 2335 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 2336 case ISD::SETUGT: 2337 case ISD::SETOGT: 2338 case ISD::SETGT: 2339 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 2340 case ISD::SETULE: 2341 case ISD::SETOLE: 2342 case ISD::SETLE: 2343 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 2344 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 2345 return DAG.getNode(PPCISD::FSEL, ResVT, 2346 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 2347 } 2348 2349 SDOperand Cmp; 2350 switch (CC) { 2351 default: break; // SETUO etc aren't handled by fsel. 2352 case ISD::SETULT: 2353 case ISD::SETOLT: 2354 case ISD::SETLT: 2355 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 2356 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2357 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2358 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 2359 case ISD::SETUGE: 2360 case ISD::SETOGE: 2361 case ISD::SETGE: 2362 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 2363 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2364 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2365 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 2366 case ISD::SETUGT: 2367 case ISD::SETOGT: 2368 case ISD::SETGT: 2369 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 2370 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2371 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2372 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 2373 case ISD::SETULE: 2374 case ISD::SETOLE: 2375 case ISD::SETLE: 2376 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 2377 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 2378 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 2379 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 2380 } 2381 return SDOperand(); 2382} 2383 2384// FIXME: Split this code up when LegalizeDAGTypes lands. 2385SDOperand PPCTargetLowering::LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 2386 assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType())); 2387 SDOperand Src = Op.getOperand(0); 2388 if (Src.getValueType() == MVT::f32) 2389 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 2390 2391 SDOperand Tmp; 2392 switch (Op.getValueType()) { 2393 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 2394 case MVT::i32: 2395 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 2396 break; 2397 case MVT::i64: 2398 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 2399 break; 2400 } 2401 2402 // Convert the FP value to an int value through memory. 2403 SDOperand FIPtr = DAG.CreateStackTemporary(MVT::f64); 2404 2405 // Emit a store to the stack slot. 2406 SDOperand Chain = DAG.getStore(DAG.getEntryNode(), Tmp, FIPtr, NULL, 0); 2407 2408 // Result is a load from the stack slot. If loading 4 bytes, make sure to 2409 // add in a bias. 2410 if (Op.getValueType() == MVT::i32) 2411 FIPtr = DAG.getNode(ISD::ADD, FIPtr.getValueType(), FIPtr, 2412 DAG.getConstant(4, FIPtr.getValueType())); 2413 return DAG.getLoad(Op.getValueType(), Chain, FIPtr, NULL, 0); 2414} 2415 2416SDOperand PPCTargetLowering::LowerFP_ROUND_INREG(SDOperand Op, 2417 SelectionDAG &DAG) { 2418 assert(Op.getValueType() == MVT::ppcf128); 2419 SDNode *Node = Op.Val; 2420 assert(Node->getOperand(0).getValueType() == MVT::ppcf128); 2421 assert(Node->getOperand(0).Val->getOpcode() == ISD::BUILD_PAIR); 2422 SDOperand Lo = Node->getOperand(0).Val->getOperand(0); 2423 SDOperand Hi = Node->getOperand(0).Val->getOperand(1); 2424 2425 // This sequence changes FPSCR to do round-to-zero, adds the two halves 2426 // of the long double, and puts FPSCR back the way it was. We do not 2427 // actually model FPSCR. 2428 std::vector<MVT::ValueType> NodeTys; 2429 SDOperand Ops[4], Result, MFFSreg, InFlag, FPreg; 2430 2431 NodeTys.push_back(MVT::f64); // Return register 2432 NodeTys.push_back(MVT::Flag); // Returns a flag for later insns 2433 Result = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); 2434 MFFSreg = Result.getValue(0); 2435 InFlag = Result.getValue(1); 2436 2437 NodeTys.clear(); 2438 NodeTys.push_back(MVT::Flag); // Returns a flag 2439 Ops[0] = DAG.getConstant(31, MVT::i32); 2440 Ops[1] = InFlag; 2441 Result = DAG.getNode(PPCISD::MTFSB1, NodeTys, Ops, 2); 2442 InFlag = Result.getValue(0); 2443 2444 NodeTys.clear(); 2445 NodeTys.push_back(MVT::Flag); // Returns a flag 2446 Ops[0] = DAG.getConstant(30, MVT::i32); 2447 Ops[1] = InFlag; 2448 Result = DAG.getNode(PPCISD::MTFSB0, NodeTys, Ops, 2); 2449 InFlag = Result.getValue(0); 2450 2451 NodeTys.clear(); 2452 NodeTys.push_back(MVT::f64); // result of add 2453 NodeTys.push_back(MVT::Flag); // Returns a flag 2454 Ops[0] = Lo; 2455 Ops[1] = Hi; 2456 Ops[2] = InFlag; 2457 Result = DAG.getNode(PPCISD::FADDRTZ, NodeTys, Ops, 3); 2458 FPreg = Result.getValue(0); 2459 InFlag = Result.getValue(1); 2460 2461 NodeTys.clear(); 2462 NodeTys.push_back(MVT::f64); 2463 Ops[0] = DAG.getConstant(1, MVT::i32); 2464 Ops[1] = MFFSreg; 2465 Ops[2] = FPreg; 2466 Ops[3] = InFlag; 2467 Result = DAG.getNode(PPCISD::MTFSF, NodeTys, Ops, 4); 2468 FPreg = Result.getValue(0); 2469 2470 // We know the low half is about to be thrown away, so just use something 2471 // convenient. 2472 return DAG.getNode(ISD::BUILD_PAIR, Lo.getValueType(), FPreg, FPreg); 2473} 2474 2475SDOperand PPCTargetLowering::LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 2476 // Don't handle ppc_fp128 here; let it be lowered to a libcall. 2477 if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) 2478 return SDOperand(); 2479 2480 if (Op.getOperand(0).getValueType() == MVT::i64) { 2481 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 2482 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 2483 if (Op.getValueType() == MVT::f32) 2484 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); 2485 return FP; 2486 } 2487 2488 assert(Op.getOperand(0).getValueType() == MVT::i32 && 2489 "Unhandled SINT_TO_FP type in custom expander!"); 2490 // Since we only generate this in 64-bit mode, we can take advantage of 2491 // 64-bit registers. In particular, sign extend the input value into the 2492 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 2493 // then lfd it and fcfid it. 2494 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 2495 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 2496 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2497 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 2498 2499 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 2500 Op.getOperand(0)); 2501 2502 // STD the extended value into the stack slot. 2503 MemOperand MO(PseudoSourceValue::getFixedStack(), 2504 MemOperand::MOStore, FrameIdx, 8, 8); 2505 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 2506 DAG.getEntryNode(), Ext64, FIdx, 2507 DAG.getMemOperand(MO)); 2508 // Load the value as a double. 2509 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, NULL, 0); 2510 2511 // FCFID it and return it. 2512 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 2513 if (Op.getValueType() == MVT::f32) 2514 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP, DAG.getIntPtrConstant(0)); 2515 return FP; 2516} 2517 2518SDOperand PPCTargetLowering::LowerFLT_ROUNDS_(SDOperand Op, SelectionDAG &DAG) { 2519 /* 2520 The rounding mode is in bits 30:31 of FPSR, and has the following 2521 settings: 2522 00 Round to nearest 2523 01 Round to 0 2524 10 Round to +inf 2525 11 Round to -inf 2526 2527 FLT_ROUNDS, on the other hand, expects the following: 2528 -1 Undefined 2529 0 Round to 0 2530 1 Round to nearest 2531 2 Round to +inf 2532 3 Round to -inf 2533 2534 To perform the conversion, we do: 2535 ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1)) 2536 */ 2537 2538 MachineFunction &MF = DAG.getMachineFunction(); 2539 MVT::ValueType VT = Op.getValueType(); 2540 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 2541 std::vector<MVT::ValueType> NodeTys; 2542 SDOperand MFFSreg, InFlag; 2543 2544 // Save FP Control Word to register 2545 NodeTys.push_back(MVT::f64); // return register 2546 NodeTys.push_back(MVT::Flag); // unused in this context 2547 SDOperand Chain = DAG.getNode(PPCISD::MFFS, NodeTys, &InFlag, 0); 2548 2549 // Save FP register to stack slot 2550 int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8); 2551 SDOperand StackSlot = DAG.getFrameIndex(SSFI, PtrVT); 2552 SDOperand Store = DAG.getStore(DAG.getEntryNode(), Chain, 2553 StackSlot, NULL, 0); 2554 2555 // Load FP Control Word from low 32 bits of stack slot. 2556 SDOperand Four = DAG.getConstant(4, PtrVT); 2557 SDOperand Addr = DAG.getNode(ISD::ADD, PtrVT, StackSlot, Four); 2558 SDOperand CWD = DAG.getLoad(MVT::i32, Store, Addr, NULL, 0); 2559 2560 // Transform as necessary 2561 SDOperand CWD1 = 2562 DAG.getNode(ISD::AND, MVT::i32, 2563 CWD, DAG.getConstant(3, MVT::i32)); 2564 SDOperand CWD2 = 2565 DAG.getNode(ISD::SRL, MVT::i32, 2566 DAG.getNode(ISD::AND, MVT::i32, 2567 DAG.getNode(ISD::XOR, MVT::i32, 2568 CWD, DAG.getConstant(3, MVT::i32)), 2569 DAG.getConstant(3, MVT::i32)), 2570 DAG.getConstant(1, MVT::i8)); 2571 2572 SDOperand RetVal = 2573 DAG.getNode(ISD::XOR, MVT::i32, CWD1, CWD2); 2574 2575 return DAG.getNode((MVT::getSizeInBits(VT) < 16 ? 2576 ISD::TRUNCATE : ISD::ZERO_EXTEND), VT, RetVal); 2577} 2578 2579SDOperand PPCTargetLowering::LowerSHL_PARTS(SDOperand Op, SelectionDAG &DAG) { 2580 MVT::ValueType VT = Op.getValueType(); 2581 unsigned BitWidth = MVT::getSizeInBits(VT); 2582 assert(Op.getNumOperands() == 3 && 2583 VT == Op.getOperand(1).getValueType() && 2584 "Unexpected SHL!"); 2585 2586 // Expand into a bunch of logical ops. Note that these ops 2587 // depend on the PPC behavior for oversized shift amounts. 2588 SDOperand Lo = Op.getOperand(0); 2589 SDOperand Hi = Op.getOperand(1); 2590 SDOperand Amt = Op.getOperand(2); 2591 MVT::ValueType AmtVT = Amt.getValueType(); 2592 2593 SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 2594 DAG.getConstant(BitWidth, AmtVT), Amt); 2595 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, VT, Hi, Amt); 2596 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, VT, Lo, Tmp1); 2597 SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 2598 SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 2599 DAG.getConstant(-BitWidth, AmtVT)); 2600 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, VT, Lo, Tmp5); 2601 SDOperand OutHi = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6); 2602 SDOperand OutLo = DAG.getNode(PPCISD::SHL, VT, Lo, Amt); 2603 SDOperand OutOps[] = { OutLo, OutHi }; 2604 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT), 2605 OutOps, 2); 2606} 2607 2608SDOperand PPCTargetLowering::LowerSRL_PARTS(SDOperand Op, SelectionDAG &DAG) { 2609 MVT::ValueType VT = Op.getValueType(); 2610 unsigned BitWidth = MVT::getSizeInBits(VT); 2611 assert(Op.getNumOperands() == 3 && 2612 VT == Op.getOperand(1).getValueType() && 2613 "Unexpected SRL!"); 2614 2615 // Expand into a bunch of logical ops. Note that these ops 2616 // depend on the PPC behavior for oversized shift amounts. 2617 SDOperand Lo = Op.getOperand(0); 2618 SDOperand Hi = Op.getOperand(1); 2619 SDOperand Amt = Op.getOperand(2); 2620 MVT::ValueType AmtVT = Amt.getValueType(); 2621 2622 SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 2623 DAG.getConstant(BitWidth, AmtVT), Amt); 2624 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt); 2625 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1); 2626 SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 2627 SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 2628 DAG.getConstant(-BitWidth, AmtVT)); 2629 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, VT, Hi, Tmp5); 2630 SDOperand OutLo = DAG.getNode(ISD::OR, VT, Tmp4, Tmp6); 2631 SDOperand OutHi = DAG.getNode(PPCISD::SRL, VT, Hi, Amt); 2632 SDOperand OutOps[] = { OutLo, OutHi }; 2633 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT), 2634 OutOps, 2); 2635} 2636 2637SDOperand PPCTargetLowering::LowerSRA_PARTS(SDOperand Op, SelectionDAG &DAG) { 2638 MVT::ValueType VT = Op.getValueType(); 2639 unsigned BitWidth = MVT::getSizeInBits(VT); 2640 assert(Op.getNumOperands() == 3 && 2641 VT == Op.getOperand(1).getValueType() && 2642 "Unexpected SRA!"); 2643 2644 // Expand into a bunch of logical ops, followed by a select_cc. 2645 SDOperand Lo = Op.getOperand(0); 2646 SDOperand Hi = Op.getOperand(1); 2647 SDOperand Amt = Op.getOperand(2); 2648 MVT::ValueType AmtVT = Amt.getValueType(); 2649 2650 SDOperand Tmp1 = DAG.getNode(ISD::SUB, AmtVT, 2651 DAG.getConstant(BitWidth, AmtVT), Amt); 2652 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, VT, Lo, Amt); 2653 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, VT, Hi, Tmp1); 2654 SDOperand Tmp4 = DAG.getNode(ISD::OR , VT, Tmp2, Tmp3); 2655 SDOperand Tmp5 = DAG.getNode(ISD::ADD, AmtVT, Amt, 2656 DAG.getConstant(-BitWidth, AmtVT)); 2657 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, VT, Hi, Tmp5); 2658 SDOperand OutHi = DAG.getNode(PPCISD::SRA, VT, Hi, Amt); 2659 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, AmtVT), 2660 Tmp4, Tmp6, ISD::SETLE); 2661 SDOperand OutOps[] = { OutLo, OutHi }; 2662 return DAG.getNode(ISD::MERGE_VALUES, DAG.getVTList(VT, VT), 2663 OutOps, 2); 2664} 2665 2666//===----------------------------------------------------------------------===// 2667// Vector related lowering. 2668// 2669 2670// If this is a vector of constants or undefs, get the bits. A bit in 2671// UndefBits is set if the corresponding element of the vector is an 2672// ISD::UNDEF value. For undefs, the corresponding VectorBits values are 2673// zero. Return true if this is not an array of constants, false if it is. 2674// 2675static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], 2676 uint64_t UndefBits[2]) { 2677 // Start with zero'd results. 2678 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; 2679 2680 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType()); 2681 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 2682 SDOperand OpVal = BV->getOperand(i); 2683 2684 unsigned PartNo = i >= e/2; // In the upper 128 bits? 2685 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. 2686 2687 uint64_t EltBits = 0; 2688 if (OpVal.getOpcode() == ISD::UNDEF) { 2689 uint64_t EltUndefBits = ~0U >> (32-EltBitSize); 2690 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); 2691 continue; 2692 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 2693 EltBits = CN->getValue() & (~0U >> (32-EltBitSize)); 2694 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 2695 assert(CN->getValueType(0) == MVT::f32 && 2696 "Only one legal FP vector type!"); 2697 EltBits = FloatToBits(CN->getValueAPF().convertToFloat()); 2698 } else { 2699 // Nonconstant element. 2700 return true; 2701 } 2702 2703 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); 2704 } 2705 2706 //printf("%llx %llx %llx %llx\n", 2707 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); 2708 return false; 2709} 2710 2711// If this is a splat (repetition) of a value across the whole vector, return 2712// the smallest size that splats it. For example, "0x01010101010101..." is a 2713// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 2714// SplatSize = 1 byte. 2715static bool isConstantSplat(const uint64_t Bits128[2], 2716 const uint64_t Undef128[2], 2717 unsigned &SplatBits, unsigned &SplatUndef, 2718 unsigned &SplatSize) { 2719 2720 // Don't let undefs prevent splats from matching. See if the top 64-bits are 2721 // the same as the lower 64-bits, ignoring undefs. 2722 if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0])) 2723 return false; // Can't be a splat if two pieces don't match. 2724 2725 uint64_t Bits64 = Bits128[0] | Bits128[1]; 2726 uint64_t Undef64 = Undef128[0] & Undef128[1]; 2727 2728 // Check that the top 32-bits are the same as the lower 32-bits, ignoring 2729 // undefs. 2730 if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64)) 2731 return false; // Can't be a splat if two pieces don't match. 2732 2733 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); 2734 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); 2735 2736 // If the top 16-bits are different than the lower 16-bits, ignoring 2737 // undefs, we have an i32 splat. 2738 if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) { 2739 SplatBits = Bits32; 2740 SplatUndef = Undef32; 2741 SplatSize = 4; 2742 return true; 2743 } 2744 2745 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); 2746 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); 2747 2748 // If the top 8-bits are different than the lower 8-bits, ignoring 2749 // undefs, we have an i16 splat. 2750 if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) { 2751 SplatBits = Bits16; 2752 SplatUndef = Undef16; 2753 SplatSize = 2; 2754 return true; 2755 } 2756 2757 // Otherwise, we have an 8-bit splat. 2758 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); 2759 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); 2760 SplatSize = 1; 2761 return true; 2762} 2763 2764/// BuildSplatI - Build a canonical splati of Val with an element size of 2765/// SplatSize. Cast the result to VT. 2766static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT, 2767 SelectionDAG &DAG) { 2768 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 2769 2770 static const MVT::ValueType VTys[] = { // canonical VT to use for each size. 2771 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 2772 }; 2773 2774 MVT::ValueType ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1]; 2775 2776 // Force vspltis[hw] -1 to vspltisb -1 to canonicalize. 2777 if (Val == -1) 2778 SplatSize = 1; 2779 2780 MVT::ValueType CanonicalVT = VTys[SplatSize-1]; 2781 2782 // Build a canonical splat for this value. 2783 SDOperand Elt = DAG.getConstant(Val, MVT::getVectorElementType(CanonicalVT)); 2784 SmallVector<SDOperand, 8> Ops; 2785 Ops.assign(MVT::getVectorNumElements(CanonicalVT), Elt); 2786 SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, 2787 &Ops[0], Ops.size()); 2788 return DAG.getNode(ISD::BIT_CONVERT, ReqVT, Res); 2789} 2790 2791/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 2792/// specified intrinsic ID. 2793static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS, 2794 SelectionDAG &DAG, 2795 MVT::ValueType DestVT = MVT::Other) { 2796 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 2797 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 2798 DAG.getConstant(IID, MVT::i32), LHS, RHS); 2799} 2800 2801/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 2802/// specified intrinsic ID. 2803static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1, 2804 SDOperand Op2, SelectionDAG &DAG, 2805 MVT::ValueType DestVT = MVT::Other) { 2806 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 2807 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 2808 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 2809} 2810 2811 2812/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 2813/// amount. The result has the specified value type. 2814static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt, 2815 MVT::ValueType VT, SelectionDAG &DAG) { 2816 // Force LHS/RHS to be the right type. 2817 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS); 2818 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS); 2819 2820 SDOperand Ops[16]; 2821 for (unsigned i = 0; i != 16; ++i) 2822 Ops[i] = DAG.getConstant(i+Amt, MVT::i32); 2823 SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS, 2824 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops,16)); 2825 return DAG.getNode(ISD::BIT_CONVERT, VT, T); 2826} 2827 2828// If this is a case we can't handle, return null and let the default 2829// expansion code take care of it. If we CAN select this case, and if it 2830// selects to a single instruction, return Op. Otherwise, if we can codegen 2831// this case more efficiently than a constant pool load, lower it to the 2832// sequence of ops that should be used. 2833SDOperand PPCTargetLowering::LowerBUILD_VECTOR(SDOperand Op, 2834 SelectionDAG &DAG) { 2835 // If this is a vector of constants or undefs, get the bits. A bit in 2836 // UndefBits is set if the corresponding element of the vector is an 2837 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 2838 // zero. 2839 uint64_t VectorBits[2]; 2840 uint64_t UndefBits[2]; 2841 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)) 2842 return SDOperand(); // Not a constant vector. 2843 2844 // If this is a splat (repetition) of a value across the whole vector, return 2845 // the smallest size that splats it. For example, "0x01010101010101..." is a 2846 // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 2847 // SplatSize = 1 byte. 2848 unsigned SplatBits, SplatUndef, SplatSize; 2849 if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){ 2850 bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0; 2851 2852 // First, handle single instruction cases. 2853 2854 // All zeros? 2855 if (SplatBits == 0) { 2856 // Canonicalize all zero vectors to be v4i32. 2857 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 2858 SDOperand Z = DAG.getConstant(0, MVT::i32); 2859 Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z); 2860 Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z); 2861 } 2862 return Op; 2863 } 2864 2865 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 2866 int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize); 2867 if (SextVal >= -16 && SextVal <= 15) 2868 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG); 2869 2870 2871 // Two instruction sequences. 2872 2873 // If this value is in the range [-32,30] and is even, use: 2874 // tmp = VSPLTI[bhw], result = add tmp, tmp 2875 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 2876 Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG); 2877 return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op); 2878 } 2879 2880 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 2881 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 2882 // for fneg/fabs. 2883 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 2884 // Make -1 and vspltisw -1: 2885 SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG); 2886 2887 // Make the VSLW intrinsic, computing 0x8000_0000. 2888 SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 2889 OnesV, DAG); 2890 2891 // xor by OnesV to invert it. 2892 Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); 2893 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 2894 } 2895 2896 // Check to see if this is a wide variety of vsplti*, binop self cases. 2897 unsigned SplatBitSize = SplatSize*8; 2898 static const signed char SplatCsts[] = { 2899 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 2900 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 2901 }; 2902 2903 for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) { 2904 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 2905 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 2906 int i = SplatCsts[idx]; 2907 2908 // Figure out what shift amount will be used by altivec if shifted by i in 2909 // this splat size. 2910 unsigned TypeShiftAmt = i & (SplatBitSize-1); 2911 2912 // vsplti + shl self. 2913 if (SextVal == (i << (int)TypeShiftAmt)) { 2914 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 2915 static const unsigned IIDs[] = { // Intrinsic to use for each size. 2916 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 2917 Intrinsic::ppc_altivec_vslw 2918 }; 2919 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 2920 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 2921 } 2922 2923 // vsplti + srl self. 2924 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 2925 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 2926 static const unsigned IIDs[] = { // Intrinsic to use for each size. 2927 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 2928 Intrinsic::ppc_altivec_vsrw 2929 }; 2930 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 2931 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 2932 } 2933 2934 // vsplti + sra self. 2935 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 2936 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 2937 static const unsigned IIDs[] = { // Intrinsic to use for each size. 2938 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 2939 Intrinsic::ppc_altivec_vsraw 2940 }; 2941 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 2942 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 2943 } 2944 2945 // vsplti + rol self. 2946 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 2947 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 2948 SDOperand Res = BuildSplatI(i, SplatSize, MVT::Other, DAG); 2949 static const unsigned IIDs[] = { // Intrinsic to use for each size. 2950 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 2951 Intrinsic::ppc_altivec_vrlw 2952 }; 2953 Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG); 2954 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 2955 } 2956 2957 // t = vsplti c, result = vsldoi t, t, 1 2958 if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { 2959 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 2960 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG); 2961 } 2962 // t = vsplti c, result = vsldoi t, t, 2 2963 if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { 2964 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 2965 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG); 2966 } 2967 // t = vsplti c, result = vsldoi t, t, 3 2968 if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { 2969 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 2970 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG); 2971 } 2972 } 2973 2974 // Three instruction sequences. 2975 2976 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 2977 if (SextVal >= 0 && SextVal <= 31) { 2978 SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, MVT::Other, DAG); 2979 SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); 2980 LHS = DAG.getNode(ISD::SUB, LHS.getValueType(), LHS, RHS); 2981 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); 2982 } 2983 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 2984 if (SextVal >= -31 && SextVal <= 0) { 2985 SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, MVT::Other, DAG); 2986 SDOperand RHS = BuildSplatI(-16, SplatSize, MVT::Other, DAG); 2987 LHS = DAG.getNode(ISD::ADD, LHS.getValueType(), LHS, RHS); 2988 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), LHS); 2989 } 2990 } 2991 2992 return SDOperand(); 2993} 2994 2995/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 2996/// the specified operations to build the shuffle. 2997static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS, 2998 SDOperand RHS, SelectionDAG &DAG) { 2999 unsigned OpNum = (PFEntry >> 26) & 0x0F; 3000 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 3001 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 3002 3003 enum { 3004 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 3005 OP_VMRGHW, 3006 OP_VMRGLW, 3007 OP_VSPLTISW0, 3008 OP_VSPLTISW1, 3009 OP_VSPLTISW2, 3010 OP_VSPLTISW3, 3011 OP_VSLDOI4, 3012 OP_VSLDOI8, 3013 OP_VSLDOI12 3014 }; 3015 3016 if (OpNum == OP_COPY) { 3017 if (LHSID == (1*9+2)*9+3) return LHS; 3018 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 3019 return RHS; 3020 } 3021 3022 SDOperand OpLHS, OpRHS; 3023 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG); 3024 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG); 3025 3026 unsigned ShufIdxs[16]; 3027 switch (OpNum) { 3028 default: assert(0 && "Unknown i32 permute!"); 3029 case OP_VMRGHW: 3030 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 3031 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 3032 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 3033 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 3034 break; 3035 case OP_VMRGLW: 3036 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 3037 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 3038 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 3039 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 3040 break; 3041 case OP_VSPLTISW0: 3042 for (unsigned i = 0; i != 16; ++i) 3043 ShufIdxs[i] = (i&3)+0; 3044 break; 3045 case OP_VSPLTISW1: 3046 for (unsigned i = 0; i != 16; ++i) 3047 ShufIdxs[i] = (i&3)+4; 3048 break; 3049 case OP_VSPLTISW2: 3050 for (unsigned i = 0; i != 16; ++i) 3051 ShufIdxs[i] = (i&3)+8; 3052 break; 3053 case OP_VSPLTISW3: 3054 for (unsigned i = 0; i != 16; ++i) 3055 ShufIdxs[i] = (i&3)+12; 3056 break; 3057 case OP_VSLDOI4: 3058 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG); 3059 case OP_VSLDOI8: 3060 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG); 3061 case OP_VSLDOI12: 3062 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG); 3063 } 3064 SDOperand Ops[16]; 3065 for (unsigned i = 0; i != 16; ++i) 3066 Ops[i] = DAG.getConstant(ShufIdxs[i], MVT::i32); 3067 3068 return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS, 3069 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); 3070} 3071 3072/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 3073/// is a shuffle we can handle in a single instruction, return it. Otherwise, 3074/// return the code it can be lowered into. Worst case, it can always be 3075/// lowered into a vperm. 3076SDOperand PPCTargetLowering::LowerVECTOR_SHUFFLE(SDOperand Op, 3077 SelectionDAG &DAG) { 3078 SDOperand V1 = Op.getOperand(0); 3079 SDOperand V2 = Op.getOperand(1); 3080 SDOperand PermMask = Op.getOperand(2); 3081 3082 // Cases that are handled by instructions that take permute immediates 3083 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 3084 // selected by the instruction selector. 3085 if (V2.getOpcode() == ISD::UNDEF) { 3086 if (PPC::isSplatShuffleMask(PermMask.Val, 1) || 3087 PPC::isSplatShuffleMask(PermMask.Val, 2) || 3088 PPC::isSplatShuffleMask(PermMask.Val, 4) || 3089 PPC::isVPKUWUMShuffleMask(PermMask.Val, true) || 3090 PPC::isVPKUHUMShuffleMask(PermMask.Val, true) || 3091 PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 || 3092 PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) || 3093 PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) || 3094 PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) || 3095 PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) || 3096 PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) || 3097 PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) { 3098 return Op; 3099 } 3100 } 3101 3102 // Altivec has a variety of "shuffle immediates" that take two vector inputs 3103 // and produce a fixed permutation. If any of these match, do not lower to 3104 // VPERM. 3105 if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) || 3106 PPC::isVPKUHUMShuffleMask(PermMask.Val, false) || 3107 PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 || 3108 PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) || 3109 PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) || 3110 PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) || 3111 PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) || 3112 PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) || 3113 PPC::isVMRGHShuffleMask(PermMask.Val, 4, false)) 3114 return Op; 3115 3116 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 3117 // perfect shuffle table to emit an optimal matching sequence. 3118 unsigned PFIndexes[4]; 3119 bool isFourElementShuffle = true; 3120 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 3121 unsigned EltNo = 8; // Start out undef. 3122 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 3123 if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) 3124 continue; // Undef, ignore it. 3125 3126 unsigned ByteSource = 3127 cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue(); 3128 if ((ByteSource & 3) != j) { 3129 isFourElementShuffle = false; 3130 break; 3131 } 3132 3133 if (EltNo == 8) { 3134 EltNo = ByteSource/4; 3135 } else if (EltNo != ByteSource/4) { 3136 isFourElementShuffle = false; 3137 break; 3138 } 3139 } 3140 PFIndexes[i] = EltNo; 3141 } 3142 3143 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 3144 // perfect shuffle vector to determine if it is cost effective to do this as 3145 // discrete instructions, or whether we should use a vperm. 3146 if (isFourElementShuffle) { 3147 // Compute the index in the perfect shuffle table. 3148 unsigned PFTableIndex = 3149 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 3150 3151 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 3152 unsigned Cost = (PFEntry >> 30); 3153 3154 // Determining when to avoid vperm is tricky. Many things affect the cost 3155 // of vperm, particularly how many times the perm mask needs to be computed. 3156 // For example, if the perm mask can be hoisted out of a loop or is already 3157 // used (perhaps because there are multiple permutes with the same shuffle 3158 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 3159 // the loop requires an extra register. 3160 // 3161 // As a compromise, we only emit discrete instructions if the shuffle can be 3162 // generated in 3 or fewer operations. When we have loop information 3163 // available, if this block is within a loop, we should avoid using vperm 3164 // for 3-operation perms and use a constant pool load instead. 3165 if (Cost < 3) 3166 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG); 3167 } 3168 3169 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 3170 // vector that will get spilled to the constant pool. 3171 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 3172 3173 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 3174 // that it is in input element units, not in bytes. Convert now. 3175 MVT::ValueType EltVT = MVT::getVectorElementType(V1.getValueType()); 3176 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; 3177 3178 SmallVector<SDOperand, 16> ResultMask; 3179 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 3180 unsigned SrcElt; 3181 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) 3182 SrcElt = 0; 3183 else 3184 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 3185 3186 for (unsigned j = 0; j != BytesPerElement; ++j) 3187 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 3188 MVT::i8)); 3189 } 3190 3191 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, 3192 &ResultMask[0], ResultMask.size()); 3193 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 3194} 3195 3196/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 3197/// altivec comparison. If it is, return true and fill in Opc/isDot with 3198/// information about the intrinsic. 3199static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc, 3200 bool &isDot) { 3201 unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue(); 3202 CompareOpc = -1; 3203 isDot = false; 3204 switch (IntrinsicID) { 3205 default: return false; 3206 // Comparison predicates. 3207 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 3208 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 3209 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 3210 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 3211 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 3212 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 3213 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 3214 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 3215 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 3216 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 3217 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 3218 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 3219 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 3220 3221 // Normal Comparisons. 3222 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 3223 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 3224 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 3225 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 3226 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 3227 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 3228 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 3229 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 3230 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 3231 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 3232 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 3233 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 3234 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 3235 } 3236 return true; 3237} 3238 3239/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 3240/// lower, do it, otherwise return null. 3241SDOperand PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDOperand Op, 3242 SelectionDAG &DAG) { 3243 // If this is a lowered altivec predicate compare, CompareOpc is set to the 3244 // opcode number of the comparison. 3245 int CompareOpc; 3246 bool isDot; 3247 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 3248 return SDOperand(); // Don't custom lower most intrinsics. 3249 3250 // If this is a non-dot comparison, make the VCMP node and we are done. 3251 if (!isDot) { 3252 SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 3253 Op.getOperand(1), Op.getOperand(2), 3254 DAG.getConstant(CompareOpc, MVT::i32)); 3255 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp); 3256 } 3257 3258 // Create the PPCISD altivec 'dot' comparison node. 3259 SDOperand Ops[] = { 3260 Op.getOperand(2), // LHS 3261 Op.getOperand(3), // RHS 3262 DAG.getConstant(CompareOpc, MVT::i32) 3263 }; 3264 std::vector<MVT::ValueType> VTs; 3265 VTs.push_back(Op.getOperand(2).getValueType()); 3266 VTs.push_back(MVT::Flag); 3267 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); 3268 3269 // Now that we have the comparison, emit a copy from the CR to a GPR. 3270 // This is flagged to the above dot comparison. 3271 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 3272 DAG.getRegister(PPC::CR6, MVT::i32), 3273 CompNode.getValue(1)); 3274 3275 // Unpack the result based on how the target uses it. 3276 unsigned BitNo; // Bit # of CR6. 3277 bool InvertBit; // Invert result? 3278 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 3279 default: // Can't happen, don't crash on invalid number though. 3280 case 0: // Return the value of the EQ bit of CR6. 3281 BitNo = 0; InvertBit = false; 3282 break; 3283 case 1: // Return the inverted value of the EQ bit of CR6. 3284 BitNo = 0; InvertBit = true; 3285 break; 3286 case 2: // Return the value of the LT bit of CR6. 3287 BitNo = 2; InvertBit = false; 3288 break; 3289 case 3: // Return the inverted value of the LT bit of CR6. 3290 BitNo = 2; InvertBit = true; 3291 break; 3292 } 3293 3294 // Shift the bit into the low position. 3295 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 3296 DAG.getConstant(8-(3-BitNo), MVT::i32)); 3297 // Isolate the bit. 3298 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 3299 DAG.getConstant(1, MVT::i32)); 3300 3301 // If we are supposed to, toggle the bit. 3302 if (InvertBit) 3303 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 3304 DAG.getConstant(1, MVT::i32)); 3305 return Flags; 3306} 3307 3308SDOperand PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDOperand Op, 3309 SelectionDAG &DAG) { 3310 // Create a stack slot that is 16-byte aligned. 3311 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 3312 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 3313 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 3314 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); 3315 3316 // Store the input value into Value#0 of the stack slot. 3317 SDOperand Store = DAG.getStore(DAG.getEntryNode(), 3318 Op.getOperand(0), FIdx, NULL, 0); 3319 // Load it out. 3320 return DAG.getLoad(Op.getValueType(), Store, FIdx, NULL, 0); 3321} 3322 3323SDOperand PPCTargetLowering::LowerMUL(SDOperand Op, SelectionDAG &DAG) { 3324 if (Op.getValueType() == MVT::v4i32) { 3325 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3326 3327 SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG); 3328 SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt. 3329 3330 SDOperand RHSSwap = // = vrlw RHS, 16 3331 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG); 3332 3333 // Shrinkify inputs to v8i16. 3334 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS); 3335 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS); 3336 RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap); 3337 3338 // Low parts multiplied together, generating 32-bit results (we ignore the 3339 // top parts). 3340 SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 3341 LHS, RHS, DAG, MVT::v4i32); 3342 3343 SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 3344 LHS, RHSSwap, Zero, DAG, MVT::v4i32); 3345 // Shift the high parts up 16 bits. 3346 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG); 3347 return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd); 3348 } else if (Op.getValueType() == MVT::v8i16) { 3349 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3350 3351 SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG); 3352 3353 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 3354 LHS, RHS, Zero, DAG); 3355 } else if (Op.getValueType() == MVT::v16i8) { 3356 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 3357 3358 // Multiply the even 8-bit parts, producing 16-bit sums. 3359 SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 3360 LHS, RHS, DAG, MVT::v8i16); 3361 EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts); 3362 3363 // Multiply the odd 8-bit parts, producing 16-bit sums. 3364 SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 3365 LHS, RHS, DAG, MVT::v8i16); 3366 OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts); 3367 3368 // Merge the results together. 3369 SDOperand Ops[16]; 3370 for (unsigned i = 0; i != 8; ++i) { 3371 Ops[i*2 ] = DAG.getConstant(2*i+1, MVT::i8); 3372 Ops[i*2+1] = DAG.getConstant(2*i+1+16, MVT::i8); 3373 } 3374 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts, 3375 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops, 16)); 3376 } else { 3377 assert(0 && "Unknown mul to lower!"); 3378 abort(); 3379 } 3380} 3381 3382/// LowerOperation - Provide custom lowering hooks for some operations. 3383/// 3384SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 3385 switch (Op.getOpcode()) { 3386 default: assert(0 && "Wasn't expecting to be able to lower this!"); 3387 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 3388 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 3389 case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); 3390 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 3391 case ISD::SETCC: return LowerSETCC(Op, DAG); 3392 case ISD::VASTART: 3393 return LowerVASTART(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, 3394 VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); 3395 3396 case ISD::VAARG: 3397 return LowerVAARG(Op, DAG, VarArgsFrameIndex, VarArgsStackOffset, 3398 VarArgsNumGPR, VarArgsNumFPR, PPCSubTarget); 3399 3400 case ISD::FORMAL_ARGUMENTS: 3401 return LowerFORMAL_ARGUMENTS(Op, DAG, VarArgsFrameIndex, 3402 VarArgsStackOffset, VarArgsNumGPR, 3403 VarArgsNumFPR, PPCSubTarget); 3404 3405 case ISD::CALL: return LowerCALL(Op, DAG, PPCSubTarget, 3406 getTargetMachine()); 3407 case ISD::RET: return LowerRET(Op, DAG, getTargetMachine()); 3408 case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); 3409 case ISD::DYNAMIC_STACKALLOC: 3410 return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); 3411 3412 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 3413 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 3414 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 3415 case ISD::FP_ROUND_INREG: return LowerFP_ROUND_INREG(Op, DAG); 3416 case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); 3417 3418 // Lower 64-bit shifts. 3419 case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG); 3420 case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG); 3421 case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG); 3422 3423 // Vector-related lowering. 3424 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 3425 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 3426 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 3427 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 3428 case ISD::MUL: return LowerMUL(Op, DAG); 3429 3430 // Frame & Return address. 3431 case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); 3432 case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); 3433 } 3434 return SDOperand(); 3435} 3436 3437SDNode *PPCTargetLowering::ExpandOperationResult(SDNode *N, SelectionDAG &DAG) { 3438 switch (N->getOpcode()) { 3439 default: assert(0 && "Wasn't expecting to be able to lower this!"); 3440 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(SDOperand(N, 0), DAG).Val; 3441 } 3442} 3443 3444 3445//===----------------------------------------------------------------------===// 3446// Other Lowering Code 3447//===----------------------------------------------------------------------===// 3448 3449MachineBasicBlock * 3450PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, 3451 MachineBasicBlock *BB) { 3452 const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); 3453 assert((MI->getOpcode() == PPC::SELECT_CC_I4 || 3454 MI->getOpcode() == PPC::SELECT_CC_I8 || 3455 MI->getOpcode() == PPC::SELECT_CC_F4 || 3456 MI->getOpcode() == PPC::SELECT_CC_F8 || 3457 MI->getOpcode() == PPC::SELECT_CC_VRRC) && 3458 "Unexpected instr type to insert"); 3459 3460 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 3461 // control-flow pattern. The incoming instruction knows the destination vreg 3462 // to set, the condition code register to branch on, the true/false values to 3463 // select between, and a branch opcode to use. 3464 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 3465 ilist<MachineBasicBlock>::iterator It = BB; 3466 ++It; 3467 3468 // thisMBB: 3469 // ... 3470 // TrueVal = ... 3471 // cmpTY ccX, r1, r2 3472 // bCC copy1MBB 3473 // fallthrough --> copy0MBB 3474 MachineBasicBlock *thisMBB = BB; 3475 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 3476 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 3477 unsigned SelectPred = MI->getOperand(4).getImm(); 3478 BuildMI(BB, TII->get(PPC::BCC)) 3479 .addImm(SelectPred).addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 3480 MachineFunction *F = BB->getParent(); 3481 F->getBasicBlockList().insert(It, copy0MBB); 3482 F->getBasicBlockList().insert(It, sinkMBB); 3483 // Update machine-CFG edges by first adding all successors of the current 3484 // block to the new block which will contain the Phi node for the select. 3485 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 3486 e = BB->succ_end(); i != e; ++i) 3487 sinkMBB->addSuccessor(*i); 3488 // Next, remove all successors of the current block, and add the true 3489 // and fallthrough blocks as its successors. 3490 while(!BB->succ_empty()) 3491 BB->removeSuccessor(BB->succ_begin()); 3492 BB->addSuccessor(copy0MBB); 3493 BB->addSuccessor(sinkMBB); 3494 3495 // copy0MBB: 3496 // %FalseValue = ... 3497 // # fallthrough to sinkMBB 3498 BB = copy0MBB; 3499 3500 // Update machine-CFG edges 3501 BB->addSuccessor(sinkMBB); 3502 3503 // sinkMBB: 3504 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 3505 // ... 3506 BB = sinkMBB; 3507 BuildMI(BB, TII->get(PPC::PHI), MI->getOperand(0).getReg()) 3508 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 3509 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 3510 3511 delete MI; // The pseudo instruction is gone now. 3512 return BB; 3513} 3514 3515//===----------------------------------------------------------------------===// 3516// Target Optimization Hooks 3517//===----------------------------------------------------------------------===// 3518 3519SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 3520 DAGCombinerInfo &DCI) const { 3521 TargetMachine &TM = getTargetMachine(); 3522 SelectionDAG &DAG = DCI.DAG; 3523 switch (N->getOpcode()) { 3524 default: break; 3525 case PPCISD::SHL: 3526 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 3527 if (C->getValue() == 0) // 0 << V -> 0. 3528 return N->getOperand(0); 3529 } 3530 break; 3531 case PPCISD::SRL: 3532 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 3533 if (C->getValue() == 0) // 0 >>u V -> 0. 3534 return N->getOperand(0); 3535 } 3536 break; 3537 case PPCISD::SRA: 3538 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) { 3539 if (C->getValue() == 0 || // 0 >>s V -> 0. 3540 C->isAllOnesValue()) // -1 >>s V -> -1. 3541 return N->getOperand(0); 3542 } 3543 break; 3544 3545 case ISD::SINT_TO_FP: 3546 if (TM.getSubtarget<PPCSubtarget>().has64BitSupport()) { 3547 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 3548 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 3549 // We allow the src/dst to be either f32/f64, but the intermediate 3550 // type must be i64. 3551 if (N->getOperand(0).getValueType() == MVT::i64 && 3552 N->getOperand(0).getOperand(0).getValueType() != MVT::ppcf128) { 3553 SDOperand Val = N->getOperand(0).getOperand(0); 3554 if (Val.getValueType() == MVT::f32) { 3555 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 3556 DCI.AddToWorklist(Val.Val); 3557 } 3558 3559 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 3560 DCI.AddToWorklist(Val.Val); 3561 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 3562 DCI.AddToWorklist(Val.Val); 3563 if (N->getValueType(0) == MVT::f32) { 3564 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val, 3565 DAG.getIntPtrConstant(0)); 3566 DCI.AddToWorklist(Val.Val); 3567 } 3568 return Val; 3569 } else if (N->getOperand(0).getValueType() == MVT::i32) { 3570 // If the intermediate type is i32, we can avoid the load/store here 3571 // too. 3572 } 3573 } 3574 } 3575 break; 3576 case ISD::STORE: 3577 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 3578 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 3579 !cast<StoreSDNode>(N)->isTruncatingStore() && 3580 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 3581 N->getOperand(1).getValueType() == MVT::i32 && 3582 N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) { 3583 SDOperand Val = N->getOperand(1).getOperand(0); 3584 if (Val.getValueType() == MVT::f32) { 3585 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 3586 DCI.AddToWorklist(Val.Val); 3587 } 3588 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 3589 DCI.AddToWorklist(Val.Val); 3590 3591 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 3592 N->getOperand(2), N->getOperand(3)); 3593 DCI.AddToWorklist(Val.Val); 3594 return Val; 3595 } 3596 3597 // Turn STORE (BSWAP) -> sthbrx/stwbrx. 3598 if (N->getOperand(1).getOpcode() == ISD::BSWAP && 3599 N->getOperand(1).Val->hasOneUse() && 3600 (N->getOperand(1).getValueType() == MVT::i32 || 3601 N->getOperand(1).getValueType() == MVT::i16)) { 3602 SDOperand BSwapOp = N->getOperand(1).getOperand(0); 3603 // Do an any-extend to 32-bits if this is a half-word input. 3604 if (BSwapOp.getValueType() == MVT::i16) 3605 BSwapOp = DAG.getNode(ISD::ANY_EXTEND, MVT::i32, BSwapOp); 3606 3607 return DAG.getNode(PPCISD::STBRX, MVT::Other, N->getOperand(0), BSwapOp, 3608 N->getOperand(2), N->getOperand(3), 3609 DAG.getValueType(N->getOperand(1).getValueType())); 3610 } 3611 break; 3612 case ISD::BSWAP: 3613 // Turn BSWAP (LOAD) -> lhbrx/lwbrx. 3614 if (ISD::isNON_EXTLoad(N->getOperand(0).Val) && 3615 N->getOperand(0).hasOneUse() && 3616 (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16)) { 3617 SDOperand Load = N->getOperand(0); 3618 LoadSDNode *LD = cast<LoadSDNode>(Load); 3619 // Create the byte-swapping load. 3620 std::vector<MVT::ValueType> VTs; 3621 VTs.push_back(MVT::i32); 3622 VTs.push_back(MVT::Other); 3623 SDOperand MO = DAG.getMemOperand(LD->getMemOperand()); 3624 SDOperand Ops[] = { 3625 LD->getChain(), // Chain 3626 LD->getBasePtr(), // Ptr 3627 MO, // MemOperand 3628 DAG.getValueType(N->getValueType(0)) // VT 3629 }; 3630 SDOperand BSLoad = DAG.getNode(PPCISD::LBRX, VTs, Ops, 4); 3631 3632 // If this is an i16 load, insert the truncate. 3633 SDOperand ResVal = BSLoad; 3634 if (N->getValueType(0) == MVT::i16) 3635 ResVal = DAG.getNode(ISD::TRUNCATE, MVT::i16, BSLoad); 3636 3637 // First, combine the bswap away. This makes the value produced by the 3638 // load dead. 3639 DCI.CombineTo(N, ResVal); 3640 3641 // Next, combine the load away, we give it a bogus result value but a real 3642 // chain result. The result value is dead because the bswap is dead. 3643 DCI.CombineTo(Load.Val, ResVal, BSLoad.getValue(1)); 3644 3645 // Return N so it doesn't get rechecked! 3646 return SDOperand(N, 0); 3647 } 3648 3649 break; 3650 case PPCISD::VCMP: { 3651 // If a VCMPo node already exists with exactly the same operands as this 3652 // node, use its result instead of this node (VCMPo computes both a CR6 and 3653 // a normal output). 3654 // 3655 if (!N->getOperand(0).hasOneUse() && 3656 !N->getOperand(1).hasOneUse() && 3657 !N->getOperand(2).hasOneUse()) { 3658 3659 // Scan all of the users of the LHS, looking for VCMPo's that match. 3660 SDNode *VCMPoNode = 0; 3661 3662 SDNode *LHSN = N->getOperand(0).Val; 3663 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 3664 UI != E; ++UI) 3665 if ((*UI).getUser()->getOpcode() == PPCISD::VCMPo && 3666 (*UI).getUser()->getOperand(1) == N->getOperand(1) && 3667 (*UI).getUser()->getOperand(2) == N->getOperand(2) && 3668 (*UI).getUser()->getOperand(0) == N->getOperand(0)) { 3669 VCMPoNode = UI->getUser(); 3670 break; 3671 } 3672 3673 // If there is no VCMPo node, or if the flag value has a single use, don't 3674 // transform this. 3675 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 3676 break; 3677 3678 // Look at the (necessarily single) use of the flag value. If it has a 3679 // chain, this transformation is more complex. Note that multiple things 3680 // could use the value result, which we should ignore. 3681 SDNode *FlagUser = 0; 3682 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 3683 FlagUser == 0; ++UI) { 3684 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 3685 SDNode *User = UI->getUser(); 3686 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 3687 if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) { 3688 FlagUser = User; 3689 break; 3690 } 3691 } 3692 } 3693 3694 // If the user is a MFCR instruction, we know this is safe. Otherwise we 3695 // give up for right now. 3696 if (FlagUser->getOpcode() == PPCISD::MFCR) 3697 return SDOperand(VCMPoNode, 0); 3698 } 3699 break; 3700 } 3701 case ISD::BR_CC: { 3702 // If this is a branch on an altivec predicate comparison, lower this so 3703 // that we don't have to do a MFCR: instead, branch directly on CR6. This 3704 // lowering is done pre-legalize, because the legalizer lowers the predicate 3705 // compare down to code that is difficult to reassemble. 3706 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 3707 SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3); 3708 int CompareOpc; 3709 bool isDot; 3710 3711 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 3712 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 3713 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 3714 assert(isDot && "Can't compare against a vector result!"); 3715 3716 // If this is a comparison against something other than 0/1, then we know 3717 // that the condition is never/always true. 3718 unsigned Val = cast<ConstantSDNode>(RHS)->getValue(); 3719 if (Val != 0 && Val != 1) { 3720 if (CC == ISD::SETEQ) // Cond never true, remove branch. 3721 return N->getOperand(0); 3722 // Always !=, turn it into an unconditional branch. 3723 return DAG.getNode(ISD::BR, MVT::Other, 3724 N->getOperand(0), N->getOperand(4)); 3725 } 3726 3727 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 3728 3729 // Create the PPCISD altivec 'dot' comparison node. 3730 std::vector<MVT::ValueType> VTs; 3731 SDOperand Ops[] = { 3732 LHS.getOperand(2), // LHS of compare 3733 LHS.getOperand(3), // RHS of compare 3734 DAG.getConstant(CompareOpc, MVT::i32) 3735 }; 3736 VTs.push_back(LHS.getOperand(2).getValueType()); 3737 VTs.push_back(MVT::Flag); 3738 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops, 3); 3739 3740 // Unpack the result based on how the target uses it. 3741 PPC::Predicate CompOpc; 3742 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) { 3743 default: // Can't happen, don't crash on invalid number though. 3744 case 0: // Branch on the value of the EQ bit of CR6. 3745 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE; 3746 break; 3747 case 1: // Branch on the inverted value of the EQ bit of CR6. 3748 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ; 3749 break; 3750 case 2: // Branch on the value of the LT bit of CR6. 3751 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE; 3752 break; 3753 case 3: // Branch on the inverted value of the LT bit of CR6. 3754 CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT; 3755 break; 3756 } 3757 3758 return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0), 3759 DAG.getConstant(CompOpc, MVT::i32), 3760 DAG.getRegister(PPC::CR6, MVT::i32), 3761 N->getOperand(4), CompNode.getValue(1)); 3762 } 3763 break; 3764 } 3765 } 3766 3767 return SDOperand(); 3768} 3769 3770//===----------------------------------------------------------------------===// 3771// Inline Assembly Support 3772//===----------------------------------------------------------------------===// 3773 3774void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 3775 const APInt &Mask, 3776 APInt &KnownZero, 3777 APInt &KnownOne, 3778 const SelectionDAG &DAG, 3779 unsigned Depth) const { 3780 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 3781 switch (Op.getOpcode()) { 3782 default: break; 3783 case PPCISD::LBRX: { 3784 // lhbrx is known to have the top bits cleared out. 3785 if (cast<VTSDNode>(Op.getOperand(3))->getVT() == MVT::i16) 3786 KnownZero = 0xFFFF0000; 3787 break; 3788 } 3789 case ISD::INTRINSIC_WO_CHAIN: { 3790 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 3791 default: break; 3792 case Intrinsic::ppc_altivec_vcmpbfp_p: 3793 case Intrinsic::ppc_altivec_vcmpeqfp_p: 3794 case Intrinsic::ppc_altivec_vcmpequb_p: 3795 case Intrinsic::ppc_altivec_vcmpequh_p: 3796 case Intrinsic::ppc_altivec_vcmpequw_p: 3797 case Intrinsic::ppc_altivec_vcmpgefp_p: 3798 case Intrinsic::ppc_altivec_vcmpgtfp_p: 3799 case Intrinsic::ppc_altivec_vcmpgtsb_p: 3800 case Intrinsic::ppc_altivec_vcmpgtsh_p: 3801 case Intrinsic::ppc_altivec_vcmpgtsw_p: 3802 case Intrinsic::ppc_altivec_vcmpgtub_p: 3803 case Intrinsic::ppc_altivec_vcmpgtuh_p: 3804 case Intrinsic::ppc_altivec_vcmpgtuw_p: 3805 KnownZero = ~1U; // All bits but the low one are known to be zero. 3806 break; 3807 } 3808 } 3809 } 3810} 3811 3812 3813/// getConstraintType - Given a constraint, return the type of 3814/// constraint it is for this target. 3815PPCTargetLowering::ConstraintType 3816PPCTargetLowering::getConstraintType(const std::string &Constraint) const { 3817 if (Constraint.size() == 1) { 3818 switch (Constraint[0]) { 3819 default: break; 3820 case 'b': 3821 case 'r': 3822 case 'f': 3823 case 'v': 3824 case 'y': 3825 return C_RegisterClass; 3826 } 3827 } 3828 return TargetLowering::getConstraintType(Constraint); 3829} 3830 3831std::pair<unsigned, const TargetRegisterClass*> 3832PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, 3833 MVT::ValueType VT) const { 3834 if (Constraint.size() == 1) { 3835 // GCC RS6000 Constraint Letters 3836 switch (Constraint[0]) { 3837 case 'b': // R1-R31 3838 case 'r': // R0-R31 3839 if (VT == MVT::i64 && PPCSubTarget.isPPC64()) 3840 return std::make_pair(0U, PPC::G8RCRegisterClass); 3841 return std::make_pair(0U, PPC::GPRCRegisterClass); 3842 case 'f': 3843 if (VT == MVT::f32) 3844 return std::make_pair(0U, PPC::F4RCRegisterClass); 3845 else if (VT == MVT::f64) 3846 return std::make_pair(0U, PPC::F8RCRegisterClass); 3847 break; 3848 case 'v': 3849 return std::make_pair(0U, PPC::VRRCRegisterClass); 3850 case 'y': // crrc 3851 return std::make_pair(0U, PPC::CRRCRegisterClass); 3852 } 3853 } 3854 3855 return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); 3856} 3857 3858 3859/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 3860/// vector. If it is invalid, don't add anything to Ops. 3861void PPCTargetLowering::LowerAsmOperandForConstraint(SDOperand Op, char Letter, 3862 std::vector<SDOperand>&Ops, 3863 SelectionDAG &DAG) { 3864 SDOperand Result(0,0); 3865 switch (Letter) { 3866 default: break; 3867 case 'I': 3868 case 'J': 3869 case 'K': 3870 case 'L': 3871 case 'M': 3872 case 'N': 3873 case 'O': 3874 case 'P': { 3875 ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op); 3876 if (!CST) return; // Must be an immediate to match. 3877 unsigned Value = CST->getValue(); 3878 switch (Letter) { 3879 default: assert(0 && "Unknown constraint letter!"); 3880 case 'I': // "I" is a signed 16-bit constant. 3881 if ((short)Value == (int)Value) 3882 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3883 break; 3884 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 3885 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 3886 if ((short)Value == 0) 3887 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3888 break; 3889 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 3890 if ((Value >> 16) == 0) 3891 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3892 break; 3893 case 'M': // "M" is a constant that is greater than 31. 3894 if (Value > 31) 3895 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3896 break; 3897 case 'N': // "N" is a positive constant that is an exact power of two. 3898 if ((int)Value > 0 && isPowerOf2_32(Value)) 3899 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3900 break; 3901 case 'O': // "O" is the constant zero. 3902 if (Value == 0) 3903 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3904 break; 3905 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 3906 if ((short)-Value == (int)-Value) 3907 Result = DAG.getTargetConstant(Value, Op.getValueType()); 3908 break; 3909 } 3910 break; 3911 } 3912 } 3913 3914 if (Result.Val) { 3915 Ops.push_back(Result); 3916 return; 3917 } 3918 3919 // Handle standard constraint letters. 3920 TargetLowering::LowerAsmOperandForConstraint(Op, Letter, Ops, DAG); 3921} 3922 3923// isLegalAddressingMode - Return true if the addressing mode represented 3924// by AM is legal for this target, for a load/store of the specified type. 3925bool PPCTargetLowering::isLegalAddressingMode(const AddrMode &AM, 3926 const Type *Ty) const { 3927 // FIXME: PPC does not allow r+i addressing modes for vectors! 3928 3929 // PPC allows a sign-extended 16-bit immediate field. 3930 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 3931 return false; 3932 3933 // No global is ever allowed as a base. 3934 if (AM.BaseGV) 3935 return false; 3936 3937 // PPC only support r+r, 3938 switch (AM.Scale) { 3939 case 0: // "r+i" or just "i", depending on HasBaseReg. 3940 break; 3941 case 1: 3942 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 3943 return false; 3944 // Otherwise we have r+r or r+i. 3945 break; 3946 case 2: 3947 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 3948 return false; 3949 // Allow 2*r as r+r. 3950 break; 3951 default: 3952 // No other scales are supported. 3953 return false; 3954 } 3955 3956 return true; 3957} 3958 3959/// isLegalAddressImmediate - Return true if the integer value can be used 3960/// as the offset of the target addressing mode for load / store of the 3961/// given type. 3962bool PPCTargetLowering::isLegalAddressImmediate(int64_t V,const Type *Ty) const{ 3963 // PPC allows a sign-extended 16-bit immediate field. 3964 return (V > -(1 << 16) && V < (1 << 16)-1); 3965} 3966 3967bool PPCTargetLowering::isLegalAddressImmediate(llvm::GlobalValue* GV) const { 3968 return false; 3969} 3970 3971SDOperand PPCTargetLowering::LowerRETURNADDR(SDOperand Op, SelectionDAG &DAG) { 3972 // Depths > 0 not supported yet! 3973 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 3974 return SDOperand(); 3975 3976 MachineFunction &MF = DAG.getMachineFunction(); 3977 PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 3978 int RAIdx = FuncInfo->getReturnAddrSaveIndex(); 3979 if (RAIdx == 0) { 3980 bool isPPC64 = PPCSubTarget.isPPC64(); 3981 int Offset = 3982 PPCFrameInfo::getReturnSaveOffset(isPPC64, PPCSubTarget.isMachoABI()); 3983 3984 // Set up a frame object for the return address. 3985 RAIdx = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, Offset); 3986 3987 // Remember it for next time. 3988 FuncInfo->setReturnAddrSaveIndex(RAIdx); 3989 3990 // Make sure the function really does not optimize away the store of the RA 3991 // to the stack. 3992 FuncInfo->setLRStoreRequired(); 3993 } 3994 3995 // Just load the return address off the stack. 3996 SDOperand RetAddrFI = DAG.getFrameIndex(RAIdx, getPointerTy()); 3997 return DAG.getLoad(getPointerTy(), DAG.getEntryNode(), RetAddrFI, NULL, 0); 3998} 3999 4000SDOperand PPCTargetLowering::LowerFRAMEADDR(SDOperand Op, SelectionDAG &DAG) { 4001 // Depths > 0 not supported yet! 4002 if (cast<ConstantSDNode>(Op.getOperand(0))->getValue() > 0) 4003 return SDOperand(); 4004 4005 MVT::ValueType PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); 4006 bool isPPC64 = PtrVT == MVT::i64; 4007 4008 MachineFunction &MF = DAG.getMachineFunction(); 4009 MachineFrameInfo *MFI = MF.getFrameInfo(); 4010 bool is31 = (NoFramePointerElim || MFI->hasVarSizedObjects()) 4011 && MFI->getStackSize(); 4012 4013 if (isPPC64) 4014 return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::X31 : PPC::X1, 4015 MVT::i64); 4016 else 4017 return DAG.getCopyFromReg(DAG.getEntryNode(), is31 ? PPC::R31 : PPC::R1, 4018 MVT::i32); 4019} 4020