PPCISelLowering.cpp revision 25b8b8cb2cd8653712b0a567edf8573a067f8ff1
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "PPCTargetMachine.h" 16#include "PPCPerfectShuffle.h" 17#include "llvm/ADT/VectorExtras.h" 18#include "llvm/Analysis/ScalarEvolutionExpressions.h" 19#include "llvm/CodeGen/MachineFrameInfo.h" 20#include "llvm/CodeGen/MachineFunction.h" 21#include "llvm/CodeGen/MachineInstrBuilder.h" 22#include "llvm/CodeGen/SelectionDAG.h" 23#include "llvm/CodeGen/SSARegMap.h" 24#include "llvm/Constants.h" 25#include "llvm/Function.h" 26#include "llvm/Intrinsics.h" 27#include "llvm/Support/MathExtras.h" 28#include "llvm/Target/TargetOptions.h" 29using namespace llvm; 30 31PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) 32 : TargetLowering(TM) { 33 34 // Fold away setcc operations if possible. 35 setSetCCIsExpensive(); 36 setPow2DivIsCheap(); 37 38 // Use _setjmp/_longjmp instead of setjmp/longjmp. 39 setUseUnderscoreSetJmpLongJmp(true); 40 41 // Set up the register classes. 42 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 43 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 44 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 45 46 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 47 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 48 49 // PowerPC has no intrinsics for these particular operations 50 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); 51 setOperationAction(ISD::MEMSET, MVT::Other, Expand); 52 setOperationAction(ISD::MEMCPY, MVT::Other, Expand); 53 54 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 55 setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand); 56 setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand); 57 58 // PowerPC has no SREM/UREM instructions 59 setOperationAction(ISD::SREM, MVT::i32, Expand); 60 setOperationAction(ISD::UREM, MVT::i32, Expand); 61 62 // We don't support sin/cos/sqrt/fmod 63 setOperationAction(ISD::FSIN , MVT::f64, Expand); 64 setOperationAction(ISD::FCOS , MVT::f64, Expand); 65 setOperationAction(ISD::FREM , MVT::f64, Expand); 66 setOperationAction(ISD::FSIN , MVT::f32, Expand); 67 setOperationAction(ISD::FCOS , MVT::f32, Expand); 68 setOperationAction(ISD::FREM , MVT::f32, Expand); 69 70 // If we're enabling GP optimizations, use hardware square root 71 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 72 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 73 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 74 } 75 76 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 77 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 78 79 // PowerPC does not have BSWAP, CTPOP or CTTZ 80 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 81 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 82 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 83 84 // PowerPC does not have ROTR 85 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 86 87 // PowerPC does not have Select 88 setOperationAction(ISD::SELECT, MVT::i32, Expand); 89 setOperationAction(ISD::SELECT, MVT::f32, Expand); 90 setOperationAction(ISD::SELECT, MVT::f64, Expand); 91 92 // PowerPC wants to turn select_cc of FP into fsel when possible. 93 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 94 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 95 96 // PowerPC wants to optimize integer setcc a bit 97 setOperationAction(ISD::SETCC, MVT::i32, Custom); 98 99 // PowerPC does not have BRCOND which requires SetCC 100 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 101 102 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 103 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 104 105 // PowerPC does not have [U|S]INT_TO_FP 106 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 107 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 108 109 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 110 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 111 112 // PowerPC does not have truncstore for i1. 113 setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote); 114 115 // We cannot sextinreg(i1). Expand to shifts. 116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 117 118 119 // Support label based line numbers. 120 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 121 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 122 // FIXME - use subtarget debug flags 123 if (!TM.getSubtarget<PPCSubtarget>().isDarwin()) 124 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 125 126 // We want to legalize GlobalAddress and ConstantPool nodes into the 127 // appropriate instructions to materialize the address. 128 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 129 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 130 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 131 132 // RET must be custom lowered, to meet ABI requirements 133 setOperationAction(ISD::RET , MVT::Other, Custom); 134 135 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 136 setOperationAction(ISD::VASTART , MVT::Other, Custom); 137 138 // Use the default implementation. 139 setOperationAction(ISD::VAARG , MVT::Other, Expand); 140 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 141 setOperationAction(ISD::VAEND , MVT::Other, Expand); 142 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 143 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 144 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 145 146 // We want to custom lower some of our intrinsics. 147 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 148 149 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { 150 // They also have instructions for converting between i64 and fp. 151 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 152 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 153 154 // FIXME: disable this lowered code. This generates 64-bit register values, 155 // and we don't model the fact that the top part is clobbered by calls. We 156 // need to flag these together so that the value isn't live across a call. 157 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 158 159 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 160 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 161 } else { 162 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 163 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 164 } 165 166 if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) { 167 // 64 bit PowerPC implementations can support i64 types directly 168 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 169 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 170 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 171 } else { 172 // 32 bit PowerPC wants to expand i64 shifts itself. 173 setOperationAction(ISD::SHL, MVT::i64, Custom); 174 setOperationAction(ISD::SRL, MVT::i64, Custom); 175 setOperationAction(ISD::SRA, MVT::i64, Custom); 176 } 177 178 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 179 // First set operation action for all vector types to expand. Then we 180 // will selectively turn on ones that can be effectively codegen'd. 181 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 182 VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 183 // add/sub are legal for all supported vector VT's. 184 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); 185 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); 186 187 // We promote all shuffles to v16i8. 188 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote); 189 AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8); 190 191 // We promote all non-typed operations to v4i32. 192 setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote); 193 AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32); 194 setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote); 195 AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32); 196 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote); 197 AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32); 198 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote); 199 AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32); 200 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 201 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32); 202 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote); 203 AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32); 204 205 // No other operations are legal. 206 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 207 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 208 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 209 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 210 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 211 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 212 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 213 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand); 214 215 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand); 216 } 217 218 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 219 // with merges, splats, etc. 220 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 221 222 setOperationAction(ISD::AND , MVT::v4i32, Legal); 223 setOperationAction(ISD::OR , MVT::v4i32, Legal); 224 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 225 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 226 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 227 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 228 229 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 230 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 231 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 232 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 233 234 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 235 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 236 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 237 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 238 239 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 240 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 241 242 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 243 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 244 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 245 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 246 } 247 248 setSetCCResultContents(ZeroOrOneSetCCResult); 249 setStackPointerRegisterToSaveRestore(PPC::R1); 250 251 // We have target-specific dag combine patterns for the following nodes: 252 setTargetDAGCombine(ISD::SINT_TO_FP); 253 setTargetDAGCombine(ISD::STORE); 254 setTargetDAGCombine(ISD::BR_CC); 255 256 computeRegisterProperties(); 257} 258 259const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 260 switch (Opcode) { 261 default: return 0; 262 case PPCISD::FSEL: return "PPCISD::FSEL"; 263 case PPCISD::FCFID: return "PPCISD::FCFID"; 264 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 265 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 266 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 267 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 268 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 269 case PPCISD::VPERM: return "PPCISD::VPERM"; 270 case PPCISD::Hi: return "PPCISD::Hi"; 271 case PPCISD::Lo: return "PPCISD::Lo"; 272 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 273 case PPCISD::SRL: return "PPCISD::SRL"; 274 case PPCISD::SRA: return "PPCISD::SRA"; 275 case PPCISD::SHL: return "PPCISD::SHL"; 276 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 277 case PPCISD::STD_32: return "PPCISD::STD_32"; 278 case PPCISD::CALL: return "PPCISD::CALL"; 279 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 280 case PPCISD::MFCR: return "PPCISD::MFCR"; 281 case PPCISD::VCMP: return "PPCISD::VCMP"; 282 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 283 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 284 } 285} 286 287//===----------------------------------------------------------------------===// 288// Node matching predicates, for use by the tblgen matching code. 289//===----------------------------------------------------------------------===// 290 291/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 292static bool isFloatingPointZero(SDOperand Op) { 293 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 294 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 295 else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) { 296 // Maybe this has already been legalized into the constant pool? 297 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 298 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get())) 299 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 300 } 301 return false; 302} 303 304/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 305/// true if Op is undef or if it matches the specified value. 306static bool isConstantOrUndef(SDOperand Op, unsigned Val) { 307 return Op.getOpcode() == ISD::UNDEF || 308 cast<ConstantSDNode>(Op)->getValue() == Val; 309} 310 311/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 312/// VPKUHUM instruction. 313bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { 314 if (!isUnary) { 315 for (unsigned i = 0; i != 16; ++i) 316 if (!isConstantOrUndef(N->getOperand(i), i*2+1)) 317 return false; 318 } else { 319 for (unsigned i = 0; i != 8; ++i) 320 if (!isConstantOrUndef(N->getOperand(i), i*2+1) || 321 !isConstantOrUndef(N->getOperand(i+8), i*2+1)) 322 return false; 323 } 324 return true; 325} 326 327/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 328/// VPKUWUM instruction. 329bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { 330 if (!isUnary) { 331 for (unsigned i = 0; i != 16; i += 2) 332 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 333 !isConstantOrUndef(N->getOperand(i+1), i*2+3)) 334 return false; 335 } else { 336 for (unsigned i = 0; i != 8; i += 2) 337 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 338 !isConstantOrUndef(N->getOperand(i+1), i*2+3) || 339 !isConstantOrUndef(N->getOperand(i+8), i*2+2) || 340 !isConstantOrUndef(N->getOperand(i+9), i*2+3)) 341 return false; 342 } 343 return true; 344} 345 346/// isVMerge - Common function, used to match vmrg* shuffles. 347/// 348static bool isVMerge(SDNode *N, unsigned UnitSize, 349 unsigned LHSStart, unsigned RHSStart) { 350 assert(N->getOpcode() == ISD::BUILD_VECTOR && 351 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 352 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 353 "Unsupported merge size!"); 354 355 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 356 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 357 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 358 LHSStart+j+i*UnitSize) || 359 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 360 RHSStart+j+i*UnitSize)) 361 return false; 362 } 363 return true; 364} 365 366/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 367/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 368bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 369 if (!isUnary) 370 return isVMerge(N, UnitSize, 8, 24); 371 return isVMerge(N, UnitSize, 8, 8); 372} 373 374/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 375/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 376bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 377 if (!isUnary) 378 return isVMerge(N, UnitSize, 0, 16); 379 return isVMerge(N, UnitSize, 0, 0); 380} 381 382 383/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 384/// amount, otherwise return -1. 385int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 386 assert(N->getOpcode() == ISD::BUILD_VECTOR && 387 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 388 // Find the first non-undef value in the shuffle mask. 389 unsigned i; 390 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 391 /*search*/; 392 393 if (i == 16) return -1; // all undef. 394 395 // Otherwise, check to see if the rest of the elements are consequtively 396 // numbered from this value. 397 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 398 if (ShiftAmt < i) return -1; 399 ShiftAmt -= i; 400 401 if (!isUnary) { 402 // Check the rest of the elements to see if they are consequtive. 403 for (++i; i != 16; ++i) 404 if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) 405 return -1; 406 } else { 407 // Check the rest of the elements to see if they are consequtive. 408 for (++i; i != 16; ++i) 409 if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) 410 return -1; 411 } 412 413 return ShiftAmt; 414} 415 416/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 417/// specifies a splat of a single element that is suitable for input to 418/// VSPLTB/VSPLTH/VSPLTW. 419bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 420 assert(N->getOpcode() == ISD::BUILD_VECTOR && 421 N->getNumOperands() == 16 && 422 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 423 424 // This is a splat operation if each element of the permute is the same, and 425 // if the value doesn't reference the second vector. 426 unsigned ElementBase = 0; 427 SDOperand Elt = N->getOperand(0); 428 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 429 ElementBase = EltV->getValue(); 430 else 431 return false; // FIXME: Handle UNDEF elements too! 432 433 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 434 return false; 435 436 // Check that they are consequtive. 437 for (unsigned i = 1; i != EltSize; ++i) { 438 if (!isa<ConstantSDNode>(N->getOperand(i)) || 439 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 440 return false; 441 } 442 443 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 444 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 445 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 446 assert(isa<ConstantSDNode>(N->getOperand(i)) && 447 "Invalid VECTOR_SHUFFLE mask!"); 448 for (unsigned j = 0; j != EltSize; ++j) 449 if (N->getOperand(i+j) != N->getOperand(j)) 450 return false; 451 } 452 453 return true; 454} 455 456/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 457/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 458unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 459 assert(isSplatShuffleMask(N, EltSize)); 460 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 461} 462 463/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 464/// by using a vspltis[bhw] instruction of the specified element size, return 465/// the constant being splatted. The ByteSize field indicates the number of 466/// bytes of each element [124] -> [bhw]. 467SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 468 SDOperand OpVal(0, 0); 469 470 // If ByteSize of the splat is bigger than the element size of the 471 // build_vector, then we have a case where we are checking for a splat where 472 // multiple elements of the buildvector are folded together into a single 473 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 474 unsigned EltSize = 16/N->getNumOperands(); 475 if (EltSize < ByteSize) { 476 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 477 SDOperand UniquedVals[4]; 478 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 479 480 // See if all of the elements in the buildvector agree across. 481 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 482 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 483 // If the element isn't a constant, bail fully out. 484 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand(); 485 486 487 if (UniquedVals[i&(Multiple-1)].Val == 0) 488 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 489 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 490 return SDOperand(); // no match. 491 } 492 493 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 494 // either constant or undef values that are identical for each chunk. See 495 // if these chunks can form into a larger vspltis*. 496 497 // Check to see if all of the leading entries are either 0 or -1. If 498 // neither, then this won't fit into the immediate field. 499 bool LeadingZero = true; 500 bool LeadingOnes = true; 501 for (unsigned i = 0; i != Multiple-1; ++i) { 502 if (UniquedVals[i].Val == 0) continue; // Must have been undefs. 503 504 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 505 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 506 } 507 // Finally, check the least significant entry. 508 if (LeadingZero) { 509 if (UniquedVals[Multiple-1].Val == 0) 510 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 511 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue(); 512 if (Val < 16) 513 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 514 } 515 if (LeadingOnes) { 516 if (UniquedVals[Multiple-1].Val == 0) 517 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 518 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended(); 519 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 520 return DAG.getTargetConstant(Val, MVT::i32); 521 } 522 523 return SDOperand(); 524 } 525 526 // Check to see if this buildvec has a single non-undef value in its elements. 527 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 528 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 529 if (OpVal.Val == 0) 530 OpVal = N->getOperand(i); 531 else if (OpVal != N->getOperand(i)) 532 return SDOperand(); 533 } 534 535 if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def. 536 537 unsigned ValSizeInBytes = 0; 538 uint64_t Value = 0; 539 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 540 Value = CN->getValue(); 541 ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8; 542 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 543 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 544 Value = FloatToBits(CN->getValue()); 545 ValSizeInBytes = 4; 546 } 547 548 // If the splat value is larger than the element value, then we can never do 549 // this splat. The only case that we could fit the replicated bits into our 550 // immediate field for would be zero, and we prefer to use vxor for it. 551 if (ValSizeInBytes < ByteSize) return SDOperand(); 552 553 // If the element value is larger than the splat value, cut it in half and 554 // check to see if the two halves are equal. Continue doing this until we 555 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 556 while (ValSizeInBytes > ByteSize) { 557 ValSizeInBytes >>= 1; 558 559 // If the top half equals the bottom half, we're still ok. 560 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 561 (Value & ((1 << (8*ValSizeInBytes))-1))) 562 return SDOperand(); 563 } 564 565 // Properly sign extend the value. 566 int ShAmt = (4-ByteSize)*8; 567 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 568 569 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 570 if (MaskVal == 0) return SDOperand(); 571 572 // Finally, if this value fits in a 5 bit sext field, return it 573 if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) 574 return DAG.getTargetConstant(MaskVal, MVT::i32); 575 return SDOperand(); 576} 577 578//===----------------------------------------------------------------------===// 579// LowerOperation implementation 580//===----------------------------------------------------------------------===// 581 582static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 583 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 584 Constant *C = CP->get(); 585 SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment()); 586 SDOperand Zero = DAG.getConstant(0, MVT::i32); 587 588 const TargetMachine &TM = DAG.getTarget(); 589 590 // If this is a non-darwin platform, we don't support non-static relo models 591 // yet. 592 if (TM.getRelocationModel() == Reloc::Static || 593 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 594 // Generate non-pic code that has direct accesses to the constant pool. 595 // The address of the global is just (hi(&g)+lo(&g)). 596 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero); 597 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero); 598 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 599 } 600 601 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero); 602 if (TM.getRelocationModel() == Reloc::PIC) { 603 // With PIC, the first instruction is actually "GR+hi(&G)". 604 Hi = DAG.getNode(ISD::ADD, MVT::i32, 605 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 606 } 607 608 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero); 609 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 610 return Lo; 611} 612 613static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 614 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 615 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); 616 SDOperand Zero = DAG.getConstant(0, MVT::i32); 617 618 const TargetMachine &TM = DAG.getTarget(); 619 620 // If this is a non-darwin platform, we don't support non-static relo models 621 // yet. 622 if (TM.getRelocationModel() == Reloc::Static || 623 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 624 // Generate non-pic code that has direct accesses to the constant pool. 625 // The address of the global is just (hi(&g)+lo(&g)). 626 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, JTI, Zero); 627 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, JTI, Zero); 628 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 629 } 630 631 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, JTI, Zero); 632 if (TM.getRelocationModel() == Reloc::PIC) { 633 // With PIC, the first instruction is actually "GR+hi(&G)". 634 Hi = DAG.getNode(ISD::ADD, MVT::i32, 635 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 636 } 637 638 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, JTI, Zero); 639 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 640 return Lo; 641} 642 643static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 644 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 645 GlobalValue *GV = GSDN->getGlobal(); 646 SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset()); 647 SDOperand Zero = DAG.getConstant(0, MVT::i32); 648 649 const TargetMachine &TM = DAG.getTarget(); 650 651 // If this is a non-darwin platform, we don't support non-static relo models 652 // yet. 653 if (TM.getRelocationModel() == Reloc::Static || 654 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 655 // Generate non-pic code that has direct accesses to globals. 656 // The address of the global is just (hi(&g)+lo(&g)). 657 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero); 658 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero); 659 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 660 } 661 662 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero); 663 if (TM.getRelocationModel() == Reloc::PIC) { 664 // With PIC, the first instruction is actually "GR+hi(&G)". 665 Hi = DAG.getNode(ISD::ADD, MVT::i32, 666 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 667 } 668 669 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero); 670 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 671 672 if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() && 673 (!GV->isExternal() || GV->hasNotBeenReadFromBytecode())) 674 return Lo; 675 676 // If the global is weak or external, we have to go through the lazy 677 // resolution stub. 678 return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0)); 679} 680 681static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 682 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 683 684 // If we're comparing for equality to zero, expose the fact that this is 685 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 686 // fold the new nodes. 687 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 688 if (C->isNullValue() && CC == ISD::SETEQ) { 689 MVT::ValueType VT = Op.getOperand(0).getValueType(); 690 SDOperand Zext = Op.getOperand(0); 691 if (VT < MVT::i32) { 692 VT = MVT::i32; 693 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 694 } 695 unsigned Log2b = Log2_32(MVT::getSizeInBits(VT)); 696 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 697 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz, 698 DAG.getConstant(Log2b, MVT::i32)); 699 return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc); 700 } 701 // Leave comparisons against 0 and -1 alone for now, since they're usually 702 // optimized. FIXME: revisit this when we can custom lower all setcc 703 // optimizations. 704 if (C->isAllOnesValue() || C->isNullValue()) 705 return SDOperand(); 706 } 707 708 // If we have an integer seteq/setne, turn it into a compare against zero 709 // by subtracting the rhs from the lhs, which is faster than setting a 710 // condition register, reading it back out, and masking the correct bit. 711 MVT::ValueType LHSVT = Op.getOperand(0).getValueType(); 712 if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 713 MVT::ValueType VT = Op.getValueType(); 714 SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0), 715 Op.getOperand(1)); 716 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 717 } 718 return SDOperand(); 719} 720 721static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG, 722 unsigned VarArgsFrameIndex) { 723 // vastart just stores the address of the VarArgsFrameIndex slot into the 724 // memory location argument. 725 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 726 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 727 Op.getOperand(1), Op.getOperand(2)); 728} 729 730static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) { 731 SDOperand Copy; 732 switch(Op.getNumOperands()) { 733 default: 734 assert(0 && "Do not know how to return this many arguments!"); 735 abort(); 736 case 1: 737 return SDOperand(); // ret void is legal 738 case 2: { 739 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 740 unsigned ArgReg; 741 if (MVT::isVector(ArgVT)) 742 ArgReg = PPC::V2; 743 else if (MVT::isInteger(ArgVT)) 744 ArgReg = PPC::R3; 745 else { 746 assert(MVT::isFloatingPoint(ArgVT)); 747 ArgReg = PPC::F1; 748 } 749 750 Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1), 751 SDOperand()); 752 753 // If we haven't noted the R3/F1 are live out, do so now. 754 if (DAG.getMachineFunction().liveout_empty()) 755 DAG.getMachineFunction().addLiveOut(ArgReg); 756 break; 757 } 758 case 3: 759 Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2), 760 SDOperand()); 761 Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1)); 762 // If we haven't noted the R3+R4 are live out, do so now. 763 if (DAG.getMachineFunction().liveout_empty()) { 764 DAG.getMachineFunction().addLiveOut(PPC::R3); 765 DAG.getMachineFunction().addLiveOut(PPC::R4); 766 } 767 break; 768 } 769 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1)); 770} 771 772/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 773/// possible. 774static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { 775 // Not FP? Not a fsel. 776 if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) || 777 !MVT::isFloatingPoint(Op.getOperand(2).getValueType())) 778 return SDOperand(); 779 780 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 781 782 // Cannot handle SETEQ/SETNE. 783 if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand(); 784 785 MVT::ValueType ResVT = Op.getValueType(); 786 MVT::ValueType CmpVT = Op.getOperand(0).getValueType(); 787 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 788 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3); 789 790 // If the RHS of the comparison is a 0.0, we don't need to do the 791 // subtraction at all. 792 if (isFloatingPointZero(RHS)) 793 switch (CC) { 794 default: break; // SETUO etc aren't handled by fsel. 795 case ISD::SETULT: 796 case ISD::SETLT: 797 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 798 case ISD::SETUGE: 799 case ISD::SETGE: 800 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 801 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 802 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 803 case ISD::SETUGT: 804 case ISD::SETGT: 805 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 806 case ISD::SETULE: 807 case ISD::SETLE: 808 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 809 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 810 return DAG.getNode(PPCISD::FSEL, ResVT, 811 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 812 } 813 814 SDOperand Cmp; 815 switch (CC) { 816 default: break; // SETUO etc aren't handled by fsel. 817 case ISD::SETULT: 818 case ISD::SETLT: 819 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 820 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 821 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 822 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 823 case ISD::SETUGE: 824 case ISD::SETGE: 825 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 826 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 827 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 828 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 829 case ISD::SETUGT: 830 case ISD::SETGT: 831 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 832 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 833 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 834 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 835 case ISD::SETULE: 836 case ISD::SETLE: 837 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 838 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 839 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 840 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 841 } 842 return SDOperand(); 843} 844 845static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 846 assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType())); 847 SDOperand Src = Op.getOperand(0); 848 if (Src.getValueType() == MVT::f32) 849 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 850 851 SDOperand Tmp; 852 switch (Op.getValueType()) { 853 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 854 case MVT::i32: 855 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 856 break; 857 case MVT::i64: 858 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 859 break; 860 } 861 862 // Convert the FP value to an int value through memory. 863 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp); 864 if (Op.getValueType() == MVT::i32) 865 Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits); 866 return Bits; 867} 868 869static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 870 if (Op.getOperand(0).getValueType() == MVT::i64) { 871 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 872 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 873 if (Op.getValueType() == MVT::f32) 874 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 875 return FP; 876 } 877 878 assert(Op.getOperand(0).getValueType() == MVT::i32 && 879 "Unhandled SINT_TO_FP type in custom expander!"); 880 // Since we only generate this in 64-bit mode, we can take advantage of 881 // 64-bit registers. In particular, sign extend the input value into the 882 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 883 // then lfd it and fcfid it. 884 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 885 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 886 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 887 888 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 889 Op.getOperand(0)); 890 891 // STD the extended value into the stack slot. 892 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 893 DAG.getEntryNode(), Ext64, FIdx, 894 DAG.getSrcValue(NULL)); 895 // Load the value as a double. 896 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL)); 897 898 // FCFID it and return it. 899 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 900 if (Op.getValueType() == MVT::f32) 901 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 902 return FP; 903} 904 905static SDOperand LowerSHL(SDOperand Op, SelectionDAG &DAG) { 906 assert(Op.getValueType() == MVT::i64 && 907 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 908 // The generic code does a fine job expanding shift by a constant. 909 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 910 911 // Otherwise, expand into a bunch of logical ops. Note that these ops 912 // depend on the PPC behavior for oversized shift amounts. 913 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 914 DAG.getConstant(0, MVT::i32)); 915 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 916 DAG.getConstant(1, MVT::i32)); 917 SDOperand Amt = Op.getOperand(1); 918 919 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 920 DAG.getConstant(32, MVT::i32), Amt); 921 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt); 922 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1); 923 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 924 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 925 DAG.getConstant(-32U, MVT::i32)); 926 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5); 927 SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 928 SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt); 929 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 930} 931 932static SDOperand LowerSRL(SDOperand Op, SelectionDAG &DAG) { 933 assert(Op.getValueType() == MVT::i64 && 934 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 935 // The generic code does a fine job expanding shift by a constant. 936 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 937 938 // Otherwise, expand into a bunch of logical ops. Note that these ops 939 // depend on the PPC behavior for oversized shift amounts. 940 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 941 DAG.getConstant(0, MVT::i32)); 942 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 943 DAG.getConstant(1, MVT::i32)); 944 SDOperand Amt = Op.getOperand(1); 945 946 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 947 DAG.getConstant(32, MVT::i32), Amt); 948 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 949 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 950 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 951 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 952 DAG.getConstant(-32U, MVT::i32)); 953 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5); 954 SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 955 SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt); 956 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 957} 958 959static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG) { 960 assert(Op.getValueType() == MVT::i64 && 961 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!"); 962 // The generic code does a fine job expanding shift by a constant. 963 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 964 965 // Otherwise, expand into a bunch of logical ops, followed by a select_cc. 966 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 967 DAG.getConstant(0, MVT::i32)); 968 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 969 DAG.getConstant(1, MVT::i32)); 970 SDOperand Amt = Op.getOperand(1); 971 972 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 973 DAG.getConstant(32, MVT::i32), Amt); 974 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 975 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 976 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 977 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 978 DAG.getConstant(-32U, MVT::i32)); 979 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5); 980 SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt); 981 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32), 982 Tmp4, Tmp6, ISD::SETLE); 983 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 984} 985 986//===----------------------------------------------------------------------===// 987// Vector related lowering. 988// 989 990// If this is a vector of constants or undefs, get the bits. A bit in 991// UndefBits is set if the corresponding element of the vector is an 992// ISD::UNDEF value. For undefs, the corresponding VectorBits values are 993// zero. Return true if this is not an array of constants, false if it is. 994// 995static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], 996 uint64_t UndefBits[2]) { 997 // Start with zero'd results. 998 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; 999 1000 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType()); 1001 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 1002 SDOperand OpVal = BV->getOperand(i); 1003 1004 unsigned PartNo = i >= e/2; // In the upper 128 bits? 1005 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. 1006 1007 uint64_t EltBits = 0; 1008 if (OpVal.getOpcode() == ISD::UNDEF) { 1009 uint64_t EltUndefBits = ~0U >> (32-EltBitSize); 1010 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); 1011 continue; 1012 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1013 EltBits = CN->getValue() & (~0U >> (32-EltBitSize)); 1014 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1015 assert(CN->getValueType(0) == MVT::f32 && 1016 "Only one legal FP vector type!"); 1017 EltBits = FloatToBits(CN->getValue()); 1018 } else { 1019 // Nonconstant element. 1020 return true; 1021 } 1022 1023 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); 1024 } 1025 1026 //printf("%llx %llx %llx %llx\n", 1027 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); 1028 return false; 1029} 1030 1031// If this is a splat (repetition) of a value across the whole vector, return 1032// the smallest size that splats it. For example, "0x01010101010101..." is a 1033// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 1034// SplatSize = 1 byte. 1035static bool isConstantSplat(const uint64_t Bits128[2], 1036 const uint64_t Undef128[2], 1037 unsigned &SplatBits, unsigned &SplatUndef, 1038 unsigned &SplatSize) { 1039 1040 // Don't let undefs prevent splats from matching. See if the top 64-bits are 1041 // the same as the lower 64-bits, ignoring undefs. 1042 if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0])) 1043 return false; // Can't be a splat if two pieces don't match. 1044 1045 uint64_t Bits64 = Bits128[0] | Bits128[1]; 1046 uint64_t Undef64 = Undef128[0] & Undef128[1]; 1047 1048 // Check that the top 32-bits are the same as the lower 32-bits, ignoring 1049 // undefs. 1050 if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64)) 1051 return false; // Can't be a splat if two pieces don't match. 1052 1053 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); 1054 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); 1055 1056 // If the top 16-bits are different than the lower 16-bits, ignoring 1057 // undefs, we have an i32 splat. 1058 if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) { 1059 SplatBits = Bits32; 1060 SplatUndef = Undef32; 1061 SplatSize = 4; 1062 return true; 1063 } 1064 1065 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); 1066 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); 1067 1068 // If the top 8-bits are different than the lower 8-bits, ignoring 1069 // undefs, we have an i16 splat. 1070 if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) { 1071 SplatBits = Bits16; 1072 SplatUndef = Undef16; 1073 SplatSize = 2; 1074 return true; 1075 } 1076 1077 // Otherwise, we have an 8-bit splat. 1078 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); 1079 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); 1080 SplatSize = 1; 1081 return true; 1082} 1083 1084/// BuildSplatI - Build a canonical splati of Val with an element size of 1085/// SplatSize. Cast the result to VT. 1086static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT, 1087 SelectionDAG &DAG) { 1088 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 1089 1090 // Force vspltis[hw] -1 to vspltisb -1. 1091 if (Val == -1) SplatSize = 1; 1092 1093 static const MVT::ValueType VTys[] = { // canonical VT to use for each size. 1094 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 1095 }; 1096 MVT::ValueType CanonicalVT = VTys[SplatSize-1]; 1097 1098 // Build a canonical splat for this value. 1099 SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT)); 1100 std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt); 1101 SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops); 1102 return DAG.getNode(ISD::BIT_CONVERT, VT, Res); 1103} 1104 1105/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 1106/// specified intrinsic ID. 1107static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS, 1108 SelectionDAG &DAG, 1109 MVT::ValueType DestVT = MVT::Other) { 1110 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 1111 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 1112 DAG.getConstant(IID, MVT::i32), LHS, RHS); 1113} 1114 1115/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 1116/// specified intrinsic ID. 1117static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1, 1118 SDOperand Op2, SelectionDAG &DAG, 1119 MVT::ValueType DestVT = MVT::Other) { 1120 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 1121 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 1122 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 1123} 1124 1125 1126/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 1127/// amount. The result has the specified value type. 1128static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt, 1129 MVT::ValueType VT, SelectionDAG &DAG) { 1130 // Force LHS/RHS to be the right type. 1131 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS); 1132 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS); 1133 1134 std::vector<SDOperand> Ops; 1135 for (unsigned i = 0; i != 16; ++i) 1136 Ops.push_back(DAG.getConstant(i+Amt, MVT::i32)); 1137 SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS, 1138 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 1139 return DAG.getNode(ISD::BIT_CONVERT, VT, T); 1140} 1141 1142// If this is a case we can't handle, return null and let the default 1143// expansion code take care of it. If we CAN select this case, and if it 1144// selects to a single instruction, return Op. Otherwise, if we can codegen 1145// this case more efficiently than a constant pool load, lower it to the 1146// sequence of ops that should be used. 1147static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 1148 // If this is a vector of constants or undefs, get the bits. A bit in 1149 // UndefBits is set if the corresponding element of the vector is an 1150 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 1151 // zero. 1152 uint64_t VectorBits[2]; 1153 uint64_t UndefBits[2]; 1154 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)) 1155 return SDOperand(); // Not a constant vector. 1156 1157 // If this is a splat (repetition) of a value across the whole vector, return 1158 // the smallest size that splats it. For example, "0x01010101010101..." is a 1159 // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 1160 // SplatSize = 1 byte. 1161 unsigned SplatBits, SplatUndef, SplatSize; 1162 if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){ 1163 bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0; 1164 1165 // First, handle single instruction cases. 1166 1167 // All zeros? 1168 if (SplatBits == 0) { 1169 // Canonicalize all zero vectors to be v4i32. 1170 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 1171 SDOperand Z = DAG.getConstant(0, MVT::i32); 1172 Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z); 1173 Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z); 1174 } 1175 return Op; 1176 } 1177 1178 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 1179 int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize); 1180 if (SextVal >= -16 && SextVal <= 15) 1181 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG); 1182 1183 1184 // Two instruction sequences. 1185 1186 // If this value is in the range [-32,30] and is even, use: 1187 // tmp = VSPLTI[bhw], result = add tmp, tmp 1188 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 1189 Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG); 1190 return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op); 1191 } 1192 1193 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 1194 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 1195 // for fneg/fabs. 1196 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 1197 // Make -1 and vspltisw -1: 1198 SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG); 1199 1200 // Make the VSLW intrinsic, computing 0x8000_0000. 1201 SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 1202 OnesV, DAG); 1203 1204 // xor by OnesV to invert it. 1205 Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); 1206 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 1207 } 1208 1209 // Check to see if this is a wide variety of vsplti*, binop self cases. 1210 unsigned SplatBitSize = SplatSize*8; 1211 static const char SplatCsts[] = { 1212 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 1213 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 1214 }; 1215 for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){ 1216 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 1217 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 1218 int i = SplatCsts[idx]; 1219 1220 // Figure out what shift amount will be used by altivec if shifted by i in 1221 // this splat size. 1222 unsigned TypeShiftAmt = i & (SplatBitSize-1); 1223 1224 // vsplti + shl self. 1225 if (SextVal == (i << (int)TypeShiftAmt)) { 1226 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1227 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1228 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 1229 Intrinsic::ppc_altivec_vslw 1230 }; 1231 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1232 } 1233 1234 // vsplti + srl self. 1235 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 1236 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1237 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1238 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 1239 Intrinsic::ppc_altivec_vsrw 1240 }; 1241 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1242 } 1243 1244 // vsplti + sra self. 1245 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 1246 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1247 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1248 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 1249 Intrinsic::ppc_altivec_vsraw 1250 }; 1251 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1252 } 1253 1254 // vsplti + rol self. 1255 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 1256 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 1257 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1258 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1259 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 1260 Intrinsic::ppc_altivec_vrlw 1261 }; 1262 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1263 } 1264 1265 // t = vsplti c, result = vsldoi t, t, 1 1266 if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { 1267 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 1268 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG); 1269 } 1270 // t = vsplti c, result = vsldoi t, t, 2 1271 if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { 1272 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 1273 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG); 1274 } 1275 // t = vsplti c, result = vsldoi t, t, 3 1276 if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { 1277 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 1278 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG); 1279 } 1280 } 1281 1282 // Three instruction sequences. 1283 1284 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 1285 if (SextVal >= 0 && SextVal <= 31) { 1286 SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, Op.getValueType(),DAG); 1287 SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG); 1288 return DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS); 1289 } 1290 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 1291 if (SextVal >= -31 && SextVal <= 0) { 1292 SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, Op.getValueType(),DAG); 1293 SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG); 1294 return DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS); 1295 } 1296 } 1297 1298 return SDOperand(); 1299} 1300 1301/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 1302/// the specified operations to build the shuffle. 1303static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS, 1304 SDOperand RHS, SelectionDAG &DAG) { 1305 unsigned OpNum = (PFEntry >> 26) & 0x0F; 1306 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 1307 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 1308 1309 enum { 1310 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 1311 OP_VMRGHW, 1312 OP_VMRGLW, 1313 OP_VSPLTISW0, 1314 OP_VSPLTISW1, 1315 OP_VSPLTISW2, 1316 OP_VSPLTISW3, 1317 OP_VSLDOI4, 1318 OP_VSLDOI8, 1319 OP_VSLDOI12, 1320 }; 1321 1322 if (OpNum == OP_COPY) { 1323 if (LHSID == (1*9+2)*9+3) return LHS; 1324 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 1325 return RHS; 1326 } 1327 1328 SDOperand OpLHS, OpRHS; 1329 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG); 1330 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG); 1331 1332 unsigned ShufIdxs[16]; 1333 switch (OpNum) { 1334 default: assert(0 && "Unknown i32 permute!"); 1335 case OP_VMRGHW: 1336 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 1337 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 1338 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 1339 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 1340 break; 1341 case OP_VMRGLW: 1342 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 1343 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 1344 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 1345 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 1346 break; 1347 case OP_VSPLTISW0: 1348 for (unsigned i = 0; i != 16; ++i) 1349 ShufIdxs[i] = (i&3)+0; 1350 break; 1351 case OP_VSPLTISW1: 1352 for (unsigned i = 0; i != 16; ++i) 1353 ShufIdxs[i] = (i&3)+4; 1354 break; 1355 case OP_VSPLTISW2: 1356 for (unsigned i = 0; i != 16; ++i) 1357 ShufIdxs[i] = (i&3)+8; 1358 break; 1359 case OP_VSPLTISW3: 1360 for (unsigned i = 0; i != 16; ++i) 1361 ShufIdxs[i] = (i&3)+12; 1362 break; 1363 case OP_VSLDOI4: 1364 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG); 1365 case OP_VSLDOI8: 1366 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG); 1367 case OP_VSLDOI12: 1368 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG); 1369 } 1370 std::vector<SDOperand> Ops; 1371 for (unsigned i = 0; i != 16; ++i) 1372 Ops.push_back(DAG.getConstant(ShufIdxs[i], MVT::i32)); 1373 1374 return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS, 1375 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 1376} 1377 1378/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 1379/// is a shuffle we can handle in a single instruction, return it. Otherwise, 1380/// return the code it can be lowered into. Worst case, it can always be 1381/// lowered into a vperm. 1382static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 1383 SDOperand V1 = Op.getOperand(0); 1384 SDOperand V2 = Op.getOperand(1); 1385 SDOperand PermMask = Op.getOperand(2); 1386 1387 // Cases that are handled by instructions that take permute immediates 1388 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 1389 // selected by the instruction selector. 1390 if (V2.getOpcode() == ISD::UNDEF) { 1391 if (PPC::isSplatShuffleMask(PermMask.Val, 1) || 1392 PPC::isSplatShuffleMask(PermMask.Val, 2) || 1393 PPC::isSplatShuffleMask(PermMask.Val, 4) || 1394 PPC::isVPKUWUMShuffleMask(PermMask.Val, true) || 1395 PPC::isVPKUHUMShuffleMask(PermMask.Val, true) || 1396 PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 || 1397 PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) || 1398 PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) || 1399 PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) || 1400 PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) || 1401 PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) || 1402 PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) { 1403 return Op; 1404 } 1405 } 1406 1407 // Altivec has a variety of "shuffle immediates" that take two vector inputs 1408 // and produce a fixed permutation. If any of these match, do not lower to 1409 // VPERM. 1410 if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) || 1411 PPC::isVPKUHUMShuffleMask(PermMask.Val, false) || 1412 PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 || 1413 PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) || 1414 PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) || 1415 PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) || 1416 PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) || 1417 PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) || 1418 PPC::isVMRGHShuffleMask(PermMask.Val, 4, false)) 1419 return Op; 1420 1421 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 1422 // perfect shuffle table to emit an optimal matching sequence. 1423 unsigned PFIndexes[4]; 1424 bool isFourElementShuffle = true; 1425 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 1426 unsigned EltNo = 8; // Start out undef. 1427 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 1428 if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) 1429 continue; // Undef, ignore it. 1430 1431 unsigned ByteSource = 1432 cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue(); 1433 if ((ByteSource & 3) != j) { 1434 isFourElementShuffle = false; 1435 break; 1436 } 1437 1438 if (EltNo == 8) { 1439 EltNo = ByteSource/4; 1440 } else if (EltNo != ByteSource/4) { 1441 isFourElementShuffle = false; 1442 break; 1443 } 1444 } 1445 PFIndexes[i] = EltNo; 1446 } 1447 1448 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 1449 // perfect shuffle vector to determine if it is cost effective to do this as 1450 // discrete instructions, or whether we should use a vperm. 1451 if (isFourElementShuffle) { 1452 // Compute the index in the perfect shuffle table. 1453 unsigned PFTableIndex = 1454 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 1455 1456 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 1457 unsigned Cost = (PFEntry >> 30); 1458 1459 // Determining when to avoid vperm is tricky. Many things affect the cost 1460 // of vperm, particularly how many times the perm mask needs to be computed. 1461 // For example, if the perm mask can be hoisted out of a loop or is already 1462 // used (perhaps because there are multiple permutes with the same shuffle 1463 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 1464 // the loop requires an extra register. 1465 // 1466 // As a compromise, we only emit discrete instructions if the shuffle can be 1467 // generated in 3 or fewer operations. When we have loop information 1468 // available, if this block is within a loop, we should avoid using vperm 1469 // for 3-operation perms and use a constant pool load instead. 1470 if (Cost < 3) 1471 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG); 1472 } 1473 1474 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 1475 // vector that will get spilled to the constant pool. 1476 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 1477 1478 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 1479 // that it is in input element units, not in bytes. Convert now. 1480 MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType()); 1481 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; 1482 1483 std::vector<SDOperand> ResultMask; 1484 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 1485 unsigned SrcElt; 1486 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) 1487 SrcElt = 0; 1488 else 1489 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 1490 1491 for (unsigned j = 0; j != BytesPerElement; ++j) 1492 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 1493 MVT::i8)); 1494 } 1495 1496 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask); 1497 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 1498} 1499 1500/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 1501/// altivec comparison. If it is, return true and fill in Opc/isDot with 1502/// information about the intrinsic. 1503static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc, 1504 bool &isDot) { 1505 unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue(); 1506 CompareOpc = -1; 1507 isDot = false; 1508 switch (IntrinsicID) { 1509 default: return false; 1510 // Comparison predicates. 1511 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 1512 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 1513 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 1514 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 1515 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 1516 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 1517 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 1518 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 1519 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 1520 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 1521 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 1522 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 1523 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 1524 1525 // Normal Comparisons. 1526 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 1527 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 1528 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 1529 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 1530 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 1531 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 1532 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 1533 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 1534 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 1535 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 1536 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 1537 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 1538 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 1539 } 1540 return true; 1541} 1542 1543/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 1544/// lower, do it, otherwise return null. 1545static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 1546 // If this is a lowered altivec predicate compare, CompareOpc is set to the 1547 // opcode number of the comparison. 1548 int CompareOpc; 1549 bool isDot; 1550 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 1551 return SDOperand(); // Don't custom lower most intrinsics. 1552 1553 // If this is a non-dot comparison, make the VCMP node and we are done. 1554 if (!isDot) { 1555 SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 1556 Op.getOperand(1), Op.getOperand(2), 1557 DAG.getConstant(CompareOpc, MVT::i32)); 1558 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp); 1559 } 1560 1561 // Create the PPCISD altivec 'dot' comparison node. 1562 std::vector<SDOperand> Ops; 1563 std::vector<MVT::ValueType> VTs; 1564 Ops.push_back(Op.getOperand(2)); // LHS 1565 Ops.push_back(Op.getOperand(3)); // RHS 1566 Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32)); 1567 VTs.push_back(Op.getOperand(2).getValueType()); 1568 VTs.push_back(MVT::Flag); 1569 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops); 1570 1571 // Now that we have the comparison, emit a copy from the CR to a GPR. 1572 // This is flagged to the above dot comparison. 1573 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 1574 DAG.getRegister(PPC::CR6, MVT::i32), 1575 CompNode.getValue(1)); 1576 1577 // Unpack the result based on how the target uses it. 1578 unsigned BitNo; // Bit # of CR6. 1579 bool InvertBit; // Invert result? 1580 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 1581 default: // Can't happen, don't crash on invalid number though. 1582 case 0: // Return the value of the EQ bit of CR6. 1583 BitNo = 0; InvertBit = false; 1584 break; 1585 case 1: // Return the inverted value of the EQ bit of CR6. 1586 BitNo = 0; InvertBit = true; 1587 break; 1588 case 2: // Return the value of the LT bit of CR6. 1589 BitNo = 2; InvertBit = false; 1590 break; 1591 case 3: // Return the inverted value of the LT bit of CR6. 1592 BitNo = 2; InvertBit = true; 1593 break; 1594 } 1595 1596 // Shift the bit into the low position. 1597 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 1598 DAG.getConstant(8-(3-BitNo), MVT::i32)); 1599 // Isolate the bit. 1600 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 1601 DAG.getConstant(1, MVT::i32)); 1602 1603 // If we are supposed to, toggle the bit. 1604 if (InvertBit) 1605 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 1606 DAG.getConstant(1, MVT::i32)); 1607 return Flags; 1608} 1609 1610static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 1611 // Create a stack slot that is 16-byte aligned. 1612 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 1613 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 1614 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 1615 1616 // Store the input value into Value#0 of the stack slot. 1617 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(), 1618 Op.getOperand(0), FIdx,DAG.getSrcValue(NULL)); 1619 // Load it out. 1620 return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL)); 1621} 1622 1623static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) { 1624 if (Op.getValueType() == MVT::v4i32) { 1625 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 1626 1627 SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG); 1628 SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt. 1629 1630 SDOperand RHSSwap = // = vrlw RHS, 16 1631 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG); 1632 1633 // Shrinkify inputs to v8i16. 1634 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS); 1635 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS); 1636 RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap); 1637 1638 // Low parts multiplied together, generating 32-bit results (we ignore the 1639 // top parts). 1640 SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 1641 LHS, RHS, DAG, MVT::v4i32); 1642 1643 SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 1644 LHS, RHSSwap, Zero, DAG, MVT::v4i32); 1645 // Shift the high parts up 16 bits. 1646 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG); 1647 return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd); 1648 } else if (Op.getValueType() == MVT::v8i16) { 1649 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 1650 1651 SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG); 1652 1653 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 1654 LHS, RHS, Zero, DAG); 1655 } else if (Op.getValueType() == MVT::v16i8) { 1656 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 1657 1658 // Multiply the even 8-bit parts, producing 16-bit sums. 1659 SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 1660 LHS, RHS, DAG, MVT::v8i16); 1661 EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts); 1662 1663 // Multiply the odd 8-bit parts, producing 16-bit sums. 1664 SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 1665 LHS, RHS, DAG, MVT::v8i16); 1666 OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts); 1667 1668 // Merge the results together. 1669 std::vector<SDOperand> Ops; 1670 for (unsigned i = 0; i != 8; ++i) { 1671 Ops.push_back(DAG.getConstant(2*i+1, MVT::i8)); 1672 Ops.push_back(DAG.getConstant(2*i+1+16, MVT::i8)); 1673 } 1674 1675 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts, 1676 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 1677 } else { 1678 assert(0 && "Unknown mul to lower!"); 1679 abort(); 1680 } 1681} 1682 1683/// LowerOperation - Provide custom lowering hooks for some operations. 1684/// 1685SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 1686 switch (Op.getOpcode()) { 1687 default: assert(0 && "Wasn't expecting to be able to lower this!"); 1688 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 1689 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 1690 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 1691 case ISD::SETCC: return LowerSETCC(Op, DAG); 1692 case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); 1693 case ISD::RET: return LowerRET(Op, DAG); 1694 1695 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 1696 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 1697 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 1698 1699 // Lower 64-bit shifts. 1700 case ISD::SHL: return LowerSHL(Op, DAG); 1701 case ISD::SRL: return LowerSRL(Op, DAG); 1702 case ISD::SRA: return LowerSRA(Op, DAG); 1703 1704 // Vector-related lowering. 1705 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 1706 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 1707 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1708 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 1709 case ISD::MUL: return LowerMUL(Op, DAG); 1710 } 1711 return SDOperand(); 1712} 1713 1714//===----------------------------------------------------------------------===// 1715// Other Lowering Code 1716//===----------------------------------------------------------------------===// 1717 1718std::vector<SDOperand> 1719PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 1720 // 1721 // add beautiful description of PPC stack frame format, or at least some docs 1722 // 1723 MachineFunction &MF = DAG.getMachineFunction(); 1724 MachineFrameInfo *MFI = MF.getFrameInfo(); 1725 MachineBasicBlock& BB = MF.front(); 1726 SSARegMap *RegMap = MF.getSSARegMap(); 1727 std::vector<SDOperand> ArgValues; 1728 1729 unsigned ArgOffset = 24; 1730 unsigned GPR_remaining = 8; 1731 unsigned FPR_remaining = 13; 1732 unsigned GPR_idx = 0, FPR_idx = 0; 1733 static const unsigned GPR[] = { 1734 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1735 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1736 }; 1737 static const unsigned FPR[] = { 1738 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1739 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1740 }; 1741 1742 // Add DAG nodes to load the arguments... On entry to a function on PPC, 1743 // the arguments start at offset 24, although they are likely to be passed 1744 // in registers. 1745 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 1746 SDOperand newroot, argt; 1747 unsigned ObjSize; 1748 bool needsLoad = false; 1749 bool ArgLive = !I->use_empty(); 1750 MVT::ValueType ObjectVT = getValueType(I->getType()); 1751 1752 switch (ObjectVT) { 1753 default: assert(0 && "Unhandled argument type!"); 1754 case MVT::i1: 1755 case MVT::i8: 1756 case MVT::i16: 1757 case MVT::i32: 1758 ObjSize = 4; 1759 if (!ArgLive) break; 1760 if (GPR_remaining > 0) { 1761 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1762 MF.addLiveIn(GPR[GPR_idx], VReg); 1763 argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 1764 if (ObjectVT != MVT::i32) { 1765 unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext 1766 : ISD::AssertZext; 1767 argt = DAG.getNode(AssertOp, MVT::i32, argt, 1768 DAG.getValueType(ObjectVT)); 1769 argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt); 1770 } 1771 } else { 1772 needsLoad = true; 1773 } 1774 break; 1775 case MVT::i64: 1776 ObjSize = 8; 1777 if (!ArgLive) break; 1778 if (GPR_remaining > 0) { 1779 SDOperand argHi, argLo; 1780 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1781 MF.addLiveIn(GPR[GPR_idx], VReg); 1782 argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 1783 // If we have two or more remaining argument registers, then both halves 1784 // of the i64 can be sourced from there. Otherwise, the lower half will 1785 // have to come off the stack. This can happen when an i64 is preceded 1786 // by 28 bytes of arguments. 1787 if (GPR_remaining > 1) { 1788 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1789 MF.addLiveIn(GPR[GPR_idx+1], VReg); 1790 argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32); 1791 } else { 1792 int FI = MFI->CreateFixedObject(4, ArgOffset+4); 1793 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 1794 argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 1795 DAG.getSrcValue(NULL)); 1796 } 1797 // Build the outgoing arg thingy 1798 argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi); 1799 newroot = argLo; 1800 } else { 1801 needsLoad = true; 1802 } 1803 break; 1804 case MVT::f32: 1805 case MVT::f64: 1806 ObjSize = (ObjectVT == MVT::f64) ? 8 : 4; 1807 if (!ArgLive) { 1808 if (FPR_remaining > 0) { 1809 --FPR_remaining; 1810 ++FPR_idx; 1811 } 1812 break; 1813 } 1814 if (FPR_remaining > 0) { 1815 unsigned VReg; 1816 if (ObjectVT == MVT::f32) 1817 VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass); 1818 else 1819 VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass); 1820 MF.addLiveIn(FPR[FPR_idx], VReg); 1821 argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT); 1822 --FPR_remaining; 1823 ++FPR_idx; 1824 } else { 1825 needsLoad = true; 1826 } 1827 break; 1828 } 1829 1830 // We need to load the argument to a virtual register if we determined above 1831 // that we ran out of physical registers of the appropriate type 1832 if (needsLoad) { 1833 unsigned SubregOffset = 0; 1834 if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3; 1835 if (ObjectVT == MVT::i16) SubregOffset = 2; 1836 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1837 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 1838 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, 1839 DAG.getConstant(SubregOffset, MVT::i32)); 1840 argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 1841 DAG.getSrcValue(NULL)); 1842 } 1843 1844 // Every 4 bytes of argument space consumes one of the GPRs available for 1845 // argument passing. 1846 if (GPR_remaining > 0) { 1847 unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1; 1848 GPR_remaining -= delta; 1849 GPR_idx += delta; 1850 } 1851 ArgOffset += ObjSize; 1852 if (newroot.Val) 1853 DAG.setRoot(newroot.getValue(1)); 1854 1855 ArgValues.push_back(argt); 1856 } 1857 1858 // If the function takes variable number of arguments, make a frame index for 1859 // the start of the first vararg value... for expansion of llvm.va_start. 1860 if (F.isVarArg()) { 1861 VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); 1862 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 1863 // If this function is vararg, store any remaining integer argument regs 1864 // to their spots on the stack so that they may be loaded by deferencing the 1865 // result of va_next. 1866 std::vector<SDOperand> MemOps; 1867 for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) { 1868 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1869 MF.addLiveIn(GPR[GPR_idx], VReg); 1870 SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 1871 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), 1872 Val, FIN, DAG.getSrcValue(NULL)); 1873 MemOps.push_back(Store); 1874 // Increment the address by four for the next argument to store 1875 SDOperand PtrOff = DAG.getConstant(4, getPointerTy()); 1876 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff); 1877 } 1878 if (!MemOps.empty()) { 1879 MemOps.push_back(DAG.getRoot()); 1880 DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps)); 1881 } 1882 } 1883 1884 return ArgValues; 1885} 1886 1887std::pair<SDOperand, SDOperand> 1888PPCTargetLowering::LowerCallTo(SDOperand Chain, 1889 const Type *RetTy, bool isVarArg, 1890 unsigned CallingConv, bool isTailCall, 1891 SDOperand Callee, ArgListTy &Args, 1892 SelectionDAG &DAG) { 1893 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 1894 // SelectExpr to use to put the arguments in the appropriate registers. 1895 std::vector<SDOperand> args_to_use; 1896 1897 // Count how many bytes are to be pushed on the stack, including the linkage 1898 // area, and parameter passing area. 1899 unsigned NumBytes = 24; 1900 1901 if (Args.empty()) { 1902 Chain = DAG.getCALLSEQ_START(Chain, 1903 DAG.getConstant(NumBytes, getPointerTy())); 1904 } else { 1905 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1906 switch (getValueType(Args[i].second)) { 1907 default: assert(0 && "Unknown value type!"); 1908 case MVT::i1: 1909 case MVT::i8: 1910 case MVT::i16: 1911 case MVT::i32: 1912 case MVT::f32: 1913 NumBytes += 4; 1914 break; 1915 case MVT::i64: 1916 case MVT::f64: 1917 NumBytes += 8; 1918 break; 1919 } 1920 } 1921 1922 // Just to be safe, we'll always reserve the full 24 bytes of linkage area 1923 // plus 32 bytes of argument space in case any called code gets funky on us. 1924 // (Required by ABI to support var arg) 1925 if (NumBytes < 56) NumBytes = 56; 1926 1927 // Adjust the stack pointer for the new arguments... 1928 // These operations are automatically eliminated by the prolog/epilog pass 1929 Chain = DAG.getCALLSEQ_START(Chain, 1930 DAG.getConstant(NumBytes, getPointerTy())); 1931 1932 // Set up a copy of the stack pointer for use loading and storing any 1933 // arguments that may not fit in the registers available for argument 1934 // passing. 1935 SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 1936 1937 // Figure out which arguments are going to go in registers, and which in 1938 // memory. Also, if this is a vararg function, floating point operations 1939 // must be stored to our stack, and loaded into integer regs as well, if 1940 // any integer regs are available for argument passing. 1941 unsigned ArgOffset = 24; 1942 unsigned GPR_remaining = 8; 1943 unsigned FPR_remaining = 13; 1944 1945 std::vector<SDOperand> MemOps; 1946 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1947 // PtrOff will be used to store the current argument to the stack if a 1948 // register cannot be found for it. 1949 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1950 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1951 MVT::ValueType ArgVT = getValueType(Args[i].second); 1952 1953 switch (ArgVT) { 1954 default: assert(0 && "Unexpected ValueType for argument!"); 1955 case MVT::i1: 1956 case MVT::i8: 1957 case MVT::i16: 1958 // Promote the integer to 32 bits. If the input type is signed use a 1959 // sign extend, otherwise use a zero extend. 1960 if (Args[i].second->isSigned()) 1961 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 1962 else 1963 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 1964 // FALL THROUGH 1965 case MVT::i32: 1966 if (GPR_remaining > 0) { 1967 args_to_use.push_back(Args[i].first); 1968 --GPR_remaining; 1969 } else { 1970 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1971 Args[i].first, PtrOff, 1972 DAG.getSrcValue(NULL))); 1973 } 1974 ArgOffset += 4; 1975 break; 1976 case MVT::i64: 1977 // If we have one free GPR left, we can place the upper half of the i64 1978 // in it, and store the other half to the stack. If we have two or more 1979 // free GPRs, then we can pass both halves of the i64 in registers. 1980 if (GPR_remaining > 0) { 1981 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1982 Args[i].first, DAG.getConstant(1, MVT::i32)); 1983 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1984 Args[i].first, DAG.getConstant(0, MVT::i32)); 1985 args_to_use.push_back(Hi); 1986 --GPR_remaining; 1987 if (GPR_remaining > 0) { 1988 args_to_use.push_back(Lo); 1989 --GPR_remaining; 1990 } else { 1991 SDOperand ConstFour = DAG.getConstant(4, getPointerTy()); 1992 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour); 1993 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1994 Lo, PtrOff, DAG.getSrcValue(NULL))); 1995 } 1996 } else { 1997 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1998 Args[i].first, PtrOff, 1999 DAG.getSrcValue(NULL))); 2000 } 2001 ArgOffset += 8; 2002 break; 2003 case MVT::f32: 2004 case MVT::f64: 2005 if (FPR_remaining > 0) { 2006 args_to_use.push_back(Args[i].first); 2007 --FPR_remaining; 2008 if (isVarArg) { 2009 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain, 2010 Args[i].first, PtrOff, 2011 DAG.getSrcValue(NULL)); 2012 MemOps.push_back(Store); 2013 // Float varargs are always shadowed in available integer registers 2014 if (GPR_remaining > 0) { 2015 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff, 2016 DAG.getSrcValue(NULL)); 2017 MemOps.push_back(Load.getValue(1)); 2018 args_to_use.push_back(Load); 2019 --GPR_remaining; 2020 } 2021 if (GPR_remaining > 0 && MVT::f64 == ArgVT) { 2022 SDOperand ConstFour = DAG.getConstant(4, getPointerTy()); 2023 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour); 2024 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff, 2025 DAG.getSrcValue(NULL)); 2026 MemOps.push_back(Load.getValue(1)); 2027 args_to_use.push_back(Load); 2028 --GPR_remaining; 2029 } 2030 } else { 2031 // If we have any FPRs remaining, we may also have GPRs remaining. 2032 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 2033 // GPRs. 2034 if (GPR_remaining > 0) { 2035 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 2036 --GPR_remaining; 2037 } 2038 if (GPR_remaining > 0 && MVT::f64 == ArgVT) { 2039 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 2040 --GPR_remaining; 2041 } 2042 } 2043 } else { 2044 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 2045 Args[i].first, PtrOff, 2046 DAG.getSrcValue(NULL))); 2047 } 2048 ArgOffset += (ArgVT == MVT::f32) ? 4 : 8; 2049 break; 2050 } 2051 } 2052 if (!MemOps.empty()) 2053 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps); 2054 } 2055 2056 std::vector<MVT::ValueType> RetVals; 2057 MVT::ValueType RetTyVT = getValueType(RetTy); 2058 MVT::ValueType ActualRetTyVT = RetTyVT; 2059 if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16) 2060 ActualRetTyVT = MVT::i32; // Promote result to i32. 2061 2062 if (RetTyVT == MVT::i64) { 2063 RetVals.push_back(MVT::i32); 2064 RetVals.push_back(MVT::i32); 2065 } else if (RetTyVT != MVT::isVoid) { 2066 RetVals.push_back(ActualRetTyVT); 2067 } 2068 RetVals.push_back(MVT::Other); 2069 2070 // If the callee is a GlobalAddress node (quite common, every direct call is) 2071 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 2072 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2073 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); 2074 2075 std::vector<SDOperand> Ops; 2076 Ops.push_back(Chain); 2077 Ops.push_back(Callee); 2078 Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end()); 2079 SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops); 2080 Chain = TheCall.getValue(TheCall.Val->getNumValues()-1); 2081 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, 2082 DAG.getConstant(NumBytes, getPointerTy())); 2083 SDOperand RetVal = TheCall; 2084 2085 // If the result is a small value, add a note so that we keep track of the 2086 // information about whether it is sign or zero extended. 2087 if (RetTyVT != ActualRetTyVT) { 2088 RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext, 2089 MVT::i32, RetVal, DAG.getValueType(RetTyVT)); 2090 RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal); 2091 } else if (RetTyVT == MVT::i64) { 2092 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1)); 2093 } 2094 2095 return std::make_pair(RetVal, Chain); 2096} 2097 2098MachineBasicBlock * 2099PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 2100 MachineBasicBlock *BB) { 2101 assert((MI->getOpcode() == PPC::SELECT_CC_Int || 2102 MI->getOpcode() == PPC::SELECT_CC_F4 || 2103 MI->getOpcode() == PPC::SELECT_CC_F8 || 2104 MI->getOpcode() == PPC::SELECT_CC_VRRC) && 2105 "Unexpected instr type to insert"); 2106 2107 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 2108 // control-flow pattern. The incoming instruction knows the destination vreg 2109 // to set, the condition code register to branch on, the true/false values to 2110 // select between, and a branch opcode to use. 2111 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2112 ilist<MachineBasicBlock>::iterator It = BB; 2113 ++It; 2114 2115 // thisMBB: 2116 // ... 2117 // TrueVal = ... 2118 // cmpTY ccX, r1, r2 2119 // bCC copy1MBB 2120 // fallthrough --> copy0MBB 2121 MachineBasicBlock *thisMBB = BB; 2122 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 2123 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 2124 BuildMI(BB, MI->getOperand(4).getImmedValue(), 2) 2125 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 2126 MachineFunction *F = BB->getParent(); 2127 F->getBasicBlockList().insert(It, copy0MBB); 2128 F->getBasicBlockList().insert(It, sinkMBB); 2129 // Update machine-CFG edges by first adding all successors of the current 2130 // block to the new block which will contain the Phi node for the select. 2131 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 2132 e = BB->succ_end(); i != e; ++i) 2133 sinkMBB->addSuccessor(*i); 2134 // Next, remove all successors of the current block, and add the true 2135 // and fallthrough blocks as its successors. 2136 while(!BB->succ_empty()) 2137 BB->removeSuccessor(BB->succ_begin()); 2138 BB->addSuccessor(copy0MBB); 2139 BB->addSuccessor(sinkMBB); 2140 2141 // copy0MBB: 2142 // %FalseValue = ... 2143 // # fallthrough to sinkMBB 2144 BB = copy0MBB; 2145 2146 // Update machine-CFG edges 2147 BB->addSuccessor(sinkMBB); 2148 2149 // sinkMBB: 2150 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 2151 // ... 2152 BB = sinkMBB; 2153 BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg()) 2154 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 2155 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 2156 2157 delete MI; // The pseudo instruction is gone now. 2158 return BB; 2159} 2160 2161//===----------------------------------------------------------------------===// 2162// Target Optimization Hooks 2163//===----------------------------------------------------------------------===// 2164 2165SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 2166 DAGCombinerInfo &DCI) const { 2167 TargetMachine &TM = getTargetMachine(); 2168 SelectionDAG &DAG = DCI.DAG; 2169 switch (N->getOpcode()) { 2170 default: break; 2171 case ISD::SINT_TO_FP: 2172 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { 2173 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 2174 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 2175 // We allow the src/dst to be either f32/f64, but the intermediate 2176 // type must be i64. 2177 if (N->getOperand(0).getValueType() == MVT::i64) { 2178 SDOperand Val = N->getOperand(0).getOperand(0); 2179 if (Val.getValueType() == MVT::f32) { 2180 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 2181 DCI.AddToWorklist(Val.Val); 2182 } 2183 2184 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 2185 DCI.AddToWorklist(Val.Val); 2186 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 2187 DCI.AddToWorklist(Val.Val); 2188 if (N->getValueType(0) == MVT::f32) { 2189 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val); 2190 DCI.AddToWorklist(Val.Val); 2191 } 2192 return Val; 2193 } else if (N->getOperand(0).getValueType() == MVT::i32) { 2194 // If the intermediate type is i32, we can avoid the load/store here 2195 // too. 2196 } 2197 } 2198 } 2199 break; 2200 case ISD::STORE: 2201 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 2202 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 2203 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 2204 N->getOperand(1).getValueType() == MVT::i32) { 2205 SDOperand Val = N->getOperand(1).getOperand(0); 2206 if (Val.getValueType() == MVT::f32) { 2207 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 2208 DCI.AddToWorklist(Val.Val); 2209 } 2210 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 2211 DCI.AddToWorklist(Val.Val); 2212 2213 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 2214 N->getOperand(2), N->getOperand(3)); 2215 DCI.AddToWorklist(Val.Val); 2216 return Val; 2217 } 2218 break; 2219 case PPCISD::VCMP: { 2220 // If a VCMPo node already exists with exactly the same operands as this 2221 // node, use its result instead of this node (VCMPo computes both a CR6 and 2222 // a normal output). 2223 // 2224 if (!N->getOperand(0).hasOneUse() && 2225 !N->getOperand(1).hasOneUse() && 2226 !N->getOperand(2).hasOneUse()) { 2227 2228 // Scan all of the users of the LHS, looking for VCMPo's that match. 2229 SDNode *VCMPoNode = 0; 2230 2231 SDNode *LHSN = N->getOperand(0).Val; 2232 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 2233 UI != E; ++UI) 2234 if ((*UI)->getOpcode() == PPCISD::VCMPo && 2235 (*UI)->getOperand(1) == N->getOperand(1) && 2236 (*UI)->getOperand(2) == N->getOperand(2) && 2237 (*UI)->getOperand(0) == N->getOperand(0)) { 2238 VCMPoNode = *UI; 2239 break; 2240 } 2241 2242 // If there is no VCMPo node, or if the flag value has a single use, don't 2243 // transform this. 2244 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 2245 break; 2246 2247 // Look at the (necessarily single) use of the flag value. If it has a 2248 // chain, this transformation is more complex. Note that multiple things 2249 // could use the value result, which we should ignore. 2250 SDNode *FlagUser = 0; 2251 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 2252 FlagUser == 0; ++UI) { 2253 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 2254 SDNode *User = *UI; 2255 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 2256 if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) { 2257 FlagUser = User; 2258 break; 2259 } 2260 } 2261 } 2262 2263 // If the user is a MFCR instruction, we know this is safe. Otherwise we 2264 // give up for right now. 2265 if (FlagUser->getOpcode() == PPCISD::MFCR) 2266 return SDOperand(VCMPoNode, 0); 2267 } 2268 break; 2269 } 2270 case ISD::BR_CC: { 2271 // If this is a branch on an altivec predicate comparison, lower this so 2272 // that we don't have to do a MFCR: instead, branch directly on CR6. This 2273 // lowering is done pre-legalize, because the legalizer lowers the predicate 2274 // compare down to code that is difficult to reassemble. 2275 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 2276 SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3); 2277 int CompareOpc; 2278 bool isDot; 2279 2280 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 2281 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 2282 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 2283 assert(isDot && "Can't compare against a vector result!"); 2284 2285 // If this is a comparison against something other than 0/1, then we know 2286 // that the condition is never/always true. 2287 unsigned Val = cast<ConstantSDNode>(RHS)->getValue(); 2288 if (Val != 0 && Val != 1) { 2289 if (CC == ISD::SETEQ) // Cond never true, remove branch. 2290 return N->getOperand(0); 2291 // Always !=, turn it into an unconditional branch. 2292 return DAG.getNode(ISD::BR, MVT::Other, 2293 N->getOperand(0), N->getOperand(4)); 2294 } 2295 2296 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 2297 2298 // Create the PPCISD altivec 'dot' comparison node. 2299 std::vector<SDOperand> Ops; 2300 std::vector<MVT::ValueType> VTs; 2301 Ops.push_back(LHS.getOperand(2)); // LHS of compare 2302 Ops.push_back(LHS.getOperand(3)); // RHS of compare 2303 Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32)); 2304 VTs.push_back(LHS.getOperand(2).getValueType()); 2305 VTs.push_back(MVT::Flag); 2306 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops); 2307 2308 // Unpack the result based on how the target uses it. 2309 unsigned CompOpc; 2310 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) { 2311 default: // Can't happen, don't crash on invalid number though. 2312 case 0: // Branch on the value of the EQ bit of CR6. 2313 CompOpc = BranchOnWhenPredTrue ? PPC::BEQ : PPC::BNE; 2314 break; 2315 case 1: // Branch on the inverted value of the EQ bit of CR6. 2316 CompOpc = BranchOnWhenPredTrue ? PPC::BNE : PPC::BEQ; 2317 break; 2318 case 2: // Branch on the value of the LT bit of CR6. 2319 CompOpc = BranchOnWhenPredTrue ? PPC::BLT : PPC::BGE; 2320 break; 2321 case 3: // Branch on the inverted value of the LT bit of CR6. 2322 CompOpc = BranchOnWhenPredTrue ? PPC::BGE : PPC::BLT; 2323 break; 2324 } 2325 2326 return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0), 2327 DAG.getRegister(PPC::CR6, MVT::i32), 2328 DAG.getConstant(CompOpc, MVT::i32), 2329 N->getOperand(4), CompNode.getValue(1)); 2330 } 2331 break; 2332 } 2333 } 2334 2335 return SDOperand(); 2336} 2337 2338//===----------------------------------------------------------------------===// 2339// Inline Assembly Support 2340//===----------------------------------------------------------------------===// 2341 2342void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 2343 uint64_t Mask, 2344 uint64_t &KnownZero, 2345 uint64_t &KnownOne, 2346 unsigned Depth) const { 2347 KnownZero = 0; 2348 KnownOne = 0; 2349 switch (Op.getOpcode()) { 2350 default: break; 2351 case ISD::INTRINSIC_WO_CHAIN: { 2352 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 2353 default: break; 2354 case Intrinsic::ppc_altivec_vcmpbfp_p: 2355 case Intrinsic::ppc_altivec_vcmpeqfp_p: 2356 case Intrinsic::ppc_altivec_vcmpequb_p: 2357 case Intrinsic::ppc_altivec_vcmpequh_p: 2358 case Intrinsic::ppc_altivec_vcmpequw_p: 2359 case Intrinsic::ppc_altivec_vcmpgefp_p: 2360 case Intrinsic::ppc_altivec_vcmpgtfp_p: 2361 case Intrinsic::ppc_altivec_vcmpgtsb_p: 2362 case Intrinsic::ppc_altivec_vcmpgtsh_p: 2363 case Intrinsic::ppc_altivec_vcmpgtsw_p: 2364 case Intrinsic::ppc_altivec_vcmpgtub_p: 2365 case Intrinsic::ppc_altivec_vcmpgtuh_p: 2366 case Intrinsic::ppc_altivec_vcmpgtuw_p: 2367 KnownZero = ~1U; // All bits but the low one are known to be zero. 2368 break; 2369 } 2370 } 2371 } 2372} 2373 2374 2375/// getConstraintType - Given a constraint letter, return the type of 2376/// constraint it is for this target. 2377PPCTargetLowering::ConstraintType 2378PPCTargetLowering::getConstraintType(char ConstraintLetter) const { 2379 switch (ConstraintLetter) { 2380 default: break; 2381 case 'b': 2382 case 'r': 2383 case 'f': 2384 case 'v': 2385 case 'y': 2386 return C_RegisterClass; 2387 } 2388 return TargetLowering::getConstraintType(ConstraintLetter); 2389} 2390 2391 2392std::vector<unsigned> PPCTargetLowering:: 2393getRegClassForInlineAsmConstraint(const std::string &Constraint, 2394 MVT::ValueType VT) const { 2395 if (Constraint.size() == 1) { 2396 switch (Constraint[0]) { // GCC RS6000 Constraint Letters 2397 default: break; // Unknown constriant letter 2398 case 'b': 2399 return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 , 2400 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 2401 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 2402 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 2403 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 2404 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 2405 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 2406 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 2407 0); 2408 case 'r': 2409 return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 , 2410 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 2411 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 2412 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 2413 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 2414 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 2415 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 2416 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 2417 0); 2418 case 'f': 2419 return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 , 2420 PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 , 2421 PPC::F8 , PPC::F9 , PPC::F10, PPC::F11, 2422 PPC::F12, PPC::F13, PPC::F14, PPC::F15, 2423 PPC::F16, PPC::F17, PPC::F18, PPC::F19, 2424 PPC::F20, PPC::F21, PPC::F22, PPC::F23, 2425 PPC::F24, PPC::F25, PPC::F26, PPC::F27, 2426 PPC::F28, PPC::F29, PPC::F30, PPC::F31, 2427 0); 2428 case 'v': 2429 return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , 2430 PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 2431 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, 2432 PPC::V12, PPC::V13, PPC::V14, PPC::V15, 2433 PPC::V16, PPC::V17, PPC::V18, PPC::V19, 2434 PPC::V20, PPC::V21, PPC::V22, PPC::V23, 2435 PPC::V24, PPC::V25, PPC::V26, PPC::V27, 2436 PPC::V28, PPC::V29, PPC::V30, PPC::V31, 2437 0); 2438 case 'y': 2439 return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, 2440 PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7, 2441 0); 2442 } 2443 } 2444 2445 return std::vector<unsigned>(); 2446} 2447 2448// isOperandValidForConstraint 2449bool PPCTargetLowering:: 2450isOperandValidForConstraint(SDOperand Op, char Letter) { 2451 switch (Letter) { 2452 default: break; 2453 case 'I': 2454 case 'J': 2455 case 'K': 2456 case 'L': 2457 case 'M': 2458 case 'N': 2459 case 'O': 2460 case 'P': { 2461 if (!isa<ConstantSDNode>(Op)) return false; // Must be an immediate. 2462 unsigned Value = cast<ConstantSDNode>(Op)->getValue(); 2463 switch (Letter) { 2464 default: assert(0 && "Unknown constraint letter!"); 2465 case 'I': // "I" is a signed 16-bit constant. 2466 return (short)Value == (int)Value; 2467 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 2468 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 2469 return (short)Value == 0; 2470 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 2471 return (Value >> 16) == 0; 2472 case 'M': // "M" is a constant that is greater than 31. 2473 return Value > 31; 2474 case 'N': // "N" is a positive constant that is an exact power of two. 2475 return (int)Value > 0 && isPowerOf2_32(Value); 2476 case 'O': // "O" is the constant zero. 2477 return Value == 0; 2478 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 2479 return (short)-Value == (int)-Value; 2480 } 2481 break; 2482 } 2483 } 2484 2485 // Handle standard constraint letters. 2486 return TargetLowering::isOperandValidForConstraint(Op, Letter); 2487} 2488 2489/// isLegalAddressImmediate - Return true if the integer value can be used 2490/// as the offset of the target addressing mode. 2491bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const { 2492 // PPC allows a sign-extended 16-bit immediate field. 2493 return (V > -(1 << 16) && V < (1 << 16)-1); 2494} 2495