PPCISelLowering.cpp revision af4ec0c56d2162a369fcdeba740ddda21be359b9
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "PPCTargetMachine.h" 16#include "PPCPerfectShuffle.h" 17#include "llvm/ADT/VectorExtras.h" 18#include "llvm/Analysis/ScalarEvolutionExpressions.h" 19#include "llvm/CodeGen/MachineFrameInfo.h" 20#include "llvm/CodeGen/MachineFunction.h" 21#include "llvm/CodeGen/MachineInstrBuilder.h" 22#include "llvm/CodeGen/SelectionDAG.h" 23#include "llvm/CodeGen/SSARegMap.h" 24#include "llvm/Constants.h" 25#include "llvm/Function.h" 26#include "llvm/Intrinsics.h" 27#include "llvm/Support/MathExtras.h" 28#include "llvm/Target/TargetOptions.h" 29using namespace llvm; 30 31PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) 32 : TargetLowering(TM) { 33 34 // Fold away setcc operations if possible. 35 setSetCCIsExpensive(); 36 setPow2DivIsCheap(); 37 38 // Use _setjmp/_longjmp instead of setjmp/longjmp. 39 setUseUnderscoreSetJmpLongJmp(true); 40 41 // Set up the register classes. 42 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 43 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 44 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 45 46 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 47 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 48 49 // PowerPC has no intrinsics for these particular operations 50 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); 51 setOperationAction(ISD::MEMSET, MVT::Other, Expand); 52 setOperationAction(ISD::MEMCPY, MVT::Other, Expand); 53 54 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 55 setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand); 56 setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand); 57 58 // PowerPC has no SREM/UREM instructions 59 setOperationAction(ISD::SREM, MVT::i32, Expand); 60 setOperationAction(ISD::UREM, MVT::i32, Expand); 61 62 // We don't support sin/cos/sqrt/fmod 63 setOperationAction(ISD::FSIN , MVT::f64, Expand); 64 setOperationAction(ISD::FCOS , MVT::f64, Expand); 65 setOperationAction(ISD::FREM , MVT::f64, Expand); 66 setOperationAction(ISD::FSIN , MVT::f32, Expand); 67 setOperationAction(ISD::FCOS , MVT::f32, Expand); 68 setOperationAction(ISD::FREM , MVT::f32, Expand); 69 70 // If we're enabling GP optimizations, use hardware square root 71 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 72 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 73 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 74 } 75 76 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 77 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 78 79 // PowerPC does not have BSWAP, CTPOP or CTTZ 80 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 81 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 82 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 83 84 // PowerPC does not have ROTR 85 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 86 87 // PowerPC does not have Select 88 setOperationAction(ISD::SELECT, MVT::i32, Expand); 89 setOperationAction(ISD::SELECT, MVT::f32, Expand); 90 setOperationAction(ISD::SELECT, MVT::f64, Expand); 91 92 // PowerPC wants to turn select_cc of FP into fsel when possible. 93 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 94 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 95 96 // PowerPC wants to optimize integer setcc a bit 97 setOperationAction(ISD::SETCC, MVT::i32, Custom); 98 99 // PowerPC does not have BRCOND which requires SetCC 100 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 101 102 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 103 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 104 105 // PowerPC does not have [U|S]INT_TO_FP 106 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 107 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 108 109 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 110 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 111 112 // PowerPC does not have truncstore for i1. 113 setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote); 114 115 // We cannot sextinreg(i1). Expand to shifts. 116 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); 117 118 119 // Support label based line numbers. 120 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 121 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 122 // FIXME - use subtarget debug flags 123 if (!TM.getSubtarget<PPCSubtarget>().isDarwin()) 124 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 125 126 // We want to legalize GlobalAddress and ConstantPool nodes into the 127 // appropriate instructions to materialize the address. 128 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 129 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 130 setOperationAction(ISD::JumpTable, MVT::i32, Custom); 131 132 // RET must be custom lowered, to meet ABI requirements 133 setOperationAction(ISD::RET , MVT::Other, Custom); 134 135 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 136 setOperationAction(ISD::VASTART , MVT::Other, Custom); 137 138 // Use the default implementation. 139 setOperationAction(ISD::VAARG , MVT::Other, Expand); 140 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 141 setOperationAction(ISD::VAEND , MVT::Other, Expand); 142 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 143 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 144 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 145 146 // We want to custom lower some of our intrinsics. 147 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 148 149 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { 150 // They also have instructions for converting between i64 and fp. 151 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 152 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 153 154 // FIXME: disable this lowered code. This generates 64-bit register values, 155 // and we don't model the fact that the top part is clobbered by calls. We 156 // need to flag these together so that the value isn't live across a call. 157 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 158 159 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 160 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 161 } else { 162 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 163 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 164 } 165 166 if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) { 167 // 64 bit PowerPC implementations can support i64 types directly 168 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 169 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 170 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 171 } else { 172 // 32 bit PowerPC wants to expand i64 shifts itself. 173 setOperationAction(ISD::SHL, MVT::i64, Custom); 174 setOperationAction(ISD::SRL, MVT::i64, Custom); 175 setOperationAction(ISD::SRA, MVT::i64, Custom); 176 } 177 178 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 179 // First set operation action for all vector types to expand. Then we 180 // will selectively turn on ones that can be effectively codegen'd. 181 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 182 VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 183 // add/sub are legal for all supported vector VT's. 184 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); 185 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); 186 187 // We promote all shuffles to v16i8. 188 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote); 189 AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8); 190 191 // We promote all non-typed operations to v4i32. 192 setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote); 193 AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32); 194 setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote); 195 AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32); 196 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote); 197 AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32); 198 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote); 199 AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32); 200 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 201 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32); 202 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote); 203 AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32); 204 205 // No other operations are legal. 206 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 207 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 208 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 209 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 210 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 211 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 212 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 213 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand); 214 215 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand); 216 } 217 218 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 219 // with merges, splats, etc. 220 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 221 222 setOperationAction(ISD::AND , MVT::v4i32, Legal); 223 setOperationAction(ISD::OR , MVT::v4i32, Legal); 224 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 225 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 226 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 227 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 228 229 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 230 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 231 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 232 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 233 234 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 235 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 236 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 237 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 238 239 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 240 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 241 242 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 243 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 244 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 245 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 246 } 247 248 setSetCCResultContents(ZeroOrOneSetCCResult); 249 setStackPointerRegisterToSaveRestore(PPC::R1); 250 251 // We have target-specific dag combine patterns for the following nodes: 252 setTargetDAGCombine(ISD::SINT_TO_FP); 253 setTargetDAGCombine(ISD::STORE); 254 setTargetDAGCombine(ISD::BR_CC); 255 256 computeRegisterProperties(); 257} 258 259const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 260 switch (Opcode) { 261 default: return 0; 262 case PPCISD::FSEL: return "PPCISD::FSEL"; 263 case PPCISD::FCFID: return "PPCISD::FCFID"; 264 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 265 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 266 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 267 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 268 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 269 case PPCISD::VPERM: return "PPCISD::VPERM"; 270 case PPCISD::Hi: return "PPCISD::Hi"; 271 case PPCISD::Lo: return "PPCISD::Lo"; 272 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 273 case PPCISD::SRL: return "PPCISD::SRL"; 274 case PPCISD::SRA: return "PPCISD::SRA"; 275 case PPCISD::SHL: return "PPCISD::SHL"; 276 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 277 case PPCISD::STD_32: return "PPCISD::STD_32"; 278 case PPCISD::CALL: return "PPCISD::CALL"; 279 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 280 case PPCISD::MFCR: return "PPCISD::MFCR"; 281 case PPCISD::VCMP: return "PPCISD::VCMP"; 282 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 283 case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH"; 284 } 285} 286 287//===----------------------------------------------------------------------===// 288// Node matching predicates, for use by the tblgen matching code. 289//===----------------------------------------------------------------------===// 290 291/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 292static bool isFloatingPointZero(SDOperand Op) { 293 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 294 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 295 else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) { 296 // Maybe this has already been legalized into the constant pool? 297 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 298 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get())) 299 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 300 } 301 return false; 302} 303 304/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 305/// true if Op is undef or if it matches the specified value. 306static bool isConstantOrUndef(SDOperand Op, unsigned Val) { 307 return Op.getOpcode() == ISD::UNDEF || 308 cast<ConstantSDNode>(Op)->getValue() == Val; 309} 310 311/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 312/// VPKUHUM instruction. 313bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { 314 if (!isUnary) { 315 for (unsigned i = 0; i != 16; ++i) 316 if (!isConstantOrUndef(N->getOperand(i), i*2+1)) 317 return false; 318 } else { 319 for (unsigned i = 0; i != 8; ++i) 320 if (!isConstantOrUndef(N->getOperand(i), i*2+1) || 321 !isConstantOrUndef(N->getOperand(i+8), i*2+1)) 322 return false; 323 } 324 return true; 325} 326 327/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 328/// VPKUWUM instruction. 329bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { 330 if (!isUnary) { 331 for (unsigned i = 0; i != 16; i += 2) 332 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 333 !isConstantOrUndef(N->getOperand(i+1), i*2+3)) 334 return false; 335 } else { 336 for (unsigned i = 0; i != 8; i += 2) 337 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 338 !isConstantOrUndef(N->getOperand(i+1), i*2+3) || 339 !isConstantOrUndef(N->getOperand(i+8), i*2+2) || 340 !isConstantOrUndef(N->getOperand(i+9), i*2+3)) 341 return false; 342 } 343 return true; 344} 345 346/// isVMerge - Common function, used to match vmrg* shuffles. 347/// 348static bool isVMerge(SDNode *N, unsigned UnitSize, 349 unsigned LHSStart, unsigned RHSStart) { 350 assert(N->getOpcode() == ISD::BUILD_VECTOR && 351 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 352 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 353 "Unsupported merge size!"); 354 355 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 356 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 357 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 358 LHSStart+j+i*UnitSize) || 359 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 360 RHSStart+j+i*UnitSize)) 361 return false; 362 } 363 return true; 364} 365 366/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 367/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 368bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 369 if (!isUnary) 370 return isVMerge(N, UnitSize, 8, 24); 371 return isVMerge(N, UnitSize, 8, 8); 372} 373 374/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 375/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 376bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 377 if (!isUnary) 378 return isVMerge(N, UnitSize, 0, 16); 379 return isVMerge(N, UnitSize, 0, 0); 380} 381 382 383/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 384/// amount, otherwise return -1. 385int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 386 assert(N->getOpcode() == ISD::BUILD_VECTOR && 387 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 388 // Find the first non-undef value in the shuffle mask. 389 unsigned i; 390 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 391 /*search*/; 392 393 if (i == 16) return -1; // all undef. 394 395 // Otherwise, check to see if the rest of the elements are consequtively 396 // numbered from this value. 397 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 398 if (ShiftAmt < i) return -1; 399 ShiftAmt -= i; 400 401 if (!isUnary) { 402 // Check the rest of the elements to see if they are consequtive. 403 for (++i; i != 16; ++i) 404 if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) 405 return -1; 406 } else { 407 // Check the rest of the elements to see if they are consequtive. 408 for (++i; i != 16; ++i) 409 if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) 410 return -1; 411 } 412 413 return ShiftAmt; 414} 415 416/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 417/// specifies a splat of a single element that is suitable for input to 418/// VSPLTB/VSPLTH/VSPLTW. 419bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 420 assert(N->getOpcode() == ISD::BUILD_VECTOR && 421 N->getNumOperands() == 16 && 422 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 423 424 // This is a splat operation if each element of the permute is the same, and 425 // if the value doesn't reference the second vector. 426 unsigned ElementBase = 0; 427 SDOperand Elt = N->getOperand(0); 428 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 429 ElementBase = EltV->getValue(); 430 else 431 return false; // FIXME: Handle UNDEF elements too! 432 433 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 434 return false; 435 436 // Check that they are consequtive. 437 for (unsigned i = 1; i != EltSize; ++i) { 438 if (!isa<ConstantSDNode>(N->getOperand(i)) || 439 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 440 return false; 441 } 442 443 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 444 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 445 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 446 assert(isa<ConstantSDNode>(N->getOperand(i)) && 447 "Invalid VECTOR_SHUFFLE mask!"); 448 for (unsigned j = 0; j != EltSize; ++j) 449 if (N->getOperand(i+j) != N->getOperand(j)) 450 return false; 451 } 452 453 return true; 454} 455 456/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 457/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 458unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 459 assert(isSplatShuffleMask(N, EltSize)); 460 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 461} 462 463/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 464/// by using a vspltis[bhw] instruction of the specified element size, return 465/// the constant being splatted. The ByteSize field indicates the number of 466/// bytes of each element [124] -> [bhw]. 467SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 468 SDOperand OpVal(0, 0); 469 470 // If ByteSize of the splat is bigger than the element size of the 471 // build_vector, then we have a case where we are checking for a splat where 472 // multiple elements of the buildvector are folded together into a single 473 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 474 unsigned EltSize = 16/N->getNumOperands(); 475 if (EltSize < ByteSize) { 476 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 477 SDOperand UniquedVals[4]; 478 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 479 480 // See if all of the elements in the buildvector agree across. 481 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 482 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 483 // If the element isn't a constant, bail fully out. 484 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand(); 485 486 487 if (UniquedVals[i&(Multiple-1)].Val == 0) 488 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 489 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 490 return SDOperand(); // no match. 491 } 492 493 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 494 // either constant or undef values that are identical for each chunk. See 495 // if these chunks can form into a larger vspltis*. 496 497 // Check to see if all of the leading entries are either 0 or -1. If 498 // neither, then this won't fit into the immediate field. 499 bool LeadingZero = true; 500 bool LeadingOnes = true; 501 for (unsigned i = 0; i != Multiple-1; ++i) { 502 if (UniquedVals[i].Val == 0) continue; // Must have been undefs. 503 504 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 505 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 506 } 507 // Finally, check the least significant entry. 508 if (LeadingZero) { 509 if (UniquedVals[Multiple-1].Val == 0) 510 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 511 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue(); 512 if (Val < 16) 513 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 514 } 515 if (LeadingOnes) { 516 if (UniquedVals[Multiple-1].Val == 0) 517 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 518 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended(); 519 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 520 return DAG.getTargetConstant(Val, MVT::i32); 521 } 522 523 return SDOperand(); 524 } 525 526 // Check to see if this buildvec has a single non-undef value in its elements. 527 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 528 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 529 if (OpVal.Val == 0) 530 OpVal = N->getOperand(i); 531 else if (OpVal != N->getOperand(i)) 532 return SDOperand(); 533 } 534 535 if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def. 536 537 unsigned ValSizeInBytes = 0; 538 uint64_t Value = 0; 539 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 540 Value = CN->getValue(); 541 ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8; 542 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 543 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 544 Value = FloatToBits(CN->getValue()); 545 ValSizeInBytes = 4; 546 } 547 548 // If the splat value is larger than the element value, then we can never do 549 // this splat. The only case that we could fit the replicated bits into our 550 // immediate field for would be zero, and we prefer to use vxor for it. 551 if (ValSizeInBytes < ByteSize) return SDOperand(); 552 553 // If the element value is larger than the splat value, cut it in half and 554 // check to see if the two halves are equal. Continue doing this until we 555 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 556 while (ValSizeInBytes > ByteSize) { 557 ValSizeInBytes >>= 1; 558 559 // If the top half equals the bottom half, we're still ok. 560 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 561 (Value & ((1 << (8*ValSizeInBytes))-1))) 562 return SDOperand(); 563 } 564 565 // Properly sign extend the value. 566 int ShAmt = (4-ByteSize)*8; 567 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 568 569 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 570 if (MaskVal == 0) return SDOperand(); 571 572 // Finally, if this value fits in a 5 bit sext field, return it 573 if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) 574 return DAG.getTargetConstant(MaskVal, MVT::i32); 575 return SDOperand(); 576} 577 578//===----------------------------------------------------------------------===// 579// LowerOperation implementation 580//===----------------------------------------------------------------------===// 581 582static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 583 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 584 Constant *C = CP->get(); 585 SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment()); 586 SDOperand Zero = DAG.getConstant(0, MVT::i32); 587 588 const TargetMachine &TM = DAG.getTarget(); 589 590 // If this is a non-darwin platform, we don't support non-static relo models 591 // yet. 592 if (TM.getRelocationModel() == Reloc::Static || 593 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 594 // Generate non-pic code that has direct accesses to the constant pool. 595 // The address of the global is just (hi(&g)+lo(&g)). 596 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero); 597 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero); 598 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 599 } 600 601 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero); 602 if (TM.getRelocationModel() == Reloc::PIC) { 603 // With PIC, the first instruction is actually "GR+hi(&G)". 604 Hi = DAG.getNode(ISD::ADD, MVT::i32, 605 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 606 } 607 608 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero); 609 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 610 return Lo; 611} 612 613static SDOperand LowerJumpTable(SDOperand Op, SelectionDAG &DAG) { 614 JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); 615 SDOperand JTI = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32); 616 SDOperand Zero = DAG.getConstant(0, MVT::i32); 617 618 const TargetMachine &TM = DAG.getTarget(); 619 620 // If this is a non-darwin platform, we don't support non-static relo models 621 // yet. 622 if (TM.getRelocationModel() == Reloc::Static || 623 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 624 // Generate non-pic code that has direct accesses to the constant pool. 625 // The address of the global is just (hi(&g)+lo(&g)). 626 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, JTI, Zero); 627 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, JTI, Zero); 628 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 629 } 630 631 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, JTI, Zero); 632 if (TM.getRelocationModel() == Reloc::PIC) { 633 // With PIC, the first instruction is actually "GR+hi(&G)". 634 Hi = DAG.getNode(ISD::ADD, MVT::i32, 635 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 636 } 637 638 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, JTI, Zero); 639 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 640 return Lo; 641} 642 643static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 644 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 645 GlobalValue *GV = GSDN->getGlobal(); 646 SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset()); 647 SDOperand Zero = DAG.getConstant(0, MVT::i32); 648 649 const TargetMachine &TM = DAG.getTarget(); 650 651 // If this is a non-darwin platform, we don't support non-static relo models 652 // yet. 653 if (TM.getRelocationModel() == Reloc::Static || 654 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 655 // Generate non-pic code that has direct accesses to globals. 656 // The address of the global is just (hi(&g)+lo(&g)). 657 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero); 658 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero); 659 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 660 } 661 662 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero); 663 if (TM.getRelocationModel() == Reloc::PIC) { 664 // With PIC, the first instruction is actually "GR+hi(&G)". 665 Hi = DAG.getNode(ISD::ADD, MVT::i32, 666 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 667 } 668 669 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero); 670 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 671 672 if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() && 673 (!GV->isExternal() || GV->hasNotBeenReadFromBytecode())) 674 return Lo; 675 676 // If the global is weak or external, we have to go through the lazy 677 // resolution stub. 678 return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0)); 679} 680 681static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 682 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 683 684 // If we're comparing for equality to zero, expose the fact that this is 685 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 686 // fold the new nodes. 687 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 688 if (C->isNullValue() && CC == ISD::SETEQ) { 689 MVT::ValueType VT = Op.getOperand(0).getValueType(); 690 SDOperand Zext = Op.getOperand(0); 691 if (VT < MVT::i32) { 692 VT = MVT::i32; 693 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 694 } 695 unsigned Log2b = Log2_32(MVT::getSizeInBits(VT)); 696 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 697 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz, 698 DAG.getConstant(Log2b, MVT::i32)); 699 return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc); 700 } 701 // Leave comparisons against 0 and -1 alone for now, since they're usually 702 // optimized. FIXME: revisit this when we can custom lower all setcc 703 // optimizations. 704 if (C->isAllOnesValue() || C->isNullValue()) 705 return SDOperand(); 706 } 707 708 // If we have an integer seteq/setne, turn it into a compare against zero 709 // by subtracting the rhs from the lhs, which is faster than setting a 710 // condition register, reading it back out, and masking the correct bit. 711 MVT::ValueType LHSVT = Op.getOperand(0).getValueType(); 712 if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 713 MVT::ValueType VT = Op.getValueType(); 714 SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0), 715 Op.getOperand(1)); 716 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 717 } 718 return SDOperand(); 719} 720 721static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG, 722 unsigned VarArgsFrameIndex) { 723 // vastart just stores the address of the VarArgsFrameIndex slot into the 724 // memory location argument. 725 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 726 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 727 Op.getOperand(1), Op.getOperand(2)); 728} 729 730static SDOperand LowerFORMAL_ARGUMENTS(SDOperand Op, SelectionDAG &DAG, 731 int &VarArgsFrameIndex) { 732 // TODO: add description of PPC stack frame format, or at least some docs. 733 // 734 MachineFunction &MF = DAG.getMachineFunction(); 735 MachineFrameInfo *MFI = MF.getFrameInfo(); 736 SSARegMap *RegMap = MF.getSSARegMap(); 737 std::vector<SDOperand> ArgValues; 738 SDOperand Root = Op.getOperand(0); 739 740 unsigned ArgOffset = 24; 741 const unsigned Num_GPR_Regs = 8; 742 const unsigned Num_FPR_Regs = 13; 743 const unsigned Num_VR_Regs = 12; 744 unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; 745 static const unsigned GPR[] = { 746 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 747 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 748 }; 749 static const unsigned FPR[] = { 750 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 751 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 752 }; 753 static const unsigned VR[] = { 754 PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, 755 PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13 756 }; 757 758 // Add DAG nodes to load the arguments or copy them out of registers. On 759 // entry to a function on PPC, the arguments start at offset 24, although the 760 // first ones are often in registers. 761 for (unsigned ArgNo = 0, e = Op.Val->getNumValues()-1; ArgNo != e; ++ArgNo) { 762 SDOperand ArgVal; 763 bool needsLoad = false; 764 MVT::ValueType ObjectVT = Op.getValue(ArgNo).getValueType(); 765 unsigned ObjSize = MVT::getSizeInBits(ObjectVT)/8; 766 767 unsigned CurArgOffset = ArgOffset; 768 769 switch (ObjectVT) { 770 default: assert(0 && "Unhandled argument type!"); 771 case MVT::i32: 772 // All int arguments reserve stack space. 773 ArgOffset += 4; 774 775 if (GPR_idx != Num_GPR_Regs) { 776 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 777 MF.addLiveIn(GPR[GPR_idx], VReg); 778 ArgVal = DAG.getCopyFromReg(Root, VReg, MVT::i32); 779 ++GPR_idx; 780 } else { 781 needsLoad = true; 782 } 783 break; 784 case MVT::f32: 785 case MVT::f64: 786 // All FP arguments reserve stack space. 787 ArgOffset += ObjSize; 788 789 // Every 4 bytes of argument space consumes one of the GPRs available for 790 // argument passing. 791 if (GPR_idx != Num_GPR_Regs) { 792 ++GPR_idx; 793 if (ObjSize == 8 && GPR_idx != Num_GPR_Regs) 794 ++GPR_idx; 795 } 796 if (FPR_idx != Num_FPR_Regs) { 797 unsigned VReg; 798 if (ObjectVT == MVT::f32) 799 VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass); 800 else 801 VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass); 802 MF.addLiveIn(FPR[FPR_idx], VReg); 803 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 804 ++FPR_idx; 805 } else { 806 needsLoad = true; 807 } 808 break; 809 case MVT::v4f32: 810 case MVT::v4i32: 811 case MVT::v8i16: 812 case MVT::v16i8: 813 // Note that vector arguments in registers don't reserve stack space. 814 if (VR_idx != Num_VR_Regs) { 815 unsigned VReg = RegMap->createVirtualRegister(&PPC::VRRCRegClass); 816 MF.addLiveIn(VR[VR_idx], VReg); 817 ArgVal = DAG.getCopyFromReg(Root, VReg, ObjectVT); 818 ++VR_idx; 819 } else { 820 // This should be simple, but requires getting 16-byte aligned stack 821 // values. 822 assert(0 && "Loading VR argument not implemented yet!"); 823 needsLoad = true; 824 } 825 break; 826 } 827 828 // We need to load the argument to a virtual register if we determined above 829 // that we ran out of physical registers of the appropriate type 830 if (needsLoad) { 831 // If the argument is actually used, emit a load from the right stack 832 // slot. 833 if (!Op.Val->hasNUsesOfValue(0, ArgNo)) { 834 int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset); 835 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 836 ArgVal = DAG.getLoad(ObjectVT, Root, FIN, 837 DAG.getSrcValue(NULL)); 838 } else { 839 // Don't emit a dead load. 840 ArgVal = DAG.getNode(ISD::UNDEF, ObjectVT); 841 } 842 } 843 844 ArgValues.push_back(ArgVal); 845 } 846 847 // If the function takes variable number of arguments, make a frame index for 848 // the start of the first vararg value... for expansion of llvm.va_start. 849 bool isVarArg = cast<ConstantSDNode>(Op.getOperand(2))->getValue() != 0; 850 if (isVarArg) { 851 VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); 852 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 853 // If this function is vararg, store any remaining integer argument regs 854 // to their spots on the stack so that they may be loaded by deferencing the 855 // result of va_next. 856 std::vector<SDOperand> MemOps; 857 for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { 858 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 859 MF.addLiveIn(GPR[GPR_idx], VReg); 860 SDOperand Val = DAG.getCopyFromReg(Root, VReg, MVT::i32); 861 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), 862 Val, FIN, DAG.getSrcValue(NULL)); 863 MemOps.push_back(Store); 864 // Increment the address by four for the next argument to store 865 SDOperand PtrOff = DAG.getConstant(4, MVT::i32); 866 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff); 867 } 868 if (!MemOps.empty()) 869 Root = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps); 870 } 871 872 ArgValues.push_back(Root); 873 874 // Return the new list of results. 875 std::vector<MVT::ValueType> RetVT(Op.Val->value_begin(), 876 Op.Val->value_end()); 877 return DAG.getNode(ISD::MERGE_VALUES, RetVT, ArgValues); 878} 879 880static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) { 881 SDOperand Copy; 882 switch(Op.getNumOperands()) { 883 default: 884 assert(0 && "Do not know how to return this many arguments!"); 885 abort(); 886 case 1: 887 return SDOperand(); // ret void is legal 888 case 2: { 889 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 890 unsigned ArgReg; 891 if (MVT::isVector(ArgVT)) 892 ArgReg = PPC::V2; 893 else if (MVT::isInteger(ArgVT)) 894 ArgReg = PPC::R3; 895 else { 896 assert(MVT::isFloatingPoint(ArgVT)); 897 ArgReg = PPC::F1; 898 } 899 900 Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1), 901 SDOperand()); 902 903 // If we haven't noted the R3/F1 are live out, do so now. 904 if (DAG.getMachineFunction().liveout_empty()) 905 DAG.getMachineFunction().addLiveOut(ArgReg); 906 break; 907 } 908 case 3: 909 Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2), 910 SDOperand()); 911 Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1)); 912 // If we haven't noted the R3+R4 are live out, do so now. 913 if (DAG.getMachineFunction().liveout_empty()) { 914 DAG.getMachineFunction().addLiveOut(PPC::R3); 915 DAG.getMachineFunction().addLiveOut(PPC::R4); 916 } 917 break; 918 } 919 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1)); 920} 921 922/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 923/// possible. 924static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { 925 // Not FP? Not a fsel. 926 if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) || 927 !MVT::isFloatingPoint(Op.getOperand(2).getValueType())) 928 return SDOperand(); 929 930 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 931 932 // Cannot handle SETEQ/SETNE. 933 if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand(); 934 935 MVT::ValueType ResVT = Op.getValueType(); 936 MVT::ValueType CmpVT = Op.getOperand(0).getValueType(); 937 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 938 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3); 939 940 // If the RHS of the comparison is a 0.0, we don't need to do the 941 // subtraction at all. 942 if (isFloatingPointZero(RHS)) 943 switch (CC) { 944 default: break; // SETUO etc aren't handled by fsel. 945 case ISD::SETULT: 946 case ISD::SETLT: 947 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 948 case ISD::SETUGE: 949 case ISD::SETGE: 950 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 951 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 952 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 953 case ISD::SETUGT: 954 case ISD::SETGT: 955 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 956 case ISD::SETULE: 957 case ISD::SETLE: 958 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 959 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 960 return DAG.getNode(PPCISD::FSEL, ResVT, 961 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 962 } 963 964 SDOperand Cmp; 965 switch (CC) { 966 default: break; // SETUO etc aren't handled by fsel. 967 case ISD::SETULT: 968 case ISD::SETLT: 969 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 970 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 971 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 972 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 973 case ISD::SETUGE: 974 case ISD::SETGE: 975 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 976 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 977 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 978 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 979 case ISD::SETUGT: 980 case ISD::SETGT: 981 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 982 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 983 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 984 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 985 case ISD::SETULE: 986 case ISD::SETLE: 987 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 988 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 989 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 990 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 991 } 992 return SDOperand(); 993} 994 995static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 996 assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType())); 997 SDOperand Src = Op.getOperand(0); 998 if (Src.getValueType() == MVT::f32) 999 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 1000 1001 SDOperand Tmp; 1002 switch (Op.getValueType()) { 1003 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 1004 case MVT::i32: 1005 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 1006 break; 1007 case MVT::i64: 1008 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 1009 break; 1010 } 1011 1012 // Convert the FP value to an int value through memory. 1013 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp); 1014 if (Op.getValueType() == MVT::i32) 1015 Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits); 1016 return Bits; 1017} 1018 1019static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 1020 if (Op.getOperand(0).getValueType() == MVT::i64) { 1021 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 1022 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 1023 if (Op.getValueType() == MVT::f32) 1024 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 1025 return FP; 1026 } 1027 1028 assert(Op.getOperand(0).getValueType() == MVT::i32 && 1029 "Unhandled SINT_TO_FP type in custom expander!"); 1030 // Since we only generate this in 64-bit mode, we can take advantage of 1031 // 64-bit registers. In particular, sign extend the input value into the 1032 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 1033 // then lfd it and fcfid it. 1034 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 1035 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 1036 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 1037 1038 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 1039 Op.getOperand(0)); 1040 1041 // STD the extended value into the stack slot. 1042 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 1043 DAG.getEntryNode(), Ext64, FIdx, 1044 DAG.getSrcValue(NULL)); 1045 // Load the value as a double. 1046 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL)); 1047 1048 // FCFID it and return it. 1049 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 1050 if (Op.getValueType() == MVT::f32) 1051 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 1052 return FP; 1053} 1054 1055static SDOperand LowerSHL(SDOperand Op, SelectionDAG &DAG) { 1056 assert(Op.getValueType() == MVT::i64 && 1057 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 1058 // The generic code does a fine job expanding shift by a constant. 1059 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 1060 1061 // Otherwise, expand into a bunch of logical ops. Note that these ops 1062 // depend on the PPC behavior for oversized shift amounts. 1063 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 1064 DAG.getConstant(0, MVT::i32)); 1065 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 1066 DAG.getConstant(1, MVT::i32)); 1067 SDOperand Amt = Op.getOperand(1); 1068 1069 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 1070 DAG.getConstant(32, MVT::i32), Amt); 1071 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt); 1072 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1); 1073 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 1074 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 1075 DAG.getConstant(-32U, MVT::i32)); 1076 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5); 1077 SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 1078 SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt); 1079 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 1080} 1081 1082static SDOperand LowerSRL(SDOperand Op, SelectionDAG &DAG) { 1083 assert(Op.getValueType() == MVT::i64 && 1084 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 1085 // The generic code does a fine job expanding shift by a constant. 1086 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 1087 1088 // Otherwise, expand into a bunch of logical ops. Note that these ops 1089 // depend on the PPC behavior for oversized shift amounts. 1090 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 1091 DAG.getConstant(0, MVT::i32)); 1092 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 1093 DAG.getConstant(1, MVT::i32)); 1094 SDOperand Amt = Op.getOperand(1); 1095 1096 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 1097 DAG.getConstant(32, MVT::i32), Amt); 1098 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 1099 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 1100 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 1101 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 1102 DAG.getConstant(-32U, MVT::i32)); 1103 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5); 1104 SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 1105 SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt); 1106 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 1107} 1108 1109static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG) { 1110 assert(Op.getValueType() == MVT::i64 && 1111 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!"); 1112 // The generic code does a fine job expanding shift by a constant. 1113 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 1114 1115 // Otherwise, expand into a bunch of logical ops, followed by a select_cc. 1116 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 1117 DAG.getConstant(0, MVT::i32)); 1118 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 1119 DAG.getConstant(1, MVT::i32)); 1120 SDOperand Amt = Op.getOperand(1); 1121 1122 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 1123 DAG.getConstant(32, MVT::i32), Amt); 1124 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 1125 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 1126 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 1127 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 1128 DAG.getConstant(-32U, MVT::i32)); 1129 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5); 1130 SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt); 1131 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32), 1132 Tmp4, Tmp6, ISD::SETLE); 1133 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 1134} 1135 1136//===----------------------------------------------------------------------===// 1137// Vector related lowering. 1138// 1139 1140// If this is a vector of constants or undefs, get the bits. A bit in 1141// UndefBits is set if the corresponding element of the vector is an 1142// ISD::UNDEF value. For undefs, the corresponding VectorBits values are 1143// zero. Return true if this is not an array of constants, false if it is. 1144// 1145static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], 1146 uint64_t UndefBits[2]) { 1147 // Start with zero'd results. 1148 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; 1149 1150 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType()); 1151 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 1152 SDOperand OpVal = BV->getOperand(i); 1153 1154 unsigned PartNo = i >= e/2; // In the upper 128 bits? 1155 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. 1156 1157 uint64_t EltBits = 0; 1158 if (OpVal.getOpcode() == ISD::UNDEF) { 1159 uint64_t EltUndefBits = ~0U >> (32-EltBitSize); 1160 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); 1161 continue; 1162 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 1163 EltBits = CN->getValue() & (~0U >> (32-EltBitSize)); 1164 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 1165 assert(CN->getValueType(0) == MVT::f32 && 1166 "Only one legal FP vector type!"); 1167 EltBits = FloatToBits(CN->getValue()); 1168 } else { 1169 // Nonconstant element. 1170 return true; 1171 } 1172 1173 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); 1174 } 1175 1176 //printf("%llx %llx %llx %llx\n", 1177 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); 1178 return false; 1179} 1180 1181// If this is a splat (repetition) of a value across the whole vector, return 1182// the smallest size that splats it. For example, "0x01010101010101..." is a 1183// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 1184// SplatSize = 1 byte. 1185static bool isConstantSplat(const uint64_t Bits128[2], 1186 const uint64_t Undef128[2], 1187 unsigned &SplatBits, unsigned &SplatUndef, 1188 unsigned &SplatSize) { 1189 1190 // Don't let undefs prevent splats from matching. See if the top 64-bits are 1191 // the same as the lower 64-bits, ignoring undefs. 1192 if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0])) 1193 return false; // Can't be a splat if two pieces don't match. 1194 1195 uint64_t Bits64 = Bits128[0] | Bits128[1]; 1196 uint64_t Undef64 = Undef128[0] & Undef128[1]; 1197 1198 // Check that the top 32-bits are the same as the lower 32-bits, ignoring 1199 // undefs. 1200 if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64)) 1201 return false; // Can't be a splat if two pieces don't match. 1202 1203 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); 1204 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); 1205 1206 // If the top 16-bits are different than the lower 16-bits, ignoring 1207 // undefs, we have an i32 splat. 1208 if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) { 1209 SplatBits = Bits32; 1210 SplatUndef = Undef32; 1211 SplatSize = 4; 1212 return true; 1213 } 1214 1215 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); 1216 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); 1217 1218 // If the top 8-bits are different than the lower 8-bits, ignoring 1219 // undefs, we have an i16 splat. 1220 if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) { 1221 SplatBits = Bits16; 1222 SplatUndef = Undef16; 1223 SplatSize = 2; 1224 return true; 1225 } 1226 1227 // Otherwise, we have an 8-bit splat. 1228 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); 1229 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); 1230 SplatSize = 1; 1231 return true; 1232} 1233 1234/// BuildSplatI - Build a canonical splati of Val with an element size of 1235/// SplatSize. Cast the result to VT. 1236static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT, 1237 SelectionDAG &DAG) { 1238 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 1239 1240 // Force vspltis[hw] -1 to vspltisb -1. 1241 if (Val == -1) SplatSize = 1; 1242 1243 static const MVT::ValueType VTys[] = { // canonical VT to use for each size. 1244 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 1245 }; 1246 MVT::ValueType CanonicalVT = VTys[SplatSize-1]; 1247 1248 // Build a canonical splat for this value. 1249 SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT)); 1250 std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt); 1251 SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops); 1252 return DAG.getNode(ISD::BIT_CONVERT, VT, Res); 1253} 1254 1255/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 1256/// specified intrinsic ID. 1257static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS, 1258 SelectionDAG &DAG, 1259 MVT::ValueType DestVT = MVT::Other) { 1260 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 1261 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 1262 DAG.getConstant(IID, MVT::i32), LHS, RHS); 1263} 1264 1265/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 1266/// specified intrinsic ID. 1267static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1, 1268 SDOperand Op2, SelectionDAG &DAG, 1269 MVT::ValueType DestVT = MVT::Other) { 1270 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 1271 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 1272 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 1273} 1274 1275 1276/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 1277/// amount. The result has the specified value type. 1278static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt, 1279 MVT::ValueType VT, SelectionDAG &DAG) { 1280 // Force LHS/RHS to be the right type. 1281 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS); 1282 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS); 1283 1284 std::vector<SDOperand> Ops; 1285 for (unsigned i = 0; i != 16; ++i) 1286 Ops.push_back(DAG.getConstant(i+Amt, MVT::i32)); 1287 SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS, 1288 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 1289 return DAG.getNode(ISD::BIT_CONVERT, VT, T); 1290} 1291 1292// If this is a case we can't handle, return null and let the default 1293// expansion code take care of it. If we CAN select this case, and if it 1294// selects to a single instruction, return Op. Otherwise, if we can codegen 1295// this case more efficiently than a constant pool load, lower it to the 1296// sequence of ops that should be used. 1297static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 1298 // If this is a vector of constants or undefs, get the bits. A bit in 1299 // UndefBits is set if the corresponding element of the vector is an 1300 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 1301 // zero. 1302 uint64_t VectorBits[2]; 1303 uint64_t UndefBits[2]; 1304 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)) 1305 return SDOperand(); // Not a constant vector. 1306 1307 // If this is a splat (repetition) of a value across the whole vector, return 1308 // the smallest size that splats it. For example, "0x01010101010101..." is a 1309 // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 1310 // SplatSize = 1 byte. 1311 unsigned SplatBits, SplatUndef, SplatSize; 1312 if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){ 1313 bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0; 1314 1315 // First, handle single instruction cases. 1316 1317 // All zeros? 1318 if (SplatBits == 0) { 1319 // Canonicalize all zero vectors to be v4i32. 1320 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 1321 SDOperand Z = DAG.getConstant(0, MVT::i32); 1322 Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z); 1323 Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z); 1324 } 1325 return Op; 1326 } 1327 1328 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 1329 int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize); 1330 if (SextVal >= -16 && SextVal <= 15) 1331 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG); 1332 1333 1334 // Two instruction sequences. 1335 1336 // If this value is in the range [-32,30] and is even, use: 1337 // tmp = VSPLTI[bhw], result = add tmp, tmp 1338 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 1339 Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG); 1340 return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op); 1341 } 1342 1343 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 1344 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 1345 // for fneg/fabs. 1346 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 1347 // Make -1 and vspltisw -1: 1348 SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG); 1349 1350 // Make the VSLW intrinsic, computing 0x8000_0000. 1351 SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 1352 OnesV, DAG); 1353 1354 // xor by OnesV to invert it. 1355 Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); 1356 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 1357 } 1358 1359 // Check to see if this is a wide variety of vsplti*, binop self cases. 1360 unsigned SplatBitSize = SplatSize*8; 1361 static const char SplatCsts[] = { 1362 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 1363 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 1364 }; 1365 for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){ 1366 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 1367 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 1368 int i = SplatCsts[idx]; 1369 1370 // Figure out what shift amount will be used by altivec if shifted by i in 1371 // this splat size. 1372 unsigned TypeShiftAmt = i & (SplatBitSize-1); 1373 1374 // vsplti + shl self. 1375 if (SextVal == (i << (int)TypeShiftAmt)) { 1376 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1377 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1378 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 1379 Intrinsic::ppc_altivec_vslw 1380 }; 1381 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1382 } 1383 1384 // vsplti + srl self. 1385 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 1386 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1387 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1388 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 1389 Intrinsic::ppc_altivec_vsrw 1390 }; 1391 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1392 } 1393 1394 // vsplti + sra self. 1395 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 1396 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1397 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1398 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 1399 Intrinsic::ppc_altivec_vsraw 1400 }; 1401 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1402 } 1403 1404 // vsplti + rol self. 1405 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 1406 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 1407 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1408 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1409 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 1410 Intrinsic::ppc_altivec_vrlw 1411 }; 1412 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1413 } 1414 1415 // t = vsplti c, result = vsldoi t, t, 1 1416 if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { 1417 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 1418 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG); 1419 } 1420 // t = vsplti c, result = vsldoi t, t, 2 1421 if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { 1422 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 1423 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG); 1424 } 1425 // t = vsplti c, result = vsldoi t, t, 3 1426 if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { 1427 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 1428 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG); 1429 } 1430 } 1431 1432 // Three instruction sequences. 1433 1434 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 1435 if (SextVal >= 0 && SextVal <= 31) { 1436 SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, Op.getValueType(),DAG); 1437 SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG); 1438 return DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS); 1439 } 1440 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 1441 if (SextVal >= -31 && SextVal <= 0) { 1442 SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, Op.getValueType(),DAG); 1443 SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG); 1444 return DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS); 1445 } 1446 } 1447 1448 return SDOperand(); 1449} 1450 1451/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 1452/// the specified operations to build the shuffle. 1453static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS, 1454 SDOperand RHS, SelectionDAG &DAG) { 1455 unsigned OpNum = (PFEntry >> 26) & 0x0F; 1456 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 1457 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 1458 1459 enum { 1460 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 1461 OP_VMRGHW, 1462 OP_VMRGLW, 1463 OP_VSPLTISW0, 1464 OP_VSPLTISW1, 1465 OP_VSPLTISW2, 1466 OP_VSPLTISW3, 1467 OP_VSLDOI4, 1468 OP_VSLDOI8, 1469 OP_VSLDOI12, 1470 }; 1471 1472 if (OpNum == OP_COPY) { 1473 if (LHSID == (1*9+2)*9+3) return LHS; 1474 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 1475 return RHS; 1476 } 1477 1478 SDOperand OpLHS, OpRHS; 1479 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG); 1480 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG); 1481 1482 unsigned ShufIdxs[16]; 1483 switch (OpNum) { 1484 default: assert(0 && "Unknown i32 permute!"); 1485 case OP_VMRGHW: 1486 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 1487 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 1488 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 1489 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 1490 break; 1491 case OP_VMRGLW: 1492 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 1493 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 1494 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 1495 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 1496 break; 1497 case OP_VSPLTISW0: 1498 for (unsigned i = 0; i != 16; ++i) 1499 ShufIdxs[i] = (i&3)+0; 1500 break; 1501 case OP_VSPLTISW1: 1502 for (unsigned i = 0; i != 16; ++i) 1503 ShufIdxs[i] = (i&3)+4; 1504 break; 1505 case OP_VSPLTISW2: 1506 for (unsigned i = 0; i != 16; ++i) 1507 ShufIdxs[i] = (i&3)+8; 1508 break; 1509 case OP_VSPLTISW3: 1510 for (unsigned i = 0; i != 16; ++i) 1511 ShufIdxs[i] = (i&3)+12; 1512 break; 1513 case OP_VSLDOI4: 1514 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG); 1515 case OP_VSLDOI8: 1516 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG); 1517 case OP_VSLDOI12: 1518 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG); 1519 } 1520 std::vector<SDOperand> Ops; 1521 for (unsigned i = 0; i != 16; ++i) 1522 Ops.push_back(DAG.getConstant(ShufIdxs[i], MVT::i32)); 1523 1524 return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS, 1525 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 1526} 1527 1528/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 1529/// is a shuffle we can handle in a single instruction, return it. Otherwise, 1530/// return the code it can be lowered into. Worst case, it can always be 1531/// lowered into a vperm. 1532static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 1533 SDOperand V1 = Op.getOperand(0); 1534 SDOperand V2 = Op.getOperand(1); 1535 SDOperand PermMask = Op.getOperand(2); 1536 1537 // Cases that are handled by instructions that take permute immediates 1538 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 1539 // selected by the instruction selector. 1540 if (V2.getOpcode() == ISD::UNDEF) { 1541 if (PPC::isSplatShuffleMask(PermMask.Val, 1) || 1542 PPC::isSplatShuffleMask(PermMask.Val, 2) || 1543 PPC::isSplatShuffleMask(PermMask.Val, 4) || 1544 PPC::isVPKUWUMShuffleMask(PermMask.Val, true) || 1545 PPC::isVPKUHUMShuffleMask(PermMask.Val, true) || 1546 PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 || 1547 PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) || 1548 PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) || 1549 PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) || 1550 PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) || 1551 PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) || 1552 PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) { 1553 return Op; 1554 } 1555 } 1556 1557 // Altivec has a variety of "shuffle immediates" that take two vector inputs 1558 // and produce a fixed permutation. If any of these match, do not lower to 1559 // VPERM. 1560 if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) || 1561 PPC::isVPKUHUMShuffleMask(PermMask.Val, false) || 1562 PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 || 1563 PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) || 1564 PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) || 1565 PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) || 1566 PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) || 1567 PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) || 1568 PPC::isVMRGHShuffleMask(PermMask.Val, 4, false)) 1569 return Op; 1570 1571 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 1572 // perfect shuffle table to emit an optimal matching sequence. 1573 unsigned PFIndexes[4]; 1574 bool isFourElementShuffle = true; 1575 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 1576 unsigned EltNo = 8; // Start out undef. 1577 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 1578 if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) 1579 continue; // Undef, ignore it. 1580 1581 unsigned ByteSource = 1582 cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue(); 1583 if ((ByteSource & 3) != j) { 1584 isFourElementShuffle = false; 1585 break; 1586 } 1587 1588 if (EltNo == 8) { 1589 EltNo = ByteSource/4; 1590 } else if (EltNo != ByteSource/4) { 1591 isFourElementShuffle = false; 1592 break; 1593 } 1594 } 1595 PFIndexes[i] = EltNo; 1596 } 1597 1598 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 1599 // perfect shuffle vector to determine if it is cost effective to do this as 1600 // discrete instructions, or whether we should use a vperm. 1601 if (isFourElementShuffle) { 1602 // Compute the index in the perfect shuffle table. 1603 unsigned PFTableIndex = 1604 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 1605 1606 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 1607 unsigned Cost = (PFEntry >> 30); 1608 1609 // Determining when to avoid vperm is tricky. Many things affect the cost 1610 // of vperm, particularly how many times the perm mask needs to be computed. 1611 // For example, if the perm mask can be hoisted out of a loop or is already 1612 // used (perhaps because there are multiple permutes with the same shuffle 1613 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 1614 // the loop requires an extra register. 1615 // 1616 // As a compromise, we only emit discrete instructions if the shuffle can be 1617 // generated in 3 or fewer operations. When we have loop information 1618 // available, if this block is within a loop, we should avoid using vperm 1619 // for 3-operation perms and use a constant pool load instead. 1620 if (Cost < 3) 1621 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG); 1622 } 1623 1624 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 1625 // vector that will get spilled to the constant pool. 1626 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 1627 1628 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 1629 // that it is in input element units, not in bytes. Convert now. 1630 MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType()); 1631 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; 1632 1633 std::vector<SDOperand> ResultMask; 1634 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 1635 unsigned SrcElt; 1636 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) 1637 SrcElt = 0; 1638 else 1639 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 1640 1641 for (unsigned j = 0; j != BytesPerElement; ++j) 1642 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 1643 MVT::i8)); 1644 } 1645 1646 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask); 1647 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 1648} 1649 1650/// getAltivecCompareInfo - Given an intrinsic, return false if it is not an 1651/// altivec comparison. If it is, return true and fill in Opc/isDot with 1652/// information about the intrinsic. 1653static bool getAltivecCompareInfo(SDOperand Intrin, int &CompareOpc, 1654 bool &isDot) { 1655 unsigned IntrinsicID = cast<ConstantSDNode>(Intrin.getOperand(0))->getValue(); 1656 CompareOpc = -1; 1657 isDot = false; 1658 switch (IntrinsicID) { 1659 default: return false; 1660 // Comparison predicates. 1661 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 1662 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 1663 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 1664 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 1665 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 1666 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 1667 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 1668 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 1669 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 1670 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 1671 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 1672 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 1673 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 1674 1675 // Normal Comparisons. 1676 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 1677 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 1678 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 1679 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 1680 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 1681 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 1682 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 1683 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 1684 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 1685 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 1686 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 1687 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 1688 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 1689 } 1690 return true; 1691} 1692 1693/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 1694/// lower, do it, otherwise return null. 1695static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 1696 // If this is a lowered altivec predicate compare, CompareOpc is set to the 1697 // opcode number of the comparison. 1698 int CompareOpc; 1699 bool isDot; 1700 if (!getAltivecCompareInfo(Op, CompareOpc, isDot)) 1701 return SDOperand(); // Don't custom lower most intrinsics. 1702 1703 // If this is a non-dot comparison, make the VCMP node and we are done. 1704 if (!isDot) { 1705 SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 1706 Op.getOperand(1), Op.getOperand(2), 1707 DAG.getConstant(CompareOpc, MVT::i32)); 1708 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp); 1709 } 1710 1711 // Create the PPCISD altivec 'dot' comparison node. 1712 std::vector<SDOperand> Ops; 1713 std::vector<MVT::ValueType> VTs; 1714 Ops.push_back(Op.getOperand(2)); // LHS 1715 Ops.push_back(Op.getOperand(3)); // RHS 1716 Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32)); 1717 VTs.push_back(Op.getOperand(2).getValueType()); 1718 VTs.push_back(MVT::Flag); 1719 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops); 1720 1721 // Now that we have the comparison, emit a copy from the CR to a GPR. 1722 // This is flagged to the above dot comparison. 1723 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 1724 DAG.getRegister(PPC::CR6, MVT::i32), 1725 CompNode.getValue(1)); 1726 1727 // Unpack the result based on how the target uses it. 1728 unsigned BitNo; // Bit # of CR6. 1729 bool InvertBit; // Invert result? 1730 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 1731 default: // Can't happen, don't crash on invalid number though. 1732 case 0: // Return the value of the EQ bit of CR6. 1733 BitNo = 0; InvertBit = false; 1734 break; 1735 case 1: // Return the inverted value of the EQ bit of CR6. 1736 BitNo = 0; InvertBit = true; 1737 break; 1738 case 2: // Return the value of the LT bit of CR6. 1739 BitNo = 2; InvertBit = false; 1740 break; 1741 case 3: // Return the inverted value of the LT bit of CR6. 1742 BitNo = 2; InvertBit = true; 1743 break; 1744 } 1745 1746 // Shift the bit into the low position. 1747 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 1748 DAG.getConstant(8-(3-BitNo), MVT::i32)); 1749 // Isolate the bit. 1750 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 1751 DAG.getConstant(1, MVT::i32)); 1752 1753 // If we are supposed to, toggle the bit. 1754 if (InvertBit) 1755 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 1756 DAG.getConstant(1, MVT::i32)); 1757 return Flags; 1758} 1759 1760static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 1761 // Create a stack slot that is 16-byte aligned. 1762 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 1763 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 1764 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 1765 1766 // Store the input value into Value#0 of the stack slot. 1767 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(), 1768 Op.getOperand(0), FIdx,DAG.getSrcValue(NULL)); 1769 // Load it out. 1770 return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL)); 1771} 1772 1773static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) { 1774 if (Op.getValueType() == MVT::v4i32) { 1775 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 1776 1777 SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG); 1778 SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt. 1779 1780 SDOperand RHSSwap = // = vrlw RHS, 16 1781 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG); 1782 1783 // Shrinkify inputs to v8i16. 1784 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS); 1785 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS); 1786 RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap); 1787 1788 // Low parts multiplied together, generating 32-bit results (we ignore the 1789 // top parts). 1790 SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 1791 LHS, RHS, DAG, MVT::v4i32); 1792 1793 SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 1794 LHS, RHSSwap, Zero, DAG, MVT::v4i32); 1795 // Shift the high parts up 16 bits. 1796 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG); 1797 return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd); 1798 } else if (Op.getValueType() == MVT::v8i16) { 1799 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 1800 1801 SDOperand Zero = BuildSplatI(0, 1, MVT::v8i16, DAG); 1802 1803 return BuildIntrinsicOp(Intrinsic::ppc_altivec_vmladduhm, 1804 LHS, RHS, Zero, DAG); 1805 } else if (Op.getValueType() == MVT::v16i8) { 1806 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 1807 1808 // Multiply the even 8-bit parts, producing 16-bit sums. 1809 SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 1810 LHS, RHS, DAG, MVT::v8i16); 1811 EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts); 1812 1813 // Multiply the odd 8-bit parts, producing 16-bit sums. 1814 SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 1815 LHS, RHS, DAG, MVT::v8i16); 1816 OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts); 1817 1818 // Merge the results together. 1819 std::vector<SDOperand> Ops; 1820 for (unsigned i = 0; i != 8; ++i) { 1821 Ops.push_back(DAG.getConstant(2*i+1, MVT::i8)); 1822 Ops.push_back(DAG.getConstant(2*i+1+16, MVT::i8)); 1823 } 1824 1825 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts, 1826 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 1827 } else { 1828 assert(0 && "Unknown mul to lower!"); 1829 abort(); 1830 } 1831} 1832 1833/// LowerOperation - Provide custom lowering hooks for some operations. 1834/// 1835SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 1836 switch (Op.getOpcode()) { 1837 default: assert(0 && "Wasn't expecting to be able to lower this!"); 1838 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 1839 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 1840 case ISD::JumpTable: return LowerJumpTable(Op, DAG); 1841 case ISD::SETCC: return LowerSETCC(Op, DAG); 1842 case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); 1843 case ISD::FORMAL_ARGUMENTS: return LowerFORMAL_ARGUMENTS(Op, DAG, 1844 VarArgsFrameIndex); 1845 case ISD::RET: return LowerRET(Op, DAG); 1846 1847 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 1848 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 1849 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 1850 1851 // Lower 64-bit shifts. 1852 case ISD::SHL: return LowerSHL(Op, DAG); 1853 case ISD::SRL: return LowerSRL(Op, DAG); 1854 case ISD::SRA: return LowerSRA(Op, DAG); 1855 1856 // Vector-related lowering. 1857 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 1858 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 1859 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1860 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 1861 case ISD::MUL: return LowerMUL(Op, DAG); 1862 } 1863 return SDOperand(); 1864} 1865 1866//===----------------------------------------------------------------------===// 1867// Other Lowering Code 1868//===----------------------------------------------------------------------===// 1869 1870std::pair<SDOperand, SDOperand> 1871PPCTargetLowering::LowerCallTo(SDOperand Chain, 1872 const Type *RetTy, bool isVarArg, 1873 unsigned CallingConv, bool isTailCall, 1874 SDOperand Callee, ArgListTy &Args, 1875 SelectionDAG &DAG) { 1876 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 1877 // SelectExpr to use to put the arguments in the appropriate registers. 1878 std::vector<SDOperand> args_to_use; 1879 1880 // Count how many bytes are to be pushed on the stack, including the linkage 1881 // area, and parameter passing area. 1882 unsigned NumBytes = 24; 1883 1884 if (Args.empty()) { 1885 Chain = DAG.getCALLSEQ_START(Chain, 1886 DAG.getConstant(NumBytes, getPointerTy())); 1887 } else { 1888 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1889 switch (getValueType(Args[i].second)) { 1890 default: assert(0 && "Unknown value type!"); 1891 case MVT::i1: 1892 case MVT::i8: 1893 case MVT::i16: 1894 case MVT::i32: 1895 case MVT::f32: 1896 NumBytes += 4; 1897 break; 1898 case MVT::i64: 1899 case MVT::f64: 1900 NumBytes += 8; 1901 break; 1902 } 1903 } 1904 1905 // Just to be safe, we'll always reserve the full 24 bytes of linkage area 1906 // plus 32 bytes of argument space in case any called code gets funky on us. 1907 // (Required by ABI to support var arg) 1908 if (NumBytes < 56) NumBytes = 56; 1909 1910 // Adjust the stack pointer for the new arguments... 1911 // These operations are automatically eliminated by the prolog/epilog pass 1912 Chain = DAG.getCALLSEQ_START(Chain, 1913 DAG.getConstant(NumBytes, getPointerTy())); 1914 1915 // Set up a copy of the stack pointer for use loading and storing any 1916 // arguments that may not fit in the registers available for argument 1917 // passing. 1918 SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 1919 1920 // Figure out which arguments are going to go in registers, and which in 1921 // memory. Also, if this is a vararg function, floating point operations 1922 // must be stored to our stack, and loaded into integer regs as well, if 1923 // any integer regs are available for argument passing. 1924 unsigned ArgOffset = 24; 1925 unsigned GPR_remaining = 8; 1926 unsigned FPR_remaining = 13; 1927 1928 std::vector<SDOperand> MemOps; 1929 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1930 // PtrOff will be used to store the current argument to the stack if a 1931 // register cannot be found for it. 1932 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1933 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1934 MVT::ValueType ArgVT = getValueType(Args[i].second); 1935 1936 switch (ArgVT) { 1937 default: assert(0 && "Unexpected ValueType for argument!"); 1938 case MVT::i1: 1939 case MVT::i8: 1940 case MVT::i16: 1941 // Promote the integer to 32 bits. If the input type is signed use a 1942 // sign extend, otherwise use a zero extend. 1943 if (Args[i].second->isSigned()) 1944 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 1945 else 1946 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 1947 // FALL THROUGH 1948 case MVT::i32: 1949 if (GPR_remaining > 0) { 1950 args_to_use.push_back(Args[i].first); 1951 --GPR_remaining; 1952 } else { 1953 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1954 Args[i].first, PtrOff, 1955 DAG.getSrcValue(NULL))); 1956 } 1957 ArgOffset += 4; 1958 break; 1959 case MVT::i64: 1960 // If we have one free GPR left, we can place the upper half of the i64 1961 // in it, and store the other half to the stack. If we have two or more 1962 // free GPRs, then we can pass both halves of the i64 in registers. 1963 if (GPR_remaining > 0) { 1964 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1965 Args[i].first, DAG.getConstant(1, MVT::i32)); 1966 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1967 Args[i].first, DAG.getConstant(0, MVT::i32)); 1968 args_to_use.push_back(Hi); 1969 --GPR_remaining; 1970 if (GPR_remaining > 0) { 1971 args_to_use.push_back(Lo); 1972 --GPR_remaining; 1973 } else { 1974 SDOperand ConstFour = DAG.getConstant(4, getPointerTy()); 1975 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour); 1976 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1977 Lo, PtrOff, DAG.getSrcValue(NULL))); 1978 } 1979 } else { 1980 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1981 Args[i].first, PtrOff, 1982 DAG.getSrcValue(NULL))); 1983 } 1984 ArgOffset += 8; 1985 break; 1986 case MVT::f32: 1987 case MVT::f64: 1988 if (FPR_remaining > 0) { 1989 args_to_use.push_back(Args[i].first); 1990 --FPR_remaining; 1991 if (isVarArg) { 1992 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain, 1993 Args[i].first, PtrOff, 1994 DAG.getSrcValue(NULL)); 1995 MemOps.push_back(Store); 1996 // Float varargs are always shadowed in available integer registers 1997 if (GPR_remaining > 0) { 1998 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff, 1999 DAG.getSrcValue(NULL)); 2000 MemOps.push_back(Load.getValue(1)); 2001 args_to_use.push_back(Load); 2002 --GPR_remaining; 2003 } 2004 if (GPR_remaining > 0 && MVT::f64 == ArgVT) { 2005 SDOperand ConstFour = DAG.getConstant(4, getPointerTy()); 2006 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour); 2007 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff, 2008 DAG.getSrcValue(NULL)); 2009 MemOps.push_back(Load.getValue(1)); 2010 args_to_use.push_back(Load); 2011 --GPR_remaining; 2012 } 2013 } else { 2014 // If we have any FPRs remaining, we may also have GPRs remaining. 2015 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 2016 // GPRs. 2017 if (GPR_remaining > 0) { 2018 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 2019 --GPR_remaining; 2020 } 2021 if (GPR_remaining > 0 && MVT::f64 == ArgVT) { 2022 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 2023 --GPR_remaining; 2024 } 2025 } 2026 } else { 2027 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 2028 Args[i].first, PtrOff, 2029 DAG.getSrcValue(NULL))); 2030 } 2031 ArgOffset += (ArgVT == MVT::f32) ? 4 : 8; 2032 break; 2033 } 2034 } 2035 if (!MemOps.empty()) 2036 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps); 2037 } 2038 2039 std::vector<MVT::ValueType> RetVals; 2040 MVT::ValueType RetTyVT = getValueType(RetTy); 2041 MVT::ValueType ActualRetTyVT = RetTyVT; 2042 if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16) 2043 ActualRetTyVT = MVT::i32; // Promote result to i32. 2044 2045 if (RetTyVT == MVT::i64) { 2046 RetVals.push_back(MVT::i32); 2047 RetVals.push_back(MVT::i32); 2048 } else if (RetTyVT != MVT::isVoid) { 2049 RetVals.push_back(ActualRetTyVT); 2050 } 2051 RetVals.push_back(MVT::Other); 2052 2053 // If the callee is a GlobalAddress node (quite common, every direct call is) 2054 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 2055 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2056 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); 2057 2058 std::vector<SDOperand> Ops; 2059 Ops.push_back(Chain); 2060 Ops.push_back(Callee); 2061 Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end()); 2062 SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops); 2063 Chain = TheCall.getValue(TheCall.Val->getNumValues()-1); 2064 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, 2065 DAG.getConstant(NumBytes, getPointerTy())); 2066 SDOperand RetVal = TheCall; 2067 2068 // If the result is a small value, add a note so that we keep track of the 2069 // information about whether it is sign or zero extended. 2070 if (RetTyVT != ActualRetTyVT) { 2071 RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext, 2072 MVT::i32, RetVal, DAG.getValueType(RetTyVT)); 2073 RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal); 2074 } else if (RetTyVT == MVT::i64) { 2075 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1)); 2076 } 2077 2078 return std::make_pair(RetVal, Chain); 2079} 2080 2081MachineBasicBlock * 2082PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 2083 MachineBasicBlock *BB) { 2084 assert((MI->getOpcode() == PPC::SELECT_CC_Int || 2085 MI->getOpcode() == PPC::SELECT_CC_F4 || 2086 MI->getOpcode() == PPC::SELECT_CC_F8 || 2087 MI->getOpcode() == PPC::SELECT_CC_VRRC) && 2088 "Unexpected instr type to insert"); 2089 2090 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 2091 // control-flow pattern. The incoming instruction knows the destination vreg 2092 // to set, the condition code register to branch on, the true/false values to 2093 // select between, and a branch opcode to use. 2094 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2095 ilist<MachineBasicBlock>::iterator It = BB; 2096 ++It; 2097 2098 // thisMBB: 2099 // ... 2100 // TrueVal = ... 2101 // cmpTY ccX, r1, r2 2102 // bCC copy1MBB 2103 // fallthrough --> copy0MBB 2104 MachineBasicBlock *thisMBB = BB; 2105 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 2106 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 2107 BuildMI(BB, MI->getOperand(4).getImmedValue(), 2) 2108 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 2109 MachineFunction *F = BB->getParent(); 2110 F->getBasicBlockList().insert(It, copy0MBB); 2111 F->getBasicBlockList().insert(It, sinkMBB); 2112 // Update machine-CFG edges by first adding all successors of the current 2113 // block to the new block which will contain the Phi node for the select. 2114 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 2115 e = BB->succ_end(); i != e; ++i) 2116 sinkMBB->addSuccessor(*i); 2117 // Next, remove all successors of the current block, and add the true 2118 // and fallthrough blocks as its successors. 2119 while(!BB->succ_empty()) 2120 BB->removeSuccessor(BB->succ_begin()); 2121 BB->addSuccessor(copy0MBB); 2122 BB->addSuccessor(sinkMBB); 2123 2124 // copy0MBB: 2125 // %FalseValue = ... 2126 // # fallthrough to sinkMBB 2127 BB = copy0MBB; 2128 2129 // Update machine-CFG edges 2130 BB->addSuccessor(sinkMBB); 2131 2132 // sinkMBB: 2133 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 2134 // ... 2135 BB = sinkMBB; 2136 BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg()) 2137 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 2138 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 2139 2140 delete MI; // The pseudo instruction is gone now. 2141 return BB; 2142} 2143 2144//===----------------------------------------------------------------------===// 2145// Target Optimization Hooks 2146//===----------------------------------------------------------------------===// 2147 2148SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 2149 DAGCombinerInfo &DCI) const { 2150 TargetMachine &TM = getTargetMachine(); 2151 SelectionDAG &DAG = DCI.DAG; 2152 switch (N->getOpcode()) { 2153 default: break; 2154 case ISD::SINT_TO_FP: 2155 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { 2156 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 2157 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 2158 // We allow the src/dst to be either f32/f64, but the intermediate 2159 // type must be i64. 2160 if (N->getOperand(0).getValueType() == MVT::i64) { 2161 SDOperand Val = N->getOperand(0).getOperand(0); 2162 if (Val.getValueType() == MVT::f32) { 2163 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 2164 DCI.AddToWorklist(Val.Val); 2165 } 2166 2167 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 2168 DCI.AddToWorklist(Val.Val); 2169 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 2170 DCI.AddToWorklist(Val.Val); 2171 if (N->getValueType(0) == MVT::f32) { 2172 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val); 2173 DCI.AddToWorklist(Val.Val); 2174 } 2175 return Val; 2176 } else if (N->getOperand(0).getValueType() == MVT::i32) { 2177 // If the intermediate type is i32, we can avoid the load/store here 2178 // too. 2179 } 2180 } 2181 } 2182 break; 2183 case ISD::STORE: 2184 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 2185 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 2186 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 2187 N->getOperand(1).getValueType() == MVT::i32) { 2188 SDOperand Val = N->getOperand(1).getOperand(0); 2189 if (Val.getValueType() == MVT::f32) { 2190 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 2191 DCI.AddToWorklist(Val.Val); 2192 } 2193 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 2194 DCI.AddToWorklist(Val.Val); 2195 2196 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 2197 N->getOperand(2), N->getOperand(3)); 2198 DCI.AddToWorklist(Val.Val); 2199 return Val; 2200 } 2201 break; 2202 case PPCISD::VCMP: { 2203 // If a VCMPo node already exists with exactly the same operands as this 2204 // node, use its result instead of this node (VCMPo computes both a CR6 and 2205 // a normal output). 2206 // 2207 if (!N->getOperand(0).hasOneUse() && 2208 !N->getOperand(1).hasOneUse() && 2209 !N->getOperand(2).hasOneUse()) { 2210 2211 // Scan all of the users of the LHS, looking for VCMPo's that match. 2212 SDNode *VCMPoNode = 0; 2213 2214 SDNode *LHSN = N->getOperand(0).Val; 2215 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 2216 UI != E; ++UI) 2217 if ((*UI)->getOpcode() == PPCISD::VCMPo && 2218 (*UI)->getOperand(1) == N->getOperand(1) && 2219 (*UI)->getOperand(2) == N->getOperand(2) && 2220 (*UI)->getOperand(0) == N->getOperand(0)) { 2221 VCMPoNode = *UI; 2222 break; 2223 } 2224 2225 // If there is no VCMPo node, or if the flag value has a single use, don't 2226 // transform this. 2227 if (!VCMPoNode || VCMPoNode->hasNUsesOfValue(0, 1)) 2228 break; 2229 2230 // Look at the (necessarily single) use of the flag value. If it has a 2231 // chain, this transformation is more complex. Note that multiple things 2232 // could use the value result, which we should ignore. 2233 SDNode *FlagUser = 0; 2234 for (SDNode::use_iterator UI = VCMPoNode->use_begin(); 2235 FlagUser == 0; ++UI) { 2236 assert(UI != VCMPoNode->use_end() && "Didn't find user!"); 2237 SDNode *User = *UI; 2238 for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { 2239 if (User->getOperand(i) == SDOperand(VCMPoNode, 1)) { 2240 FlagUser = User; 2241 break; 2242 } 2243 } 2244 } 2245 2246 // If the user is a MFCR instruction, we know this is safe. Otherwise we 2247 // give up for right now. 2248 if (FlagUser->getOpcode() == PPCISD::MFCR) 2249 return SDOperand(VCMPoNode, 0); 2250 } 2251 break; 2252 } 2253 case ISD::BR_CC: { 2254 // If this is a branch on an altivec predicate comparison, lower this so 2255 // that we don't have to do a MFCR: instead, branch directly on CR6. This 2256 // lowering is done pre-legalize, because the legalizer lowers the predicate 2257 // compare down to code that is difficult to reassemble. 2258 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get(); 2259 SDOperand LHS = N->getOperand(2), RHS = N->getOperand(3); 2260 int CompareOpc; 2261 bool isDot; 2262 2263 if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN && 2264 isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) && 2265 getAltivecCompareInfo(LHS, CompareOpc, isDot)) { 2266 assert(isDot && "Can't compare against a vector result!"); 2267 2268 // If this is a comparison against something other than 0/1, then we know 2269 // that the condition is never/always true. 2270 unsigned Val = cast<ConstantSDNode>(RHS)->getValue(); 2271 if (Val != 0 && Val != 1) { 2272 if (CC == ISD::SETEQ) // Cond never true, remove branch. 2273 return N->getOperand(0); 2274 // Always !=, turn it into an unconditional branch. 2275 return DAG.getNode(ISD::BR, MVT::Other, 2276 N->getOperand(0), N->getOperand(4)); 2277 } 2278 2279 bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0); 2280 2281 // Create the PPCISD altivec 'dot' comparison node. 2282 std::vector<SDOperand> Ops; 2283 std::vector<MVT::ValueType> VTs; 2284 Ops.push_back(LHS.getOperand(2)); // LHS of compare 2285 Ops.push_back(LHS.getOperand(3)); // RHS of compare 2286 Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32)); 2287 VTs.push_back(LHS.getOperand(2).getValueType()); 2288 VTs.push_back(MVT::Flag); 2289 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops); 2290 2291 // Unpack the result based on how the target uses it. 2292 unsigned CompOpc; 2293 switch (cast<ConstantSDNode>(LHS.getOperand(1))->getValue()) { 2294 default: // Can't happen, don't crash on invalid number though. 2295 case 0: // Branch on the value of the EQ bit of CR6. 2296 CompOpc = BranchOnWhenPredTrue ? PPC::BEQ : PPC::BNE; 2297 break; 2298 case 1: // Branch on the inverted value of the EQ bit of CR6. 2299 CompOpc = BranchOnWhenPredTrue ? PPC::BNE : PPC::BEQ; 2300 break; 2301 case 2: // Branch on the value of the LT bit of CR6. 2302 CompOpc = BranchOnWhenPredTrue ? PPC::BLT : PPC::BGE; 2303 break; 2304 case 3: // Branch on the inverted value of the LT bit of CR6. 2305 CompOpc = BranchOnWhenPredTrue ? PPC::BGE : PPC::BLT; 2306 break; 2307 } 2308 2309 return DAG.getNode(PPCISD::COND_BRANCH, MVT::Other, N->getOperand(0), 2310 DAG.getRegister(PPC::CR6, MVT::i32), 2311 DAG.getConstant(CompOpc, MVT::i32), 2312 N->getOperand(4), CompNode.getValue(1)); 2313 } 2314 break; 2315 } 2316 } 2317 2318 return SDOperand(); 2319} 2320 2321//===----------------------------------------------------------------------===// 2322// Inline Assembly Support 2323//===----------------------------------------------------------------------===// 2324 2325void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 2326 uint64_t Mask, 2327 uint64_t &KnownZero, 2328 uint64_t &KnownOne, 2329 unsigned Depth) const { 2330 KnownZero = 0; 2331 KnownOne = 0; 2332 switch (Op.getOpcode()) { 2333 default: break; 2334 case ISD::INTRINSIC_WO_CHAIN: { 2335 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 2336 default: break; 2337 case Intrinsic::ppc_altivec_vcmpbfp_p: 2338 case Intrinsic::ppc_altivec_vcmpeqfp_p: 2339 case Intrinsic::ppc_altivec_vcmpequb_p: 2340 case Intrinsic::ppc_altivec_vcmpequh_p: 2341 case Intrinsic::ppc_altivec_vcmpequw_p: 2342 case Intrinsic::ppc_altivec_vcmpgefp_p: 2343 case Intrinsic::ppc_altivec_vcmpgtfp_p: 2344 case Intrinsic::ppc_altivec_vcmpgtsb_p: 2345 case Intrinsic::ppc_altivec_vcmpgtsh_p: 2346 case Intrinsic::ppc_altivec_vcmpgtsw_p: 2347 case Intrinsic::ppc_altivec_vcmpgtub_p: 2348 case Intrinsic::ppc_altivec_vcmpgtuh_p: 2349 case Intrinsic::ppc_altivec_vcmpgtuw_p: 2350 KnownZero = ~1U; // All bits but the low one are known to be zero. 2351 break; 2352 } 2353 } 2354 } 2355} 2356 2357 2358/// getConstraintType - Given a constraint letter, return the type of 2359/// constraint it is for this target. 2360PPCTargetLowering::ConstraintType 2361PPCTargetLowering::getConstraintType(char ConstraintLetter) const { 2362 switch (ConstraintLetter) { 2363 default: break; 2364 case 'b': 2365 case 'r': 2366 case 'f': 2367 case 'v': 2368 case 'y': 2369 return C_RegisterClass; 2370 } 2371 return TargetLowering::getConstraintType(ConstraintLetter); 2372} 2373 2374 2375std::vector<unsigned> PPCTargetLowering:: 2376getRegClassForInlineAsmConstraint(const std::string &Constraint, 2377 MVT::ValueType VT) const { 2378 if (Constraint.size() == 1) { 2379 switch (Constraint[0]) { // GCC RS6000 Constraint Letters 2380 default: break; // Unknown constriant letter 2381 case 'b': 2382 return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 , 2383 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 2384 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 2385 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 2386 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 2387 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 2388 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 2389 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 2390 0); 2391 case 'r': 2392 return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 , 2393 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 2394 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 2395 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 2396 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 2397 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 2398 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 2399 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 2400 0); 2401 case 'f': 2402 return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 , 2403 PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 , 2404 PPC::F8 , PPC::F9 , PPC::F10, PPC::F11, 2405 PPC::F12, PPC::F13, PPC::F14, PPC::F15, 2406 PPC::F16, PPC::F17, PPC::F18, PPC::F19, 2407 PPC::F20, PPC::F21, PPC::F22, PPC::F23, 2408 PPC::F24, PPC::F25, PPC::F26, PPC::F27, 2409 PPC::F28, PPC::F29, PPC::F30, PPC::F31, 2410 0); 2411 case 'v': 2412 return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , 2413 PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 2414 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, 2415 PPC::V12, PPC::V13, PPC::V14, PPC::V15, 2416 PPC::V16, PPC::V17, PPC::V18, PPC::V19, 2417 PPC::V20, PPC::V21, PPC::V22, PPC::V23, 2418 PPC::V24, PPC::V25, PPC::V26, PPC::V27, 2419 PPC::V28, PPC::V29, PPC::V30, PPC::V31, 2420 0); 2421 case 'y': 2422 return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, 2423 PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7, 2424 0); 2425 } 2426 } 2427 2428 return std::vector<unsigned>(); 2429} 2430 2431// isOperandValidForConstraint 2432bool PPCTargetLowering:: 2433isOperandValidForConstraint(SDOperand Op, char Letter) { 2434 switch (Letter) { 2435 default: break; 2436 case 'I': 2437 case 'J': 2438 case 'K': 2439 case 'L': 2440 case 'M': 2441 case 'N': 2442 case 'O': 2443 case 'P': { 2444 if (!isa<ConstantSDNode>(Op)) return false; // Must be an immediate. 2445 unsigned Value = cast<ConstantSDNode>(Op)->getValue(); 2446 switch (Letter) { 2447 default: assert(0 && "Unknown constraint letter!"); 2448 case 'I': // "I" is a signed 16-bit constant. 2449 return (short)Value == (int)Value; 2450 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 2451 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 2452 return (short)Value == 0; 2453 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 2454 return (Value >> 16) == 0; 2455 case 'M': // "M" is a constant that is greater than 31. 2456 return Value > 31; 2457 case 'N': // "N" is a positive constant that is an exact power of two. 2458 return (int)Value > 0 && isPowerOf2_32(Value); 2459 case 'O': // "O" is the constant zero. 2460 return Value == 0; 2461 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 2462 return (short)-Value == (int)-Value; 2463 } 2464 break; 2465 } 2466 } 2467 2468 // Handle standard constraint letters. 2469 return TargetLowering::isOperandValidForConstraint(Op, Letter); 2470} 2471 2472/// isLegalAddressImmediate - Return true if the integer value can be used 2473/// as the offset of the target addressing mode. 2474bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const { 2475 // PPC allows a sign-extended 16-bit immediate field. 2476 return (V > -(1 << 16) && V < (1 << 16)-1); 2477} 2478