PPCISelLowering.cpp revision 19a815238e55458e95f99b4dad31ed053c9f635c
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "PPCTargetMachine.h" 16#include "PPCPerfectShuffle.h" 17#include "llvm/ADT/VectorExtras.h" 18#include "llvm/Analysis/ScalarEvolutionExpressions.h" 19#include "llvm/CodeGen/MachineFrameInfo.h" 20#include "llvm/CodeGen/MachineFunction.h" 21#include "llvm/CodeGen/MachineInstrBuilder.h" 22#include "llvm/CodeGen/SelectionDAG.h" 23#include "llvm/CodeGen/SSARegMap.h" 24#include "llvm/Constants.h" 25#include "llvm/Function.h" 26#include "llvm/Intrinsics.h" 27#include "llvm/Support/MathExtras.h" 28#include "llvm/Target/TargetOptions.h" 29using namespace llvm; 30 31PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) 32 : TargetLowering(TM) { 33 34 // Fold away setcc operations if possible. 35 setSetCCIsExpensive(); 36 setPow2DivIsCheap(); 37 38 // Use _setjmp/_longjmp instead of setjmp/longjmp. 39 setUseUnderscoreSetJmpLongJmp(true); 40 41 // Set up the register classes. 42 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 43 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 44 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 45 46 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 47 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 48 49 // PowerPC has no intrinsics for these particular operations 50 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); 51 setOperationAction(ISD::MEMSET, MVT::Other, Expand); 52 setOperationAction(ISD::MEMCPY, MVT::Other, Expand); 53 54 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 55 setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand); 56 setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand); 57 58 // PowerPC has no SREM/UREM instructions 59 setOperationAction(ISD::SREM, MVT::i32, Expand); 60 setOperationAction(ISD::UREM, MVT::i32, Expand); 61 62 // We don't support sin/cos/sqrt/fmod 63 setOperationAction(ISD::FSIN , MVT::f64, Expand); 64 setOperationAction(ISD::FCOS , MVT::f64, Expand); 65 setOperationAction(ISD::FREM , MVT::f64, Expand); 66 setOperationAction(ISD::FSIN , MVT::f32, Expand); 67 setOperationAction(ISD::FCOS , MVT::f32, Expand); 68 setOperationAction(ISD::FREM , MVT::f32, Expand); 69 70 // If we're enabling GP optimizations, use hardware square root 71 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 72 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 73 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 74 } 75 76 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 77 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 78 79 // PowerPC does not have BSWAP, CTPOP or CTTZ 80 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 81 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 82 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 83 84 // PowerPC does not have ROTR 85 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 86 87 // PowerPC does not have Select 88 setOperationAction(ISD::SELECT, MVT::i32, Expand); 89 setOperationAction(ISD::SELECT, MVT::f32, Expand); 90 setOperationAction(ISD::SELECT, MVT::f64, Expand); 91 92 // PowerPC wants to turn select_cc of FP into fsel when possible. 93 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 94 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 95 96 // PowerPC wants to optimize integer setcc a bit 97 setOperationAction(ISD::SETCC, MVT::i32, Custom); 98 99 // PowerPC does not have BRCOND which requires SetCC 100 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 101 102 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 103 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 104 105 // PowerPC does not have [U|S]INT_TO_FP 106 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 107 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 108 109 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 110 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 111 112 // PowerPC does not have truncstore for i1. 113 setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote); 114 115 // Support label based line numbers. 116 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 117 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 118 // FIXME - use subtarget debug flags 119 if (!TM.getSubtarget<PPCSubtarget>().isDarwin()) 120 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 121 122 // We want to legalize GlobalAddress and ConstantPool nodes into the 123 // appropriate instructions to materialize the address. 124 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 125 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 126 127 // RET must be custom lowered, to meet ABI requirements 128 setOperationAction(ISD::RET , MVT::Other, Custom); 129 130 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 131 setOperationAction(ISD::VASTART , MVT::Other, Custom); 132 133 // Use the default implementation. 134 setOperationAction(ISD::VAARG , MVT::Other, Expand); 135 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 136 setOperationAction(ISD::VAEND , MVT::Other, Expand); 137 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 138 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 139 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 140 141 // We want to custom lower some of our intrinsics. 142 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 143 144 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { 145 // They also have instructions for converting between i64 and fp. 146 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 147 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 148 149 // FIXME: disable this lowered code. This generates 64-bit register values, 150 // and we don't model the fact that the top part is clobbered by calls. We 151 // need to flag these together so that the value isn't live across a call. 152 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 153 154 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 155 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 156 } else { 157 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 158 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 159 } 160 161 if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) { 162 // 64 bit PowerPC implementations can support i64 types directly 163 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 164 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 165 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 166 } else { 167 // 32 bit PowerPC wants to expand i64 shifts itself. 168 setOperationAction(ISD::SHL, MVT::i64, Custom); 169 setOperationAction(ISD::SRL, MVT::i64, Custom); 170 setOperationAction(ISD::SRA, MVT::i64, Custom); 171 } 172 173 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 174 // First set operation action for all vector types to expand. Then we 175 // will selectively turn on ones that can be effectively codegen'd. 176 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 177 VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 178 // add/sub are legal for all supported vector VT's. 179 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); 180 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); 181 182 // We promote all shuffles to v16i8. 183 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote); 184 AddPromotedToType (ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8); 185 186 // We promote all non-typed operations to v4i32. 187 setOperationAction(ISD::AND , (MVT::ValueType)VT, Promote); 188 AddPromotedToType (ISD::AND , (MVT::ValueType)VT, MVT::v4i32); 189 setOperationAction(ISD::OR , (MVT::ValueType)VT, Promote); 190 AddPromotedToType (ISD::OR , (MVT::ValueType)VT, MVT::v4i32); 191 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Promote); 192 AddPromotedToType (ISD::XOR , (MVT::ValueType)VT, MVT::v4i32); 193 setOperationAction(ISD::LOAD , (MVT::ValueType)VT, Promote); 194 AddPromotedToType (ISD::LOAD , (MVT::ValueType)VT, MVT::v4i32); 195 setOperationAction(ISD::SELECT, (MVT::ValueType)VT, Promote); 196 AddPromotedToType (ISD::SELECT, (MVT::ValueType)VT, MVT::v4i32); 197 setOperationAction(ISD::STORE, (MVT::ValueType)VT, Promote); 198 AddPromotedToType (ISD::STORE, (MVT::ValueType)VT, MVT::v4i32); 199 200 // No other operations are legal. 201 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 202 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 203 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 204 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 205 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 206 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 207 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 208 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand); 209 210 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand); 211 } 212 213 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 214 // with merges, splats, etc. 215 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 216 217 setOperationAction(ISD::AND , MVT::v4i32, Legal); 218 setOperationAction(ISD::OR , MVT::v4i32, Legal); 219 setOperationAction(ISD::XOR , MVT::v4i32, Legal); 220 setOperationAction(ISD::LOAD , MVT::v4i32, Legal); 221 setOperationAction(ISD::SELECT, MVT::v4i32, Expand); 222 setOperationAction(ISD::STORE , MVT::v4i32, Legal); 223 224 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 225 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 226 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 227 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 228 229 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 230 setOperationAction(ISD::MUL, MVT::v4i32, Custom); 231 setOperationAction(ISD::MUL, MVT::v8i16, Custom); 232 setOperationAction(ISD::MUL, MVT::v16i8, Custom); 233 234 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 235 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 236 237 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 238 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 239 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 240 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 241 } 242 243 setSetCCResultContents(ZeroOrOneSetCCResult); 244 setStackPointerRegisterToSaveRestore(PPC::R1); 245 246 // We have target-specific dag combine patterns for the following nodes: 247 setTargetDAGCombine(ISD::SINT_TO_FP); 248 setTargetDAGCombine(ISD::STORE); 249 250 computeRegisterProperties(); 251} 252 253const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 254 switch (Opcode) { 255 default: return 0; 256 case PPCISD::FSEL: return "PPCISD::FSEL"; 257 case PPCISD::FCFID: return "PPCISD::FCFID"; 258 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 259 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 260 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 261 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 262 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 263 case PPCISD::VPERM: return "PPCISD::VPERM"; 264 case PPCISD::Hi: return "PPCISD::Hi"; 265 case PPCISD::Lo: return "PPCISD::Lo"; 266 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 267 case PPCISD::SRL: return "PPCISD::SRL"; 268 case PPCISD::SRA: return "PPCISD::SRA"; 269 case PPCISD::SHL: return "PPCISD::SHL"; 270 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 271 case PPCISD::STD_32: return "PPCISD::STD_32"; 272 case PPCISD::CALL: return "PPCISD::CALL"; 273 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 274 case PPCISD::MFCR: return "PPCISD::MFCR"; 275 case PPCISD::VCMP: return "PPCISD::VCMP"; 276 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 277 } 278} 279 280//===----------------------------------------------------------------------===// 281// Node matching predicates, for use by the tblgen matching code. 282//===----------------------------------------------------------------------===// 283 284/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 285static bool isFloatingPointZero(SDOperand Op) { 286 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 287 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 288 else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) { 289 // Maybe this has already been legalized into the constant pool? 290 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 291 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get())) 292 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 293 } 294 return false; 295} 296 297/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 298/// true if Op is undef or if it matches the specified value. 299static bool isConstantOrUndef(SDOperand Op, unsigned Val) { 300 return Op.getOpcode() == ISD::UNDEF || 301 cast<ConstantSDNode>(Op)->getValue() == Val; 302} 303 304/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 305/// VPKUHUM instruction. 306bool PPC::isVPKUHUMShuffleMask(SDNode *N, bool isUnary) { 307 if (!isUnary) { 308 for (unsigned i = 0; i != 16; ++i) 309 if (!isConstantOrUndef(N->getOperand(i), i*2+1)) 310 return false; 311 } else { 312 for (unsigned i = 0; i != 8; ++i) 313 if (!isConstantOrUndef(N->getOperand(i), i*2+1) || 314 !isConstantOrUndef(N->getOperand(i+8), i*2+1)) 315 return false; 316 } 317 return true; 318} 319 320/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 321/// VPKUWUM instruction. 322bool PPC::isVPKUWUMShuffleMask(SDNode *N, bool isUnary) { 323 if (!isUnary) { 324 for (unsigned i = 0; i != 16; i += 2) 325 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 326 !isConstantOrUndef(N->getOperand(i+1), i*2+3)) 327 return false; 328 } else { 329 for (unsigned i = 0; i != 8; i += 2) 330 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 331 !isConstantOrUndef(N->getOperand(i+1), i*2+3) || 332 !isConstantOrUndef(N->getOperand(i+8), i*2+2) || 333 !isConstantOrUndef(N->getOperand(i+9), i*2+3)) 334 return false; 335 } 336 return true; 337} 338 339/// isVMerge - Common function, used to match vmrg* shuffles. 340/// 341static bool isVMerge(SDNode *N, unsigned UnitSize, 342 unsigned LHSStart, unsigned RHSStart) { 343 assert(N->getOpcode() == ISD::BUILD_VECTOR && 344 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 345 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 346 "Unsupported merge size!"); 347 348 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 349 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 350 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 351 LHSStart+j+i*UnitSize) || 352 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 353 RHSStart+j+i*UnitSize)) 354 return false; 355 } 356 return true; 357} 358 359/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 360/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 361bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 362 if (!isUnary) 363 return isVMerge(N, UnitSize, 8, 24); 364 return isVMerge(N, UnitSize, 8, 8); 365} 366 367/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 368/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 369bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize, bool isUnary) { 370 if (!isUnary) 371 return isVMerge(N, UnitSize, 0, 16); 372 return isVMerge(N, UnitSize, 0, 0); 373} 374 375 376/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 377/// amount, otherwise return -1. 378int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { 379 assert(N->getOpcode() == ISD::BUILD_VECTOR && 380 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 381 // Find the first non-undef value in the shuffle mask. 382 unsigned i; 383 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 384 /*search*/; 385 386 if (i == 16) return -1; // all undef. 387 388 // Otherwise, check to see if the rest of the elements are consequtively 389 // numbered from this value. 390 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 391 if (ShiftAmt < i) return -1; 392 ShiftAmt -= i; 393 394 if (!isUnary) { 395 // Check the rest of the elements to see if they are consequtive. 396 for (++i; i != 16; ++i) 397 if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) 398 return -1; 399 } else { 400 // Check the rest of the elements to see if they are consequtive. 401 for (++i; i != 16; ++i) 402 if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) 403 return -1; 404 } 405 406 return ShiftAmt; 407} 408 409/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 410/// specifies a splat of a single element that is suitable for input to 411/// VSPLTB/VSPLTH/VSPLTW. 412bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 413 assert(N->getOpcode() == ISD::BUILD_VECTOR && 414 N->getNumOperands() == 16 && 415 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 416 417 // This is a splat operation if each element of the permute is the same, and 418 // if the value doesn't reference the second vector. 419 unsigned ElementBase = 0; 420 SDOperand Elt = N->getOperand(0); 421 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 422 ElementBase = EltV->getValue(); 423 else 424 return false; // FIXME: Handle UNDEF elements too! 425 426 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 427 return false; 428 429 // Check that they are consequtive. 430 for (unsigned i = 1; i != EltSize; ++i) { 431 if (!isa<ConstantSDNode>(N->getOperand(i)) || 432 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 433 return false; 434 } 435 436 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 437 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 438 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 439 assert(isa<ConstantSDNode>(N->getOperand(i)) && 440 "Invalid VECTOR_SHUFFLE mask!"); 441 for (unsigned j = 0; j != EltSize; ++j) 442 if (N->getOperand(i+j) != N->getOperand(j)) 443 return false; 444 } 445 446 return true; 447} 448 449/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 450/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 451unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 452 assert(isSplatShuffleMask(N, EltSize)); 453 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 454} 455 456/// get_VSPLTI_elt - If this is a build_vector of constants which can be formed 457/// by using a vspltis[bhw] instruction of the specified element size, return 458/// the constant being splatted. The ByteSize field indicates the number of 459/// bytes of each element [124] -> [bhw]. 460SDOperand PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) { 461 SDOperand OpVal(0, 0); 462 463 // If ByteSize of the splat is bigger than the element size of the 464 // build_vector, then we have a case where we are checking for a splat where 465 // multiple elements of the buildvector are folded together into a single 466 // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8). 467 unsigned EltSize = 16/N->getNumOperands(); 468 if (EltSize < ByteSize) { 469 unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval. 470 SDOperand UniquedVals[4]; 471 assert(Multiple > 1 && Multiple <= 4 && "How can this happen?"); 472 473 // See if all of the elements in the buildvector agree across. 474 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 475 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 476 // If the element isn't a constant, bail fully out. 477 if (!isa<ConstantSDNode>(N->getOperand(i))) return SDOperand(); 478 479 480 if (UniquedVals[i&(Multiple-1)].Val == 0) 481 UniquedVals[i&(Multiple-1)] = N->getOperand(i); 482 else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i)) 483 return SDOperand(); // no match. 484 } 485 486 // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains 487 // either constant or undef values that are identical for each chunk. See 488 // if these chunks can form into a larger vspltis*. 489 490 // Check to see if all of the leading entries are either 0 or -1. If 491 // neither, then this won't fit into the immediate field. 492 bool LeadingZero = true; 493 bool LeadingOnes = true; 494 for (unsigned i = 0; i != Multiple-1; ++i) { 495 if (UniquedVals[i].Val == 0) continue; // Must have been undefs. 496 497 LeadingZero &= cast<ConstantSDNode>(UniquedVals[i])->isNullValue(); 498 LeadingOnes &= cast<ConstantSDNode>(UniquedVals[i])->isAllOnesValue(); 499 } 500 // Finally, check the least significant entry. 501 if (LeadingZero) { 502 if (UniquedVals[Multiple-1].Val == 0) 503 return DAG.getTargetConstant(0, MVT::i32); // 0,0,0,undef 504 int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getValue(); 505 if (Val < 16) 506 return DAG.getTargetConstant(Val, MVT::i32); // 0,0,0,4 -> vspltisw(4) 507 } 508 if (LeadingOnes) { 509 if (UniquedVals[Multiple-1].Val == 0) 510 return DAG.getTargetConstant(~0U, MVT::i32); // -1,-1,-1,undef 511 int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSignExtended(); 512 if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2) 513 return DAG.getTargetConstant(Val, MVT::i32); 514 } 515 516 return SDOperand(); 517 } 518 519 // Check to see if this buildvec has a single non-undef value in its elements. 520 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 521 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 522 if (OpVal.Val == 0) 523 OpVal = N->getOperand(i); 524 else if (OpVal != N->getOperand(i)) 525 return SDOperand(); 526 } 527 528 if (OpVal.Val == 0) return SDOperand(); // All UNDEF: use implicit def. 529 530 unsigned ValSizeInBytes = 0; 531 uint64_t Value = 0; 532 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 533 Value = CN->getValue(); 534 ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8; 535 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 536 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 537 Value = FloatToBits(CN->getValue()); 538 ValSizeInBytes = 4; 539 } 540 541 // If the splat value is larger than the element value, then we can never do 542 // this splat. The only case that we could fit the replicated bits into our 543 // immediate field for would be zero, and we prefer to use vxor for it. 544 if (ValSizeInBytes < ByteSize) return SDOperand(); 545 546 // If the element value is larger than the splat value, cut it in half and 547 // check to see if the two halves are equal. Continue doing this until we 548 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 549 while (ValSizeInBytes > ByteSize) { 550 ValSizeInBytes >>= 1; 551 552 // If the top half equals the bottom half, we're still ok. 553 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 554 (Value & ((1 << (8*ValSizeInBytes))-1))) 555 return SDOperand(); 556 } 557 558 // Properly sign extend the value. 559 int ShAmt = (4-ByteSize)*8; 560 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 561 562 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 563 if (MaskVal == 0) return SDOperand(); 564 565 // Finally, if this value fits in a 5 bit sext field, return it 566 if (((MaskVal << (32-5)) >> (32-5)) == MaskVal) 567 return DAG.getTargetConstant(MaskVal, MVT::i32); 568 return SDOperand(); 569} 570 571//===----------------------------------------------------------------------===// 572// LowerOperation implementation 573//===----------------------------------------------------------------------===// 574 575static SDOperand LowerConstantPool(SDOperand Op, SelectionDAG &DAG) { 576 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 577 Constant *C = CP->get(); 578 SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment()); 579 SDOperand Zero = DAG.getConstant(0, MVT::i32); 580 581 const TargetMachine &TM = DAG.getTarget(); 582 583 // If this is a non-darwin platform, we don't support non-static relo models 584 // yet. 585 if (TM.getRelocationModel() == Reloc::Static || 586 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 587 // Generate non-pic code that has direct accesses to the constant pool. 588 // The address of the global is just (hi(&g)+lo(&g)). 589 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero); 590 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero); 591 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 592 } 593 594 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero); 595 if (TM.getRelocationModel() == Reloc::PIC) { 596 // With PIC, the first instruction is actually "GR+hi(&G)". 597 Hi = DAG.getNode(ISD::ADD, MVT::i32, 598 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 599 } 600 601 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero); 602 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 603 return Lo; 604} 605 606static SDOperand LowerGlobalAddress(SDOperand Op, SelectionDAG &DAG) { 607 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 608 GlobalValue *GV = GSDN->getGlobal(); 609 SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset()); 610 SDOperand Zero = DAG.getConstant(0, MVT::i32); 611 612 const TargetMachine &TM = DAG.getTarget(); 613 614 // If this is a non-darwin platform, we don't support non-static relo models 615 // yet. 616 if (TM.getRelocationModel() == Reloc::Static || 617 !TM.getSubtarget<PPCSubtarget>().isDarwin()) { 618 // Generate non-pic code that has direct accesses to globals. 619 // The address of the global is just (hi(&g)+lo(&g)). 620 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero); 621 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero); 622 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 623 } 624 625 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero); 626 if (TM.getRelocationModel() == Reloc::PIC) { 627 // With PIC, the first instruction is actually "GR+hi(&G)". 628 Hi = DAG.getNode(ISD::ADD, MVT::i32, 629 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 630 } 631 632 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero); 633 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 634 635 if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() && 636 (!GV->isExternal() || GV->hasNotBeenReadFromBytecode())) 637 return Lo; 638 639 // If the global is weak or external, we have to go through the lazy 640 // resolution stub. 641 return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0)); 642} 643 644static SDOperand LowerSETCC(SDOperand Op, SelectionDAG &DAG) { 645 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 646 647 // If we're comparing for equality to zero, expose the fact that this is 648 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 649 // fold the new nodes. 650 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 651 if (C->isNullValue() && CC == ISD::SETEQ) { 652 MVT::ValueType VT = Op.getOperand(0).getValueType(); 653 SDOperand Zext = Op.getOperand(0); 654 if (VT < MVT::i32) { 655 VT = MVT::i32; 656 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 657 } 658 unsigned Log2b = Log2_32(MVT::getSizeInBits(VT)); 659 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 660 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz, 661 DAG.getConstant(Log2b, MVT::i32)); 662 return DAG.getNode(ISD::TRUNCATE, MVT::i32, Scc); 663 } 664 // Leave comparisons against 0 and -1 alone for now, since they're usually 665 // optimized. FIXME: revisit this when we can custom lower all setcc 666 // optimizations. 667 if (C->isAllOnesValue() || C->isNullValue()) 668 return SDOperand(); 669 } 670 671 // If we have an integer seteq/setne, turn it into a compare against zero 672 // by subtracting the rhs from the lhs, which is faster than setting a 673 // condition register, reading it back out, and masking the correct bit. 674 MVT::ValueType LHSVT = Op.getOperand(0).getValueType(); 675 if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 676 MVT::ValueType VT = Op.getValueType(); 677 SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0), 678 Op.getOperand(1)); 679 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 680 } 681 return SDOperand(); 682} 683 684static SDOperand LowerVASTART(SDOperand Op, SelectionDAG &DAG, 685 unsigned VarArgsFrameIndex) { 686 // vastart just stores the address of the VarArgsFrameIndex slot into the 687 // memory location argument. 688 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 689 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 690 Op.getOperand(1), Op.getOperand(2)); 691} 692 693static SDOperand LowerRET(SDOperand Op, SelectionDAG &DAG) { 694 SDOperand Copy; 695 switch(Op.getNumOperands()) { 696 default: 697 assert(0 && "Do not know how to return this many arguments!"); 698 abort(); 699 case 1: 700 return SDOperand(); // ret void is legal 701 case 2: { 702 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 703 unsigned ArgReg; 704 if (MVT::isVector(ArgVT)) 705 ArgReg = PPC::V2; 706 else if (MVT::isInteger(ArgVT)) 707 ArgReg = PPC::R3; 708 else { 709 assert(MVT::isFloatingPoint(ArgVT)); 710 ArgReg = PPC::F1; 711 } 712 713 Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1), 714 SDOperand()); 715 716 // If we haven't noted the R3/F1 are live out, do so now. 717 if (DAG.getMachineFunction().liveout_empty()) 718 DAG.getMachineFunction().addLiveOut(ArgReg); 719 break; 720 } 721 case 3: 722 Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2), 723 SDOperand()); 724 Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1)); 725 // If we haven't noted the R3+R4 are live out, do so now. 726 if (DAG.getMachineFunction().liveout_empty()) { 727 DAG.getMachineFunction().addLiveOut(PPC::R3); 728 DAG.getMachineFunction().addLiveOut(PPC::R4); 729 } 730 break; 731 } 732 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1)); 733} 734 735/// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when 736/// possible. 737static SDOperand LowerSELECT_CC(SDOperand Op, SelectionDAG &DAG) { 738 // Not FP? Not a fsel. 739 if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) || 740 !MVT::isFloatingPoint(Op.getOperand(2).getValueType())) 741 return SDOperand(); 742 743 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 744 745 // Cannot handle SETEQ/SETNE. 746 if (CC == ISD::SETEQ || CC == ISD::SETNE) return SDOperand(); 747 748 MVT::ValueType ResVT = Op.getValueType(); 749 MVT::ValueType CmpVT = Op.getOperand(0).getValueType(); 750 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 751 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3); 752 753 // If the RHS of the comparison is a 0.0, we don't need to do the 754 // subtraction at all. 755 if (isFloatingPointZero(RHS)) 756 switch (CC) { 757 default: break; // SETUO etc aren't handled by fsel. 758 case ISD::SETULT: 759 case ISD::SETLT: 760 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 761 case ISD::SETUGE: 762 case ISD::SETGE: 763 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 764 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 765 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 766 case ISD::SETUGT: 767 case ISD::SETGT: 768 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 769 case ISD::SETULE: 770 case ISD::SETLE: 771 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 772 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 773 return DAG.getNode(PPCISD::FSEL, ResVT, 774 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 775 } 776 777 SDOperand Cmp; 778 switch (CC) { 779 default: break; // SETUO etc aren't handled by fsel. 780 case ISD::SETULT: 781 case ISD::SETLT: 782 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 783 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 784 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 785 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 786 case ISD::SETUGE: 787 case ISD::SETGE: 788 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 789 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 790 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 791 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 792 case ISD::SETUGT: 793 case ISD::SETGT: 794 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 795 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 796 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 797 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 798 case ISD::SETULE: 799 case ISD::SETLE: 800 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 801 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 802 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 803 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 804 } 805 return SDOperand(); 806} 807 808static SDOperand LowerFP_TO_SINT(SDOperand Op, SelectionDAG &DAG) { 809 assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType())); 810 SDOperand Src = Op.getOperand(0); 811 if (Src.getValueType() == MVT::f32) 812 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 813 814 SDOperand Tmp; 815 switch (Op.getValueType()) { 816 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 817 case MVT::i32: 818 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 819 break; 820 case MVT::i64: 821 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 822 break; 823 } 824 825 // Convert the FP value to an int value through memory. 826 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp); 827 if (Op.getValueType() == MVT::i32) 828 Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits); 829 return Bits; 830} 831 832static SDOperand LowerSINT_TO_FP(SDOperand Op, SelectionDAG &DAG) { 833 if (Op.getOperand(0).getValueType() == MVT::i64) { 834 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 835 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 836 if (Op.getValueType() == MVT::f32) 837 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 838 return FP; 839 } 840 841 assert(Op.getOperand(0).getValueType() == MVT::i32 && 842 "Unhandled SINT_TO_FP type in custom expander!"); 843 // Since we only generate this in 64-bit mode, we can take advantage of 844 // 64-bit registers. In particular, sign extend the input value into the 845 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 846 // then lfd it and fcfid it. 847 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 848 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 849 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 850 851 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 852 Op.getOperand(0)); 853 854 // STD the extended value into the stack slot. 855 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 856 DAG.getEntryNode(), Ext64, FIdx, 857 DAG.getSrcValue(NULL)); 858 // Load the value as a double. 859 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL)); 860 861 // FCFID it and return it. 862 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 863 if (Op.getValueType() == MVT::f32) 864 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 865 return FP; 866} 867 868static SDOperand LowerSHL(SDOperand Op, SelectionDAG &DAG) { 869 assert(Op.getValueType() == MVT::i64 && 870 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 871 // The generic code does a fine job expanding shift by a constant. 872 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 873 874 // Otherwise, expand into a bunch of logical ops. Note that these ops 875 // depend on the PPC behavior for oversized shift amounts. 876 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 877 DAG.getConstant(0, MVT::i32)); 878 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 879 DAG.getConstant(1, MVT::i32)); 880 SDOperand Amt = Op.getOperand(1); 881 882 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 883 DAG.getConstant(32, MVT::i32), Amt); 884 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt); 885 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1); 886 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 887 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 888 DAG.getConstant(-32U, MVT::i32)); 889 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5); 890 SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 891 SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt); 892 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 893} 894 895static SDOperand LowerSRL(SDOperand Op, SelectionDAG &DAG) { 896 assert(Op.getValueType() == MVT::i64 && 897 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 898 // The generic code does a fine job expanding shift by a constant. 899 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 900 901 // Otherwise, expand into a bunch of logical ops. Note that these ops 902 // depend on the PPC behavior for oversized shift amounts. 903 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 904 DAG.getConstant(0, MVT::i32)); 905 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 906 DAG.getConstant(1, MVT::i32)); 907 SDOperand Amt = Op.getOperand(1); 908 909 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 910 DAG.getConstant(32, MVT::i32), Amt); 911 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 912 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 913 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 914 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 915 DAG.getConstant(-32U, MVT::i32)); 916 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5); 917 SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 918 SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt); 919 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 920} 921 922static SDOperand LowerSRA(SDOperand Op, SelectionDAG &DAG) { 923 assert(Op.getValueType() == MVT::i64 && 924 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!"); 925 // The generic code does a fine job expanding shift by a constant. 926 if (isa<ConstantSDNode>(Op.getOperand(1))) return SDOperand(); 927 928 // Otherwise, expand into a bunch of logical ops, followed by a select_cc. 929 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 930 DAG.getConstant(0, MVT::i32)); 931 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 932 DAG.getConstant(1, MVT::i32)); 933 SDOperand Amt = Op.getOperand(1); 934 935 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 936 DAG.getConstant(32, MVT::i32), Amt); 937 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 938 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 939 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 940 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 941 DAG.getConstant(-32U, MVT::i32)); 942 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5); 943 SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt); 944 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32), 945 Tmp4, Tmp6, ISD::SETLE); 946 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 947} 948 949//===----------------------------------------------------------------------===// 950// Vector related lowering. 951// 952 953// If this is a vector of constants or undefs, get the bits. A bit in 954// UndefBits is set if the corresponding element of the vector is an 955// ISD::UNDEF value. For undefs, the corresponding VectorBits values are 956// zero. Return true if this is not an array of constants, false if it is. 957// 958static bool GetConstantBuildVectorBits(SDNode *BV, uint64_t VectorBits[2], 959 uint64_t UndefBits[2]) { 960 // Start with zero'd results. 961 VectorBits[0] = VectorBits[1] = UndefBits[0] = UndefBits[1] = 0; 962 963 unsigned EltBitSize = MVT::getSizeInBits(BV->getOperand(0).getValueType()); 964 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 965 SDOperand OpVal = BV->getOperand(i); 966 967 unsigned PartNo = i >= e/2; // In the upper 128 bits? 968 unsigned SlotNo = e/2 - (i & (e/2-1))-1; // Which subpiece of the uint64_t. 969 970 uint64_t EltBits = 0; 971 if (OpVal.getOpcode() == ISD::UNDEF) { 972 uint64_t EltUndefBits = ~0U >> (32-EltBitSize); 973 UndefBits[PartNo] |= EltUndefBits << (SlotNo*EltBitSize); 974 continue; 975 } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 976 EltBits = CN->getValue() & (~0U >> (32-EltBitSize)); 977 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 978 assert(CN->getValueType(0) == MVT::f32 && 979 "Only one legal FP vector type!"); 980 EltBits = FloatToBits(CN->getValue()); 981 } else { 982 // Nonconstant element. 983 return true; 984 } 985 986 VectorBits[PartNo] |= EltBits << (SlotNo*EltBitSize); 987 } 988 989 //printf("%llx %llx %llx %llx\n", 990 // VectorBits[0], VectorBits[1], UndefBits[0], UndefBits[1]); 991 return false; 992} 993 994// If this is a splat (repetition) of a value across the whole vector, return 995// the smallest size that splats it. For example, "0x01010101010101..." is a 996// splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 997// SplatSize = 1 byte. 998static bool isConstantSplat(const uint64_t Bits128[2], 999 const uint64_t Undef128[2], 1000 unsigned &SplatBits, unsigned &SplatUndef, 1001 unsigned &SplatSize) { 1002 1003 // Don't let undefs prevent splats from matching. See if the top 64-bits are 1004 // the same as the lower 64-bits, ignoring undefs. 1005 if ((Bits128[0] & ~Undef128[1]) != (Bits128[1] & ~Undef128[0])) 1006 return false; // Can't be a splat if two pieces don't match. 1007 1008 uint64_t Bits64 = Bits128[0] | Bits128[1]; 1009 uint64_t Undef64 = Undef128[0] & Undef128[1]; 1010 1011 // Check that the top 32-bits are the same as the lower 32-bits, ignoring 1012 // undefs. 1013 if ((Bits64 & (~Undef64 >> 32)) != ((Bits64 >> 32) & ~Undef64)) 1014 return false; // Can't be a splat if two pieces don't match. 1015 1016 uint32_t Bits32 = uint32_t(Bits64) | uint32_t(Bits64 >> 32); 1017 uint32_t Undef32 = uint32_t(Undef64) & uint32_t(Undef64 >> 32); 1018 1019 // If the top 16-bits are different than the lower 16-bits, ignoring 1020 // undefs, we have an i32 splat. 1021 if ((Bits32 & (~Undef32 >> 16)) != ((Bits32 >> 16) & ~Undef32)) { 1022 SplatBits = Bits32; 1023 SplatUndef = Undef32; 1024 SplatSize = 4; 1025 return true; 1026 } 1027 1028 uint16_t Bits16 = uint16_t(Bits32) | uint16_t(Bits32 >> 16); 1029 uint16_t Undef16 = uint16_t(Undef32) & uint16_t(Undef32 >> 16); 1030 1031 // If the top 8-bits are different than the lower 8-bits, ignoring 1032 // undefs, we have an i16 splat. 1033 if ((Bits16 & (uint16_t(~Undef16) >> 8)) != ((Bits16 >> 8) & ~Undef16)) { 1034 SplatBits = Bits16; 1035 SplatUndef = Undef16; 1036 SplatSize = 2; 1037 return true; 1038 } 1039 1040 // Otherwise, we have an 8-bit splat. 1041 SplatBits = uint8_t(Bits16) | uint8_t(Bits16 >> 8); 1042 SplatUndef = uint8_t(Undef16) & uint8_t(Undef16 >> 8); 1043 SplatSize = 1; 1044 return true; 1045} 1046 1047/// BuildSplatI - Build a canonical splati of Val with an element size of 1048/// SplatSize. Cast the result to VT. 1049static SDOperand BuildSplatI(int Val, unsigned SplatSize, MVT::ValueType VT, 1050 SelectionDAG &DAG) { 1051 assert(Val >= -16 && Val <= 15 && "vsplti is out of range!"); 1052 1053 // Force vspltis[hw] -1 to vspltisb -1. 1054 if (Val == -1) SplatSize = 1; 1055 1056 static const MVT::ValueType VTys[] = { // canonical VT to use for each size. 1057 MVT::v16i8, MVT::v8i16, MVT::Other, MVT::v4i32 1058 }; 1059 MVT::ValueType CanonicalVT = VTys[SplatSize-1]; 1060 1061 // Build a canonical splat for this value. 1062 SDOperand Elt = DAG.getConstant(Val, MVT::getVectorBaseType(CanonicalVT)); 1063 std::vector<SDOperand> Ops(MVT::getVectorNumElements(CanonicalVT), Elt); 1064 SDOperand Res = DAG.getNode(ISD::BUILD_VECTOR, CanonicalVT, Ops); 1065 return DAG.getNode(ISD::BIT_CONVERT, VT, Res); 1066} 1067 1068/// BuildIntrinsicOp - Return a binary operator intrinsic node with the 1069/// specified intrinsic ID. 1070static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand LHS, SDOperand RHS, 1071 SelectionDAG &DAG, 1072 MVT::ValueType DestVT = MVT::Other) { 1073 if (DestVT == MVT::Other) DestVT = LHS.getValueType(); 1074 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 1075 DAG.getConstant(IID, MVT::i32), LHS, RHS); 1076} 1077 1078/// BuildIntrinsicOp - Return a ternary operator intrinsic node with the 1079/// specified intrinsic ID. 1080static SDOperand BuildIntrinsicOp(unsigned IID, SDOperand Op0, SDOperand Op1, 1081 SDOperand Op2, SelectionDAG &DAG, 1082 MVT::ValueType DestVT = MVT::Other) { 1083 if (DestVT == MVT::Other) DestVT = Op0.getValueType(); 1084 return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DestVT, 1085 DAG.getConstant(IID, MVT::i32), Op0, Op1, Op2); 1086} 1087 1088 1089/// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified 1090/// amount. The result has the specified value type. 1091static SDOperand BuildVSLDOI(SDOperand LHS, SDOperand RHS, unsigned Amt, 1092 MVT::ValueType VT, SelectionDAG &DAG) { 1093 // Force LHS/RHS to be the right type. 1094 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, LHS); 1095 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, RHS); 1096 1097 std::vector<SDOperand> Ops; 1098 for (unsigned i = 0; i != 16; ++i) 1099 Ops.push_back(DAG.getConstant(i+Amt, MVT::i32)); 1100 SDOperand T = DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, LHS, RHS, 1101 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 1102 return DAG.getNode(ISD::BIT_CONVERT, VT, T); 1103} 1104 1105// If this is a case we can't handle, return null and let the default 1106// expansion code take care of it. If we CAN select this case, and if it 1107// selects to a single instruction, return Op. Otherwise, if we can codegen 1108// this case more efficiently than a constant pool load, lower it to the 1109// sequence of ops that should be used. 1110static SDOperand LowerBUILD_VECTOR(SDOperand Op, SelectionDAG &DAG) { 1111 // If this is a vector of constants or undefs, get the bits. A bit in 1112 // UndefBits is set if the corresponding element of the vector is an 1113 // ISD::UNDEF value. For undefs, the corresponding VectorBits values are 1114 // zero. 1115 uint64_t VectorBits[2]; 1116 uint64_t UndefBits[2]; 1117 if (GetConstantBuildVectorBits(Op.Val, VectorBits, UndefBits)) 1118 return SDOperand(); // Not a constant vector. 1119 1120 // If this is a splat (repetition) of a value across the whole vector, return 1121 // the smallest size that splats it. For example, "0x01010101010101..." is a 1122 // splat of 0x01, 0x0101, and 0x01010101. We return SplatBits = 0x01 and 1123 // SplatSize = 1 byte. 1124 unsigned SplatBits, SplatUndef, SplatSize; 1125 if (isConstantSplat(VectorBits, UndefBits, SplatBits, SplatUndef, SplatSize)){ 1126 bool HasAnyUndefs = (UndefBits[0] | UndefBits[1]) != 0; 1127 1128 // First, handle single instruction cases. 1129 1130 // All zeros? 1131 if (SplatBits == 0) { 1132 // Canonicalize all zero vectors to be v4i32. 1133 if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) { 1134 SDOperand Z = DAG.getConstant(0, MVT::i32); 1135 Z = DAG.getNode(ISD::BUILD_VECTOR, MVT::v4i32, Z, Z, Z, Z); 1136 Op = DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Z); 1137 } 1138 return Op; 1139 } 1140 1141 // If the sign extended value is in the range [-16,15], use VSPLTI[bhw]. 1142 int32_t SextVal= int32_t(SplatBits << (32-8*SplatSize)) >> (32-8*SplatSize); 1143 if (SextVal >= -16 && SextVal <= 15) 1144 return BuildSplatI(SextVal, SplatSize, Op.getValueType(), DAG); 1145 1146 1147 // Two instruction sequences. 1148 1149 // If this value is in the range [-32,30] and is even, use: 1150 // tmp = VSPLTI[bhw], result = add tmp, tmp 1151 if (SextVal >= -32 && SextVal <= 30 && (SextVal & 1) == 0) { 1152 Op = BuildSplatI(SextVal >> 1, SplatSize, Op.getValueType(), DAG); 1153 return DAG.getNode(ISD::ADD, Op.getValueType(), Op, Op); 1154 } 1155 1156 // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is 1157 // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important 1158 // for fneg/fabs. 1159 if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) { 1160 // Make -1 and vspltisw -1: 1161 SDOperand OnesV = BuildSplatI(-1, 4, MVT::v4i32, DAG); 1162 1163 // Make the VSLW intrinsic, computing 0x8000_0000. 1164 SDOperand Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV, 1165 OnesV, DAG); 1166 1167 // xor by OnesV to invert it. 1168 Res = DAG.getNode(ISD::XOR, MVT::v4i32, Res, OnesV); 1169 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Res); 1170 } 1171 1172 // Check to see if this is a wide variety of vsplti*, binop self cases. 1173 unsigned SplatBitSize = SplatSize*8; 1174 static const char SplatCsts[] = { 1175 -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, 1176 -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16 1177 }; 1178 for (unsigned idx = 0; idx < sizeof(SplatCsts)/sizeof(SplatCsts[0]); ++idx){ 1179 // Indirect through the SplatCsts array so that we favor 'vsplti -1' for 1180 // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1' 1181 int i = SplatCsts[idx]; 1182 1183 // Figure out what shift amount will be used by altivec if shifted by i in 1184 // this splat size. 1185 unsigned TypeShiftAmt = i & (SplatBitSize-1); 1186 1187 // vsplti + shl self. 1188 if (SextVal == (i << (int)TypeShiftAmt)) { 1189 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1190 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1191 Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0, 1192 Intrinsic::ppc_altivec_vslw 1193 }; 1194 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1195 } 1196 1197 // vsplti + srl self. 1198 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 1199 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1200 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1201 Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0, 1202 Intrinsic::ppc_altivec_vsrw 1203 }; 1204 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1205 } 1206 1207 // vsplti + sra self. 1208 if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { 1209 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1210 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1211 Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, 1212 Intrinsic::ppc_altivec_vsraw 1213 }; 1214 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1215 } 1216 1217 // vsplti + rol self. 1218 if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | 1219 ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) { 1220 Op = BuildSplatI(i, SplatSize, Op.getValueType(), DAG); 1221 static const unsigned IIDs[] = { // Intrinsic to use for each size. 1222 Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0, 1223 Intrinsic::ppc_altivec_vrlw 1224 }; 1225 return BuildIntrinsicOp(IIDs[SplatSize-1], Op, Op, DAG); 1226 } 1227 1228 // t = vsplti c, result = vsldoi t, t, 1 1229 if (SextVal == ((i << 8) | (i >> (TypeShiftAmt-8)))) { 1230 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 1231 return BuildVSLDOI(T, T, 1, Op.getValueType(), DAG); 1232 } 1233 // t = vsplti c, result = vsldoi t, t, 2 1234 if (SextVal == ((i << 16) | (i >> (TypeShiftAmt-16)))) { 1235 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 1236 return BuildVSLDOI(T, T, 2, Op.getValueType(), DAG); 1237 } 1238 // t = vsplti c, result = vsldoi t, t, 3 1239 if (SextVal == ((i << 24) | (i >> (TypeShiftAmt-24)))) { 1240 SDOperand T = BuildSplatI(i, SplatSize, MVT::v16i8, DAG); 1241 return BuildVSLDOI(T, T, 3, Op.getValueType(), DAG); 1242 } 1243 } 1244 1245 // Three instruction sequences. 1246 1247 // Odd, in range [17,31]: (vsplti C)-(vsplti -16). 1248 if (SextVal >= 0 && SextVal <= 31) { 1249 SDOperand LHS = BuildSplatI(SextVal-16, SplatSize, Op.getValueType(),DAG); 1250 SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG); 1251 return DAG.getNode(ISD::SUB, Op.getValueType(), LHS, RHS); 1252 } 1253 // Odd, in range [-31,-17]: (vsplti C)+(vsplti -16). 1254 if (SextVal >= -31 && SextVal <= 0) { 1255 SDOperand LHS = BuildSplatI(SextVal+16, SplatSize, Op.getValueType(),DAG); 1256 SDOperand RHS = BuildSplatI(-16, SplatSize, Op.getValueType(), DAG); 1257 return DAG.getNode(ISD::ADD, Op.getValueType(), LHS, RHS); 1258 } 1259 } 1260 1261 return SDOperand(); 1262} 1263 1264/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit 1265/// the specified operations to build the shuffle. 1266static SDOperand GeneratePerfectShuffle(unsigned PFEntry, SDOperand LHS, 1267 SDOperand RHS, SelectionDAG &DAG) { 1268 unsigned OpNum = (PFEntry >> 26) & 0x0F; 1269 unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); 1270 unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); 1271 1272 enum { 1273 OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3> 1274 OP_VMRGHW, 1275 OP_VMRGLW, 1276 OP_VSPLTISW0, 1277 OP_VSPLTISW1, 1278 OP_VSPLTISW2, 1279 OP_VSPLTISW3, 1280 OP_VSLDOI4, 1281 OP_VSLDOI8, 1282 OP_VSLDOI12, 1283 }; 1284 1285 if (OpNum == OP_COPY) { 1286 if (LHSID == (1*9+2)*9+3) return LHS; 1287 assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!"); 1288 return RHS; 1289 } 1290 1291 SDOperand OpLHS, OpRHS; 1292 OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG); 1293 OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG); 1294 1295 unsigned ShufIdxs[16]; 1296 switch (OpNum) { 1297 default: assert(0 && "Unknown i32 permute!"); 1298 case OP_VMRGHW: 1299 ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3; 1300 ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19; 1301 ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7; 1302 ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23; 1303 break; 1304 case OP_VMRGLW: 1305 ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11; 1306 ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27; 1307 ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15; 1308 ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31; 1309 break; 1310 case OP_VSPLTISW0: 1311 for (unsigned i = 0; i != 16; ++i) 1312 ShufIdxs[i] = (i&3)+0; 1313 break; 1314 case OP_VSPLTISW1: 1315 for (unsigned i = 0; i != 16; ++i) 1316 ShufIdxs[i] = (i&3)+4; 1317 break; 1318 case OP_VSPLTISW2: 1319 for (unsigned i = 0; i != 16; ++i) 1320 ShufIdxs[i] = (i&3)+8; 1321 break; 1322 case OP_VSPLTISW3: 1323 for (unsigned i = 0; i != 16; ++i) 1324 ShufIdxs[i] = (i&3)+12; 1325 break; 1326 case OP_VSLDOI4: 1327 return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG); 1328 case OP_VSLDOI8: 1329 return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG); 1330 case OP_VSLDOI12: 1331 return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG); 1332 } 1333 std::vector<SDOperand> Ops; 1334 for (unsigned i = 0; i != 16; ++i) 1335 Ops.push_back(DAG.getConstant(ShufIdxs[i], MVT::i32)); 1336 1337 return DAG.getNode(ISD::VECTOR_SHUFFLE, OpLHS.getValueType(), OpLHS, OpRHS, 1338 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 1339} 1340 1341/// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this 1342/// is a shuffle we can handle in a single instruction, return it. Otherwise, 1343/// return the code it can be lowered into. Worst case, it can always be 1344/// lowered into a vperm. 1345static SDOperand LowerVECTOR_SHUFFLE(SDOperand Op, SelectionDAG &DAG) { 1346 SDOperand V1 = Op.getOperand(0); 1347 SDOperand V2 = Op.getOperand(1); 1348 SDOperand PermMask = Op.getOperand(2); 1349 1350 // Cases that are handled by instructions that take permute immediates 1351 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 1352 // selected by the instruction selector. 1353 if (V2.getOpcode() == ISD::UNDEF) { 1354 if (PPC::isSplatShuffleMask(PermMask.Val, 1) || 1355 PPC::isSplatShuffleMask(PermMask.Val, 2) || 1356 PPC::isSplatShuffleMask(PermMask.Val, 4) || 1357 PPC::isVPKUWUMShuffleMask(PermMask.Val, true) || 1358 PPC::isVPKUHUMShuffleMask(PermMask.Val, true) || 1359 PPC::isVSLDOIShuffleMask(PermMask.Val, true) != -1 || 1360 PPC::isVMRGLShuffleMask(PermMask.Val, 1, true) || 1361 PPC::isVMRGLShuffleMask(PermMask.Val, 2, true) || 1362 PPC::isVMRGLShuffleMask(PermMask.Val, 4, true) || 1363 PPC::isVMRGHShuffleMask(PermMask.Val, 1, true) || 1364 PPC::isVMRGHShuffleMask(PermMask.Val, 2, true) || 1365 PPC::isVMRGHShuffleMask(PermMask.Val, 4, true)) { 1366 return Op; 1367 } 1368 } 1369 1370 // Altivec has a variety of "shuffle immediates" that take two vector inputs 1371 // and produce a fixed permutation. If any of these match, do not lower to 1372 // VPERM. 1373 if (PPC::isVPKUWUMShuffleMask(PermMask.Val, false) || 1374 PPC::isVPKUHUMShuffleMask(PermMask.Val, false) || 1375 PPC::isVSLDOIShuffleMask(PermMask.Val, false) != -1 || 1376 PPC::isVMRGLShuffleMask(PermMask.Val, 1, false) || 1377 PPC::isVMRGLShuffleMask(PermMask.Val, 2, false) || 1378 PPC::isVMRGLShuffleMask(PermMask.Val, 4, false) || 1379 PPC::isVMRGHShuffleMask(PermMask.Val, 1, false) || 1380 PPC::isVMRGHShuffleMask(PermMask.Val, 2, false) || 1381 PPC::isVMRGHShuffleMask(PermMask.Val, 4, false)) 1382 return Op; 1383 1384 // Check to see if this is a shuffle of 4-byte values. If so, we can use our 1385 // perfect shuffle table to emit an optimal matching sequence. 1386 unsigned PFIndexes[4]; 1387 bool isFourElementShuffle = true; 1388 for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number 1389 unsigned EltNo = 8; // Start out undef. 1390 for (unsigned j = 0; j != 4; ++j) { // Intra-element byte. 1391 if (PermMask.getOperand(i*4+j).getOpcode() == ISD::UNDEF) 1392 continue; // Undef, ignore it. 1393 1394 unsigned ByteSource = 1395 cast<ConstantSDNode>(PermMask.getOperand(i*4+j))->getValue(); 1396 if ((ByteSource & 3) != j) { 1397 isFourElementShuffle = false; 1398 break; 1399 } 1400 1401 if (EltNo == 8) { 1402 EltNo = ByteSource/4; 1403 } else if (EltNo != ByteSource/4) { 1404 isFourElementShuffle = false; 1405 break; 1406 } 1407 } 1408 PFIndexes[i] = EltNo; 1409 } 1410 1411 // If this shuffle can be expressed as a shuffle of 4-byte elements, use the 1412 // perfect shuffle vector to determine if it is cost effective to do this as 1413 // discrete instructions, or whether we should use a vperm. 1414 if (isFourElementShuffle) { 1415 // Compute the index in the perfect shuffle table. 1416 unsigned PFTableIndex = 1417 PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; 1418 1419 unsigned PFEntry = PerfectShuffleTable[PFTableIndex]; 1420 unsigned Cost = (PFEntry >> 30); 1421 1422 // Determining when to avoid vperm is tricky. Many things affect the cost 1423 // of vperm, particularly how many times the perm mask needs to be computed. 1424 // For example, if the perm mask can be hoisted out of a loop or is already 1425 // used (perhaps because there are multiple permutes with the same shuffle 1426 // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of 1427 // the loop requires an extra register. 1428 // 1429 // As a compromise, we only emit discrete instructions if the shuffle can be 1430 // generated in 3 or fewer operations. When we have loop information 1431 // available, if this block is within a loop, we should avoid using vperm 1432 // for 3-operation perms and use a constant pool load instead. 1433 if (Cost < 3) 1434 return GeneratePerfectShuffle(PFEntry, V1, V2, DAG); 1435 } 1436 1437 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 1438 // vector that will get spilled to the constant pool. 1439 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 1440 1441 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 1442 // that it is in input element units, not in bytes. Convert now. 1443 MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType()); 1444 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; 1445 1446 std::vector<SDOperand> ResultMask; 1447 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 1448 unsigned SrcElt; 1449 if (PermMask.getOperand(i).getOpcode() == ISD::UNDEF) 1450 SrcElt = 0; 1451 else 1452 SrcElt = cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 1453 1454 for (unsigned j = 0; j != BytesPerElement; ++j) 1455 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 1456 MVT::i8)); 1457 } 1458 1459 SDOperand VPermMask = DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask); 1460 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 1461} 1462 1463/// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom 1464/// lower, do it, otherwise return null. 1465static SDOperand LowerINTRINSIC_WO_CHAIN(SDOperand Op, SelectionDAG &DAG) { 1466 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 1467 1468 // If this is a lowered altivec predicate compare, CompareOpc is set to the 1469 // opcode number of the comparison. 1470 int CompareOpc = -1; 1471 bool isDot = false; 1472 switch (IntNo) { 1473 default: return SDOperand(); // Don't custom lower most intrinsics. 1474 // Comparison predicates. 1475 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 1476 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 1477 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 1478 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 1479 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 1480 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 1481 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 1482 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 1483 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 1484 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 1485 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 1486 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 1487 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 1488 1489 // Normal Comparisons. 1490 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 1491 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 1492 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 1493 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 1494 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 1495 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 1496 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 1497 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 1498 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 1499 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 1500 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 1501 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 1502 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 1503 } 1504 1505 assert(CompareOpc>0 && "We only lower altivec predicate compares so far!"); 1506 1507 // If this is a non-dot comparison, make the VCMP node. 1508 if (!isDot) { 1509 SDOperand Tmp = DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 1510 Op.getOperand(1), Op.getOperand(2), 1511 DAG.getConstant(CompareOpc, MVT::i32)); 1512 return DAG.getNode(ISD::BIT_CONVERT, Op.getValueType(), Tmp); 1513 } 1514 1515 // Create the PPCISD altivec 'dot' comparison node. 1516 std::vector<SDOperand> Ops; 1517 std::vector<MVT::ValueType> VTs; 1518 Ops.push_back(Op.getOperand(2)); // LHS 1519 Ops.push_back(Op.getOperand(3)); // RHS 1520 Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32)); 1521 VTs.push_back(Op.getOperand(2).getValueType()); 1522 VTs.push_back(MVT::Flag); 1523 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops); 1524 1525 // Now that we have the comparison, emit a copy from the CR to a GPR. 1526 // This is flagged to the above dot comparison. 1527 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 1528 DAG.getRegister(PPC::CR6, MVT::i32), 1529 CompNode.getValue(1)); 1530 1531 // Unpack the result based on how the target uses it. 1532 unsigned BitNo; // Bit # of CR6. 1533 bool InvertBit; // Invert result? 1534 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 1535 default: // Can't happen, don't crash on invalid number though. 1536 case 0: // Return the value of the EQ bit of CR6. 1537 BitNo = 0; InvertBit = false; 1538 break; 1539 case 1: // Return the inverted value of the EQ bit of CR6. 1540 BitNo = 0; InvertBit = true; 1541 break; 1542 case 2: // Return the value of the LT bit of CR6. 1543 BitNo = 2; InvertBit = false; 1544 break; 1545 case 3: // Return the inverted value of the LT bit of CR6. 1546 BitNo = 2; InvertBit = true; 1547 break; 1548 } 1549 1550 // Shift the bit into the low position. 1551 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 1552 DAG.getConstant(8-(3-BitNo), MVT::i32)); 1553 // Isolate the bit. 1554 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 1555 DAG.getConstant(1, MVT::i32)); 1556 1557 // If we are supposed to, toggle the bit. 1558 if (InvertBit) 1559 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 1560 DAG.getConstant(1, MVT::i32)); 1561 return Flags; 1562} 1563 1564static SDOperand LowerSCALAR_TO_VECTOR(SDOperand Op, SelectionDAG &DAG) { 1565 // Create a stack slot that is 16-byte aligned. 1566 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 1567 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 1568 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 1569 1570 // Store the input value into Value#0 of the stack slot. 1571 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(), 1572 Op.getOperand(0), FIdx,DAG.getSrcValue(NULL)); 1573 // Load it out. 1574 return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL)); 1575} 1576 1577static SDOperand LowerMUL(SDOperand Op, SelectionDAG &DAG) { 1578 if (Op.getValueType() == MVT::v4i32) { 1579 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 1580 1581 SDOperand Zero = BuildSplatI( 0, 1, MVT::v4i32, DAG); 1582 SDOperand Neg16 = BuildSplatI(-16, 4, MVT::v4i32, DAG); // +16 as shift amt. 1583 1584 SDOperand RHSSwap = // = vrlw RHS, 16 1585 BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG); 1586 1587 // Shrinkify inputs to v8i16. 1588 LHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, LHS); 1589 RHS = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHS); 1590 RHSSwap = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, RHSSwap); 1591 1592 // Low parts multiplied together, generating 32-bit results (we ignore the 1593 // top parts). 1594 SDOperand LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 1595 LHS, RHS, DAG, MVT::v4i32); 1596 1597 SDOperand HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm, 1598 LHS, RHSSwap, Zero, DAG, MVT::v4i32); 1599 // Shift the high parts up 16 bits. 1600 HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd, Neg16, DAG); 1601 return DAG.getNode(ISD::ADD, MVT::v4i32, LoProd, HiProd); 1602 } else if (Op.getValueType() == MVT::v8i16) { 1603 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 1604 1605 // Multiply the even 16-bit parts, producing 32-bit sums. 1606 SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleuh, 1607 LHS, RHS, DAG, MVT::v4i32); 1608 EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, EvenParts); 1609 1610 // Multiply the odd 16-bit parts, producing 32-bit sums. 1611 SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh, 1612 LHS, RHS, DAG, MVT::v4i32); 1613 OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v8i16, OddParts); 1614 1615 // Merge the results together. 1616 std::vector<SDOperand> Ops; 1617 for (unsigned i = 0; i != 4; ++i) { 1618 Ops.push_back(DAG.getConstant(2*i+1, MVT::i16)); 1619 Ops.push_back(DAG.getConstant(2*i+1+8, MVT::i16)); 1620 } 1621 1622 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v8i16, EvenParts, OddParts, 1623 DAG.getNode(ISD::BUILD_VECTOR, MVT::v8i16, Ops)); 1624 } else if (Op.getValueType() == MVT::v16i8) { 1625 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 1626 1627 // Multiply the even 8-bit parts, producing 16-bit sums. 1628 SDOperand EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, 1629 LHS, RHS, DAG, MVT::v8i16); 1630 EvenParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, EvenParts); 1631 1632 // Multiply the odd 8-bit parts, producing 16-bit sums. 1633 SDOperand OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub, 1634 LHS, RHS, DAG, MVT::v8i16); 1635 OddParts = DAG.getNode(ISD::BIT_CONVERT, MVT::v16i8, OddParts); 1636 1637 // Merge the results together. 1638 std::vector<SDOperand> Ops; 1639 for (unsigned i = 0; i != 8; ++i) { 1640 Ops.push_back(DAG.getConstant(2*i+1, MVT::i8)); 1641 Ops.push_back(DAG.getConstant(2*i+1+16, MVT::i8)); 1642 } 1643 1644 return DAG.getNode(ISD::VECTOR_SHUFFLE, MVT::v16i8, EvenParts, OddParts, 1645 DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, Ops)); 1646 } else { 1647 assert(0 && "Unknown mul to lower!"); 1648 abort(); 1649 } 1650} 1651 1652/// LowerOperation - Provide custom lowering hooks for some operations. 1653/// 1654SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 1655 switch (Op.getOpcode()) { 1656 default: assert(0 && "Wasn't expecting to be able to lower this!"); 1657 case ISD::ConstantPool: return LowerConstantPool(Op, DAG); 1658 case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); 1659 case ISD::SETCC: return LowerSETCC(Op, DAG); 1660 case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); 1661 case ISD::RET: return LowerRET(Op, DAG); 1662 1663 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); 1664 case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG); 1665 case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 1666 1667 // Lower 64-bit shifts. 1668 case ISD::SHL: return LowerSHL(Op, DAG); 1669 case ISD::SRL: return LowerSRL(Op, DAG); 1670 case ISD::SRA: return LowerSRA(Op, DAG); 1671 1672 // Vector-related lowering. 1673 case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); 1674 case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); 1675 case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); 1676 case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG); 1677 case ISD::MUL: return LowerMUL(Op, DAG); 1678 } 1679 return SDOperand(); 1680} 1681 1682//===----------------------------------------------------------------------===// 1683// Other Lowering Code 1684//===----------------------------------------------------------------------===// 1685 1686std::vector<SDOperand> 1687PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 1688 // 1689 // add beautiful description of PPC stack frame format, or at least some docs 1690 // 1691 MachineFunction &MF = DAG.getMachineFunction(); 1692 MachineFrameInfo *MFI = MF.getFrameInfo(); 1693 MachineBasicBlock& BB = MF.front(); 1694 SSARegMap *RegMap = MF.getSSARegMap(); 1695 std::vector<SDOperand> ArgValues; 1696 1697 unsigned ArgOffset = 24; 1698 unsigned GPR_remaining = 8; 1699 unsigned FPR_remaining = 13; 1700 unsigned GPR_idx = 0, FPR_idx = 0; 1701 static const unsigned GPR[] = { 1702 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1703 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1704 }; 1705 static const unsigned FPR[] = { 1706 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1707 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1708 }; 1709 1710 // Add DAG nodes to load the arguments... On entry to a function on PPC, 1711 // the arguments start at offset 24, although they are likely to be passed 1712 // in registers. 1713 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 1714 SDOperand newroot, argt; 1715 unsigned ObjSize; 1716 bool needsLoad = false; 1717 bool ArgLive = !I->use_empty(); 1718 MVT::ValueType ObjectVT = getValueType(I->getType()); 1719 1720 switch (ObjectVT) { 1721 default: assert(0 && "Unhandled argument type!"); 1722 case MVT::i1: 1723 case MVT::i8: 1724 case MVT::i16: 1725 case MVT::i32: 1726 ObjSize = 4; 1727 if (!ArgLive) break; 1728 if (GPR_remaining > 0) { 1729 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1730 MF.addLiveIn(GPR[GPR_idx], VReg); 1731 argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 1732 if (ObjectVT != MVT::i32) { 1733 unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext 1734 : ISD::AssertZext; 1735 argt = DAG.getNode(AssertOp, MVT::i32, argt, 1736 DAG.getValueType(ObjectVT)); 1737 argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt); 1738 } 1739 } else { 1740 needsLoad = true; 1741 } 1742 break; 1743 case MVT::i64: 1744 ObjSize = 8; 1745 if (!ArgLive) break; 1746 if (GPR_remaining > 0) { 1747 SDOperand argHi, argLo; 1748 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1749 MF.addLiveIn(GPR[GPR_idx], VReg); 1750 argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 1751 // If we have two or more remaining argument registers, then both halves 1752 // of the i64 can be sourced from there. Otherwise, the lower half will 1753 // have to come off the stack. This can happen when an i64 is preceded 1754 // by 28 bytes of arguments. 1755 if (GPR_remaining > 1) { 1756 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1757 MF.addLiveIn(GPR[GPR_idx+1], VReg); 1758 argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32); 1759 } else { 1760 int FI = MFI->CreateFixedObject(4, ArgOffset+4); 1761 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 1762 argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 1763 DAG.getSrcValue(NULL)); 1764 } 1765 // Build the outgoing arg thingy 1766 argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi); 1767 newroot = argLo; 1768 } else { 1769 needsLoad = true; 1770 } 1771 break; 1772 case MVT::f32: 1773 case MVT::f64: 1774 ObjSize = (ObjectVT == MVT::f64) ? 8 : 4; 1775 if (!ArgLive) { 1776 if (FPR_remaining > 0) { 1777 --FPR_remaining; 1778 ++FPR_idx; 1779 } 1780 break; 1781 } 1782 if (FPR_remaining > 0) { 1783 unsigned VReg; 1784 if (ObjectVT == MVT::f32) 1785 VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass); 1786 else 1787 VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass); 1788 MF.addLiveIn(FPR[FPR_idx], VReg); 1789 argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT); 1790 --FPR_remaining; 1791 ++FPR_idx; 1792 } else { 1793 needsLoad = true; 1794 } 1795 break; 1796 } 1797 1798 // We need to load the argument to a virtual register if we determined above 1799 // that we ran out of physical registers of the appropriate type 1800 if (needsLoad) { 1801 unsigned SubregOffset = 0; 1802 if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3; 1803 if (ObjectVT == MVT::i16) SubregOffset = 2; 1804 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1805 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 1806 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, 1807 DAG.getConstant(SubregOffset, MVT::i32)); 1808 argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 1809 DAG.getSrcValue(NULL)); 1810 } 1811 1812 // Every 4 bytes of argument space consumes one of the GPRs available for 1813 // argument passing. 1814 if (GPR_remaining > 0) { 1815 unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1; 1816 GPR_remaining -= delta; 1817 GPR_idx += delta; 1818 } 1819 ArgOffset += ObjSize; 1820 if (newroot.Val) 1821 DAG.setRoot(newroot.getValue(1)); 1822 1823 ArgValues.push_back(argt); 1824 } 1825 1826 // If the function takes variable number of arguments, make a frame index for 1827 // the start of the first vararg value... for expansion of llvm.va_start. 1828 if (F.isVarArg()) { 1829 VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); 1830 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 1831 // If this function is vararg, store any remaining integer argument regs 1832 // to their spots on the stack so that they may be loaded by deferencing the 1833 // result of va_next. 1834 std::vector<SDOperand> MemOps; 1835 for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) { 1836 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1837 MF.addLiveIn(GPR[GPR_idx], VReg); 1838 SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 1839 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), 1840 Val, FIN, DAG.getSrcValue(NULL)); 1841 MemOps.push_back(Store); 1842 // Increment the address by four for the next argument to store 1843 SDOperand PtrOff = DAG.getConstant(4, getPointerTy()); 1844 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff); 1845 } 1846 if (!MemOps.empty()) { 1847 MemOps.push_back(DAG.getRoot()); 1848 DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps)); 1849 } 1850 } 1851 1852 return ArgValues; 1853} 1854 1855std::pair<SDOperand, SDOperand> 1856PPCTargetLowering::LowerCallTo(SDOperand Chain, 1857 const Type *RetTy, bool isVarArg, 1858 unsigned CallingConv, bool isTailCall, 1859 SDOperand Callee, ArgListTy &Args, 1860 SelectionDAG &DAG) { 1861 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 1862 // SelectExpr to use to put the arguments in the appropriate registers. 1863 std::vector<SDOperand> args_to_use; 1864 1865 // Count how many bytes are to be pushed on the stack, including the linkage 1866 // area, and parameter passing area. 1867 unsigned NumBytes = 24; 1868 1869 if (Args.empty()) { 1870 Chain = DAG.getCALLSEQ_START(Chain, 1871 DAG.getConstant(NumBytes, getPointerTy())); 1872 } else { 1873 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1874 switch (getValueType(Args[i].second)) { 1875 default: assert(0 && "Unknown value type!"); 1876 case MVT::i1: 1877 case MVT::i8: 1878 case MVT::i16: 1879 case MVT::i32: 1880 case MVT::f32: 1881 NumBytes += 4; 1882 break; 1883 case MVT::i64: 1884 case MVT::f64: 1885 NumBytes += 8; 1886 break; 1887 } 1888 } 1889 1890 // Just to be safe, we'll always reserve the full 24 bytes of linkage area 1891 // plus 32 bytes of argument space in case any called code gets funky on us. 1892 // (Required by ABI to support var arg) 1893 if (NumBytes < 56) NumBytes = 56; 1894 1895 // Adjust the stack pointer for the new arguments... 1896 // These operations are automatically eliminated by the prolog/epilog pass 1897 Chain = DAG.getCALLSEQ_START(Chain, 1898 DAG.getConstant(NumBytes, getPointerTy())); 1899 1900 // Set up a copy of the stack pointer for use loading and storing any 1901 // arguments that may not fit in the registers available for argument 1902 // passing. 1903 SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 1904 1905 // Figure out which arguments are going to go in registers, and which in 1906 // memory. Also, if this is a vararg function, floating point operations 1907 // must be stored to our stack, and loaded into integer regs as well, if 1908 // any integer regs are available for argument passing. 1909 unsigned ArgOffset = 24; 1910 unsigned GPR_remaining = 8; 1911 unsigned FPR_remaining = 13; 1912 1913 std::vector<SDOperand> MemOps; 1914 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1915 // PtrOff will be used to store the current argument to the stack if a 1916 // register cannot be found for it. 1917 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1918 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1919 MVT::ValueType ArgVT = getValueType(Args[i].second); 1920 1921 switch (ArgVT) { 1922 default: assert(0 && "Unexpected ValueType for argument!"); 1923 case MVT::i1: 1924 case MVT::i8: 1925 case MVT::i16: 1926 // Promote the integer to 32 bits. If the input type is signed use a 1927 // sign extend, otherwise use a zero extend. 1928 if (Args[i].second->isSigned()) 1929 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 1930 else 1931 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 1932 // FALL THROUGH 1933 case MVT::i32: 1934 if (GPR_remaining > 0) { 1935 args_to_use.push_back(Args[i].first); 1936 --GPR_remaining; 1937 } else { 1938 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1939 Args[i].first, PtrOff, 1940 DAG.getSrcValue(NULL))); 1941 } 1942 ArgOffset += 4; 1943 break; 1944 case MVT::i64: 1945 // If we have one free GPR left, we can place the upper half of the i64 1946 // in it, and store the other half to the stack. If we have two or more 1947 // free GPRs, then we can pass both halves of the i64 in registers. 1948 if (GPR_remaining > 0) { 1949 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1950 Args[i].first, DAG.getConstant(1, MVT::i32)); 1951 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1952 Args[i].first, DAG.getConstant(0, MVT::i32)); 1953 args_to_use.push_back(Hi); 1954 --GPR_remaining; 1955 if (GPR_remaining > 0) { 1956 args_to_use.push_back(Lo); 1957 --GPR_remaining; 1958 } else { 1959 SDOperand ConstFour = DAG.getConstant(4, getPointerTy()); 1960 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour); 1961 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1962 Lo, PtrOff, DAG.getSrcValue(NULL))); 1963 } 1964 } else { 1965 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1966 Args[i].first, PtrOff, 1967 DAG.getSrcValue(NULL))); 1968 } 1969 ArgOffset += 8; 1970 break; 1971 case MVT::f32: 1972 case MVT::f64: 1973 if (FPR_remaining > 0) { 1974 args_to_use.push_back(Args[i].first); 1975 --FPR_remaining; 1976 if (isVarArg) { 1977 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain, 1978 Args[i].first, PtrOff, 1979 DAG.getSrcValue(NULL)); 1980 MemOps.push_back(Store); 1981 // Float varargs are always shadowed in available integer registers 1982 if (GPR_remaining > 0) { 1983 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff, 1984 DAG.getSrcValue(NULL)); 1985 MemOps.push_back(Load.getValue(1)); 1986 args_to_use.push_back(Load); 1987 --GPR_remaining; 1988 } 1989 if (GPR_remaining > 0 && MVT::f64 == ArgVT) { 1990 SDOperand ConstFour = DAG.getConstant(4, getPointerTy()); 1991 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour); 1992 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff, 1993 DAG.getSrcValue(NULL)); 1994 MemOps.push_back(Load.getValue(1)); 1995 args_to_use.push_back(Load); 1996 --GPR_remaining; 1997 } 1998 } else { 1999 // If we have any FPRs remaining, we may also have GPRs remaining. 2000 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 2001 // GPRs. 2002 if (GPR_remaining > 0) { 2003 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 2004 --GPR_remaining; 2005 } 2006 if (GPR_remaining > 0 && MVT::f64 == ArgVT) { 2007 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 2008 --GPR_remaining; 2009 } 2010 } 2011 } else { 2012 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 2013 Args[i].first, PtrOff, 2014 DAG.getSrcValue(NULL))); 2015 } 2016 ArgOffset += (ArgVT == MVT::f32) ? 4 : 8; 2017 break; 2018 } 2019 } 2020 if (!MemOps.empty()) 2021 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps); 2022 } 2023 2024 std::vector<MVT::ValueType> RetVals; 2025 MVT::ValueType RetTyVT = getValueType(RetTy); 2026 MVT::ValueType ActualRetTyVT = RetTyVT; 2027 if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16) 2028 ActualRetTyVT = MVT::i32; // Promote result to i32. 2029 2030 if (RetTyVT == MVT::i64) { 2031 RetVals.push_back(MVT::i32); 2032 RetVals.push_back(MVT::i32); 2033 } else if (RetTyVT != MVT::isVoid) { 2034 RetVals.push_back(ActualRetTyVT); 2035 } 2036 RetVals.push_back(MVT::Other); 2037 2038 // If the callee is a GlobalAddress node (quite common, every direct call is) 2039 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 2040 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 2041 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); 2042 2043 std::vector<SDOperand> Ops; 2044 Ops.push_back(Chain); 2045 Ops.push_back(Callee); 2046 Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end()); 2047 SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops); 2048 Chain = TheCall.getValue(TheCall.Val->getNumValues()-1); 2049 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, 2050 DAG.getConstant(NumBytes, getPointerTy())); 2051 SDOperand RetVal = TheCall; 2052 2053 // If the result is a small value, add a note so that we keep track of the 2054 // information about whether it is sign or zero extended. 2055 if (RetTyVT != ActualRetTyVT) { 2056 RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext, 2057 MVT::i32, RetVal, DAG.getValueType(RetTyVT)); 2058 RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal); 2059 } else if (RetTyVT == MVT::i64) { 2060 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1)); 2061 } 2062 2063 return std::make_pair(RetVal, Chain); 2064} 2065 2066MachineBasicBlock * 2067PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 2068 MachineBasicBlock *BB) { 2069 assert((MI->getOpcode() == PPC::SELECT_CC_Int || 2070 MI->getOpcode() == PPC::SELECT_CC_F4 || 2071 MI->getOpcode() == PPC::SELECT_CC_F8 || 2072 MI->getOpcode() == PPC::SELECT_CC_VRRC) && 2073 "Unexpected instr type to insert"); 2074 2075 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 2076 // control-flow pattern. The incoming instruction knows the destination vreg 2077 // to set, the condition code register to branch on, the true/false values to 2078 // select between, and a branch opcode to use. 2079 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 2080 ilist<MachineBasicBlock>::iterator It = BB; 2081 ++It; 2082 2083 // thisMBB: 2084 // ... 2085 // TrueVal = ... 2086 // cmpTY ccX, r1, r2 2087 // bCC copy1MBB 2088 // fallthrough --> copy0MBB 2089 MachineBasicBlock *thisMBB = BB; 2090 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 2091 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 2092 BuildMI(BB, MI->getOperand(4).getImmedValue(), 2) 2093 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 2094 MachineFunction *F = BB->getParent(); 2095 F->getBasicBlockList().insert(It, copy0MBB); 2096 F->getBasicBlockList().insert(It, sinkMBB); 2097 // Update machine-CFG edges by first adding all successors of the current 2098 // block to the new block which will contain the Phi node for the select. 2099 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 2100 e = BB->succ_end(); i != e; ++i) 2101 sinkMBB->addSuccessor(*i); 2102 // Next, remove all successors of the current block, and add the true 2103 // and fallthrough blocks as its successors. 2104 while(!BB->succ_empty()) 2105 BB->removeSuccessor(BB->succ_begin()); 2106 BB->addSuccessor(copy0MBB); 2107 BB->addSuccessor(sinkMBB); 2108 2109 // copy0MBB: 2110 // %FalseValue = ... 2111 // # fallthrough to sinkMBB 2112 BB = copy0MBB; 2113 2114 // Update machine-CFG edges 2115 BB->addSuccessor(sinkMBB); 2116 2117 // sinkMBB: 2118 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 2119 // ... 2120 BB = sinkMBB; 2121 BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg()) 2122 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 2123 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 2124 2125 delete MI; // The pseudo instruction is gone now. 2126 return BB; 2127} 2128 2129//===----------------------------------------------------------------------===// 2130// Target Optimization Hooks 2131//===----------------------------------------------------------------------===// 2132 2133SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 2134 DAGCombinerInfo &DCI) const { 2135 TargetMachine &TM = getTargetMachine(); 2136 SelectionDAG &DAG = DCI.DAG; 2137 switch (N->getOpcode()) { 2138 default: break; 2139 case ISD::SINT_TO_FP: 2140 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { 2141 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 2142 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 2143 // We allow the src/dst to be either f32/f64, but the intermediate 2144 // type must be i64. 2145 if (N->getOperand(0).getValueType() == MVT::i64) { 2146 SDOperand Val = N->getOperand(0).getOperand(0); 2147 if (Val.getValueType() == MVT::f32) { 2148 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 2149 DCI.AddToWorklist(Val.Val); 2150 } 2151 2152 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 2153 DCI.AddToWorklist(Val.Val); 2154 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 2155 DCI.AddToWorklist(Val.Val); 2156 if (N->getValueType(0) == MVT::f32) { 2157 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val); 2158 DCI.AddToWorklist(Val.Val); 2159 } 2160 return Val; 2161 } else if (N->getOperand(0).getValueType() == MVT::i32) { 2162 // If the intermediate type is i32, we can avoid the load/store here 2163 // too. 2164 } 2165 } 2166 } 2167 break; 2168 case ISD::STORE: 2169 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 2170 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 2171 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 2172 N->getOperand(1).getValueType() == MVT::i32) { 2173 SDOperand Val = N->getOperand(1).getOperand(0); 2174 if (Val.getValueType() == MVT::f32) { 2175 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 2176 DCI.AddToWorklist(Val.Val); 2177 } 2178 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 2179 DCI.AddToWorklist(Val.Val); 2180 2181 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 2182 N->getOperand(2), N->getOperand(3)); 2183 DCI.AddToWorklist(Val.Val); 2184 return Val; 2185 } 2186 break; 2187 case PPCISD::VCMP: { 2188 // If a VCMPo node already exists with exactly the same operands as this 2189 // node, use its result instead of this node (VCMPo computes both a CR6 and 2190 // a normal output). 2191 // 2192 if (!N->getOperand(0).hasOneUse() && 2193 !N->getOperand(1).hasOneUse() && 2194 !N->getOperand(2).hasOneUse()) { 2195 2196 // Scan all of the users of the LHS, looking for VCMPo's that match. 2197 SDNode *VCMPoNode = 0; 2198 2199 SDNode *LHSN = N->getOperand(0).Val; 2200 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 2201 UI != E; ++UI) 2202 if ((*UI)->getOpcode() == PPCISD::VCMPo && 2203 (*UI)->getOperand(1) == N->getOperand(1) && 2204 (*UI)->getOperand(2) == N->getOperand(2) && 2205 (*UI)->getOperand(0) == N->getOperand(0)) { 2206 VCMPoNode = *UI; 2207 break; 2208 } 2209 2210 // If there are non-zero uses of the flag value, use the VCMPo node! 2211 if (VCMPoNode && !VCMPoNode->hasNUsesOfValue(0, 1)) 2212 return SDOperand(VCMPoNode, 0); 2213 } 2214 break; 2215 } 2216 } 2217 2218 return SDOperand(); 2219} 2220 2221//===----------------------------------------------------------------------===// 2222// Inline Assembly Support 2223//===----------------------------------------------------------------------===// 2224 2225void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 2226 uint64_t Mask, 2227 uint64_t &KnownZero, 2228 uint64_t &KnownOne, 2229 unsigned Depth) const { 2230 KnownZero = 0; 2231 KnownOne = 0; 2232 switch (Op.getOpcode()) { 2233 default: break; 2234 case ISD::INTRINSIC_WO_CHAIN: { 2235 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 2236 default: break; 2237 case Intrinsic::ppc_altivec_vcmpbfp_p: 2238 case Intrinsic::ppc_altivec_vcmpeqfp_p: 2239 case Intrinsic::ppc_altivec_vcmpequb_p: 2240 case Intrinsic::ppc_altivec_vcmpequh_p: 2241 case Intrinsic::ppc_altivec_vcmpequw_p: 2242 case Intrinsic::ppc_altivec_vcmpgefp_p: 2243 case Intrinsic::ppc_altivec_vcmpgtfp_p: 2244 case Intrinsic::ppc_altivec_vcmpgtsb_p: 2245 case Intrinsic::ppc_altivec_vcmpgtsh_p: 2246 case Intrinsic::ppc_altivec_vcmpgtsw_p: 2247 case Intrinsic::ppc_altivec_vcmpgtub_p: 2248 case Intrinsic::ppc_altivec_vcmpgtuh_p: 2249 case Intrinsic::ppc_altivec_vcmpgtuw_p: 2250 KnownZero = ~1U; // All bits but the low one are known to be zero. 2251 break; 2252 } 2253 } 2254 } 2255} 2256 2257 2258/// getConstraintType - Given a constraint letter, return the type of 2259/// constraint it is for this target. 2260PPCTargetLowering::ConstraintType 2261PPCTargetLowering::getConstraintType(char ConstraintLetter) const { 2262 switch (ConstraintLetter) { 2263 default: break; 2264 case 'b': 2265 case 'r': 2266 case 'f': 2267 case 'v': 2268 case 'y': 2269 return C_RegisterClass; 2270 } 2271 return TargetLowering::getConstraintType(ConstraintLetter); 2272} 2273 2274 2275std::vector<unsigned> PPCTargetLowering:: 2276getRegClassForInlineAsmConstraint(const std::string &Constraint, 2277 MVT::ValueType VT) const { 2278 if (Constraint.size() == 1) { 2279 switch (Constraint[0]) { // GCC RS6000 Constraint Letters 2280 default: break; // Unknown constriant letter 2281 case 'b': 2282 return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 , 2283 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 2284 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 2285 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 2286 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 2287 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 2288 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 2289 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 2290 0); 2291 case 'r': 2292 return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 , 2293 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 2294 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 2295 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 2296 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 2297 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 2298 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 2299 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 2300 0); 2301 case 'f': 2302 return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 , 2303 PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 , 2304 PPC::F8 , PPC::F9 , PPC::F10, PPC::F11, 2305 PPC::F12, PPC::F13, PPC::F14, PPC::F15, 2306 PPC::F16, PPC::F17, PPC::F18, PPC::F19, 2307 PPC::F20, PPC::F21, PPC::F22, PPC::F23, 2308 PPC::F24, PPC::F25, PPC::F26, PPC::F27, 2309 PPC::F28, PPC::F29, PPC::F30, PPC::F31, 2310 0); 2311 case 'v': 2312 return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , 2313 PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 2314 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, 2315 PPC::V12, PPC::V13, PPC::V14, PPC::V15, 2316 PPC::V16, PPC::V17, PPC::V18, PPC::V19, 2317 PPC::V20, PPC::V21, PPC::V22, PPC::V23, 2318 PPC::V24, PPC::V25, PPC::V26, PPC::V27, 2319 PPC::V28, PPC::V29, PPC::V30, PPC::V31, 2320 0); 2321 case 'y': 2322 return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, 2323 PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7, 2324 0); 2325 } 2326 } 2327 2328 return std::vector<unsigned>(); 2329} 2330 2331// isOperandValidForConstraint 2332bool PPCTargetLowering:: 2333isOperandValidForConstraint(SDOperand Op, char Letter) { 2334 switch (Letter) { 2335 default: break; 2336 case 'I': 2337 case 'J': 2338 case 'K': 2339 case 'L': 2340 case 'M': 2341 case 'N': 2342 case 'O': 2343 case 'P': { 2344 if (!isa<ConstantSDNode>(Op)) return false; // Must be an immediate. 2345 unsigned Value = cast<ConstantSDNode>(Op)->getValue(); 2346 switch (Letter) { 2347 default: assert(0 && "Unknown constraint letter!"); 2348 case 'I': // "I" is a signed 16-bit constant. 2349 return (short)Value == (int)Value; 2350 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 2351 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 2352 return (short)Value == 0; 2353 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 2354 return (Value >> 16) == 0; 2355 case 'M': // "M" is a constant that is greater than 31. 2356 return Value > 31; 2357 case 'N': // "N" is a positive constant that is an exact power of two. 2358 return (int)Value > 0 && isPowerOf2_32(Value); 2359 case 'O': // "O" is the constant zero. 2360 return Value == 0; 2361 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 2362 return (short)-Value == (int)-Value; 2363 } 2364 break; 2365 } 2366 } 2367 2368 // Handle standard constraint letters. 2369 return TargetLowering::isOperandValidForConstraint(Op, Letter); 2370} 2371 2372/// isLegalAddressImmediate - Return true if the integer value can be used 2373/// as the offset of the target addressing mode. 2374bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const { 2375 // PPC allows a sign-extended 16-bit immediate field. 2376 return (V > -(1 << 16) && V < (1 << 16)-1); 2377} 2378