PPCISelLowering.cpp revision 116cc48e30b9c307bf3eec29c890b4ba25cd18db
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "PPCTargetMachine.h" 16#include "llvm/ADT/VectorExtras.h" 17#include "llvm/Analysis/ScalarEvolutionExpressions.h" 18#include "llvm/CodeGen/MachineFrameInfo.h" 19#include "llvm/CodeGen/MachineFunction.h" 20#include "llvm/CodeGen/MachineInstrBuilder.h" 21#include "llvm/CodeGen/SelectionDAG.h" 22#include "llvm/CodeGen/SSARegMap.h" 23#include "llvm/Constants.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/Support/MathExtras.h" 27#include "llvm/Target/TargetOptions.h" 28using namespace llvm; 29 30PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) 31 : TargetLowering(TM) { 32 33 // Fold away setcc operations if possible. 34 setSetCCIsExpensive(); 35 setPow2DivIsCheap(); 36 37 // Use _setjmp/_longjmp instead of setjmp/longjmp. 38 setUseUnderscoreSetJmpLongJmp(true); 39 40 // Set up the register classes. 41 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 42 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 43 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 44 45 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 46 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 47 48 // PowerPC has no intrinsics for these particular operations 49 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); 50 setOperationAction(ISD::MEMSET, MVT::Other, Expand); 51 setOperationAction(ISD::MEMCPY, MVT::Other, Expand); 52 53 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 54 setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand); 55 setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand); 56 57 // PowerPC has no SREM/UREM instructions 58 setOperationAction(ISD::SREM, MVT::i32, Expand); 59 setOperationAction(ISD::UREM, MVT::i32, Expand); 60 61 // We don't support sin/cos/sqrt/fmod 62 setOperationAction(ISD::FSIN , MVT::f64, Expand); 63 setOperationAction(ISD::FCOS , MVT::f64, Expand); 64 setOperationAction(ISD::FREM , MVT::f64, Expand); 65 setOperationAction(ISD::FSIN , MVT::f32, Expand); 66 setOperationAction(ISD::FCOS , MVT::f32, Expand); 67 setOperationAction(ISD::FREM , MVT::f32, Expand); 68 69 // If we're enabling GP optimizations, use hardware square root 70 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 71 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 72 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 73 } 74 75 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 76 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 77 78 // PowerPC does not have BSWAP, CTPOP or CTTZ 79 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 80 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 81 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 82 83 // PowerPC does not have ROTR 84 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 85 86 // PowerPC does not have Select 87 setOperationAction(ISD::SELECT, MVT::i32, Expand); 88 setOperationAction(ISD::SELECT, MVT::f32, Expand); 89 setOperationAction(ISD::SELECT, MVT::f64, Expand); 90 91 // PowerPC wants to turn select_cc of FP into fsel when possible. 92 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 93 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 94 95 // PowerPC wants to optimize integer setcc a bit 96 setOperationAction(ISD::SETCC, MVT::i32, Custom); 97 98 // PowerPC does not have BRCOND which requires SetCC 99 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 100 101 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 102 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 103 104 // PowerPC does not have [U|S]INT_TO_FP 105 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 106 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 107 108 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 109 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 110 111 // PowerPC does not have truncstore for i1. 112 setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote); 113 114 // Support label based line numbers. 115 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 116 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 117 // FIXME - use subtarget debug flags 118 if (!TM.getSubtarget<PPCSubtarget>().isDarwin()) 119 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 120 121 // We want to legalize GlobalAddress and ConstantPool nodes into the 122 // appropriate instructions to materialize the address. 123 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 124 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 125 126 // RET must be custom lowered, to meet ABI requirements 127 setOperationAction(ISD::RET , MVT::Other, Custom); 128 129 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 130 setOperationAction(ISD::VASTART , MVT::Other, Custom); 131 132 // Use the default implementation. 133 setOperationAction(ISD::VAARG , MVT::Other, Expand); 134 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 135 setOperationAction(ISD::VAEND , MVT::Other, Expand); 136 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 137 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 138 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 139 140 // We want to custom lower some of our intrinsics. 141 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 142 143 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { 144 // They also have instructions for converting between i64 and fp. 145 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 146 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 147 148 // FIXME: disable this lowered code. This generates 64-bit register values, 149 // and we don't model the fact that the top part is clobbered by calls. We 150 // need to flag these together so that the value isn't live across a call. 151 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 152 153 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 154 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 155 } else { 156 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 157 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 158 } 159 160 if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) { 161 // 64 bit PowerPC implementations can support i64 types directly 162 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 163 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 164 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 165 } else { 166 // 32 bit PowerPC wants to expand i64 shifts itself. 167 setOperationAction(ISD::SHL, MVT::i64, Custom); 168 setOperationAction(ISD::SRL, MVT::i64, Custom); 169 setOperationAction(ISD::SRA, MVT::i64, Custom); 170 } 171 172 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 173 // First set operation action for all vector types to expand. Then we 174 // will selectively turn on ones that can be effectively codegen'd. 175 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 176 VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 177 // add/sub/and/or/xor are legal for all supported vector VT's. 178 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); 179 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); 180 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal); 181 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal); 182 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal); 183 184 // We promote all shuffles to v16i8. 185 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote); 186 AddPromotedToType(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8); 187 188 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 189 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 190 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 191 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 192 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 193 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 194 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 195 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand); 196 197 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand); 198 } 199 200 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 201 // with merges, splats, etc. 202 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 203 204 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 205 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 206 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 207 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 208 209 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 210 211 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 212 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 213 214 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 215 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 216 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 217 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 218 } 219 220 setSetCCResultContents(ZeroOrOneSetCCResult); 221 setStackPointerRegisterToSaveRestore(PPC::R1); 222 223 // We have target-specific dag combine patterns for the following nodes: 224 setTargetDAGCombine(ISD::SINT_TO_FP); 225 setTargetDAGCombine(ISD::STORE); 226 227 computeRegisterProperties(); 228} 229 230const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 231 switch (Opcode) { 232 default: return 0; 233 case PPCISD::FSEL: return "PPCISD::FSEL"; 234 case PPCISD::FCFID: return "PPCISD::FCFID"; 235 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 236 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 237 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 238 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 239 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 240 case PPCISD::VPERM: return "PPCISD::VPERM"; 241 case PPCISD::Hi: return "PPCISD::Hi"; 242 case PPCISD::Lo: return "PPCISD::Lo"; 243 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 244 case PPCISD::SRL: return "PPCISD::SRL"; 245 case PPCISD::SRA: return "PPCISD::SRA"; 246 case PPCISD::SHL: return "PPCISD::SHL"; 247 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 248 case PPCISD::STD_32: return "PPCISD::STD_32"; 249 case PPCISD::CALL: return "PPCISD::CALL"; 250 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 251 case PPCISD::MFCR: return "PPCISD::MFCR"; 252 case PPCISD::VCMP: return "PPCISD::VCMP"; 253 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 254 } 255} 256 257/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 258static bool isFloatingPointZero(SDOperand Op) { 259 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 260 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 261 else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) { 262 // Maybe this has already been legalized into the constant pool? 263 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 264 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get())) 265 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 266 } 267 return false; 268} 269 270/// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return 271/// true if Op is undef or if it matches the specified value. 272static bool isConstantOrUndef(SDOperand Op, unsigned Val) { 273 return Op.getOpcode() == ISD::UNDEF || 274 cast<ConstantSDNode>(Op)->getValue() == Val; 275} 276 277/// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a 278/// VPKUHUM instruction. 279bool PPC::isVPKUHUMShuffleMask(SDNode *N) { 280 for (unsigned i = 0; i != 16; ++i) 281 if (!isConstantOrUndef(N->getOperand(i), i*2+1)) 282 return false; 283 return true; 284} 285 286/// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a 287/// VPKUWUM instruction. 288bool PPC::isVPKUWUMShuffleMask(SDNode *N) { 289 for (unsigned i = 0; i != 16; i += 2) 290 if (!isConstantOrUndef(N->getOperand(i ), i*2+2) || 291 !isConstantOrUndef(N->getOperand(i+1), i*2+3)) 292 return false; 293 return true; 294} 295 296/// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for 297/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). 298bool PPC::isVMRGLShuffleMask(SDNode *N, unsigned UnitSize) { 299 assert(N->getOpcode() == ISD::BUILD_VECTOR && 300 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 301 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 302 "Unsupported merge size!"); 303 304 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 305 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 306 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 307 8+j+i*UnitSize) || 308 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 309 24+j+i*UnitSize)) 310 return false; 311 } 312 return true; 313} 314 315/// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for 316/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). 317bool PPC::isVMRGHShuffleMask(SDNode *N, unsigned UnitSize) { 318 assert(N->getOpcode() == ISD::BUILD_VECTOR && 319 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 320 assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) && 321 "Unsupported merge size!"); 322 323 for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units 324 for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit 325 if (!isConstantOrUndef(N->getOperand(i*UnitSize*2+j), 326 0+j+i*UnitSize) || 327 !isConstantOrUndef(N->getOperand(i*UnitSize*2+UnitSize+j), 328 16+j+i*UnitSize)) 329 return false; 330 } 331 return true; 332} 333 334 335/// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift 336/// amount, otherwise return -1. 337int PPC::isVSLDOIShuffleMask(SDNode *N) { 338 assert(N->getOpcode() == ISD::BUILD_VECTOR && 339 N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 340 // Find the first non-undef value in the shuffle mask. 341 unsigned i; 342 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 343 /*search*/; 344 345 if (i == 16) return -1; // all undef. 346 347 // Otherwise, check to see if the rest of the elements are consequtively 348 // numbered from this value. 349 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 350 if (ShiftAmt < i) return -1; 351 ShiftAmt -= i; 352 353 // Check the rest of the elements to see if they are consequtive. 354 for (++i; i != 16; ++i) 355 if (!isConstantOrUndef(N->getOperand(i), ShiftAmt+i)) 356 return -1; 357 358 return ShiftAmt; 359} 360 361/// isVSLDOIRotateShuffleMask - If this is a vsldoi rotate shuffle mask, 362/// return the shift amount, otherwise return -1. Note that vlsdoi(x,x) will 363/// result in the shuffle being changed to shuffle(x,undef, ...) with 364/// transformed byte numbers. 365int PPC::isVSLDOIRotateShuffleMask(SDNode *N) { 366 assert(N->getNumOperands() == 16 && "PPC only supports shuffles by bytes!"); 367 // Find the first non-undef value in the shuffle mask. 368 unsigned i; 369 for (i = 0; i != 16 && N->getOperand(i).getOpcode() == ISD::UNDEF; ++i) 370 /*search*/; 371 372 if (i == 16) return -1; // all undef. 373 374 // Otherwise, check to see if the rest of the elements are consequtively 375 // numbered from this value. 376 unsigned ShiftAmt = cast<ConstantSDNode>(N->getOperand(i))->getValue(); 377 if (ShiftAmt < i) return -1; 378 ShiftAmt -= i; 379 380 // Check the rest of the elements to see if they are consequtive. 381 for (++i; i != 16; ++i) 382 if (!isConstantOrUndef(N->getOperand(i), (ShiftAmt+i) & 15)) 383 return -1; 384 385 return ShiftAmt; 386} 387 388/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 389/// specifies a splat of a single element that is suitable for input to 390/// VSPLTB/VSPLTH/VSPLTW. 391bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 392 assert(N->getOpcode() == ISD::BUILD_VECTOR && 393 N->getNumOperands() == 16 && 394 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 395 396 // This is a splat operation if each element of the permute is the same, and 397 // if the value doesn't reference the second vector. 398 unsigned ElementBase = 0; 399 SDOperand Elt = N->getOperand(0); 400 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 401 ElementBase = EltV->getValue(); 402 else 403 return false; // FIXME: Handle UNDEF elements too! 404 405 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 406 return false; 407 408 // Check that they are consequtive. 409 for (unsigned i = 1; i != EltSize; ++i) { 410 if (!isa<ConstantSDNode>(N->getOperand(i)) || 411 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 412 return false; 413 } 414 415 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 416 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 417 assert(isa<ConstantSDNode>(N->getOperand(i)) && 418 "Invalid VECTOR_SHUFFLE mask!"); 419 for (unsigned j = 0; j != EltSize; ++j) 420 if (N->getOperand(i+j) != N->getOperand(j)) 421 return false; 422 } 423 424 return true; 425} 426 427/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 428/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 429unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 430 assert(isSplatShuffleMask(N, EltSize)); 431 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 432} 433 434/// isVecSplatImm - Return true if this is a build_vector of constants which 435/// can be formed by using a vspltis[bhw] instruction. The ByteSize field 436/// indicates the number of bytes of each element [124] -> [bhw]. 437bool PPC::isVecSplatImm(SDNode *N, unsigned ByteSize, char *Val) { 438 SDOperand OpVal(0, 0); 439 // Check to see if this buildvec has a single non-undef value in its elements. 440 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 441 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 442 if (OpVal.Val == 0) 443 OpVal = N->getOperand(i); 444 else if (OpVal != N->getOperand(i)) 445 return false; 446 } 447 448 if (OpVal.Val == 0) return false; // All UNDEF: use implicit def. 449 450 unsigned ValSizeInBytes = 0; 451 uint64_t Value = 0; 452 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 453 Value = CN->getValue(); 454 ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8; 455 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 456 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 457 Value = FloatToBits(CN->getValue()); 458 ValSizeInBytes = 4; 459 } 460 461 // If the splat value is larger than the element value, then we can never do 462 // this splat. The only case that we could fit the replicated bits into our 463 // immediate field for would be zero, and we prefer to use vxor for it. 464 if (ValSizeInBytes < ByteSize) return false; 465 466 // If the element value is larger than the splat value, cut it in half and 467 // check to see if the two halves are equal. Continue doing this until we 468 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 469 while (ValSizeInBytes > ByteSize) { 470 ValSizeInBytes >>= 1; 471 472 // If the top half equals the bottom half, we're still ok. 473 if (((Value >> (ValSizeInBytes*8)) & ((1 << (8*ValSizeInBytes))-1)) != 474 (Value & ((1 << (8*ValSizeInBytes))-1))) 475 return false; 476 } 477 478 // Properly sign extend the value. 479 int ShAmt = (4-ByteSize)*8; 480 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 481 482 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 483 if (MaskVal == 0) return false; 484 485 if (Val) *Val = MaskVal; 486 487 // Finally, if this value fits in a 5 bit sext field, return true. 488 return ((MaskVal << (32-5)) >> (32-5)) == MaskVal; 489} 490 491 492/// LowerOperation - Provide custom lowering hooks for some operations. 493/// 494SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 495 switch (Op.getOpcode()) { 496 default: assert(0 && "Wasn't expecting to be able to lower this!"); 497 case ISD::FP_TO_SINT: { 498 assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType())); 499 SDOperand Src = Op.getOperand(0); 500 if (Src.getValueType() == MVT::f32) 501 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 502 503 SDOperand Tmp; 504 switch (Op.getValueType()) { 505 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 506 case MVT::i32: 507 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 508 break; 509 case MVT::i64: 510 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 511 break; 512 } 513 514 // Convert the FP value to an int value through memory. 515 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp); 516 if (Op.getValueType() == MVT::i32) 517 Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits); 518 return Bits; 519 } 520 case ISD::SINT_TO_FP: 521 if (Op.getOperand(0).getValueType() == MVT::i64) { 522 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 523 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 524 if (Op.getValueType() == MVT::f32) 525 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 526 return FP; 527 } else { 528 assert(Op.getOperand(0).getValueType() == MVT::i32 && 529 "Unhandled SINT_TO_FP type in custom expander!"); 530 // Since we only generate this in 64-bit mode, we can take advantage of 531 // 64-bit registers. In particular, sign extend the input value into the 532 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 533 // then lfd it and fcfid it. 534 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 535 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 536 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 537 538 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 539 Op.getOperand(0)); 540 541 // STD the extended value into the stack slot. 542 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 543 DAG.getEntryNode(), Ext64, FIdx, 544 DAG.getSrcValue(NULL)); 545 // Load the value as a double. 546 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL)); 547 548 // FCFID it and return it. 549 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 550 if (Op.getValueType() == MVT::f32) 551 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 552 return FP; 553 } 554 break; 555 556 case ISD::SELECT_CC: { 557 // Turn FP only select_cc's into fsel instructions. 558 if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) || 559 !MVT::isFloatingPoint(Op.getOperand(2).getValueType())) 560 break; 561 562 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 563 564 // Cannot handle SETEQ/SETNE. 565 if (CC == ISD::SETEQ || CC == ISD::SETNE) break; 566 567 MVT::ValueType ResVT = Op.getValueType(); 568 MVT::ValueType CmpVT = Op.getOperand(0).getValueType(); 569 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 570 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3); 571 572 // If the RHS of the comparison is a 0.0, we don't need to do the 573 // subtraction at all. 574 if (isFloatingPointZero(RHS)) 575 switch (CC) { 576 default: break; // SETUO etc aren't handled by fsel. 577 case ISD::SETULT: 578 case ISD::SETLT: 579 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 580 case ISD::SETUGE: 581 case ISD::SETGE: 582 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 583 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 584 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 585 case ISD::SETUGT: 586 case ISD::SETGT: 587 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 588 case ISD::SETULE: 589 case ISD::SETLE: 590 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 591 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 592 return DAG.getNode(PPCISD::FSEL, ResVT, 593 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 594 } 595 596 SDOperand Cmp; 597 switch (CC) { 598 default: break; // SETUO etc aren't handled by fsel. 599 case ISD::SETULT: 600 case ISD::SETLT: 601 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 602 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 603 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 604 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 605 case ISD::SETUGE: 606 case ISD::SETGE: 607 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 608 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 609 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 610 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 611 case ISD::SETUGT: 612 case ISD::SETGT: 613 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 614 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 615 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 616 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 617 case ISD::SETULE: 618 case ISD::SETLE: 619 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 620 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 621 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 622 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 623 } 624 break; 625 } 626 case ISD::SHL: { 627 assert(Op.getValueType() == MVT::i64 && 628 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 629 // The generic code does a fine job expanding shift by a constant. 630 if (isa<ConstantSDNode>(Op.getOperand(1))) break; 631 632 // Otherwise, expand into a bunch of logical ops. Note that these ops 633 // depend on the PPC behavior for oversized shift amounts. 634 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 635 DAG.getConstant(0, MVT::i32)); 636 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 637 DAG.getConstant(1, MVT::i32)); 638 SDOperand Amt = Op.getOperand(1); 639 640 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 641 DAG.getConstant(32, MVT::i32), Amt); 642 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt); 643 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1); 644 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 645 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 646 DAG.getConstant(-32U, MVT::i32)); 647 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5); 648 SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 649 SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt); 650 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 651 } 652 case ISD::SRL: { 653 assert(Op.getValueType() == MVT::i64 && 654 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 655 // The generic code does a fine job expanding shift by a constant. 656 if (isa<ConstantSDNode>(Op.getOperand(1))) break; 657 658 // Otherwise, expand into a bunch of logical ops. Note that these ops 659 // depend on the PPC behavior for oversized shift amounts. 660 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 661 DAG.getConstant(0, MVT::i32)); 662 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 663 DAG.getConstant(1, MVT::i32)); 664 SDOperand Amt = Op.getOperand(1); 665 666 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 667 DAG.getConstant(32, MVT::i32), Amt); 668 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 669 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 670 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 671 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 672 DAG.getConstant(-32U, MVT::i32)); 673 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5); 674 SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 675 SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt); 676 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 677 } 678 case ISD::SRA: { 679 assert(Op.getValueType() == MVT::i64 && 680 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!"); 681 // The generic code does a fine job expanding shift by a constant. 682 if (isa<ConstantSDNode>(Op.getOperand(1))) break; 683 684 // Otherwise, expand into a bunch of logical ops, followed by a select_cc. 685 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 686 DAG.getConstant(0, MVT::i32)); 687 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 688 DAG.getConstant(1, MVT::i32)); 689 SDOperand Amt = Op.getOperand(1); 690 691 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 692 DAG.getConstant(32, MVT::i32), Amt); 693 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 694 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 695 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 696 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 697 DAG.getConstant(-32U, MVT::i32)); 698 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5); 699 SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt); 700 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32), 701 Tmp4, Tmp6, ISD::SETLE); 702 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 703 } 704 case ISD::ConstantPool: { 705 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 706 Constant *C = CP->get(); 707 SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment()); 708 SDOperand Zero = DAG.getConstant(0, MVT::i32); 709 710 if (getTargetMachine().getRelocationModel() == Reloc::Static) { 711 // Generate non-pic code that has direct accesses to the constant pool. 712 // The address of the global is just (hi(&g)+lo(&g)). 713 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero); 714 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero); 715 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 716 } 717 718 // Only lower ConstantPool on Darwin. 719 if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break; 720 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero); 721 if (getTargetMachine().getRelocationModel() == Reloc::PIC) { 722 // With PIC, the first instruction is actually "GR+hi(&G)". 723 Hi = DAG.getNode(ISD::ADD, MVT::i32, 724 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 725 } 726 727 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero); 728 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 729 return Lo; 730 } 731 case ISD::GlobalAddress: { 732 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 733 GlobalValue *GV = GSDN->getGlobal(); 734 SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset()); 735 SDOperand Zero = DAG.getConstant(0, MVT::i32); 736 737 if (getTargetMachine().getRelocationModel() == Reloc::Static) { 738 // Generate non-pic code that has direct accesses to globals. 739 // The address of the global is just (hi(&g)+lo(&g)). 740 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero); 741 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero); 742 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 743 } 744 745 // Only lower GlobalAddress on Darwin. 746 if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break; 747 748 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero); 749 if (getTargetMachine().getRelocationModel() == Reloc::PIC) { 750 // With PIC, the first instruction is actually "GR+hi(&G)". 751 Hi = DAG.getNode(ISD::ADD, MVT::i32, 752 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 753 } 754 755 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero); 756 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 757 758 if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() && 759 (!GV->isExternal() || GV->hasNotBeenReadFromBytecode())) 760 return Lo; 761 762 // If the global is weak or external, we have to go through the lazy 763 // resolution stub. 764 return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0)); 765 } 766 case ISD::SETCC: { 767 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 768 769 // If we're comparing for equality to zero, expose the fact that this is 770 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 771 // fold the new nodes. 772 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 773 if (C->isNullValue() && CC == ISD::SETEQ) { 774 MVT::ValueType VT = Op.getOperand(0).getValueType(); 775 SDOperand Zext = Op.getOperand(0); 776 if (VT < MVT::i32) { 777 VT = MVT::i32; 778 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 779 } 780 unsigned Log2b = Log2_32(MVT::getSizeInBits(VT)); 781 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 782 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz, 783 DAG.getConstant(Log2b, getShiftAmountTy())); 784 return DAG.getNode(ISD::TRUNCATE, getSetCCResultTy(), Scc); 785 } 786 // Leave comparisons against 0 and -1 alone for now, since they're usually 787 // optimized. FIXME: revisit this when we can custom lower all setcc 788 // optimizations. 789 if (C->isAllOnesValue() || C->isNullValue()) 790 break; 791 } 792 793 // If we have an integer seteq/setne, turn it into a compare against zero 794 // by subtracting the rhs from the lhs, which is faster than setting a 795 // condition register, reading it back out, and masking the correct bit. 796 MVT::ValueType LHSVT = Op.getOperand(0).getValueType(); 797 if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 798 MVT::ValueType VT = Op.getValueType(); 799 SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0), 800 Op.getOperand(1)); 801 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 802 } 803 break; 804 } 805 case ISD::VASTART: { 806 // vastart just stores the address of the VarArgsFrameIndex slot into the 807 // memory location argument. 808 // FIXME: Replace MVT::i32 with PointerTy 809 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 810 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 811 Op.getOperand(1), Op.getOperand(2)); 812 } 813 case ISD::RET: { 814 SDOperand Copy; 815 816 switch(Op.getNumOperands()) { 817 default: 818 assert(0 && "Do not know how to return this many arguments!"); 819 abort(); 820 case 1: 821 return SDOperand(); // ret void is legal 822 case 2: { 823 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 824 unsigned ArgReg = MVT::isInteger(ArgVT) ? PPC::R3 : PPC::F1; 825 Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1), 826 SDOperand()); 827 break; 828 } 829 case 3: 830 Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2), 831 SDOperand()); 832 Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1)); 833 break; 834 } 835 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1)); 836 } 837 case ISD::SCALAR_TO_VECTOR: { 838 // Create a stack slot that is 16-byte aligned. 839 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 840 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 841 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 842 843 // Store the input value into Value#0 of the stack slot. 844 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(), 845 Op.getOperand(0), FIdx,DAG.getSrcValue(NULL)); 846 // Load it out. 847 return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL)); 848 } 849 case ISD::BUILD_VECTOR: 850 // If this is a case we can't handle, return null and let the default 851 // expansion code take care of it. If we CAN select this case, return Op. 852 853 // See if this is all zeros. 854 // FIXME: We should handle splat(-0.0), and other cases here. 855 if (ISD::isBuildVectorAllZeros(Op.Val)) 856 return Op; 857 858 if (PPC::isVecSplatImm(Op.Val, 1) || // vspltisb 859 PPC::isVecSplatImm(Op.Val, 2) || // vspltish 860 PPC::isVecSplatImm(Op.Val, 4)) // vspltisw 861 return Op; 862 863 return SDOperand(); 864 865 case ISD::VECTOR_SHUFFLE: { 866 SDOperand V1 = Op.getOperand(0); 867 SDOperand V2 = Op.getOperand(1); 868 SDOperand PermMask = Op.getOperand(2); 869 870 // Cases that are handled by instructions that take permute immediates 871 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 872 // selected by the instruction selector. 873 if (V2.getOpcode() == ISD::UNDEF && 874 (PPC::isSplatShuffleMask(PermMask.Val, 1) || 875 PPC::isSplatShuffleMask(PermMask.Val, 2) || 876 PPC::isSplatShuffleMask(PermMask.Val, 4) || 877 PPC::isVSLDOIRotateShuffleMask(PermMask.Val) != -1)) 878 return Op; 879 880 if (PPC::isVPKUWUMShuffleMask(PermMask.Val) || 881 PPC::isVPKUHUMShuffleMask(PermMask.Val) || 882 PPC::isVSLDOIShuffleMask(PermMask.Val) != -1 || 883 PPC::isVMRGLShuffleMask(PermMask.Val, 1) || 884 PPC::isVMRGLShuffleMask(PermMask.Val, 2) || 885 PPC::isVMRGLShuffleMask(PermMask.Val, 4) || 886 PPC::isVMRGHShuffleMask(PermMask.Val, 1) || 887 PPC::isVMRGHShuffleMask(PermMask.Val, 2) || 888 PPC::isVMRGHShuffleMask(PermMask.Val, 4)) 889 return Op; 890 891 // TODO: Handle more cases, and also handle cases that are cheaper to do as 892 // multiple such instructions than as a constant pool load/vperm pair. 893 894 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 895 // vector that will get spilled to the constant pool. 896 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 897 898 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 899 // that it is in input element units, not in bytes. Convert now. 900 MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType()); 901 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; 902 903 std::vector<SDOperand> ResultMask; 904 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 905 unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 906 907 for (unsigned j = 0; j != BytesPerElement; ++j) 908 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 909 MVT::i8)); 910 } 911 912 SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask); 913 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 914 } 915 case ISD::INTRINSIC_WO_CHAIN: { 916 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 917 918 // If this is a lowered altivec predicate compare, CompareOpc is set to the 919 // opcode number of the comparison. 920 int CompareOpc = -1; 921 bool isDot = false; 922 switch (IntNo) { 923 default: return SDOperand(); // Don't custom lower most intrinsics. 924 // Comparison predicates. 925 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 926 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 927 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 928 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 929 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 930 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 931 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 932 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 933 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 934 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 935 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 936 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 937 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 938 939 // Normal Comparisons. 940 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 941 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 942 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 943 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 944 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 945 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 946 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 947 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 948 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 949 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 950 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 951 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 952 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 953 } 954 955 assert(CompareOpc>0 && "We only lower altivec predicate compares so far!"); 956 957 // If this is a non-dot comparison, make the VCMP node. 958 if (!isDot) 959 return DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 960 Op.getOperand(1), Op.getOperand(2), 961 DAG.getConstant(CompareOpc, MVT::i32)); 962 963 // Create the PPCISD altivec 'dot' comparison node. 964 std::vector<SDOperand> Ops; 965 std::vector<MVT::ValueType> VTs; 966 Ops.push_back(Op.getOperand(2)); // LHS 967 Ops.push_back(Op.getOperand(3)); // RHS 968 Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32)); 969 VTs.push_back(Op.getOperand(2).getValueType()); 970 VTs.push_back(MVT::Flag); 971 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops); 972 973 // Now that we have the comparison, emit a copy from the CR to a GPR. 974 // This is flagged to the above dot comparison. 975 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 976 DAG.getRegister(PPC::CR6, MVT::i32), 977 CompNode.getValue(1)); 978 979 // Unpack the result based on how the target uses it. 980 unsigned BitNo; // Bit # of CR6. 981 bool InvertBit; // Invert result? 982 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 983 default: // Can't happen, don't crash on invalid number though. 984 case 0: // Return the value of the EQ bit of CR6. 985 BitNo = 0; InvertBit = false; 986 break; 987 case 1: // Return the inverted value of the EQ bit of CR6. 988 BitNo = 0; InvertBit = true; 989 break; 990 case 2: // Return the value of the LT bit of CR6. 991 BitNo = 2; InvertBit = false; 992 break; 993 case 3: // Return the inverted value of the LT bit of CR6. 994 BitNo = 2; InvertBit = true; 995 break; 996 } 997 998 // Shift the bit into the low position. 999 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 1000 DAG.getConstant(8-(3-BitNo), MVT::i32)); 1001 // Isolate the bit. 1002 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 1003 DAG.getConstant(1, MVT::i32)); 1004 1005 // If we are supposed to, toggle the bit. 1006 if (InvertBit) 1007 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 1008 DAG.getConstant(1, MVT::i32)); 1009 return Flags; 1010 } 1011 } 1012 return SDOperand(); 1013} 1014 1015std::vector<SDOperand> 1016PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 1017 // 1018 // add beautiful description of PPC stack frame format, or at least some docs 1019 // 1020 MachineFunction &MF = DAG.getMachineFunction(); 1021 MachineFrameInfo *MFI = MF.getFrameInfo(); 1022 MachineBasicBlock& BB = MF.front(); 1023 SSARegMap *RegMap = MF.getSSARegMap(); 1024 std::vector<SDOperand> ArgValues; 1025 1026 unsigned ArgOffset = 24; 1027 unsigned GPR_remaining = 8; 1028 unsigned FPR_remaining = 13; 1029 unsigned GPR_idx = 0, FPR_idx = 0; 1030 static const unsigned GPR[] = { 1031 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 1032 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 1033 }; 1034 static const unsigned FPR[] = { 1035 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 1036 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 1037 }; 1038 1039 // Add DAG nodes to load the arguments... On entry to a function on PPC, 1040 // the arguments start at offset 24, although they are likely to be passed 1041 // in registers. 1042 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 1043 SDOperand newroot, argt; 1044 unsigned ObjSize; 1045 bool needsLoad = false; 1046 bool ArgLive = !I->use_empty(); 1047 MVT::ValueType ObjectVT = getValueType(I->getType()); 1048 1049 switch (ObjectVT) { 1050 default: assert(0 && "Unhandled argument type!"); 1051 case MVT::i1: 1052 case MVT::i8: 1053 case MVT::i16: 1054 case MVT::i32: 1055 ObjSize = 4; 1056 if (!ArgLive) break; 1057 if (GPR_remaining > 0) { 1058 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1059 MF.addLiveIn(GPR[GPR_idx], VReg); 1060 argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 1061 if (ObjectVT != MVT::i32) { 1062 unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext 1063 : ISD::AssertZext; 1064 argt = DAG.getNode(AssertOp, MVT::i32, argt, 1065 DAG.getValueType(ObjectVT)); 1066 argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt); 1067 } 1068 } else { 1069 needsLoad = true; 1070 } 1071 break; 1072 case MVT::i64: 1073 ObjSize = 8; 1074 if (!ArgLive) break; 1075 if (GPR_remaining > 0) { 1076 SDOperand argHi, argLo; 1077 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1078 MF.addLiveIn(GPR[GPR_idx], VReg); 1079 argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 1080 // If we have two or more remaining argument registers, then both halves 1081 // of the i64 can be sourced from there. Otherwise, the lower half will 1082 // have to come off the stack. This can happen when an i64 is preceded 1083 // by 28 bytes of arguments. 1084 if (GPR_remaining > 1) { 1085 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1086 MF.addLiveIn(GPR[GPR_idx+1], VReg); 1087 argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32); 1088 } else { 1089 int FI = MFI->CreateFixedObject(4, ArgOffset+4); 1090 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 1091 argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 1092 DAG.getSrcValue(NULL)); 1093 } 1094 // Build the outgoing arg thingy 1095 argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi); 1096 newroot = argLo; 1097 } else { 1098 needsLoad = true; 1099 } 1100 break; 1101 case MVT::f32: 1102 case MVT::f64: 1103 ObjSize = (ObjectVT == MVT::f64) ? 8 : 4; 1104 if (!ArgLive) { 1105 if (FPR_remaining > 0) { 1106 --FPR_remaining; 1107 ++FPR_idx; 1108 } 1109 break; 1110 } 1111 if (FPR_remaining > 0) { 1112 unsigned VReg; 1113 if (ObjectVT == MVT::f32) 1114 VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass); 1115 else 1116 VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass); 1117 MF.addLiveIn(FPR[FPR_idx], VReg); 1118 argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT); 1119 --FPR_remaining; 1120 ++FPR_idx; 1121 } else { 1122 needsLoad = true; 1123 } 1124 break; 1125 } 1126 1127 // We need to load the argument to a virtual register if we determined above 1128 // that we ran out of physical registers of the appropriate type 1129 if (needsLoad) { 1130 unsigned SubregOffset = 0; 1131 if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3; 1132 if (ObjectVT == MVT::i16) SubregOffset = 2; 1133 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1134 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 1135 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, 1136 DAG.getConstant(SubregOffset, MVT::i32)); 1137 argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 1138 DAG.getSrcValue(NULL)); 1139 } 1140 1141 // Every 4 bytes of argument space consumes one of the GPRs available for 1142 // argument passing. 1143 if (GPR_remaining > 0) { 1144 unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1; 1145 GPR_remaining -= delta; 1146 GPR_idx += delta; 1147 } 1148 ArgOffset += ObjSize; 1149 if (newroot.Val) 1150 DAG.setRoot(newroot.getValue(1)); 1151 1152 ArgValues.push_back(argt); 1153 } 1154 1155 // If the function takes variable number of arguments, make a frame index for 1156 // the start of the first vararg value... for expansion of llvm.va_start. 1157 if (F.isVarArg()) { 1158 VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); 1159 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 1160 // If this function is vararg, store any remaining integer argument regs 1161 // to their spots on the stack so that they may be loaded by deferencing the 1162 // result of va_next. 1163 std::vector<SDOperand> MemOps; 1164 for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) { 1165 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1166 MF.addLiveIn(GPR[GPR_idx], VReg); 1167 SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 1168 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), 1169 Val, FIN, DAG.getSrcValue(NULL)); 1170 MemOps.push_back(Store); 1171 // Increment the address by four for the next argument to store 1172 SDOperand PtrOff = DAG.getConstant(4, getPointerTy()); 1173 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff); 1174 } 1175 if (!MemOps.empty()) { 1176 MemOps.push_back(DAG.getRoot()); 1177 DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps)); 1178 } 1179 } 1180 1181 // Finally, inform the code generator which regs we return values in. 1182 switch (getValueType(F.getReturnType())) { 1183 default: assert(0 && "Unknown type!"); 1184 case MVT::isVoid: break; 1185 case MVT::i1: 1186 case MVT::i8: 1187 case MVT::i16: 1188 case MVT::i32: 1189 MF.addLiveOut(PPC::R3); 1190 break; 1191 case MVT::i64: 1192 MF.addLiveOut(PPC::R3); 1193 MF.addLiveOut(PPC::R4); 1194 break; 1195 case MVT::f32: 1196 case MVT::f64: 1197 MF.addLiveOut(PPC::F1); 1198 break; 1199 } 1200 1201 return ArgValues; 1202} 1203 1204std::pair<SDOperand, SDOperand> 1205PPCTargetLowering::LowerCallTo(SDOperand Chain, 1206 const Type *RetTy, bool isVarArg, 1207 unsigned CallingConv, bool isTailCall, 1208 SDOperand Callee, ArgListTy &Args, 1209 SelectionDAG &DAG) { 1210 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 1211 // SelectExpr to use to put the arguments in the appropriate registers. 1212 std::vector<SDOperand> args_to_use; 1213 1214 // Count how many bytes are to be pushed on the stack, including the linkage 1215 // area, and parameter passing area. 1216 unsigned NumBytes = 24; 1217 1218 if (Args.empty()) { 1219 Chain = DAG.getCALLSEQ_START(Chain, 1220 DAG.getConstant(NumBytes, getPointerTy())); 1221 } else { 1222 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1223 switch (getValueType(Args[i].second)) { 1224 default: assert(0 && "Unknown value type!"); 1225 case MVT::i1: 1226 case MVT::i8: 1227 case MVT::i16: 1228 case MVT::i32: 1229 case MVT::f32: 1230 NumBytes += 4; 1231 break; 1232 case MVT::i64: 1233 case MVT::f64: 1234 NumBytes += 8; 1235 break; 1236 } 1237 } 1238 1239 // Just to be safe, we'll always reserve the full 24 bytes of linkage area 1240 // plus 32 bytes of argument space in case any called code gets funky on us. 1241 // (Required by ABI to support var arg) 1242 if (NumBytes < 56) NumBytes = 56; 1243 1244 // Adjust the stack pointer for the new arguments... 1245 // These operations are automatically eliminated by the prolog/epilog pass 1246 Chain = DAG.getCALLSEQ_START(Chain, 1247 DAG.getConstant(NumBytes, getPointerTy())); 1248 1249 // Set up a copy of the stack pointer for use loading and storing any 1250 // arguments that may not fit in the registers available for argument 1251 // passing. 1252 SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 1253 1254 // Figure out which arguments are going to go in registers, and which in 1255 // memory. Also, if this is a vararg function, floating point operations 1256 // must be stored to our stack, and loaded into integer regs as well, if 1257 // any integer regs are available for argument passing. 1258 unsigned ArgOffset = 24; 1259 unsigned GPR_remaining = 8; 1260 unsigned FPR_remaining = 13; 1261 1262 std::vector<SDOperand> MemOps; 1263 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1264 // PtrOff will be used to store the current argument to the stack if a 1265 // register cannot be found for it. 1266 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1267 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1268 MVT::ValueType ArgVT = getValueType(Args[i].second); 1269 1270 switch (ArgVT) { 1271 default: assert(0 && "Unexpected ValueType for argument!"); 1272 case MVT::i1: 1273 case MVT::i8: 1274 case MVT::i16: 1275 // Promote the integer to 32 bits. If the input type is signed use a 1276 // sign extend, otherwise use a zero extend. 1277 if (Args[i].second->isSigned()) 1278 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 1279 else 1280 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 1281 // FALL THROUGH 1282 case MVT::i32: 1283 if (GPR_remaining > 0) { 1284 args_to_use.push_back(Args[i].first); 1285 --GPR_remaining; 1286 } else { 1287 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1288 Args[i].first, PtrOff, 1289 DAG.getSrcValue(NULL))); 1290 } 1291 ArgOffset += 4; 1292 break; 1293 case MVT::i64: 1294 // If we have one free GPR left, we can place the upper half of the i64 1295 // in it, and store the other half to the stack. If we have two or more 1296 // free GPRs, then we can pass both halves of the i64 in registers. 1297 if (GPR_remaining > 0) { 1298 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1299 Args[i].first, DAG.getConstant(1, MVT::i32)); 1300 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1301 Args[i].first, DAG.getConstant(0, MVT::i32)); 1302 args_to_use.push_back(Hi); 1303 --GPR_remaining; 1304 if (GPR_remaining > 0) { 1305 args_to_use.push_back(Lo); 1306 --GPR_remaining; 1307 } else { 1308 SDOperand ConstFour = DAG.getConstant(4, getPointerTy()); 1309 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour); 1310 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1311 Lo, PtrOff, DAG.getSrcValue(NULL))); 1312 } 1313 } else { 1314 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1315 Args[i].first, PtrOff, 1316 DAG.getSrcValue(NULL))); 1317 } 1318 ArgOffset += 8; 1319 break; 1320 case MVT::f32: 1321 case MVT::f64: 1322 if (FPR_remaining > 0) { 1323 args_to_use.push_back(Args[i].first); 1324 --FPR_remaining; 1325 if (isVarArg) { 1326 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain, 1327 Args[i].first, PtrOff, 1328 DAG.getSrcValue(NULL)); 1329 MemOps.push_back(Store); 1330 // Float varargs are always shadowed in available integer registers 1331 if (GPR_remaining > 0) { 1332 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff, 1333 DAG.getSrcValue(NULL)); 1334 MemOps.push_back(Load.getValue(1)); 1335 args_to_use.push_back(Load); 1336 --GPR_remaining; 1337 } 1338 if (GPR_remaining > 0 && MVT::f64 == ArgVT) { 1339 SDOperand ConstFour = DAG.getConstant(4, getPointerTy()); 1340 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour); 1341 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff, 1342 DAG.getSrcValue(NULL)); 1343 MemOps.push_back(Load.getValue(1)); 1344 args_to_use.push_back(Load); 1345 --GPR_remaining; 1346 } 1347 } else { 1348 // If we have any FPRs remaining, we may also have GPRs remaining. 1349 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 1350 // GPRs. 1351 if (GPR_remaining > 0) { 1352 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 1353 --GPR_remaining; 1354 } 1355 if (GPR_remaining > 0 && MVT::f64 == ArgVT) { 1356 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 1357 --GPR_remaining; 1358 } 1359 } 1360 } else { 1361 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1362 Args[i].first, PtrOff, 1363 DAG.getSrcValue(NULL))); 1364 } 1365 ArgOffset += (ArgVT == MVT::f32) ? 4 : 8; 1366 break; 1367 } 1368 } 1369 if (!MemOps.empty()) 1370 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps); 1371 } 1372 1373 std::vector<MVT::ValueType> RetVals; 1374 MVT::ValueType RetTyVT = getValueType(RetTy); 1375 MVT::ValueType ActualRetTyVT = RetTyVT; 1376 if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16) 1377 ActualRetTyVT = MVT::i32; // Promote result to i32. 1378 1379 if (RetTyVT == MVT::i64) { 1380 RetVals.push_back(MVT::i32); 1381 RetVals.push_back(MVT::i32); 1382 } else if (RetTyVT != MVT::isVoid) { 1383 RetVals.push_back(ActualRetTyVT); 1384 } 1385 RetVals.push_back(MVT::Other); 1386 1387 // If the callee is a GlobalAddress node (quite common, every direct call is) 1388 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1389 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1390 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); 1391 1392 std::vector<SDOperand> Ops; 1393 Ops.push_back(Chain); 1394 Ops.push_back(Callee); 1395 Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end()); 1396 SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops); 1397 Chain = TheCall.getValue(TheCall.Val->getNumValues()-1); 1398 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, 1399 DAG.getConstant(NumBytes, getPointerTy())); 1400 SDOperand RetVal = TheCall; 1401 1402 // If the result is a small value, add a note so that we keep track of the 1403 // information about whether it is sign or zero extended. 1404 if (RetTyVT != ActualRetTyVT) { 1405 RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext, 1406 MVT::i32, RetVal, DAG.getValueType(RetTyVT)); 1407 RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal); 1408 } else if (RetTyVT == MVT::i64) { 1409 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1)); 1410 } 1411 1412 return std::make_pair(RetVal, Chain); 1413} 1414 1415MachineBasicBlock * 1416PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1417 MachineBasicBlock *BB) { 1418 assert((MI->getOpcode() == PPC::SELECT_CC_Int || 1419 MI->getOpcode() == PPC::SELECT_CC_F4 || 1420 MI->getOpcode() == PPC::SELECT_CC_F8) && 1421 "Unexpected instr type to insert"); 1422 1423 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 1424 // control-flow pattern. The incoming instruction knows the destination vreg 1425 // to set, the condition code register to branch on, the true/false values to 1426 // select between, and a branch opcode to use. 1427 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1428 ilist<MachineBasicBlock>::iterator It = BB; 1429 ++It; 1430 1431 // thisMBB: 1432 // ... 1433 // TrueVal = ... 1434 // cmpTY ccX, r1, r2 1435 // bCC copy1MBB 1436 // fallthrough --> copy0MBB 1437 MachineBasicBlock *thisMBB = BB; 1438 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1439 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1440 BuildMI(BB, MI->getOperand(4).getImmedValue(), 2) 1441 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 1442 MachineFunction *F = BB->getParent(); 1443 F->getBasicBlockList().insert(It, copy0MBB); 1444 F->getBasicBlockList().insert(It, sinkMBB); 1445 // Update machine-CFG edges by first adding all successors of the current 1446 // block to the new block which will contain the Phi node for the select. 1447 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1448 e = BB->succ_end(); i != e; ++i) 1449 sinkMBB->addSuccessor(*i); 1450 // Next, remove all successors of the current block, and add the true 1451 // and fallthrough blocks as its successors. 1452 while(!BB->succ_empty()) 1453 BB->removeSuccessor(BB->succ_begin()); 1454 BB->addSuccessor(copy0MBB); 1455 BB->addSuccessor(sinkMBB); 1456 1457 // copy0MBB: 1458 // %FalseValue = ... 1459 // # fallthrough to sinkMBB 1460 BB = copy0MBB; 1461 1462 // Update machine-CFG edges 1463 BB->addSuccessor(sinkMBB); 1464 1465 // sinkMBB: 1466 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1467 // ... 1468 BB = sinkMBB; 1469 BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg()) 1470 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 1471 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1472 1473 delete MI; // The pseudo instruction is gone now. 1474 return BB; 1475} 1476 1477SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 1478 DAGCombinerInfo &DCI) const { 1479 TargetMachine &TM = getTargetMachine(); 1480 SelectionDAG &DAG = DCI.DAG; 1481 switch (N->getOpcode()) { 1482 default: break; 1483 case ISD::SINT_TO_FP: 1484 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { 1485 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 1486 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 1487 // We allow the src/dst to be either f32/f64, but the intermediate 1488 // type must be i64. 1489 if (N->getOperand(0).getValueType() == MVT::i64) { 1490 SDOperand Val = N->getOperand(0).getOperand(0); 1491 if (Val.getValueType() == MVT::f32) { 1492 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 1493 DCI.AddToWorklist(Val.Val); 1494 } 1495 1496 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 1497 DCI.AddToWorklist(Val.Val); 1498 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 1499 DCI.AddToWorklist(Val.Val); 1500 if (N->getValueType(0) == MVT::f32) { 1501 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val); 1502 DCI.AddToWorklist(Val.Val); 1503 } 1504 return Val; 1505 } else if (N->getOperand(0).getValueType() == MVT::i32) { 1506 // If the intermediate type is i32, we can avoid the load/store here 1507 // too. 1508 } 1509 } 1510 } 1511 break; 1512 case ISD::STORE: 1513 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 1514 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 1515 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 1516 N->getOperand(1).getValueType() == MVT::i32) { 1517 SDOperand Val = N->getOperand(1).getOperand(0); 1518 if (Val.getValueType() == MVT::f32) { 1519 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 1520 DCI.AddToWorklist(Val.Val); 1521 } 1522 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 1523 DCI.AddToWorklist(Val.Val); 1524 1525 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 1526 N->getOperand(2), N->getOperand(3)); 1527 DCI.AddToWorklist(Val.Val); 1528 return Val; 1529 } 1530 break; 1531 case PPCISD::VCMP: { 1532 // If a VCMPo node already exists with exactly the same operands as this 1533 // node, use its result instead of this node (VCMPo computes both a CR6 and 1534 // a normal output). 1535 // 1536 if (!N->getOperand(0).hasOneUse() && 1537 !N->getOperand(1).hasOneUse() && 1538 !N->getOperand(2).hasOneUse()) { 1539 1540 // Scan all of the users of the LHS, looking for VCMPo's that match. 1541 SDNode *VCMPoNode = 0; 1542 1543 SDNode *LHSN = N->getOperand(0).Val; 1544 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 1545 UI != E; ++UI) 1546 if ((*UI)->getOpcode() == PPCISD::VCMPo && 1547 (*UI)->getOperand(1) == N->getOperand(1) && 1548 (*UI)->getOperand(2) == N->getOperand(2) && 1549 (*UI)->getOperand(0) == N->getOperand(0)) { 1550 VCMPoNode = *UI; 1551 break; 1552 } 1553 1554 // If there are non-zero uses of the flag value, use the VCMPo node! 1555 if (VCMPoNode && !VCMPoNode->hasNUsesOfValue(0, 1)) 1556 return SDOperand(VCMPoNode, 0); 1557 } 1558 break; 1559 } 1560 } 1561 1562 return SDOperand(); 1563} 1564 1565void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 1566 uint64_t Mask, 1567 uint64_t &KnownZero, 1568 uint64_t &KnownOne, 1569 unsigned Depth) const { 1570 KnownZero = 0; 1571 KnownOne = 0; 1572 switch (Op.getOpcode()) { 1573 default: break; 1574 case ISD::INTRINSIC_WO_CHAIN: { 1575 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 1576 default: break; 1577 case Intrinsic::ppc_altivec_vcmpbfp_p: 1578 case Intrinsic::ppc_altivec_vcmpeqfp_p: 1579 case Intrinsic::ppc_altivec_vcmpequb_p: 1580 case Intrinsic::ppc_altivec_vcmpequh_p: 1581 case Intrinsic::ppc_altivec_vcmpequw_p: 1582 case Intrinsic::ppc_altivec_vcmpgefp_p: 1583 case Intrinsic::ppc_altivec_vcmpgtfp_p: 1584 case Intrinsic::ppc_altivec_vcmpgtsb_p: 1585 case Intrinsic::ppc_altivec_vcmpgtsh_p: 1586 case Intrinsic::ppc_altivec_vcmpgtsw_p: 1587 case Intrinsic::ppc_altivec_vcmpgtub_p: 1588 case Intrinsic::ppc_altivec_vcmpgtuh_p: 1589 case Intrinsic::ppc_altivec_vcmpgtuw_p: 1590 KnownZero = ~1U; // All bits but the low one are known to be zero. 1591 break; 1592 } 1593 } 1594 } 1595} 1596 1597 1598/// getConstraintType - Given a constraint letter, return the type of 1599/// constraint it is for this target. 1600PPCTargetLowering::ConstraintType 1601PPCTargetLowering::getConstraintType(char ConstraintLetter) const { 1602 switch (ConstraintLetter) { 1603 default: break; 1604 case 'b': 1605 case 'r': 1606 case 'f': 1607 case 'v': 1608 case 'y': 1609 return C_RegisterClass; 1610 } 1611 return TargetLowering::getConstraintType(ConstraintLetter); 1612} 1613 1614 1615std::vector<unsigned> PPCTargetLowering:: 1616getRegClassForInlineAsmConstraint(const std::string &Constraint, 1617 MVT::ValueType VT) const { 1618 if (Constraint.size() == 1) { 1619 switch (Constraint[0]) { // GCC RS6000 Constraint Letters 1620 default: break; // Unknown constriant letter 1621 case 'b': 1622 return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 , 1623 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 1624 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 1625 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 1626 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 1627 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 1628 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 1629 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 1630 0); 1631 case 'r': 1632 return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 , 1633 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 1634 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 1635 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 1636 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 1637 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 1638 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 1639 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 1640 0); 1641 case 'f': 1642 return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 , 1643 PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 , 1644 PPC::F8 , PPC::F9 , PPC::F10, PPC::F11, 1645 PPC::F12, PPC::F13, PPC::F14, PPC::F15, 1646 PPC::F16, PPC::F17, PPC::F18, PPC::F19, 1647 PPC::F20, PPC::F21, PPC::F22, PPC::F23, 1648 PPC::F24, PPC::F25, PPC::F26, PPC::F27, 1649 PPC::F28, PPC::F29, PPC::F30, PPC::F31, 1650 0); 1651 case 'v': 1652 return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , 1653 PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 1654 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, 1655 PPC::V12, PPC::V13, PPC::V14, PPC::V15, 1656 PPC::V16, PPC::V17, PPC::V18, PPC::V19, 1657 PPC::V20, PPC::V21, PPC::V22, PPC::V23, 1658 PPC::V24, PPC::V25, PPC::V26, PPC::V27, 1659 PPC::V28, PPC::V29, PPC::V30, PPC::V31, 1660 0); 1661 case 'y': 1662 return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, 1663 PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7, 1664 0); 1665 } 1666 } 1667 1668 return std::vector<unsigned>(); 1669} 1670 1671// isOperandValidForConstraint 1672bool PPCTargetLowering:: 1673isOperandValidForConstraint(SDOperand Op, char Letter) { 1674 switch (Letter) { 1675 default: break; 1676 case 'I': 1677 case 'J': 1678 case 'K': 1679 case 'L': 1680 case 'M': 1681 case 'N': 1682 case 'O': 1683 case 'P': { 1684 if (!isa<ConstantSDNode>(Op)) return false; // Must be an immediate. 1685 unsigned Value = cast<ConstantSDNode>(Op)->getValue(); 1686 switch (Letter) { 1687 default: assert(0 && "Unknown constraint letter!"); 1688 case 'I': // "I" is a signed 16-bit constant. 1689 return (short)Value == (int)Value; 1690 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 1691 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 1692 return (short)Value == 0; 1693 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 1694 return (Value >> 16) == 0; 1695 case 'M': // "M" is a constant that is greater than 31. 1696 return Value > 31; 1697 case 'N': // "N" is a positive constant that is an exact power of two. 1698 return (int)Value > 0 && isPowerOf2_32(Value); 1699 case 'O': // "O" is the constant zero. 1700 return Value == 0; 1701 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 1702 return (short)-Value == (int)-Value; 1703 } 1704 break; 1705 } 1706 } 1707 1708 // Handle standard constraint letters. 1709 return TargetLowering::isOperandValidForConstraint(Op, Letter); 1710} 1711 1712/// isLegalAddressImmediate - Return true if the integer value can be used 1713/// as the offset of the target addressing mode. 1714bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const { 1715 // PPC allows a sign-extended 16-bit immediate field. 1716 return (V > -(1 << 16) && V < (1 << 16)-1); 1717} 1718