PPCISelLowering.cpp revision 098e699f21a532d37b1ad44bb1bc24a87b3f9962
1//===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by Chris Lattner and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file implements the PPCISelLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "PPCISelLowering.h" 15#include "PPCTargetMachine.h" 16#include "llvm/ADT/VectorExtras.h" 17#include "llvm/Analysis/ScalarEvolutionExpressions.h" 18#include "llvm/CodeGen/MachineFrameInfo.h" 19#include "llvm/CodeGen/MachineFunction.h" 20#include "llvm/CodeGen/MachineInstrBuilder.h" 21#include "llvm/CodeGen/SelectionDAG.h" 22#include "llvm/CodeGen/SSARegMap.h" 23#include "llvm/Constants.h" 24#include "llvm/Function.h" 25#include "llvm/Intrinsics.h" 26#include "llvm/Support/MathExtras.h" 27#include "llvm/Target/TargetOptions.h" 28using namespace llvm; 29 30PPCTargetLowering::PPCTargetLowering(TargetMachine &TM) 31 : TargetLowering(TM) { 32 33 // Fold away setcc operations if possible. 34 setSetCCIsExpensive(); 35 setPow2DivIsCheap(); 36 37 // Use _setjmp/_longjmp instead of setjmp/longjmp. 38 setUseUnderscoreSetJmpLongJmp(true); 39 40 // Set up the register classes. 41 addRegisterClass(MVT::i32, PPC::GPRCRegisterClass); 42 addRegisterClass(MVT::f32, PPC::F4RCRegisterClass); 43 addRegisterClass(MVT::f64, PPC::F8RCRegisterClass); 44 45 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 46 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 47 48 // PowerPC has no intrinsics for these particular operations 49 setOperationAction(ISD::MEMMOVE, MVT::Other, Expand); 50 setOperationAction(ISD::MEMSET, MVT::Other, Expand); 51 setOperationAction(ISD::MEMCPY, MVT::Other, Expand); 52 53 // PowerPC has an i16 but no i8 (or i1) SEXTLOAD 54 setOperationAction(ISD::SEXTLOAD, MVT::i1, Expand); 55 setOperationAction(ISD::SEXTLOAD, MVT::i8, Expand); 56 57 // PowerPC has no SREM/UREM instructions 58 setOperationAction(ISD::SREM, MVT::i32, Expand); 59 setOperationAction(ISD::UREM, MVT::i32, Expand); 60 61 // We don't support sin/cos/sqrt/fmod 62 setOperationAction(ISD::FSIN , MVT::f64, Expand); 63 setOperationAction(ISD::FCOS , MVT::f64, Expand); 64 setOperationAction(ISD::FREM , MVT::f64, Expand); 65 setOperationAction(ISD::FSIN , MVT::f32, Expand); 66 setOperationAction(ISD::FCOS , MVT::f32, Expand); 67 setOperationAction(ISD::FREM , MVT::f32, Expand); 68 69 // If we're enabling GP optimizations, use hardware square root 70 if (!TM.getSubtarget<PPCSubtarget>().hasFSQRT()) { 71 setOperationAction(ISD::FSQRT, MVT::f64, Expand); 72 setOperationAction(ISD::FSQRT, MVT::f32, Expand); 73 } 74 75 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); 76 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); 77 78 // PowerPC does not have BSWAP, CTPOP or CTTZ 79 setOperationAction(ISD::BSWAP, MVT::i32 , Expand); 80 setOperationAction(ISD::CTPOP, MVT::i32 , Expand); 81 setOperationAction(ISD::CTTZ , MVT::i32 , Expand); 82 83 // PowerPC does not have ROTR 84 setOperationAction(ISD::ROTR, MVT::i32 , Expand); 85 86 // PowerPC does not have Select 87 setOperationAction(ISD::SELECT, MVT::i32, Expand); 88 setOperationAction(ISD::SELECT, MVT::f32, Expand); 89 setOperationAction(ISD::SELECT, MVT::f64, Expand); 90 91 // PowerPC wants to turn select_cc of FP into fsel when possible. 92 setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); 93 setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); 94 95 // PowerPC wants to optimize integer setcc a bit 96 setOperationAction(ISD::SETCC, MVT::i32, Custom); 97 98 // PowerPC does not have BRCOND which requires SetCC 99 setOperationAction(ISD::BRCOND, MVT::Other, Expand); 100 101 // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. 102 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); 103 104 // PowerPC does not have [U|S]INT_TO_FP 105 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); 106 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); 107 108 setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand); 109 setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand); 110 111 // PowerPC does not have truncstore for i1. 112 setOperationAction(ISD::TRUNCSTORE, MVT::i1, Promote); 113 114 // Support label based line numbers. 115 setOperationAction(ISD::LOCATION, MVT::Other, Expand); 116 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 117 // FIXME - use subtarget debug flags 118 if (!TM.getSubtarget<PPCSubtarget>().isDarwin()) 119 setOperationAction(ISD::DEBUG_LABEL, MVT::Other, Expand); 120 121 // We want to legalize GlobalAddress and ConstantPool nodes into the 122 // appropriate instructions to materialize the address. 123 setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); 124 setOperationAction(ISD::ConstantPool, MVT::i32, Custom); 125 126 // RET must be custom lowered, to meet ABI requirements 127 setOperationAction(ISD::RET , MVT::Other, Custom); 128 129 // VASTART needs to be custom lowered to use the VarArgsFrameIndex 130 setOperationAction(ISD::VASTART , MVT::Other, Custom); 131 132 // Use the default implementation. 133 setOperationAction(ISD::VAARG , MVT::Other, Expand); 134 setOperationAction(ISD::VACOPY , MVT::Other, Expand); 135 setOperationAction(ISD::VAEND , MVT::Other, Expand); 136 setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); 137 setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); 138 setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Expand); 139 140 // We want to custom lower some of our intrinsics. 141 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); 142 143 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { 144 // They also have instructions for converting between i64 and fp. 145 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); 146 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); 147 148 // FIXME: disable this lowered code. This generates 64-bit register values, 149 // and we don't model the fact that the top part is clobbered by calls. We 150 // need to flag these together so that the value isn't live across a call. 151 //setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); 152 153 // To take advantage of the above i64 FP_TO_SINT, promote i32 FP_TO_UINT 154 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Promote); 155 } else { 156 // PowerPC does not have FP_TO_UINT on 32-bit implementations. 157 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); 158 } 159 160 if (TM.getSubtarget<PPCSubtarget>().has64BitRegs()) { 161 // 64 bit PowerPC implementations can support i64 types directly 162 addRegisterClass(MVT::i64, PPC::G8RCRegisterClass); 163 // BUILD_PAIR can't be handled natively, and should be expanded to shl/or 164 setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); 165 } else { 166 // 32 bit PowerPC wants to expand i64 shifts itself. 167 setOperationAction(ISD::SHL, MVT::i64, Custom); 168 setOperationAction(ISD::SRL, MVT::i64, Custom); 169 setOperationAction(ISD::SRA, MVT::i64, Custom); 170 } 171 172 if (TM.getSubtarget<PPCSubtarget>().hasAltivec()) { 173 // First set operation action for all vector types to expand. Then we 174 // will selectively turn on ones that can be effectively codegen'd. 175 for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; 176 VT != (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { 177 // add/sub/and/or/xor are legal for all supported vector VT's. 178 setOperationAction(ISD::ADD , (MVT::ValueType)VT, Legal); 179 setOperationAction(ISD::SUB , (MVT::ValueType)VT, Legal); 180 setOperationAction(ISD::AND , (MVT::ValueType)VT, Legal); 181 setOperationAction(ISD::OR , (MVT::ValueType)VT, Legal); 182 setOperationAction(ISD::XOR , (MVT::ValueType)VT, Legal); 183 184 // We promote all shuffles to v16i8. 185 setOperationAction(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, Promote); 186 AddPromotedToType(ISD::VECTOR_SHUFFLE, (MVT::ValueType)VT, MVT::v16i8); 187 188 setOperationAction(ISD::MUL , (MVT::ValueType)VT, Expand); 189 setOperationAction(ISD::SDIV, (MVT::ValueType)VT, Expand); 190 setOperationAction(ISD::SREM, (MVT::ValueType)VT, Expand); 191 setOperationAction(ISD::UDIV, (MVT::ValueType)VT, Expand); 192 setOperationAction(ISD::UREM, (MVT::ValueType)VT, Expand); 193 setOperationAction(ISD::EXTRACT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 194 setOperationAction(ISD::INSERT_VECTOR_ELT, (MVT::ValueType)VT, Expand); 195 setOperationAction(ISD::BUILD_VECTOR, (MVT::ValueType)VT, Expand); 196 197 setOperationAction(ISD::SCALAR_TO_VECTOR, (MVT::ValueType)VT, Expand); 198 } 199 200 // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle 201 // with merges, splats, etc. 202 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); 203 204 addRegisterClass(MVT::v4f32, PPC::VRRCRegisterClass); 205 addRegisterClass(MVT::v4i32, PPC::VRRCRegisterClass); 206 addRegisterClass(MVT::v8i16, PPC::VRRCRegisterClass); 207 addRegisterClass(MVT::v16i8, PPC::VRRCRegisterClass); 208 209 setOperationAction(ISD::MUL, MVT::v4f32, Legal); 210 211 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Custom); 212 setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4i32, Custom); 213 214 setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); 215 setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); 216 setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); 217 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); 218 } 219 220 setSetCCResultContents(ZeroOrOneSetCCResult); 221 setStackPointerRegisterToSaveRestore(PPC::R1); 222 223 // We have target-specific dag combine patterns for the following nodes: 224 setTargetDAGCombine(ISD::SINT_TO_FP); 225 setTargetDAGCombine(ISD::STORE); 226 227 computeRegisterProperties(); 228} 229 230const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { 231 switch (Opcode) { 232 default: return 0; 233 case PPCISD::FSEL: return "PPCISD::FSEL"; 234 case PPCISD::FCFID: return "PPCISD::FCFID"; 235 case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; 236 case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; 237 case PPCISD::STFIWX: return "PPCISD::STFIWX"; 238 case PPCISD::VMADDFP: return "PPCISD::VMADDFP"; 239 case PPCISD::VNMSUBFP: return "PPCISD::VNMSUBFP"; 240 case PPCISD::VPERM: return "PPCISD::VPERM"; 241 case PPCISD::Hi: return "PPCISD::Hi"; 242 case PPCISD::Lo: return "PPCISD::Lo"; 243 case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; 244 case PPCISD::SRL: return "PPCISD::SRL"; 245 case PPCISD::SRA: return "PPCISD::SRA"; 246 case PPCISD::SHL: return "PPCISD::SHL"; 247 case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; 248 case PPCISD::STD_32: return "PPCISD::STD_32"; 249 case PPCISD::CALL: return "PPCISD::CALL"; 250 case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; 251 case PPCISD::MFCR: return "PPCISD::MFCR"; 252 case PPCISD::VCMP: return "PPCISD::VCMP"; 253 case PPCISD::VCMPo: return "PPCISD::VCMPo"; 254 } 255} 256 257/// isFloatingPointZero - Return true if this is 0.0 or -0.0. 258static bool isFloatingPointZero(SDOperand Op) { 259 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op)) 260 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 261 else if (Op.getOpcode() == ISD::EXTLOAD || Op.getOpcode() == ISD::LOAD) { 262 // Maybe this has already been legalized into the constant pool? 263 if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1))) 264 if (ConstantFP *CFP = dyn_cast<ConstantFP>(CP->get())) 265 return CFP->isExactlyValue(-0.0) || CFP->isExactlyValue(0.0); 266 } 267 return false; 268} 269 270 271/// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand 272/// specifies a splat of a single element that is suitable for input to 273/// VSPLTB/VSPLTH/VSPLTW. 274bool PPC::isSplatShuffleMask(SDNode *N, unsigned EltSize) { 275 assert(N->getOpcode() == ISD::BUILD_VECTOR && 276 N->getNumOperands() == 16 && 277 (EltSize == 1 || EltSize == 2 || EltSize == 4)); 278 279 // This is a splat operation if each element of the permute is the same, and 280 // if the value doesn't reference the second vector. 281 unsigned ElementBase = 0; 282 SDOperand Elt = N->getOperand(0); 283 if (ConstantSDNode *EltV = dyn_cast<ConstantSDNode>(Elt)) 284 ElementBase = EltV->getValue(); 285 else 286 return false; // FIXME: Handle UNDEF elements too! 287 288 if (cast<ConstantSDNode>(Elt)->getValue() >= 16) 289 return false; 290 291 // Check that they are consequtive. 292 for (unsigned i = 1; i != EltSize; ++i) { 293 if (!isa<ConstantSDNode>(N->getOperand(i)) || 294 cast<ConstantSDNode>(N->getOperand(i))->getValue() != i+ElementBase) 295 return false; 296 } 297 298 assert(isa<ConstantSDNode>(Elt) && "Invalid VECTOR_SHUFFLE mask!"); 299 for (unsigned i = EltSize, e = 16; i != e; i += EltSize) { 300 assert(isa<ConstantSDNode>(N->getOperand(i)) && 301 "Invalid VECTOR_SHUFFLE mask!"); 302 for (unsigned j = 0; j != EltSize; ++j) 303 if (N->getOperand(i+j) != N->getOperand(j)) 304 return false; 305 } 306 307 return true; 308} 309 310/// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the 311/// specified isSplatShuffleMask VECTOR_SHUFFLE mask. 312unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { 313 assert(isSplatShuffleMask(N, EltSize)); 314 return cast<ConstantSDNode>(N->getOperand(0))->getValue() / EltSize; 315} 316 317/// isVecSplatImm - Return true if this is a build_vector of constants which 318/// can be formed by using a vspltis[bhw] instruction. The ByteSize field 319/// indicates the number of bytes of each element [124] -> [bhw]. 320bool PPC::isVecSplatImm(SDNode *N, unsigned ByteSize, char *Val) { 321 SDOperand OpVal(0, 0); 322 // Check to see if this buildvec has a single non-undef value in its elements. 323 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 324 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 325 if (OpVal.Val == 0) 326 OpVal = N->getOperand(i); 327 else if (OpVal != N->getOperand(i)) 328 return false; 329 } 330 331 if (OpVal.Val == 0) return false; // All UNDEF: use implicit def. 332 333 unsigned ValSizeInBytes = 0; 334 uint64_t Value = 0; 335 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) { 336 Value = CN->getValue(); 337 ValSizeInBytes = MVT::getSizeInBits(CN->getValueType(0))/8; 338 } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) { 339 assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!"); 340 Value = FloatToBits(CN->getValue()); 341 ValSizeInBytes = 4; 342 } 343 344 // If the splat value is larger than the element value, then we can never do 345 // this splat. The only case that we could fit the replicated bits into our 346 // immediate field for would be zero, and we prefer to use vxor for it. 347 if (ValSizeInBytes < ByteSize) return false; 348 349 // If the element value is larger than the splat value, cut it in half and 350 // check to see if the two halves are equal. Continue doing this until we 351 // get to ByteSize. This allows us to handle 0x01010101 as 0x01. 352 while (ValSizeInBytes > ByteSize) { 353 ValSizeInBytes >>= 1; 354 355 // If the top half equals the bottom half, we're still ok. 356 if (((Value >> (ValSizeInBytes*8)) & (1 << (8*ValSizeInBytes)-1)) != 357 (Value & (1 << (8*ValSizeInBytes)-1))) 358 return false; 359 } 360 361 // Properly sign extend the value. 362 int ShAmt = (4-ByteSize)*8; 363 int MaskVal = ((int)Value << ShAmt) >> ShAmt; 364 365 // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros. 366 if (MaskVal == 0) return false; 367 368 if (Val) *Val = MaskVal; 369 370 // Finally, if this value fits in a 5 bit sext field, return true. 371 return ((MaskVal << (32-5)) >> (32-5)) == MaskVal; 372} 373 374 375/// LowerOperation - Provide custom lowering hooks for some operations. 376/// 377SDOperand PPCTargetLowering::LowerOperation(SDOperand Op, SelectionDAG &DAG) { 378 switch (Op.getOpcode()) { 379 default: assert(0 && "Wasn't expecting to be able to lower this!"); 380 case ISD::FP_TO_SINT: { 381 assert(MVT::isFloatingPoint(Op.getOperand(0).getValueType())); 382 SDOperand Src = Op.getOperand(0); 383 if (Src.getValueType() == MVT::f32) 384 Src = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Src); 385 386 SDOperand Tmp; 387 switch (Op.getValueType()) { 388 default: assert(0 && "Unhandled FP_TO_SINT type in custom expander!"); 389 case MVT::i32: 390 Tmp = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Src); 391 break; 392 case MVT::i64: 393 Tmp = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Src); 394 break; 395 } 396 397 // Convert the FP value to an int value through memory. 398 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::i64, Tmp); 399 if (Op.getValueType() == MVT::i32) 400 Bits = DAG.getNode(ISD::TRUNCATE, MVT::i32, Bits); 401 return Bits; 402 } 403 case ISD::SINT_TO_FP: 404 if (Op.getOperand(0).getValueType() == MVT::i64) { 405 SDOperand Bits = DAG.getNode(ISD::BIT_CONVERT, MVT::f64, Op.getOperand(0)); 406 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Bits); 407 if (Op.getValueType() == MVT::f32) 408 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 409 return FP; 410 } else { 411 assert(Op.getOperand(0).getValueType() == MVT::i32 && 412 "Unhandled SINT_TO_FP type in custom expander!"); 413 // Since we only generate this in 64-bit mode, we can take advantage of 414 // 64-bit registers. In particular, sign extend the input value into the 415 // 64-bit register with extsw, store the WHOLE 64-bit value into the stack 416 // then lfd it and fcfid it. 417 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 418 int FrameIdx = FrameInfo->CreateStackObject(8, 8); 419 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 420 421 SDOperand Ext64 = DAG.getNode(PPCISD::EXTSW_32, MVT::i32, 422 Op.getOperand(0)); 423 424 // STD the extended value into the stack slot. 425 SDOperand Store = DAG.getNode(PPCISD::STD_32, MVT::Other, 426 DAG.getEntryNode(), Ext64, FIdx, 427 DAG.getSrcValue(NULL)); 428 // Load the value as a double. 429 SDOperand Ld = DAG.getLoad(MVT::f64, Store, FIdx, DAG.getSrcValue(NULL)); 430 431 // FCFID it and return it. 432 SDOperand FP = DAG.getNode(PPCISD::FCFID, MVT::f64, Ld); 433 if (Op.getValueType() == MVT::f32) 434 FP = DAG.getNode(ISD::FP_ROUND, MVT::f32, FP); 435 return FP; 436 } 437 break; 438 439 case ISD::SELECT_CC: { 440 // Turn FP only select_cc's into fsel instructions. 441 if (!MVT::isFloatingPoint(Op.getOperand(0).getValueType()) || 442 !MVT::isFloatingPoint(Op.getOperand(2).getValueType())) 443 break; 444 445 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get(); 446 447 // Cannot handle SETEQ/SETNE. 448 if (CC == ISD::SETEQ || CC == ISD::SETNE) break; 449 450 MVT::ValueType ResVT = Op.getValueType(); 451 MVT::ValueType CmpVT = Op.getOperand(0).getValueType(); 452 SDOperand LHS = Op.getOperand(0), RHS = Op.getOperand(1); 453 SDOperand TV = Op.getOperand(2), FV = Op.getOperand(3); 454 455 // If the RHS of the comparison is a 0.0, we don't need to do the 456 // subtraction at all. 457 if (isFloatingPointZero(RHS)) 458 switch (CC) { 459 default: break; // SETUO etc aren't handled by fsel. 460 case ISD::SETULT: 461 case ISD::SETLT: 462 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 463 case ISD::SETUGE: 464 case ISD::SETGE: 465 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 466 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 467 return DAG.getNode(PPCISD::FSEL, ResVT, LHS, TV, FV); 468 case ISD::SETUGT: 469 case ISD::SETGT: 470 std::swap(TV, FV); // fsel is natively setge, swap operands for setlt 471 case ISD::SETULE: 472 case ISD::SETLE: 473 if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits 474 LHS = DAG.getNode(ISD::FP_EXTEND, MVT::f64, LHS); 475 return DAG.getNode(PPCISD::FSEL, ResVT, 476 DAG.getNode(ISD::FNEG, MVT::f64, LHS), TV, FV); 477 } 478 479 SDOperand Cmp; 480 switch (CC) { 481 default: break; // SETUO etc aren't handled by fsel. 482 case ISD::SETULT: 483 case ISD::SETLT: 484 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 485 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 486 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 487 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 488 case ISD::SETUGE: 489 case ISD::SETGE: 490 Cmp = DAG.getNode(ISD::FSUB, CmpVT, LHS, RHS); 491 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 492 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 493 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 494 case ISD::SETUGT: 495 case ISD::SETGT: 496 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 497 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 498 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 499 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, FV, TV); 500 case ISD::SETULE: 501 case ISD::SETLE: 502 Cmp = DAG.getNode(ISD::FSUB, CmpVT, RHS, LHS); 503 if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits 504 Cmp = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Cmp); 505 return DAG.getNode(PPCISD::FSEL, ResVT, Cmp, TV, FV); 506 } 507 break; 508 } 509 case ISD::SHL: { 510 assert(Op.getValueType() == MVT::i64 && 511 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 512 // The generic code does a fine job expanding shift by a constant. 513 if (isa<ConstantSDNode>(Op.getOperand(1))) break; 514 515 // Otherwise, expand into a bunch of logical ops. Note that these ops 516 // depend on the PPC behavior for oversized shift amounts. 517 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 518 DAG.getConstant(0, MVT::i32)); 519 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 520 DAG.getConstant(1, MVT::i32)); 521 SDOperand Amt = Op.getOperand(1); 522 523 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 524 DAG.getConstant(32, MVT::i32), Amt); 525 SDOperand Tmp2 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Amt); 526 SDOperand Tmp3 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Tmp1); 527 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 528 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 529 DAG.getConstant(-32U, MVT::i32)); 530 SDOperand Tmp6 = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Tmp5); 531 SDOperand OutHi = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 532 SDOperand OutLo = DAG.getNode(PPCISD::SHL, MVT::i32, Lo, Amt); 533 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 534 } 535 case ISD::SRL: { 536 assert(Op.getValueType() == MVT::i64 && 537 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SHL!"); 538 // The generic code does a fine job expanding shift by a constant. 539 if (isa<ConstantSDNode>(Op.getOperand(1))) break; 540 541 // Otherwise, expand into a bunch of logical ops. Note that these ops 542 // depend on the PPC behavior for oversized shift amounts. 543 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 544 DAG.getConstant(0, MVT::i32)); 545 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 546 DAG.getConstant(1, MVT::i32)); 547 SDOperand Amt = Op.getOperand(1); 548 549 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 550 DAG.getConstant(32, MVT::i32), Amt); 551 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 552 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 553 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 554 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 555 DAG.getConstant(-32U, MVT::i32)); 556 SDOperand Tmp6 = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Tmp5); 557 SDOperand OutLo = DAG.getNode(ISD::OR, MVT::i32, Tmp4, Tmp6); 558 SDOperand OutHi = DAG.getNode(PPCISD::SRL, MVT::i32, Hi, Amt); 559 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 560 } 561 case ISD::SRA: { 562 assert(Op.getValueType() == MVT::i64 && 563 Op.getOperand(1).getValueType() == MVT::i32 && "Unexpected SRA!"); 564 // The generic code does a fine job expanding shift by a constant. 565 if (isa<ConstantSDNode>(Op.getOperand(1))) break; 566 567 // Otherwise, expand into a bunch of logical ops, followed by a select_cc. 568 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 569 DAG.getConstant(0, MVT::i32)); 570 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, Op.getOperand(0), 571 DAG.getConstant(1, MVT::i32)); 572 SDOperand Amt = Op.getOperand(1); 573 574 SDOperand Tmp1 = DAG.getNode(ISD::SUB, MVT::i32, 575 DAG.getConstant(32, MVT::i32), Amt); 576 SDOperand Tmp2 = DAG.getNode(PPCISD::SRL, MVT::i32, Lo, Amt); 577 SDOperand Tmp3 = DAG.getNode(PPCISD::SHL, MVT::i32, Hi, Tmp1); 578 SDOperand Tmp4 = DAG.getNode(ISD::OR , MVT::i32, Tmp2, Tmp3); 579 SDOperand Tmp5 = DAG.getNode(ISD::ADD, MVT::i32, Amt, 580 DAG.getConstant(-32U, MVT::i32)); 581 SDOperand Tmp6 = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Tmp5); 582 SDOperand OutHi = DAG.getNode(PPCISD::SRA, MVT::i32, Hi, Amt); 583 SDOperand OutLo = DAG.getSelectCC(Tmp5, DAG.getConstant(0, MVT::i32), 584 Tmp4, Tmp6, ISD::SETLE); 585 return DAG.getNode(ISD::BUILD_PAIR, MVT::i64, OutLo, OutHi); 586 } 587 case ISD::ConstantPool: { 588 ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); 589 Constant *C = CP->get(); 590 SDOperand CPI = DAG.getTargetConstantPool(C, MVT::i32, CP->getAlignment()); 591 SDOperand Zero = DAG.getConstant(0, MVT::i32); 592 593 if (getTargetMachine().getRelocationModel() == Reloc::Static) { 594 // Generate non-pic code that has direct accesses to the constant pool. 595 // The address of the global is just (hi(&g)+lo(&g)). 596 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero); 597 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero); 598 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 599 } 600 601 // Only lower ConstantPool on Darwin. 602 if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break; 603 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, CPI, Zero); 604 if (getTargetMachine().getRelocationModel() == Reloc::PIC) { 605 // With PIC, the first instruction is actually "GR+hi(&G)". 606 Hi = DAG.getNode(ISD::ADD, MVT::i32, 607 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 608 } 609 610 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, CPI, Zero); 611 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 612 return Lo; 613 } 614 case ISD::GlobalAddress: { 615 GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op); 616 GlobalValue *GV = GSDN->getGlobal(); 617 SDOperand GA = DAG.getTargetGlobalAddress(GV, MVT::i32, GSDN->getOffset()); 618 SDOperand Zero = DAG.getConstant(0, MVT::i32); 619 620 if (getTargetMachine().getRelocationModel() == Reloc::Static) { 621 // Generate non-pic code that has direct accesses to globals. 622 // The address of the global is just (hi(&g)+lo(&g)). 623 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero); 624 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero); 625 return DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 626 } 627 628 // Only lower GlobalAddress on Darwin. 629 if (!getTargetMachine().getSubtarget<PPCSubtarget>().isDarwin()) break; 630 631 SDOperand Hi = DAG.getNode(PPCISD::Hi, MVT::i32, GA, Zero); 632 if (getTargetMachine().getRelocationModel() == Reloc::PIC) { 633 // With PIC, the first instruction is actually "GR+hi(&G)". 634 Hi = DAG.getNode(ISD::ADD, MVT::i32, 635 DAG.getNode(PPCISD::GlobalBaseReg, MVT::i32), Hi); 636 } 637 638 SDOperand Lo = DAG.getNode(PPCISD::Lo, MVT::i32, GA, Zero); 639 Lo = DAG.getNode(ISD::ADD, MVT::i32, Hi, Lo); 640 641 if (!GV->hasWeakLinkage() && !GV->hasLinkOnceLinkage() && 642 (!GV->isExternal() || GV->hasNotBeenReadFromBytecode())) 643 return Lo; 644 645 // If the global is weak or external, we have to go through the lazy 646 // resolution stub. 647 return DAG.getLoad(MVT::i32, DAG.getEntryNode(), Lo, DAG.getSrcValue(0)); 648 } 649 case ISD::SETCC: { 650 ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get(); 651 652 // If we're comparing for equality to zero, expose the fact that this is 653 // implented as a ctlz/srl pair on ppc, so that the dag combiner can 654 // fold the new nodes. 655 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 656 if (C->isNullValue() && CC == ISD::SETEQ) { 657 MVT::ValueType VT = Op.getOperand(0).getValueType(); 658 SDOperand Zext = Op.getOperand(0); 659 if (VT < MVT::i32) { 660 VT = MVT::i32; 661 Zext = DAG.getNode(ISD::ZERO_EXTEND, VT, Op.getOperand(0)); 662 } 663 unsigned Log2b = Log2_32(MVT::getSizeInBits(VT)); 664 SDOperand Clz = DAG.getNode(ISD::CTLZ, VT, Zext); 665 SDOperand Scc = DAG.getNode(ISD::SRL, VT, Clz, 666 DAG.getConstant(Log2b, getShiftAmountTy())); 667 return DAG.getNode(ISD::TRUNCATE, getSetCCResultTy(), Scc); 668 } 669 // Leave comparisons against 0 and -1 alone for now, since they're usually 670 // optimized. FIXME: revisit this when we can custom lower all setcc 671 // optimizations. 672 if (C->isAllOnesValue() || C->isNullValue()) 673 break; 674 } 675 676 // If we have an integer seteq/setne, turn it into a compare against zero 677 // by subtracting the rhs from the lhs, which is faster than setting a 678 // condition register, reading it back out, and masking the correct bit. 679 MVT::ValueType LHSVT = Op.getOperand(0).getValueType(); 680 if (MVT::isInteger(LHSVT) && (CC == ISD::SETEQ || CC == ISD::SETNE)) { 681 MVT::ValueType VT = Op.getValueType(); 682 SDOperand Sub = DAG.getNode(ISD::SUB, LHSVT, Op.getOperand(0), 683 Op.getOperand(1)); 684 return DAG.getSetCC(VT, Sub, DAG.getConstant(0, LHSVT), CC); 685 } 686 break; 687 } 688 case ISD::VASTART: { 689 // vastart just stores the address of the VarArgsFrameIndex slot into the 690 // memory location argument. 691 // FIXME: Replace MVT::i32 with PointerTy 692 SDOperand FR = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 693 return DAG.getNode(ISD::STORE, MVT::Other, Op.getOperand(0), FR, 694 Op.getOperand(1), Op.getOperand(2)); 695 } 696 case ISD::RET: { 697 SDOperand Copy; 698 699 switch(Op.getNumOperands()) { 700 default: 701 assert(0 && "Do not know how to return this many arguments!"); 702 abort(); 703 case 1: 704 return SDOperand(); // ret void is legal 705 case 2: { 706 MVT::ValueType ArgVT = Op.getOperand(1).getValueType(); 707 unsigned ArgReg = MVT::isInteger(ArgVT) ? PPC::R3 : PPC::F1; 708 Copy = DAG.getCopyToReg(Op.getOperand(0), ArgReg, Op.getOperand(1), 709 SDOperand()); 710 break; 711 } 712 case 3: 713 Copy = DAG.getCopyToReg(Op.getOperand(0), PPC::R3, Op.getOperand(2), 714 SDOperand()); 715 Copy = DAG.getCopyToReg(Copy, PPC::R4, Op.getOperand(1),Copy.getValue(1)); 716 break; 717 } 718 return DAG.getNode(PPCISD::RET_FLAG, MVT::Other, Copy, Copy.getValue(1)); 719 } 720 case ISD::SCALAR_TO_VECTOR: { 721 // Create a stack slot that is 16-byte aligned. 722 MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); 723 int FrameIdx = FrameInfo->CreateStackObject(16, 16); 724 SDOperand FIdx = DAG.getFrameIndex(FrameIdx, MVT::i32); 725 726 // Store the input value into Value#0 of the stack slot. 727 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, DAG.getEntryNode(), 728 Op.getOperand(0), FIdx,DAG.getSrcValue(NULL)); 729 // Load it out. 730 return DAG.getLoad(Op.getValueType(), Store, FIdx, DAG.getSrcValue(NULL)); 731 } 732 case ISD::BUILD_VECTOR: 733 // If this is a case we can't handle, return null and let the default 734 // expansion code take care of it. If we CAN select this case, return Op. 735 736 // See if this is all zeros. 737 // FIXME: We should handle splat(-0.0), and other cases here. 738 if (ISD::isBuildVectorAllZeros(Op.Val)) 739 return Op; 740 741 if (PPC::isVecSplatImm(Op.Val, 1) || // vspltisb 742 PPC::isVecSplatImm(Op.Val, 2) || // vspltish 743 PPC::isVecSplatImm(Op.Val, 4)) // vspltisw 744 return Op; 745 746 return SDOperand(); 747 748 case ISD::VECTOR_SHUFFLE: { 749 SDOperand V1 = Op.getOperand(0); 750 SDOperand V2 = Op.getOperand(1); 751 SDOperand PermMask = Op.getOperand(2); 752 753 // Cases that are handled by instructions that take permute immediates 754 // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be 755 // selected by the instruction selector. 756 if (V2.getOpcode() == ISD::UNDEF && 757 (PPC::isSplatShuffleMask(PermMask.Val, 1) || 758 PPC::isSplatShuffleMask(PermMask.Val, 2) || 759 PPC::isSplatShuffleMask(PermMask.Val, 4))) 760 break; 761 762 // TODO: Handle more cases, and also handle cases that are cheaper to do as 763 // multiple such instructions than as a constant pool load/vperm pair. 764 765 // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant 766 // vector that will get spilled to the constant pool. 767 if (V2.getOpcode() == ISD::UNDEF) V2 = V1; 768 769 // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except 770 // that it is in input element units, not in bytes. Convert now. 771 MVT::ValueType EltVT = MVT::getVectorBaseType(V1.getValueType()); 772 unsigned BytesPerElement = MVT::getSizeInBits(EltVT)/8; 773 774 std::vector<SDOperand> ResultMask; 775 for (unsigned i = 0, e = PermMask.getNumOperands(); i != e; ++i) { 776 unsigned SrcElt =cast<ConstantSDNode>(PermMask.getOperand(i))->getValue(); 777 778 for (unsigned j = 0; j != BytesPerElement; ++j) 779 ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, 780 MVT::i8)); 781 } 782 783 SDOperand VPermMask =DAG.getNode(ISD::BUILD_VECTOR, MVT::v16i8, ResultMask); 784 return DAG.getNode(PPCISD::VPERM, V1.getValueType(), V1, V2, VPermMask); 785 } 786 case ISD::INTRINSIC_WO_CHAIN: { 787 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getValue(); 788 789 // If this is a lowered altivec predicate compare, CompareOpc is set to the 790 // opcode number of the comparison. 791 int CompareOpc = -1; 792 bool isDot = false; 793 switch (IntNo) { 794 default: return SDOperand(); // Don't custom lower most intrinsics. 795 // Comparison predicates. 796 case Intrinsic::ppc_altivec_vcmpbfp_p: CompareOpc = 966; isDot = 1; break; 797 case Intrinsic::ppc_altivec_vcmpeqfp_p: CompareOpc = 198; isDot = 1; break; 798 case Intrinsic::ppc_altivec_vcmpequb_p: CompareOpc = 6; isDot = 1; break; 799 case Intrinsic::ppc_altivec_vcmpequh_p: CompareOpc = 70; isDot = 1; break; 800 case Intrinsic::ppc_altivec_vcmpequw_p: CompareOpc = 134; isDot = 1; break; 801 case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break; 802 case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break; 803 case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break; 804 case Intrinsic::ppc_altivec_vcmpgtsh_p: CompareOpc = 838; isDot = 1; break; 805 case Intrinsic::ppc_altivec_vcmpgtsw_p: CompareOpc = 902; isDot = 1; break; 806 case Intrinsic::ppc_altivec_vcmpgtub_p: CompareOpc = 518; isDot = 1; break; 807 case Intrinsic::ppc_altivec_vcmpgtuh_p: CompareOpc = 582; isDot = 1; break; 808 case Intrinsic::ppc_altivec_vcmpgtuw_p: CompareOpc = 646; isDot = 1; break; 809 810 // Normal Comparisons. 811 case Intrinsic::ppc_altivec_vcmpbfp: CompareOpc = 966; isDot = 0; break; 812 case Intrinsic::ppc_altivec_vcmpeqfp: CompareOpc = 198; isDot = 0; break; 813 case Intrinsic::ppc_altivec_vcmpequb: CompareOpc = 6; isDot = 0; break; 814 case Intrinsic::ppc_altivec_vcmpequh: CompareOpc = 70; isDot = 0; break; 815 case Intrinsic::ppc_altivec_vcmpequw: CompareOpc = 134; isDot = 0; break; 816 case Intrinsic::ppc_altivec_vcmpgefp: CompareOpc = 454; isDot = 0; break; 817 case Intrinsic::ppc_altivec_vcmpgtfp: CompareOpc = 710; isDot = 0; break; 818 case Intrinsic::ppc_altivec_vcmpgtsb: CompareOpc = 774; isDot = 0; break; 819 case Intrinsic::ppc_altivec_vcmpgtsh: CompareOpc = 838; isDot = 0; break; 820 case Intrinsic::ppc_altivec_vcmpgtsw: CompareOpc = 902; isDot = 0; break; 821 case Intrinsic::ppc_altivec_vcmpgtub: CompareOpc = 518; isDot = 0; break; 822 case Intrinsic::ppc_altivec_vcmpgtuh: CompareOpc = 582; isDot = 0; break; 823 case Intrinsic::ppc_altivec_vcmpgtuw: CompareOpc = 646; isDot = 0; break; 824 } 825 826 assert(CompareOpc>0 && "We only lower altivec predicate compares so far!"); 827 828 // If this is a non-dot comparison, make the VCMP node. 829 if (!isDot) 830 return DAG.getNode(PPCISD::VCMP, Op.getOperand(2).getValueType(), 831 Op.getOperand(1), Op.getOperand(2), 832 DAG.getConstant(CompareOpc, MVT::i32)); 833 834 // Create the PPCISD altivec 'dot' comparison node. 835 std::vector<SDOperand> Ops; 836 std::vector<MVT::ValueType> VTs; 837 Ops.push_back(Op.getOperand(2)); // LHS 838 Ops.push_back(Op.getOperand(3)); // RHS 839 Ops.push_back(DAG.getConstant(CompareOpc, MVT::i32)); 840 VTs.push_back(Op.getOperand(2).getValueType()); 841 VTs.push_back(MVT::Flag); 842 SDOperand CompNode = DAG.getNode(PPCISD::VCMPo, VTs, Ops); 843 844 // Now that we have the comparison, emit a copy from the CR to a GPR. 845 // This is flagged to the above dot comparison. 846 SDOperand Flags = DAG.getNode(PPCISD::MFCR, MVT::i32, 847 DAG.getRegister(PPC::CR6, MVT::i32), 848 CompNode.getValue(1)); 849 850 // Unpack the result based on how the target uses it. 851 unsigned BitNo; // Bit # of CR6. 852 bool InvertBit; // Invert result? 853 switch (cast<ConstantSDNode>(Op.getOperand(1))->getValue()) { 854 default: // Can't happen, don't crash on invalid number though. 855 case 0: // Return the value of the EQ bit of CR6. 856 BitNo = 0; InvertBit = false; 857 break; 858 case 1: // Return the inverted value of the EQ bit of CR6. 859 BitNo = 0; InvertBit = true; 860 break; 861 case 2: // Return the value of the LT bit of CR6. 862 BitNo = 2; InvertBit = false; 863 break; 864 case 3: // Return the inverted value of the LT bit of CR6. 865 BitNo = 2; InvertBit = true; 866 break; 867 } 868 869 // Shift the bit into the low position. 870 Flags = DAG.getNode(ISD::SRL, MVT::i32, Flags, 871 DAG.getConstant(8-(3-BitNo), MVT::i32)); 872 // Isolate the bit. 873 Flags = DAG.getNode(ISD::AND, MVT::i32, Flags, 874 DAG.getConstant(1, MVT::i32)); 875 876 // If we are supposed to, toggle the bit. 877 if (InvertBit) 878 Flags = DAG.getNode(ISD::XOR, MVT::i32, Flags, 879 DAG.getConstant(1, MVT::i32)); 880 return Flags; 881 } 882 } 883 return SDOperand(); 884} 885 886std::vector<SDOperand> 887PPCTargetLowering::LowerArguments(Function &F, SelectionDAG &DAG) { 888 // 889 // add beautiful description of PPC stack frame format, or at least some docs 890 // 891 MachineFunction &MF = DAG.getMachineFunction(); 892 MachineFrameInfo *MFI = MF.getFrameInfo(); 893 MachineBasicBlock& BB = MF.front(); 894 SSARegMap *RegMap = MF.getSSARegMap(); 895 std::vector<SDOperand> ArgValues; 896 897 unsigned ArgOffset = 24; 898 unsigned GPR_remaining = 8; 899 unsigned FPR_remaining = 13; 900 unsigned GPR_idx = 0, FPR_idx = 0; 901 static const unsigned GPR[] = { 902 PPC::R3, PPC::R4, PPC::R5, PPC::R6, 903 PPC::R7, PPC::R8, PPC::R9, PPC::R10, 904 }; 905 static const unsigned FPR[] = { 906 PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, 907 PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 908 }; 909 910 // Add DAG nodes to load the arguments... On entry to a function on PPC, 911 // the arguments start at offset 24, although they are likely to be passed 912 // in registers. 913 for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) { 914 SDOperand newroot, argt; 915 unsigned ObjSize; 916 bool needsLoad = false; 917 bool ArgLive = !I->use_empty(); 918 MVT::ValueType ObjectVT = getValueType(I->getType()); 919 920 switch (ObjectVT) { 921 default: assert(0 && "Unhandled argument type!"); 922 case MVT::i1: 923 case MVT::i8: 924 case MVT::i16: 925 case MVT::i32: 926 ObjSize = 4; 927 if (!ArgLive) break; 928 if (GPR_remaining > 0) { 929 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 930 MF.addLiveIn(GPR[GPR_idx], VReg); 931 argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 932 if (ObjectVT != MVT::i32) { 933 unsigned AssertOp = I->getType()->isSigned() ? ISD::AssertSext 934 : ISD::AssertZext; 935 argt = DAG.getNode(AssertOp, MVT::i32, argt, 936 DAG.getValueType(ObjectVT)); 937 argt = DAG.getNode(ISD::TRUNCATE, ObjectVT, argt); 938 } 939 } else { 940 needsLoad = true; 941 } 942 break; 943 case MVT::i64: 944 ObjSize = 8; 945 if (!ArgLive) break; 946 if (GPR_remaining > 0) { 947 SDOperand argHi, argLo; 948 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 949 MF.addLiveIn(GPR[GPR_idx], VReg); 950 argHi = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 951 // If we have two or more remaining argument registers, then both halves 952 // of the i64 can be sourced from there. Otherwise, the lower half will 953 // have to come off the stack. This can happen when an i64 is preceded 954 // by 28 bytes of arguments. 955 if (GPR_remaining > 1) { 956 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 957 MF.addLiveIn(GPR[GPR_idx+1], VReg); 958 argLo = DAG.getCopyFromReg(argHi, VReg, MVT::i32); 959 } else { 960 int FI = MFI->CreateFixedObject(4, ArgOffset+4); 961 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 962 argLo = DAG.getLoad(MVT::i32, DAG.getEntryNode(), FIN, 963 DAG.getSrcValue(NULL)); 964 } 965 // Build the outgoing arg thingy 966 argt = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, argLo, argHi); 967 newroot = argLo; 968 } else { 969 needsLoad = true; 970 } 971 break; 972 case MVT::f32: 973 case MVT::f64: 974 ObjSize = (ObjectVT == MVT::f64) ? 8 : 4; 975 if (!ArgLive) { 976 if (FPR_remaining > 0) { 977 --FPR_remaining; 978 ++FPR_idx; 979 } 980 break; 981 } 982 if (FPR_remaining > 0) { 983 unsigned VReg; 984 if (ObjectVT == MVT::f32) 985 VReg = RegMap->createVirtualRegister(&PPC::F4RCRegClass); 986 else 987 VReg = RegMap->createVirtualRegister(&PPC::F8RCRegClass); 988 MF.addLiveIn(FPR[FPR_idx], VReg); 989 argt = newroot = DAG.getCopyFromReg(DAG.getRoot(), VReg, ObjectVT); 990 --FPR_remaining; 991 ++FPR_idx; 992 } else { 993 needsLoad = true; 994 } 995 break; 996 } 997 998 // We need to load the argument to a virtual register if we determined above 999 // that we ran out of physical registers of the appropriate type 1000 if (needsLoad) { 1001 unsigned SubregOffset = 0; 1002 if (ObjectVT == MVT::i8 || ObjectVT == MVT::i1) SubregOffset = 3; 1003 if (ObjectVT == MVT::i16) SubregOffset = 2; 1004 int FI = MFI->CreateFixedObject(ObjSize, ArgOffset); 1005 SDOperand FIN = DAG.getFrameIndex(FI, MVT::i32); 1006 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, 1007 DAG.getConstant(SubregOffset, MVT::i32)); 1008 argt = newroot = DAG.getLoad(ObjectVT, DAG.getEntryNode(), FIN, 1009 DAG.getSrcValue(NULL)); 1010 } 1011 1012 // Every 4 bytes of argument space consumes one of the GPRs available for 1013 // argument passing. 1014 if (GPR_remaining > 0) { 1015 unsigned delta = (GPR_remaining > 1 && ObjSize == 8) ? 2 : 1; 1016 GPR_remaining -= delta; 1017 GPR_idx += delta; 1018 } 1019 ArgOffset += ObjSize; 1020 if (newroot.Val) 1021 DAG.setRoot(newroot.getValue(1)); 1022 1023 ArgValues.push_back(argt); 1024 } 1025 1026 // If the function takes variable number of arguments, make a frame index for 1027 // the start of the first vararg value... for expansion of llvm.va_start. 1028 if (F.isVarArg()) { 1029 VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset); 1030 SDOperand FIN = DAG.getFrameIndex(VarArgsFrameIndex, MVT::i32); 1031 // If this function is vararg, store any remaining integer argument regs 1032 // to their spots on the stack so that they may be loaded by deferencing the 1033 // result of va_next. 1034 std::vector<SDOperand> MemOps; 1035 for (; GPR_remaining > 0; --GPR_remaining, ++GPR_idx) { 1036 unsigned VReg = RegMap->createVirtualRegister(&PPC::GPRCRegClass); 1037 MF.addLiveIn(GPR[GPR_idx], VReg); 1038 SDOperand Val = DAG.getCopyFromReg(DAG.getRoot(), VReg, MVT::i32); 1039 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Val.getValue(1), 1040 Val, FIN, DAG.getSrcValue(NULL)); 1041 MemOps.push_back(Store); 1042 // Increment the address by four for the next argument to store 1043 SDOperand PtrOff = DAG.getConstant(4, getPointerTy()); 1044 FIN = DAG.getNode(ISD::ADD, MVT::i32, FIN, PtrOff); 1045 } 1046 if (!MemOps.empty()) { 1047 MemOps.push_back(DAG.getRoot()); 1048 DAG.setRoot(DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps)); 1049 } 1050 } 1051 1052 // Finally, inform the code generator which regs we return values in. 1053 switch (getValueType(F.getReturnType())) { 1054 default: assert(0 && "Unknown type!"); 1055 case MVT::isVoid: break; 1056 case MVT::i1: 1057 case MVT::i8: 1058 case MVT::i16: 1059 case MVT::i32: 1060 MF.addLiveOut(PPC::R3); 1061 break; 1062 case MVT::i64: 1063 MF.addLiveOut(PPC::R3); 1064 MF.addLiveOut(PPC::R4); 1065 break; 1066 case MVT::f32: 1067 case MVT::f64: 1068 MF.addLiveOut(PPC::F1); 1069 break; 1070 } 1071 1072 return ArgValues; 1073} 1074 1075std::pair<SDOperand, SDOperand> 1076PPCTargetLowering::LowerCallTo(SDOperand Chain, 1077 const Type *RetTy, bool isVarArg, 1078 unsigned CallingConv, bool isTailCall, 1079 SDOperand Callee, ArgListTy &Args, 1080 SelectionDAG &DAG) { 1081 // args_to_use will accumulate outgoing args for the PPCISD::CALL case in 1082 // SelectExpr to use to put the arguments in the appropriate registers. 1083 std::vector<SDOperand> args_to_use; 1084 1085 // Count how many bytes are to be pushed on the stack, including the linkage 1086 // area, and parameter passing area. 1087 unsigned NumBytes = 24; 1088 1089 if (Args.empty()) { 1090 Chain = DAG.getCALLSEQ_START(Chain, 1091 DAG.getConstant(NumBytes, getPointerTy())); 1092 } else { 1093 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1094 switch (getValueType(Args[i].second)) { 1095 default: assert(0 && "Unknown value type!"); 1096 case MVT::i1: 1097 case MVT::i8: 1098 case MVT::i16: 1099 case MVT::i32: 1100 case MVT::f32: 1101 NumBytes += 4; 1102 break; 1103 case MVT::i64: 1104 case MVT::f64: 1105 NumBytes += 8; 1106 break; 1107 } 1108 } 1109 1110 // Just to be safe, we'll always reserve the full 24 bytes of linkage area 1111 // plus 32 bytes of argument space in case any called code gets funky on us. 1112 // (Required by ABI to support var arg) 1113 if (NumBytes < 56) NumBytes = 56; 1114 1115 // Adjust the stack pointer for the new arguments... 1116 // These operations are automatically eliminated by the prolog/epilog pass 1117 Chain = DAG.getCALLSEQ_START(Chain, 1118 DAG.getConstant(NumBytes, getPointerTy())); 1119 1120 // Set up a copy of the stack pointer for use loading and storing any 1121 // arguments that may not fit in the registers available for argument 1122 // passing. 1123 SDOperand StackPtr = DAG.getRegister(PPC::R1, MVT::i32); 1124 1125 // Figure out which arguments are going to go in registers, and which in 1126 // memory. Also, if this is a vararg function, floating point operations 1127 // must be stored to our stack, and loaded into integer regs as well, if 1128 // any integer regs are available for argument passing. 1129 unsigned ArgOffset = 24; 1130 unsigned GPR_remaining = 8; 1131 unsigned FPR_remaining = 13; 1132 1133 std::vector<SDOperand> MemOps; 1134 for (unsigned i = 0, e = Args.size(); i != e; ++i) { 1135 // PtrOff will be used to store the current argument to the stack if a 1136 // register cannot be found for it. 1137 SDOperand PtrOff = DAG.getConstant(ArgOffset, getPointerTy()); 1138 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, StackPtr, PtrOff); 1139 MVT::ValueType ArgVT = getValueType(Args[i].second); 1140 1141 switch (ArgVT) { 1142 default: assert(0 && "Unexpected ValueType for argument!"); 1143 case MVT::i1: 1144 case MVT::i8: 1145 case MVT::i16: 1146 // Promote the integer to 32 bits. If the input type is signed use a 1147 // sign extend, otherwise use a zero extend. 1148 if (Args[i].second->isSigned()) 1149 Args[i].first =DAG.getNode(ISD::SIGN_EXTEND, MVT::i32, Args[i].first); 1150 else 1151 Args[i].first =DAG.getNode(ISD::ZERO_EXTEND, MVT::i32, Args[i].first); 1152 // FALL THROUGH 1153 case MVT::i32: 1154 if (GPR_remaining > 0) { 1155 args_to_use.push_back(Args[i].first); 1156 --GPR_remaining; 1157 } else { 1158 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1159 Args[i].first, PtrOff, 1160 DAG.getSrcValue(NULL))); 1161 } 1162 ArgOffset += 4; 1163 break; 1164 case MVT::i64: 1165 // If we have one free GPR left, we can place the upper half of the i64 1166 // in it, and store the other half to the stack. If we have two or more 1167 // free GPRs, then we can pass both halves of the i64 in registers. 1168 if (GPR_remaining > 0) { 1169 SDOperand Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1170 Args[i].first, DAG.getConstant(1, MVT::i32)); 1171 SDOperand Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, MVT::i32, 1172 Args[i].first, DAG.getConstant(0, MVT::i32)); 1173 args_to_use.push_back(Hi); 1174 --GPR_remaining; 1175 if (GPR_remaining > 0) { 1176 args_to_use.push_back(Lo); 1177 --GPR_remaining; 1178 } else { 1179 SDOperand ConstFour = DAG.getConstant(4, getPointerTy()); 1180 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour); 1181 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1182 Lo, PtrOff, DAG.getSrcValue(NULL))); 1183 } 1184 } else { 1185 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1186 Args[i].first, PtrOff, 1187 DAG.getSrcValue(NULL))); 1188 } 1189 ArgOffset += 8; 1190 break; 1191 case MVT::f32: 1192 case MVT::f64: 1193 if (FPR_remaining > 0) { 1194 args_to_use.push_back(Args[i].first); 1195 --FPR_remaining; 1196 if (isVarArg) { 1197 SDOperand Store = DAG.getNode(ISD::STORE, MVT::Other, Chain, 1198 Args[i].first, PtrOff, 1199 DAG.getSrcValue(NULL)); 1200 MemOps.push_back(Store); 1201 // Float varargs are always shadowed in available integer registers 1202 if (GPR_remaining > 0) { 1203 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff, 1204 DAG.getSrcValue(NULL)); 1205 MemOps.push_back(Load.getValue(1)); 1206 args_to_use.push_back(Load); 1207 --GPR_remaining; 1208 } 1209 if (GPR_remaining > 0 && MVT::f64 == ArgVT) { 1210 SDOperand ConstFour = DAG.getConstant(4, getPointerTy()); 1211 PtrOff = DAG.getNode(ISD::ADD, MVT::i32, PtrOff, ConstFour); 1212 SDOperand Load = DAG.getLoad(MVT::i32, Store, PtrOff, 1213 DAG.getSrcValue(NULL)); 1214 MemOps.push_back(Load.getValue(1)); 1215 args_to_use.push_back(Load); 1216 --GPR_remaining; 1217 } 1218 } else { 1219 // If we have any FPRs remaining, we may also have GPRs remaining. 1220 // Args passed in FPRs consume either 1 (f32) or 2 (f64) available 1221 // GPRs. 1222 if (GPR_remaining > 0) { 1223 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 1224 --GPR_remaining; 1225 } 1226 if (GPR_remaining > 0 && MVT::f64 == ArgVT) { 1227 args_to_use.push_back(DAG.getNode(ISD::UNDEF, MVT::i32)); 1228 --GPR_remaining; 1229 } 1230 } 1231 } else { 1232 MemOps.push_back(DAG.getNode(ISD::STORE, MVT::Other, Chain, 1233 Args[i].first, PtrOff, 1234 DAG.getSrcValue(NULL))); 1235 } 1236 ArgOffset += (ArgVT == MVT::f32) ? 4 : 8; 1237 break; 1238 } 1239 } 1240 if (!MemOps.empty()) 1241 Chain = DAG.getNode(ISD::TokenFactor, MVT::Other, MemOps); 1242 } 1243 1244 std::vector<MVT::ValueType> RetVals; 1245 MVT::ValueType RetTyVT = getValueType(RetTy); 1246 MVT::ValueType ActualRetTyVT = RetTyVT; 1247 if (RetTyVT >= MVT::i1 && RetTyVT <= MVT::i16) 1248 ActualRetTyVT = MVT::i32; // Promote result to i32. 1249 1250 if (RetTyVT == MVT::i64) { 1251 RetVals.push_back(MVT::i32); 1252 RetVals.push_back(MVT::i32); 1253 } else if (RetTyVT != MVT::isVoid) { 1254 RetVals.push_back(ActualRetTyVT); 1255 } 1256 RetVals.push_back(MVT::Other); 1257 1258 // If the callee is a GlobalAddress node (quite common, every direct call is) 1259 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. 1260 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 1261 Callee = DAG.getTargetGlobalAddress(G->getGlobal(), MVT::i32); 1262 1263 std::vector<SDOperand> Ops; 1264 Ops.push_back(Chain); 1265 Ops.push_back(Callee); 1266 Ops.insert(Ops.end(), args_to_use.begin(), args_to_use.end()); 1267 SDOperand TheCall = DAG.getNode(PPCISD::CALL, RetVals, Ops); 1268 Chain = TheCall.getValue(TheCall.Val->getNumValues()-1); 1269 Chain = DAG.getNode(ISD::CALLSEQ_END, MVT::Other, Chain, 1270 DAG.getConstant(NumBytes, getPointerTy())); 1271 SDOperand RetVal = TheCall; 1272 1273 // If the result is a small value, add a note so that we keep track of the 1274 // information about whether it is sign or zero extended. 1275 if (RetTyVT != ActualRetTyVT) { 1276 RetVal = DAG.getNode(RetTy->isSigned() ? ISD::AssertSext : ISD::AssertZext, 1277 MVT::i32, RetVal, DAG.getValueType(RetTyVT)); 1278 RetVal = DAG.getNode(ISD::TRUNCATE, RetTyVT, RetVal); 1279 } else if (RetTyVT == MVT::i64) { 1280 RetVal = DAG.getNode(ISD::BUILD_PAIR, MVT::i64, RetVal, RetVal.getValue(1)); 1281 } 1282 1283 return std::make_pair(RetVal, Chain); 1284} 1285 1286MachineBasicBlock * 1287PPCTargetLowering::InsertAtEndOfBasicBlock(MachineInstr *MI, 1288 MachineBasicBlock *BB) { 1289 assert((MI->getOpcode() == PPC::SELECT_CC_Int || 1290 MI->getOpcode() == PPC::SELECT_CC_F4 || 1291 MI->getOpcode() == PPC::SELECT_CC_F8) && 1292 "Unexpected instr type to insert"); 1293 1294 // To "insert" a SELECT_CC instruction, we actually have to insert the diamond 1295 // control-flow pattern. The incoming instruction knows the destination vreg 1296 // to set, the condition code register to branch on, the true/false values to 1297 // select between, and a branch opcode to use. 1298 const BasicBlock *LLVM_BB = BB->getBasicBlock(); 1299 ilist<MachineBasicBlock>::iterator It = BB; 1300 ++It; 1301 1302 // thisMBB: 1303 // ... 1304 // TrueVal = ... 1305 // cmpTY ccX, r1, r2 1306 // bCC copy1MBB 1307 // fallthrough --> copy0MBB 1308 MachineBasicBlock *thisMBB = BB; 1309 MachineBasicBlock *copy0MBB = new MachineBasicBlock(LLVM_BB); 1310 MachineBasicBlock *sinkMBB = new MachineBasicBlock(LLVM_BB); 1311 BuildMI(BB, MI->getOperand(4).getImmedValue(), 2) 1312 .addReg(MI->getOperand(1).getReg()).addMBB(sinkMBB); 1313 MachineFunction *F = BB->getParent(); 1314 F->getBasicBlockList().insert(It, copy0MBB); 1315 F->getBasicBlockList().insert(It, sinkMBB); 1316 // Update machine-CFG edges by first adding all successors of the current 1317 // block to the new block which will contain the Phi node for the select. 1318 for(MachineBasicBlock::succ_iterator i = BB->succ_begin(), 1319 e = BB->succ_end(); i != e; ++i) 1320 sinkMBB->addSuccessor(*i); 1321 // Next, remove all successors of the current block, and add the true 1322 // and fallthrough blocks as its successors. 1323 while(!BB->succ_empty()) 1324 BB->removeSuccessor(BB->succ_begin()); 1325 BB->addSuccessor(copy0MBB); 1326 BB->addSuccessor(sinkMBB); 1327 1328 // copy0MBB: 1329 // %FalseValue = ... 1330 // # fallthrough to sinkMBB 1331 BB = copy0MBB; 1332 1333 // Update machine-CFG edges 1334 BB->addSuccessor(sinkMBB); 1335 1336 // sinkMBB: 1337 // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] 1338 // ... 1339 BB = sinkMBB; 1340 BuildMI(BB, PPC::PHI, 4, MI->getOperand(0).getReg()) 1341 .addReg(MI->getOperand(3).getReg()).addMBB(copy0MBB) 1342 .addReg(MI->getOperand(2).getReg()).addMBB(thisMBB); 1343 1344 delete MI; // The pseudo instruction is gone now. 1345 return BB; 1346} 1347 1348SDOperand PPCTargetLowering::PerformDAGCombine(SDNode *N, 1349 DAGCombinerInfo &DCI) const { 1350 TargetMachine &TM = getTargetMachine(); 1351 SelectionDAG &DAG = DCI.DAG; 1352 switch (N->getOpcode()) { 1353 default: break; 1354 case ISD::SINT_TO_FP: 1355 if (TM.getSubtarget<PPCSubtarget>().is64Bit()) { 1356 if (N->getOperand(0).getOpcode() == ISD::FP_TO_SINT) { 1357 // Turn (sint_to_fp (fp_to_sint X)) -> fctidz/fcfid without load/stores. 1358 // We allow the src/dst to be either f32/f64, but the intermediate 1359 // type must be i64. 1360 if (N->getOperand(0).getValueType() == MVT::i64) { 1361 SDOperand Val = N->getOperand(0).getOperand(0); 1362 if (Val.getValueType() == MVT::f32) { 1363 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 1364 DCI.AddToWorklist(Val.Val); 1365 } 1366 1367 Val = DAG.getNode(PPCISD::FCTIDZ, MVT::f64, Val); 1368 DCI.AddToWorklist(Val.Val); 1369 Val = DAG.getNode(PPCISD::FCFID, MVT::f64, Val); 1370 DCI.AddToWorklist(Val.Val); 1371 if (N->getValueType(0) == MVT::f32) { 1372 Val = DAG.getNode(ISD::FP_ROUND, MVT::f32, Val); 1373 DCI.AddToWorklist(Val.Val); 1374 } 1375 return Val; 1376 } else if (N->getOperand(0).getValueType() == MVT::i32) { 1377 // If the intermediate type is i32, we can avoid the load/store here 1378 // too. 1379 } 1380 } 1381 } 1382 break; 1383 case ISD::STORE: 1384 // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)). 1385 if (TM.getSubtarget<PPCSubtarget>().hasSTFIWX() && 1386 N->getOperand(1).getOpcode() == ISD::FP_TO_SINT && 1387 N->getOperand(1).getValueType() == MVT::i32) { 1388 SDOperand Val = N->getOperand(1).getOperand(0); 1389 if (Val.getValueType() == MVT::f32) { 1390 Val = DAG.getNode(ISD::FP_EXTEND, MVT::f64, Val); 1391 DCI.AddToWorklist(Val.Val); 1392 } 1393 Val = DAG.getNode(PPCISD::FCTIWZ, MVT::f64, Val); 1394 DCI.AddToWorklist(Val.Val); 1395 1396 Val = DAG.getNode(PPCISD::STFIWX, MVT::Other, N->getOperand(0), Val, 1397 N->getOperand(2), N->getOperand(3)); 1398 DCI.AddToWorklist(Val.Val); 1399 return Val; 1400 } 1401 break; 1402 case PPCISD::VCMP: { 1403 // If a VCMPo node already exists with exactly the same operands as this 1404 // node, use its result instead of this node (VCMPo computes both a CR6 and 1405 // a normal output). 1406 // 1407 if (!N->getOperand(0).hasOneUse() && 1408 !N->getOperand(1).hasOneUse() && 1409 !N->getOperand(2).hasOneUse()) { 1410 1411 // Scan all of the users of the LHS, looking for VCMPo's that match. 1412 SDNode *VCMPoNode = 0; 1413 1414 SDNode *LHSN = N->getOperand(0).Val; 1415 for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end(); 1416 UI != E; ++UI) 1417 if ((*UI)->getOpcode() == PPCISD::VCMPo && 1418 (*UI)->getOperand(1) == N->getOperand(1) && 1419 (*UI)->getOperand(2) == N->getOperand(2) && 1420 (*UI)->getOperand(0) == N->getOperand(0)) { 1421 VCMPoNode = *UI; 1422 break; 1423 } 1424 1425 // If there are non-zero uses of the flag value, use the VCMPo node! 1426 if (VCMPoNode && !VCMPoNode->hasNUsesOfValue(0, 1)) 1427 return SDOperand(VCMPoNode, 0); 1428 } 1429 break; 1430 } 1431 } 1432 1433 return SDOperand(); 1434} 1435 1436void PPCTargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 1437 uint64_t Mask, 1438 uint64_t &KnownZero, 1439 uint64_t &KnownOne, 1440 unsigned Depth) const { 1441 KnownZero = 0; 1442 KnownOne = 0; 1443 switch (Op.getOpcode()) { 1444 default: break; 1445 case ISD::INTRINSIC_WO_CHAIN: { 1446 switch (cast<ConstantSDNode>(Op.getOperand(0))->getValue()) { 1447 default: break; 1448 case Intrinsic::ppc_altivec_vcmpbfp_p: 1449 case Intrinsic::ppc_altivec_vcmpeqfp_p: 1450 case Intrinsic::ppc_altivec_vcmpequb_p: 1451 case Intrinsic::ppc_altivec_vcmpequh_p: 1452 case Intrinsic::ppc_altivec_vcmpequw_p: 1453 case Intrinsic::ppc_altivec_vcmpgefp_p: 1454 case Intrinsic::ppc_altivec_vcmpgtfp_p: 1455 case Intrinsic::ppc_altivec_vcmpgtsb_p: 1456 case Intrinsic::ppc_altivec_vcmpgtsh_p: 1457 case Intrinsic::ppc_altivec_vcmpgtsw_p: 1458 case Intrinsic::ppc_altivec_vcmpgtub_p: 1459 case Intrinsic::ppc_altivec_vcmpgtuh_p: 1460 case Intrinsic::ppc_altivec_vcmpgtuw_p: 1461 KnownZero = ~1U; // All bits but the low one are known to be zero. 1462 break; 1463 } 1464 } 1465 } 1466} 1467 1468 1469/// getConstraintType - Given a constraint letter, return the type of 1470/// constraint it is for this target. 1471PPCTargetLowering::ConstraintType 1472PPCTargetLowering::getConstraintType(char ConstraintLetter) const { 1473 switch (ConstraintLetter) { 1474 default: break; 1475 case 'b': 1476 case 'r': 1477 case 'f': 1478 case 'v': 1479 case 'y': 1480 return C_RegisterClass; 1481 } 1482 return TargetLowering::getConstraintType(ConstraintLetter); 1483} 1484 1485 1486std::vector<unsigned> PPCTargetLowering:: 1487getRegClassForInlineAsmConstraint(const std::string &Constraint, 1488 MVT::ValueType VT) const { 1489 if (Constraint.size() == 1) { 1490 switch (Constraint[0]) { // GCC RS6000 Constraint Letters 1491 default: break; // Unknown constriant letter 1492 case 'b': 1493 return make_vector<unsigned>(/*no R0*/ PPC::R1 , PPC::R2 , PPC::R3 , 1494 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 1495 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 1496 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 1497 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 1498 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 1499 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 1500 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 1501 0); 1502 case 'r': 1503 return make_vector<unsigned>(PPC::R0 , PPC::R1 , PPC::R2 , PPC::R3 , 1504 PPC::R4 , PPC::R5 , PPC::R6 , PPC::R7 , 1505 PPC::R8 , PPC::R9 , PPC::R10, PPC::R11, 1506 PPC::R12, PPC::R13, PPC::R14, PPC::R15, 1507 PPC::R16, PPC::R17, PPC::R18, PPC::R19, 1508 PPC::R20, PPC::R21, PPC::R22, PPC::R23, 1509 PPC::R24, PPC::R25, PPC::R26, PPC::R27, 1510 PPC::R28, PPC::R29, PPC::R30, PPC::R31, 1511 0); 1512 case 'f': 1513 return make_vector<unsigned>(PPC::F0 , PPC::F1 , PPC::F2 , PPC::F3 , 1514 PPC::F4 , PPC::F5 , PPC::F6 , PPC::F7 , 1515 PPC::F8 , PPC::F9 , PPC::F10, PPC::F11, 1516 PPC::F12, PPC::F13, PPC::F14, PPC::F15, 1517 PPC::F16, PPC::F17, PPC::F18, PPC::F19, 1518 PPC::F20, PPC::F21, PPC::F22, PPC::F23, 1519 PPC::F24, PPC::F25, PPC::F26, PPC::F27, 1520 PPC::F28, PPC::F29, PPC::F30, PPC::F31, 1521 0); 1522 case 'v': 1523 return make_vector<unsigned>(PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , 1524 PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 1525 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, 1526 PPC::V12, PPC::V13, PPC::V14, PPC::V15, 1527 PPC::V16, PPC::V17, PPC::V18, PPC::V19, 1528 PPC::V20, PPC::V21, PPC::V22, PPC::V23, 1529 PPC::V24, PPC::V25, PPC::V26, PPC::V27, 1530 PPC::V28, PPC::V29, PPC::V30, PPC::V31, 1531 0); 1532 case 'y': 1533 return make_vector<unsigned>(PPC::CR0, PPC::CR1, PPC::CR2, PPC::CR3, 1534 PPC::CR4, PPC::CR5, PPC::CR6, PPC::CR7, 1535 0); 1536 } 1537 } 1538 1539 return std::vector<unsigned>(); 1540} 1541 1542// isOperandValidForConstraint 1543bool PPCTargetLowering:: 1544isOperandValidForConstraint(SDOperand Op, char Letter) { 1545 switch (Letter) { 1546 default: break; 1547 case 'I': 1548 case 'J': 1549 case 'K': 1550 case 'L': 1551 case 'M': 1552 case 'N': 1553 case 'O': 1554 case 'P': { 1555 if (!isa<ConstantSDNode>(Op)) return false; // Must be an immediate. 1556 unsigned Value = cast<ConstantSDNode>(Op)->getValue(); 1557 switch (Letter) { 1558 default: assert(0 && "Unknown constraint letter!"); 1559 case 'I': // "I" is a signed 16-bit constant. 1560 return (short)Value == (int)Value; 1561 case 'J': // "J" is a constant with only the high-order 16 bits nonzero. 1562 case 'L': // "L" is a signed 16-bit constant shifted left 16 bits. 1563 return (short)Value == 0; 1564 case 'K': // "K" is a constant with only the low-order 16 bits nonzero. 1565 return (Value >> 16) == 0; 1566 case 'M': // "M" is a constant that is greater than 31. 1567 return Value > 31; 1568 case 'N': // "N" is a positive constant that is an exact power of two. 1569 return (int)Value > 0 && isPowerOf2_32(Value); 1570 case 'O': // "O" is the constant zero. 1571 return Value == 0; 1572 case 'P': // "P" is a constant whose negation is a signed 16-bit constant. 1573 return (short)-Value == (int)-Value; 1574 } 1575 break; 1576 } 1577 } 1578 1579 // Handle standard constraint letters. 1580 return TargetLowering::isOperandValidForConstraint(Op, Letter); 1581} 1582 1583/// isLegalAddressImmediate - Return true if the integer value can be used 1584/// as the offset of the target addressing mode. 1585bool PPCTargetLowering::isLegalAddressImmediate(int64_t V) const { 1586 // PPC allows a sign-extended 16-bit immediate field. 1587 return (V > -(1 << 16) && V < (1 << 16)-1); 1588} 1589