TargetLowering.cpp revision 8b2794aeff151be8cdbd44786c1d0f94f8f2e427
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by the LLVM research group and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the TargetLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/Target/TargetLowering.h" 15#include "llvm/Target/TargetData.h" 16#include "llvm/Target/TargetMachine.h" 17#include "llvm/Target/MRegisterInfo.h" 18#include "llvm/DerivedTypes.h" 19#include "llvm/CodeGen/SelectionDAG.h" 20#include "llvm/ADT/StringExtras.h" 21#include "llvm/Support/MathExtras.h" 22using namespace llvm; 23 24TargetLowering::TargetLowering(TargetMachine &tm) 25 : TM(tm), TD(TM.getTargetData()) { 26 assert(ISD::BUILTIN_OP_END <= 156 && 27 "Fixed size array in TargetLowering is not large enough!"); 28 // All operations default to being supported. 29 memset(OpActions, 0, sizeof(OpActions)); 30 memset(LoadXActions, 0, sizeof(LoadXActions)); 31 memset(&StoreXActions, 0, sizeof(StoreXActions)); 32 33 IsLittleEndian = TD->isLittleEndian(); 34 UsesGlobalOffsetTable = false; 35 ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD->getIntPtrType()); 36 ShiftAmtHandling = Undefined; 37 memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); 38 memset(TargetDAGCombineArray, 0, 39 sizeof(TargetDAGCombineArray)/sizeof(TargetDAGCombineArray[0])); 40 maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; 41 allowUnalignedMemoryAccesses = false; 42 UseUnderscoreSetJmpLongJmp = false; 43 IntDivIsCheap = false; 44 Pow2DivIsCheap = false; 45 StackPointerRegisterToSaveRestore = 0; 46 SchedPreferenceInfo = SchedulingForLatency; 47 JumpBufSize = 0; 48 JumpBufAlignment = 0; 49} 50 51TargetLowering::~TargetLowering() {} 52 53/// setValueTypeAction - Set the action for a particular value type. This 54/// assumes an action has not already been set for this value type. 55static void SetValueTypeAction(MVT::ValueType VT, 56 TargetLowering::LegalizeAction Action, 57 TargetLowering &TLI, 58 MVT::ValueType *TransformToType, 59 TargetLowering::ValueTypeActionImpl &ValueTypeActions) { 60 ValueTypeActions.setTypeAction(VT, Action); 61 if (Action == TargetLowering::Promote) { 62 MVT::ValueType PromoteTo; 63 if (VT == MVT::f32) 64 PromoteTo = MVT::f64; 65 else { 66 unsigned LargerReg = VT+1; 67 while (!TLI.isTypeLegal((MVT::ValueType)LargerReg)) { 68 ++LargerReg; 69 assert(MVT::isInteger((MVT::ValueType)LargerReg) && 70 "Nothing to promote to??"); 71 } 72 PromoteTo = (MVT::ValueType)LargerReg; 73 } 74 75 assert(MVT::isInteger(VT) == MVT::isInteger(PromoteTo) && 76 MVT::isFloatingPoint(VT) == MVT::isFloatingPoint(PromoteTo) && 77 "Can only promote from int->int or fp->fp!"); 78 assert(VT < PromoteTo && "Must promote to a larger type!"); 79 TransformToType[VT] = PromoteTo; 80 } else if (Action == TargetLowering::Expand) { 81 assert((VT == MVT::Vector || MVT::isInteger(VT)) && VT > MVT::i8 && 82 "Cannot expand this type: target must support SOME integer reg!"); 83 // Expand to the next smaller integer type! 84 TransformToType[VT] = (MVT::ValueType)(VT-1); 85 } 86} 87 88 89/// computeRegisterProperties - Once all of the register classes are added, 90/// this allows us to compute derived properties we expose. 91void TargetLowering::computeRegisterProperties() { 92 assert(MVT::LAST_VALUETYPE <= 32 && 93 "Too many value types for ValueTypeActions to hold!"); 94 95 // Everything defaults to one. 96 for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) 97 NumElementsForVT[i] = 1; 98 99 // Find the largest integer register class. 100 unsigned LargestIntReg = MVT::i128; 101 for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) 102 assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); 103 104 // Every integer value type larger than this largest register takes twice as 105 // many registers to represent as the previous ValueType. 106 unsigned ExpandedReg = LargestIntReg; ++LargestIntReg; 107 for (++ExpandedReg; MVT::isInteger((MVT::ValueType)ExpandedReg);++ExpandedReg) 108 NumElementsForVT[ExpandedReg] = 2*NumElementsForVT[ExpandedReg-1]; 109 110 // Inspect all of the ValueType's possible, deciding how to process them. 111 for (unsigned IntReg = MVT::i1; IntReg <= MVT::i128; ++IntReg) 112 // If we are expanding this type, expand it! 113 if (getNumElements((MVT::ValueType)IntReg) != 1) 114 SetValueTypeAction((MVT::ValueType)IntReg, Expand, *this, TransformToType, 115 ValueTypeActions); 116 else if (!isTypeLegal((MVT::ValueType)IntReg)) 117 // Otherwise, if we don't have native support, we must promote to a 118 // larger type. 119 SetValueTypeAction((MVT::ValueType)IntReg, Promote, *this, 120 TransformToType, ValueTypeActions); 121 else 122 TransformToType[(MVT::ValueType)IntReg] = (MVT::ValueType)IntReg; 123 124 // If the target does not have native support for F32, promote it to F64. 125 if (!isTypeLegal(MVT::f32)) 126 SetValueTypeAction(MVT::f32, Promote, *this, 127 TransformToType, ValueTypeActions); 128 else 129 TransformToType[MVT::f32] = MVT::f32; 130 131 // Set MVT::Vector to always be Expanded 132 SetValueTypeAction(MVT::Vector, Expand, *this, TransformToType, 133 ValueTypeActions); 134 135 // Loop over all of the legal vector value types, specifying an identity type 136 // transformation. 137 for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; 138 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { 139 if (isTypeLegal((MVT::ValueType)i)) 140 TransformToType[i] = (MVT::ValueType)i; 141 } 142 143 assert(isTypeLegal(MVT::f64) && "Target does not support FP?"); 144 TransformToType[MVT::f64] = MVT::f64; 145} 146 147const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { 148 return NULL; 149} 150 151/// getPackedTypeBreakdown - Packed types are broken down into some number of 152/// legal first class types. For example, <8 x float> maps to 2 MVT::v4f32 153/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. 154/// 155/// This method returns the number and type of the resultant breakdown. 156/// 157unsigned TargetLowering::getPackedTypeBreakdown(const PackedType *PTy, 158 MVT::ValueType &PTyElementVT, 159 MVT::ValueType &PTyLegalElementVT) const { 160 // Figure out the right, legal destination reg to copy into. 161 unsigned NumElts = PTy->getNumElements(); 162 MVT::ValueType EltTy = getValueType(PTy->getElementType()); 163 164 unsigned NumVectorRegs = 1; 165 166 // Divide the input until we get to a supported size. This will always 167 // end with a scalar if the target doesn't support vectors. 168 while (NumElts > 1 && !isTypeLegal(getVectorType(EltTy, NumElts))) { 169 NumElts >>= 1; 170 NumVectorRegs <<= 1; 171 } 172 173 MVT::ValueType VT; 174 if (NumElts == 1) { 175 VT = EltTy; 176 } else { 177 VT = getVectorType(EltTy, NumElts); 178 } 179 PTyElementVT = VT; 180 181 MVT::ValueType DestVT = getTypeToTransformTo(VT); 182 PTyLegalElementVT = DestVT; 183 if (DestVT < VT) { 184 // Value is expanded, e.g. i64 -> i16. 185 return NumVectorRegs*(MVT::getSizeInBits(VT)/MVT::getSizeInBits(DestVT)); 186 } else { 187 // Otherwise, promotion or legal types use the same number of registers as 188 // the vector decimated to the appropriate level. 189 return NumVectorRegs; 190 } 191 192 return 1; 193} 194 195//===----------------------------------------------------------------------===// 196// Optimization Methods 197//===----------------------------------------------------------------------===// 198 199/// ShrinkDemandedConstant - Check to see if the specified operand of the 200/// specified instruction is a constant integer. If so, check to see if there 201/// are any bits set in the constant that are not demanded. If so, shrink the 202/// constant and return true. 203bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op, 204 uint64_t Demanded) { 205 // FIXME: ISD::SELECT, ISD::SELECT_CC 206 switch(Op.getOpcode()) { 207 default: break; 208 case ISD::AND: 209 case ISD::OR: 210 case ISD::XOR: 211 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) 212 if ((~Demanded & C->getValue()) != 0) { 213 MVT::ValueType VT = Op.getValueType(); 214 SDOperand New = DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0), 215 DAG.getConstant(Demanded & C->getValue(), 216 VT)); 217 return CombineTo(Op, New); 218 } 219 break; 220 } 221 return false; 222} 223 224/// SimplifyDemandedBits - Look at Op. At this point, we know that only the 225/// DemandedMask bits of the result of Op are ever used downstream. If we can 226/// use this information to simplify Op, create a new simplified DAG node and 227/// return true, returning the original and new nodes in Old and New. Otherwise, 228/// analyze the expression and return a mask of KnownOne and KnownZero bits for 229/// the expression (used to simplify the caller). The KnownZero/One bits may 230/// only be accurate for those bits in the DemandedMask. 231bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask, 232 uint64_t &KnownZero, 233 uint64_t &KnownOne, 234 TargetLoweringOpt &TLO, 235 unsigned Depth) const { 236 KnownZero = KnownOne = 0; // Don't know anything. 237 // Other users may use these bits. 238 if (!Op.Val->hasOneUse()) { 239 if (Depth != 0) { 240 // If not at the root, Just compute the KnownZero/KnownOne bits to 241 // simplify things downstream. 242 ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); 243 return false; 244 } 245 // If this is the root being simplified, allow it to have multiple uses, 246 // just set the DemandedMask to all bits. 247 DemandedMask = MVT::getIntVTBitMask(Op.getValueType()); 248 } else if (DemandedMask == 0) { 249 // Not demanding any bits from Op. 250 if (Op.getOpcode() != ISD::UNDEF) 251 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::UNDEF, Op.getValueType())); 252 return false; 253 } else if (Depth == 6) { // Limit search depth. 254 return false; 255 } 256 257 uint64_t KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; 258 switch (Op.getOpcode()) { 259 case ISD::Constant: 260 // We know all of the bits for a constant! 261 KnownOne = cast<ConstantSDNode>(Op)->getValue() & DemandedMask; 262 KnownZero = ~KnownOne & DemandedMask; 263 return false; // Don't fall through, will infinitely loop. 264 case ISD::AND: 265 // If the RHS is a constant, check to see if the LHS would be zero without 266 // using the bits from the RHS. Below, we use knowledge about the RHS to 267 // simplify the LHS, here we're using information from the LHS to simplify 268 // the RHS. 269 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 270 uint64_t LHSZero, LHSOne; 271 ComputeMaskedBits(Op.getOperand(0), DemandedMask, 272 LHSZero, LHSOne, Depth+1); 273 // If the LHS already has zeros where RHSC does, this and is dead. 274 if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask)) 275 return TLO.CombineTo(Op, Op.getOperand(0)); 276 // If any of the set bits in the RHS are known zero on the LHS, shrink 277 // the constant. 278 if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & DemandedMask)) 279 return true; 280 } 281 282 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 283 KnownOne, TLO, Depth+1)) 284 return true; 285 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 286 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownZero, 287 KnownZero2, KnownOne2, TLO, Depth+1)) 288 return true; 289 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 290 291 // If all of the demanded bits are known one on one side, return the other. 292 // These bits cannot contribute to the result of the 'and'. 293 if ((DemandedMask & ~KnownZero2 & KnownOne)==(DemandedMask & ~KnownZero2)) 294 return TLO.CombineTo(Op, Op.getOperand(0)); 295 if ((DemandedMask & ~KnownZero & KnownOne2)==(DemandedMask & ~KnownZero)) 296 return TLO.CombineTo(Op, Op.getOperand(1)); 297 // If all of the demanded bits in the inputs are known zeros, return zero. 298 if ((DemandedMask & (KnownZero|KnownZero2)) == DemandedMask) 299 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); 300 // If the RHS is a constant, see if we can simplify it. 301 if (TLO.ShrinkDemandedConstant(Op, DemandedMask & ~KnownZero2)) 302 return true; 303 304 // Output known-1 bits are only known if set in both the LHS & RHS. 305 KnownOne &= KnownOne2; 306 // Output known-0 are known to be clear if zero in either the LHS | RHS. 307 KnownZero |= KnownZero2; 308 break; 309 case ISD::OR: 310 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 311 KnownOne, TLO, Depth+1)) 312 return true; 313 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 314 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownOne, 315 KnownZero2, KnownOne2, TLO, Depth+1)) 316 return true; 317 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 318 319 // If all of the demanded bits are known zero on one side, return the other. 320 // These bits cannot contribute to the result of the 'or'. 321 if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2)) 322 return TLO.CombineTo(Op, Op.getOperand(0)); 323 if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne)) 324 return TLO.CombineTo(Op, Op.getOperand(1)); 325 // If all of the potentially set bits on one side are known to be set on 326 // the other side, just use the 'other' side. 327 if ((DemandedMask & (~KnownZero) & KnownOne2) == 328 (DemandedMask & (~KnownZero))) 329 return TLO.CombineTo(Op, Op.getOperand(0)); 330 if ((DemandedMask & (~KnownZero2) & KnownOne) == 331 (DemandedMask & (~KnownZero2))) 332 return TLO.CombineTo(Op, Op.getOperand(1)); 333 // If the RHS is a constant, see if we can simplify it. 334 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 335 return true; 336 337 // Output known-0 bits are only known if clear in both the LHS & RHS. 338 KnownZero &= KnownZero2; 339 // Output known-1 are known to be set if set in either the LHS | RHS. 340 KnownOne |= KnownOne2; 341 break; 342 case ISD::XOR: 343 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 344 KnownOne, TLO, Depth+1)) 345 return true; 346 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 347 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownZero2, 348 KnownOne2, TLO, Depth+1)) 349 return true; 350 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 351 352 // If all of the demanded bits are known zero on one side, return the other. 353 // These bits cannot contribute to the result of the 'xor'. 354 if ((DemandedMask & KnownZero) == DemandedMask) 355 return TLO.CombineTo(Op, Op.getOperand(0)); 356 if ((DemandedMask & KnownZero2) == DemandedMask) 357 return TLO.CombineTo(Op, Op.getOperand(1)); 358 359 // Output known-0 bits are known if clear or set in both the LHS & RHS. 360 KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); 361 // Output known-1 are known to be set if set in only one of the LHS, RHS. 362 KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); 363 364 // If all of the unknown bits are known to be zero on one side or the other 365 // (but not both) turn this into an *inclusive* or. 366 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 367 if (uint64_t UnknownBits = DemandedMask & ~(KnownZeroOut|KnownOneOut)) 368 if ((UnknownBits & (KnownZero|KnownZero2)) == UnknownBits) 369 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, Op.getValueType(), 370 Op.getOperand(0), 371 Op.getOperand(1))); 372 // If all of the demanded bits on one side are known, and all of the set 373 // bits on that side are also known to be set on the other side, turn this 374 // into an AND, as we know the bits will be cleared. 375 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 376 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) { // all known 377 if ((KnownOne & KnownOne2) == KnownOne) { 378 MVT::ValueType VT = Op.getValueType(); 379 SDOperand ANDC = TLO.DAG.getConstant(~KnownOne & DemandedMask, VT); 380 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, VT, Op.getOperand(0), 381 ANDC)); 382 } 383 } 384 385 // If the RHS is a constant, see if we can simplify it. 386 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. 387 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 388 return true; 389 390 KnownZero = KnownZeroOut; 391 KnownOne = KnownOneOut; 392 break; 393 case ISD::SETCC: 394 // If we know the result of a setcc has the top bits zero, use this info. 395 if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) 396 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 397 break; 398 case ISD::SELECT: 399 if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero, 400 KnownOne, TLO, Depth+1)) 401 return true; 402 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero2, 403 KnownOne2, TLO, Depth+1)) 404 return true; 405 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 406 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 407 408 // If the operands are constants, see if we can simplify them. 409 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 410 return true; 411 412 // Only known if known in both the LHS and RHS. 413 KnownOne &= KnownOne2; 414 KnownZero &= KnownZero2; 415 break; 416 case ISD::SELECT_CC: 417 if (SimplifyDemandedBits(Op.getOperand(3), DemandedMask, KnownZero, 418 KnownOne, TLO, Depth+1)) 419 return true; 420 if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero2, 421 KnownOne2, TLO, Depth+1)) 422 return true; 423 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 424 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 425 426 // If the operands are constants, see if we can simplify them. 427 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 428 return true; 429 430 // Only known if known in both the LHS and RHS. 431 KnownOne &= KnownOne2; 432 KnownZero &= KnownZero2; 433 break; 434 case ISD::SHL: 435 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 436 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask >> SA->getValue(), 437 KnownZero, KnownOne, TLO, Depth+1)) 438 return true; 439 KnownZero <<= SA->getValue(); 440 KnownOne <<= SA->getValue(); 441 KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. 442 } 443 break; 444 case ISD::SRL: 445 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 446 MVT::ValueType VT = Op.getValueType(); 447 unsigned ShAmt = SA->getValue(); 448 449 // Compute the new bits that are at the top now. 450 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 451 if (SimplifyDemandedBits(Op.getOperand(0), 452 (DemandedMask << ShAmt) & TypeMask, 453 KnownZero, KnownOne, TLO, Depth+1)) 454 return true; 455 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 456 KnownZero &= TypeMask; 457 KnownOne &= TypeMask; 458 KnownZero >>= ShAmt; 459 KnownOne >>= ShAmt; 460 461 uint64_t HighBits = (1ULL << ShAmt)-1; 462 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 463 KnownZero |= HighBits; // High bits known zero. 464 } 465 break; 466 case ISD::SRA: 467 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 468 MVT::ValueType VT = Op.getValueType(); 469 unsigned ShAmt = SA->getValue(); 470 471 // Compute the new bits that are at the top now. 472 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 473 474 uint64_t InDemandedMask = (DemandedMask << ShAmt) & TypeMask; 475 476 // If any of the demanded bits are produced by the sign extension, we also 477 // demand the input sign bit. 478 uint64_t HighBits = (1ULL << ShAmt)-1; 479 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 480 if (HighBits & DemandedMask) 481 InDemandedMask |= MVT::getIntVTSignBit(VT); 482 483 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, 484 KnownZero, KnownOne, TLO, Depth+1)) 485 return true; 486 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 487 KnownZero &= TypeMask; 488 KnownOne &= TypeMask; 489 KnownZero >>= ShAmt; 490 KnownOne >>= ShAmt; 491 492 // Handle the sign bits. 493 uint64_t SignBit = MVT::getIntVTSignBit(VT); 494 SignBit >>= ShAmt; // Adjust to where it is now in the mask. 495 496 // If the input sign bit is known to be zero, or if none of the top bits 497 // are demanded, turn this into an unsigned shift right. 498 if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) { 499 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, VT, Op.getOperand(0), 500 Op.getOperand(1))); 501 } else if (KnownOne & SignBit) { // New bits are known one. 502 KnownOne |= HighBits; 503 } 504 } 505 break; 506 case ISD::SIGN_EXTEND_INREG: { 507 MVT::ValueType VT = Op.getValueType(); 508 MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 509 510 // Sign extension. Compute the demanded bits in the result that are not 511 // present in the input. 512 uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & DemandedMask; 513 514 // If none of the extended bits are demanded, eliminate the sextinreg. 515 if (NewBits == 0) 516 return TLO.CombineTo(Op, Op.getOperand(0)); 517 518 uint64_t InSignBit = MVT::getIntVTSignBit(EVT); 519 int64_t InputDemandedBits = DemandedMask & MVT::getIntVTBitMask(EVT); 520 521 // Since the sign extended bits are demanded, we know that the sign 522 // bit is demanded. 523 InputDemandedBits |= InSignBit; 524 525 if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, 526 KnownZero, KnownOne, TLO, Depth+1)) 527 return true; 528 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 529 530 // If the sign bit of the input is known set or clear, then we know the 531 // top bits of the result. 532 533 // If the input sign bit is known zero, convert this into a zero extension. 534 if (KnownZero & InSignBit) 535 return TLO.CombineTo(Op, 536 TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT)); 537 538 if (KnownOne & InSignBit) { // Input sign bit known set 539 KnownOne |= NewBits; 540 KnownZero &= ~NewBits; 541 } else { // Input sign bit unknown 542 KnownZero &= ~NewBits; 543 KnownOne &= ~NewBits; 544 } 545 break; 546 } 547 case ISD::CTTZ: 548 case ISD::CTLZ: 549 case ISD::CTPOP: { 550 MVT::ValueType VT = Op.getValueType(); 551 unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; 552 KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); 553 KnownOne = 0; 554 break; 555 } 556 case ISD::LOAD: { 557 if (ISD::isZEXTLoad(Op.Val)) { 558 LoadSDNode *LD = cast<LoadSDNode>(Op); 559 MVT::ValueType VT = LD->getLoadedVT(); 560 KnownZero |= ~MVT::getIntVTBitMask(VT) & DemandedMask; 561 } 562 break; 563 } 564 case ISD::ZERO_EXTEND: { 565 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 566 567 // If none of the top bits are demanded, convert this into an any_extend. 568 uint64_t NewBits = (~InMask) & DemandedMask; 569 if (NewBits == 0) 570 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, 571 Op.getValueType(), 572 Op.getOperand(0))); 573 574 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 575 KnownZero, KnownOne, TLO, Depth+1)) 576 return true; 577 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 578 KnownZero |= NewBits; 579 break; 580 } 581 case ISD::SIGN_EXTEND: { 582 MVT::ValueType InVT = Op.getOperand(0).getValueType(); 583 uint64_t InMask = MVT::getIntVTBitMask(InVT); 584 uint64_t InSignBit = MVT::getIntVTSignBit(InVT); 585 uint64_t NewBits = (~InMask) & DemandedMask; 586 587 // If none of the top bits are demanded, convert this into an any_extend. 588 if (NewBits == 0) 589 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(), 590 Op.getOperand(0))); 591 592 // Since some of the sign extended bits are demanded, we know that the sign 593 // bit is demanded. 594 uint64_t InDemandedBits = DemandedMask & InMask; 595 InDemandedBits |= InSignBit; 596 597 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 598 KnownOne, TLO, Depth+1)) 599 return true; 600 601 // If the sign bit is known zero, convert this to a zero extend. 602 if (KnownZero & InSignBit) 603 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, 604 Op.getValueType(), 605 Op.getOperand(0))); 606 607 // If the sign bit is known one, the top bits match. 608 if (KnownOne & InSignBit) { 609 KnownOne |= NewBits; 610 KnownZero &= ~NewBits; 611 } else { // Otherwise, top bits aren't known. 612 KnownOne &= ~NewBits; 613 KnownZero &= ~NewBits; 614 } 615 break; 616 } 617 case ISD::ANY_EXTEND: { 618 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 619 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 620 KnownZero, KnownOne, TLO, Depth+1)) 621 return true; 622 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 623 break; 624 } 625 case ISD::TRUNCATE: { 626 // Simplify the input, using demanded bit information, and compute the known 627 // zero/one bits live out. 628 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, 629 KnownZero, KnownOne, TLO, Depth+1)) 630 return true; 631 632 // If the input is only used by this truncate, see if we can shrink it based 633 // on the known demanded bits. 634 if (Op.getOperand(0).Val->hasOneUse()) { 635 SDOperand In = Op.getOperand(0); 636 switch (In.getOpcode()) { 637 default: break; 638 case ISD::SRL: 639 // Shrink SRL by a constant if none of the high bits shifted in are 640 // demanded. 641 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ 642 uint64_t HighBits = MVT::getIntVTBitMask(In.getValueType()); 643 HighBits &= ~MVT::getIntVTBitMask(Op.getValueType()); 644 HighBits >>= ShAmt->getValue(); 645 646 if (ShAmt->getValue() < MVT::getSizeInBits(Op.getValueType()) && 647 (DemandedMask & HighBits) == 0) { 648 // None of the shifted in bits are needed. Add a truncate of the 649 // shift input, then shift it. 650 SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, 651 Op.getValueType(), 652 In.getOperand(0)); 653 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(), 654 NewTrunc, In.getOperand(1))); 655 } 656 } 657 break; 658 } 659 } 660 661 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 662 uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); 663 KnownZero &= OutMask; 664 KnownOne &= OutMask; 665 break; 666 } 667 case ISD::AssertZext: { 668 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 669 uint64_t InMask = MVT::getIntVTBitMask(VT); 670 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 671 KnownZero, KnownOne, TLO, Depth+1)) 672 return true; 673 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 674 KnownZero |= ~InMask & DemandedMask; 675 break; 676 } 677 case ISD::ADD: 678 case ISD::SUB: 679 case ISD::INTRINSIC_WO_CHAIN: 680 case ISD::INTRINSIC_W_CHAIN: 681 case ISD::INTRINSIC_VOID: 682 // Just use ComputeMaskedBits to compute output bits. 683 ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); 684 break; 685 } 686 687 // If we know the value of all of the demanded bits, return this as a 688 // constant. 689 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) 690 return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); 691 692 return false; 693} 694 695/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use 696/// this predicate to simplify operations downstream. Mask is known to be zero 697/// for bits that V cannot have. 698bool TargetLowering::MaskedValueIsZero(SDOperand Op, uint64_t Mask, 699 unsigned Depth) const { 700 uint64_t KnownZero, KnownOne; 701 ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); 702 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 703 return (KnownZero & Mask) == Mask; 704} 705 706/// ComputeMaskedBits - Determine which of the bits specified in Mask are 707/// known to be either zero or one and return them in the KnownZero/KnownOne 708/// bitsets. This code only analyzes bits in Mask, in order to short-circuit 709/// processing. 710void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask, 711 uint64_t &KnownZero, uint64_t &KnownOne, 712 unsigned Depth) const { 713 KnownZero = KnownOne = 0; // Don't know anything. 714 if (Depth == 6 || Mask == 0) 715 return; // Limit search depth. 716 717 uint64_t KnownZero2, KnownOne2; 718 719 switch (Op.getOpcode()) { 720 case ISD::Constant: 721 // We know all of the bits for a constant! 722 KnownOne = cast<ConstantSDNode>(Op)->getValue() & Mask; 723 KnownZero = ~KnownOne & Mask; 724 return; 725 case ISD::AND: 726 // If either the LHS or the RHS are Zero, the result is zero. 727 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 728 Mask &= ~KnownZero; 729 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 730 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 731 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 732 733 // Output known-1 bits are only known if set in both the LHS & RHS. 734 KnownOne &= KnownOne2; 735 // Output known-0 are known to be clear if zero in either the LHS | RHS. 736 KnownZero |= KnownZero2; 737 return; 738 case ISD::OR: 739 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 740 Mask &= ~KnownOne; 741 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 742 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 743 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 744 745 // Output known-0 bits are only known if clear in both the LHS & RHS. 746 KnownZero &= KnownZero2; 747 // Output known-1 are known to be set if set in either the LHS | RHS. 748 KnownOne |= KnownOne2; 749 return; 750 case ISD::XOR: { 751 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 752 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 753 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 754 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 755 756 // Output known-0 bits are known if clear or set in both the LHS & RHS. 757 uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); 758 // Output known-1 are known to be set if set in only one of the LHS, RHS. 759 KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); 760 KnownZero = KnownZeroOut; 761 return; 762 } 763 case ISD::SELECT: 764 ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); 765 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); 766 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 767 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 768 769 // Only known if known in both the LHS and RHS. 770 KnownOne &= KnownOne2; 771 KnownZero &= KnownZero2; 772 return; 773 case ISD::SELECT_CC: 774 ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); 775 ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); 776 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 777 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 778 779 // Only known if known in both the LHS and RHS. 780 KnownOne &= KnownOne2; 781 KnownZero &= KnownZero2; 782 return; 783 case ISD::SETCC: 784 // If we know the result of a setcc has the top bits zero, use this info. 785 if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) 786 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 787 return; 788 case ISD::SHL: 789 // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 790 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 791 ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(), 792 KnownZero, KnownOne, Depth+1); 793 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 794 KnownZero <<= SA->getValue(); 795 KnownOne <<= SA->getValue(); 796 KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. 797 } 798 return; 799 case ISD::SRL: 800 // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 801 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 802 MVT::ValueType VT = Op.getValueType(); 803 unsigned ShAmt = SA->getValue(); 804 805 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 806 ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask, 807 KnownZero, KnownOne, Depth+1); 808 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 809 KnownZero &= TypeMask; 810 KnownOne &= TypeMask; 811 KnownZero >>= ShAmt; 812 KnownOne >>= ShAmt; 813 814 uint64_t HighBits = (1ULL << ShAmt)-1; 815 HighBits <<= MVT::getSizeInBits(VT)-ShAmt; 816 KnownZero |= HighBits; // High bits known zero. 817 } 818 return; 819 case ISD::SRA: 820 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 821 MVT::ValueType VT = Op.getValueType(); 822 unsigned ShAmt = SA->getValue(); 823 824 // Compute the new bits that are at the top now. 825 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 826 827 uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask; 828 // If any of the demanded bits are produced by the sign extension, we also 829 // demand the input sign bit. 830 uint64_t HighBits = (1ULL << ShAmt)-1; 831 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 832 if (HighBits & Mask) 833 InDemandedMask |= MVT::getIntVTSignBit(VT); 834 835 ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, 836 Depth+1); 837 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 838 KnownZero &= TypeMask; 839 KnownOne &= TypeMask; 840 KnownZero >>= ShAmt; 841 KnownOne >>= ShAmt; 842 843 // Handle the sign bits. 844 uint64_t SignBit = MVT::getIntVTSignBit(VT); 845 SignBit >>= ShAmt; // Adjust to where it is now in the mask. 846 847 if (KnownZero & SignBit) { 848 KnownZero |= HighBits; // New bits are known zero. 849 } else if (KnownOne & SignBit) { 850 KnownOne |= HighBits; // New bits are known one. 851 } 852 } 853 return; 854 case ISD::SIGN_EXTEND_INREG: { 855 MVT::ValueType VT = Op.getValueType(); 856 MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 857 858 // Sign extension. Compute the demanded bits in the result that are not 859 // present in the input. 860 uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask; 861 862 uint64_t InSignBit = MVT::getIntVTSignBit(EVT); 863 int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT); 864 865 // If the sign extended bits are demanded, we know that the sign 866 // bit is demanded. 867 if (NewBits) 868 InputDemandedBits |= InSignBit; 869 870 ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, 871 KnownZero, KnownOne, Depth+1); 872 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 873 874 // If the sign bit of the input is known set or clear, then we know the 875 // top bits of the result. 876 if (KnownZero & InSignBit) { // Input sign bit known clear 877 KnownZero |= NewBits; 878 KnownOne &= ~NewBits; 879 } else if (KnownOne & InSignBit) { // Input sign bit known set 880 KnownOne |= NewBits; 881 KnownZero &= ~NewBits; 882 } else { // Input sign bit unknown 883 KnownZero &= ~NewBits; 884 KnownOne &= ~NewBits; 885 } 886 return; 887 } 888 case ISD::CTTZ: 889 case ISD::CTLZ: 890 case ISD::CTPOP: { 891 MVT::ValueType VT = Op.getValueType(); 892 unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; 893 KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); 894 KnownOne = 0; 895 return; 896 } 897 case ISD::LOAD: { 898 if (ISD::isZEXTLoad(Op.Val)) { 899 LoadSDNode *LD = cast<LoadSDNode>(Op); 900 MVT::ValueType VT = LD->getLoadedVT(); 901 KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask; 902 } 903 return; 904 } 905 case ISD::ZERO_EXTEND: { 906 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 907 uint64_t NewBits = (~InMask) & Mask; 908 ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 909 KnownOne, Depth+1); 910 KnownZero |= NewBits & Mask; 911 KnownOne &= ~NewBits; 912 return; 913 } 914 case ISD::SIGN_EXTEND: { 915 MVT::ValueType InVT = Op.getOperand(0).getValueType(); 916 unsigned InBits = MVT::getSizeInBits(InVT); 917 uint64_t InMask = MVT::getIntVTBitMask(InVT); 918 uint64_t InSignBit = 1ULL << (InBits-1); 919 uint64_t NewBits = (~InMask) & Mask; 920 uint64_t InDemandedBits = Mask & InMask; 921 922 // If any of the sign extended bits are demanded, we know that the sign 923 // bit is demanded. 924 if (NewBits & Mask) 925 InDemandedBits |= InSignBit; 926 927 ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, 928 KnownOne, Depth+1); 929 // If the sign bit is known zero or one, the top bits match. 930 if (KnownZero & InSignBit) { 931 KnownZero |= NewBits; 932 KnownOne &= ~NewBits; 933 } else if (KnownOne & InSignBit) { 934 KnownOne |= NewBits; 935 KnownZero &= ~NewBits; 936 } else { // Otherwise, top bits aren't known. 937 KnownOne &= ~NewBits; 938 KnownZero &= ~NewBits; 939 } 940 return; 941 } 942 case ISD::ANY_EXTEND: { 943 MVT::ValueType VT = Op.getOperand(0).getValueType(); 944 ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT), 945 KnownZero, KnownOne, Depth+1); 946 return; 947 } 948 case ISD::TRUNCATE: { 949 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 950 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 951 uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); 952 KnownZero &= OutMask; 953 KnownOne &= OutMask; 954 break; 955 } 956 case ISD::AssertZext: { 957 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 958 uint64_t InMask = MVT::getIntVTBitMask(VT); 959 ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 960 KnownOne, Depth+1); 961 KnownZero |= (~InMask) & Mask; 962 return; 963 } 964 case ISD::ADD: { 965 // If either the LHS or the RHS are Zero, the result is zero. 966 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 967 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 968 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 969 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 970 971 // Output known-0 bits are known if clear or set in both the low clear bits 972 // common to both LHS & RHS. For example, 8+(X<<3) is known to have the 973 // low 3 bits clear. 974 uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), 975 CountTrailingZeros_64(~KnownZero2)); 976 977 KnownZero = (1ULL << KnownZeroOut) - 1; 978 KnownOne = 0; 979 return; 980 } 981 case ISD::SUB: { 982 ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)); 983 if (!CLHS) return; 984 985 // We know that the top bits of C-X are clear if X contains less bits 986 // than C (i.e. no wrap-around can happen). For example, 20-X is 987 // positive if we can prove that X is >= 0 and < 16. 988 MVT::ValueType VT = CLHS->getValueType(0); 989 if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) { // sign bit clear 990 unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1); 991 uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit 992 MaskV = ~MaskV & MVT::getIntVTBitMask(VT); 993 ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); 994 995 // If all of the MaskV bits are known to be zero, then we know the output 996 // top bits are zero, because we now know that the output is from [0-C]. 997 if ((KnownZero & MaskV) == MaskV) { 998 unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue()); 999 KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask; // Top bits known zero. 1000 KnownOne = 0; // No one bits known. 1001 } else { 1002 KnownZero = KnownOne = 0; // Otherwise, nothing known. 1003 } 1004 } 1005 return; 1006 } 1007 default: 1008 // Allow the target to implement this method for its nodes. 1009 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { 1010 case ISD::INTRINSIC_WO_CHAIN: 1011 case ISD::INTRINSIC_W_CHAIN: 1012 case ISD::INTRINSIC_VOID: 1013 computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne); 1014 } 1015 return; 1016 } 1017} 1018 1019/// computeMaskedBitsForTargetNode - Determine which of the bits specified 1020/// in Mask are known to be either zero or one and return them in the 1021/// KnownZero/KnownOne bitsets. 1022void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 1023 uint64_t Mask, 1024 uint64_t &KnownZero, 1025 uint64_t &KnownOne, 1026 unsigned Depth) const { 1027 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 1028 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1029 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1030 Op.getOpcode() == ISD::INTRINSIC_VOID) && 1031 "Should use MaskedValueIsZero if you don't know whether Op" 1032 " is a target node!"); 1033 KnownZero = 0; 1034 KnownOne = 0; 1035} 1036 1037/// ComputeNumSignBits - Return the number of times the sign bit of the 1038/// register is replicated into the other bits. We know that at least 1 bit 1039/// is always equal to the sign bit (itself), but other cases can give us 1040/// information. For example, immediately after an "SRA X, 2", we know that 1041/// the top 3 bits are all equal to each other, so we return 3. 1042unsigned TargetLowering::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{ 1043 MVT::ValueType VT = Op.getValueType(); 1044 assert(MVT::isInteger(VT) && "Invalid VT!"); 1045 unsigned VTBits = MVT::getSizeInBits(VT); 1046 unsigned Tmp, Tmp2; 1047 1048 if (Depth == 6) 1049 return 1; // Limit search depth. 1050 1051 switch (Op.getOpcode()) { 1052 default: break; 1053 case ISD::AssertSext: 1054 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 1055 return VTBits-Tmp+1; 1056 case ISD::AssertZext: 1057 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 1058 return VTBits-Tmp; 1059 1060 case ISD::Constant: { 1061 uint64_t Val = cast<ConstantSDNode>(Op)->getValue(); 1062 // If negative, invert the bits, then look at it. 1063 if (Val & MVT::getIntVTSignBit(VT)) 1064 Val = ~Val; 1065 1066 // Shift the bits so they are the leading bits in the int64_t. 1067 Val <<= 64-VTBits; 1068 1069 // Return # leading zeros. We use 'min' here in case Val was zero before 1070 // shifting. We don't want to return '64' as for an i32 "0". 1071 return std::min(VTBits, CountLeadingZeros_64(Val)); 1072 } 1073 1074 case ISD::SIGN_EXTEND: 1075 Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType()); 1076 return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; 1077 1078 case ISD::SIGN_EXTEND_INREG: 1079 // Max of the input and what this extends. 1080 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 1081 Tmp = VTBits-Tmp+1; 1082 1083 Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1084 return std::max(Tmp, Tmp2); 1085 1086 case ISD::SRA: 1087 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1088 // SRA X, C -> adds C sign bits. 1089 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1090 Tmp += C->getValue(); 1091 if (Tmp > VTBits) Tmp = VTBits; 1092 } 1093 return Tmp; 1094 case ISD::SHL: 1095 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1096 // shl destroys sign bits. 1097 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1098 if (C->getValue() >= VTBits || // Bad shift. 1099 C->getValue() >= Tmp) break; // Shifted all sign bits out. 1100 return Tmp - C->getValue(); 1101 } 1102 break; 1103 case ISD::AND: 1104 case ISD::OR: 1105 case ISD::XOR: // NOT is handled here. 1106 // Logical binary ops preserve the number of sign bits. 1107 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1108 if (Tmp == 1) return 1; // Early out. 1109 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1110 return std::min(Tmp, Tmp2); 1111 1112 case ISD::SELECT: 1113 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1114 if (Tmp == 1) return 1; // Early out. 1115 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1116 return std::min(Tmp, Tmp2); 1117 1118 case ISD::SETCC: 1119 // If setcc returns 0/-1, all bits are sign bits. 1120 if (getSetCCResultContents() == ZeroOrNegativeOneSetCCResult) 1121 return VTBits; 1122 break; 1123 case ISD::ROTL: 1124 case ISD::ROTR: 1125 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1126 unsigned RotAmt = C->getValue() & (VTBits-1); 1127 1128 // Handle rotate right by N like a rotate left by 32-N. 1129 if (Op.getOpcode() == ISD::ROTR) 1130 RotAmt = (VTBits-RotAmt) & (VTBits-1); 1131 1132 // If we aren't rotating out all of the known-in sign bits, return the 1133 // number that are left. This handles rotl(sext(x), 1) for example. 1134 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1135 if (Tmp > RotAmt+1) return Tmp-RotAmt; 1136 } 1137 break; 1138 case ISD::ADD: 1139 // Add can have at most one carry bit. Thus we know that the output 1140 // is, at worst, one more bit than the inputs. 1141 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1142 if (Tmp == 1) return 1; // Early out. 1143 1144 // Special case decrementing a value (ADD X, -1): 1145 if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) 1146 if (CRHS->isAllOnesValue()) { 1147 uint64_t KnownZero, KnownOne; 1148 uint64_t Mask = MVT::getIntVTBitMask(VT); 1149 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 1150 1151 // If the input is known to be 0 or 1, the output is 0/-1, which is all 1152 // sign bits set. 1153 if ((KnownZero|1) == Mask) 1154 return VTBits; 1155 1156 // If we are subtracting one from a positive number, there is no carry 1157 // out of the result. 1158 if (KnownZero & MVT::getIntVTSignBit(VT)) 1159 return Tmp; 1160 } 1161 1162 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1163 if (Tmp2 == 1) return 1; 1164 return std::min(Tmp, Tmp2)-1; 1165 break; 1166 1167 case ISD::SUB: 1168 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1169 if (Tmp2 == 1) return 1; 1170 1171 // Handle NEG. 1172 if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) 1173 if (CLHS->getValue() == 0) { 1174 uint64_t KnownZero, KnownOne; 1175 uint64_t Mask = MVT::getIntVTBitMask(VT); 1176 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 1177 // If the input is known to be 0 or 1, the output is 0/-1, which is all 1178 // sign bits set. 1179 if ((KnownZero|1) == Mask) 1180 return VTBits; 1181 1182 // If the input is known to be positive (the sign bit is known clear), 1183 // the output of the NEG has the same number of sign bits as the input. 1184 if (KnownZero & MVT::getIntVTSignBit(VT)) 1185 return Tmp2; 1186 1187 // Otherwise, we treat this like a SUB. 1188 } 1189 1190 // Sub can have at most one carry bit. Thus we know that the output 1191 // is, at worst, one more bit than the inputs. 1192 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1193 if (Tmp == 1) return 1; // Early out. 1194 return std::min(Tmp, Tmp2)-1; 1195 break; 1196 case ISD::TRUNCATE: 1197 // FIXME: it's tricky to do anything useful for this, but it is an important 1198 // case for targets like X86. 1199 break; 1200 } 1201 1202 // Handle LOADX separately here. EXTLOAD case will fallthrough. 1203 if (Op.getOpcode() == ISD::LOAD) { 1204 LoadSDNode *LD = cast<LoadSDNode>(Op); 1205 unsigned ExtType = LD->getExtensionType(); 1206 switch (ExtType) { 1207 default: break; 1208 case ISD::SEXTLOAD: // '17' bits known 1209 Tmp = MVT::getSizeInBits(LD->getLoadedVT()); 1210 return VTBits-Tmp+1; 1211 case ISD::ZEXTLOAD: // '16' bits known 1212 Tmp = MVT::getSizeInBits(LD->getLoadedVT()); 1213 return VTBits-Tmp; 1214 } 1215 } 1216 1217 // Allow the target to implement this method for its nodes. 1218 if (Op.getOpcode() >= ISD::BUILTIN_OP_END || 1219 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1220 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1221 Op.getOpcode() == ISD::INTRINSIC_VOID) { 1222 unsigned NumBits = ComputeNumSignBitsForTargetNode(Op, Depth); 1223 if (NumBits > 1) return NumBits; 1224 } 1225 1226 // Finally, if we can prove that the top bits of the result are 0's or 1's, 1227 // use this information. 1228 uint64_t KnownZero, KnownOne; 1229 uint64_t Mask = MVT::getIntVTBitMask(VT); 1230 ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); 1231 1232 uint64_t SignBit = MVT::getIntVTSignBit(VT); 1233 if (KnownZero & SignBit) { // SignBit is 0 1234 Mask = KnownZero; 1235 } else if (KnownOne & SignBit) { // SignBit is 1; 1236 Mask = KnownOne; 1237 } else { 1238 // Nothing known. 1239 return 1; 1240 } 1241 1242 // Okay, we know that the sign bit in Mask is set. Use CLZ to determine 1243 // the number of identical bits in the top of the input value. 1244 Mask ^= ~0ULL; 1245 Mask <<= 64-VTBits; 1246 // Return # leading zeros. We use 'min' here in case Val was zero before 1247 // shifting. We don't want to return '64' as for an i32 "0". 1248 return std::min(VTBits, CountLeadingZeros_64(Mask)); 1249} 1250 1251 1252 1253/// ComputeNumSignBitsForTargetNode - This method can be implemented by 1254/// targets that want to expose additional information about sign bits to the 1255/// DAG Combiner. 1256unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op, 1257 unsigned Depth) const { 1258 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 1259 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1260 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1261 Op.getOpcode() == ISD::INTRINSIC_VOID) && 1262 "Should use ComputeNumSignBits if you don't know whether Op" 1263 " is a target node!"); 1264 return 1; 1265} 1266 1267 1268SDOperand TargetLowering:: 1269PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1270 // Default implementation: no optimization. 1271 return SDOperand(); 1272} 1273 1274//===----------------------------------------------------------------------===// 1275// Inline Assembler Implementation Methods 1276//===----------------------------------------------------------------------===// 1277 1278TargetLowering::ConstraintType 1279TargetLowering::getConstraintType(char ConstraintLetter) const { 1280 // FIXME: lots more standard ones to handle. 1281 switch (ConstraintLetter) { 1282 default: return C_Unknown; 1283 case 'r': return C_RegisterClass; 1284 case 'm': // memory 1285 case 'o': // offsetable 1286 case 'V': // not offsetable 1287 return C_Memory; 1288 case 'i': // Simple Integer or Relocatable Constant 1289 case 'n': // Simple Integer 1290 case 's': // Relocatable Constant 1291 case 'I': // Target registers. 1292 case 'J': 1293 case 'K': 1294 case 'L': 1295 case 'M': 1296 case 'N': 1297 case 'O': 1298 case 'P': 1299 return C_Other; 1300 } 1301} 1302 1303bool TargetLowering::isOperandValidForConstraint(SDOperand Op, 1304 char ConstraintLetter) { 1305 switch (ConstraintLetter) { 1306 default: return false; 1307 case 'i': // Simple Integer or Relocatable Constant 1308 case 'n': // Simple Integer 1309 case 's': // Relocatable Constant 1310 return true; // FIXME: not right. 1311 } 1312} 1313 1314 1315std::vector<unsigned> TargetLowering:: 1316getRegClassForInlineAsmConstraint(const std::string &Constraint, 1317 MVT::ValueType VT) const { 1318 return std::vector<unsigned>(); 1319} 1320 1321 1322std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: 1323getRegForInlineAsmConstraint(const std::string &Constraint, 1324 MVT::ValueType VT) const { 1325 if (Constraint[0] != '{') 1326 return std::pair<unsigned, const TargetRegisterClass*>(0, 0); 1327 assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); 1328 1329 // Remove the braces from around the name. 1330 std::string RegName(Constraint.begin()+1, Constraint.end()-1); 1331 1332 // Figure out which register class contains this reg. 1333 const MRegisterInfo *RI = TM.getRegisterInfo(); 1334 for (MRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), 1335 E = RI->regclass_end(); RCI != E; ++RCI) { 1336 const TargetRegisterClass *RC = *RCI; 1337 1338 // If none of the the value types for this register class are valid, we 1339 // can't use it. For example, 64-bit reg classes on 32-bit targets. 1340 bool isLegal = false; 1341 for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); 1342 I != E; ++I) { 1343 if (isTypeLegal(*I)) { 1344 isLegal = true; 1345 break; 1346 } 1347 } 1348 1349 if (!isLegal) continue; 1350 1351 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 1352 I != E; ++I) { 1353 if (StringsEqualNoCase(RegName, RI->get(*I).Name)) 1354 return std::make_pair(*I, RC); 1355 } 1356 } 1357 1358 return std::pair<unsigned, const TargetRegisterClass*>(0, 0); 1359} 1360 1361//===----------------------------------------------------------------------===// 1362// Loop Strength Reduction hooks 1363//===----------------------------------------------------------------------===// 1364 1365/// isLegalAddressImmediate - Return true if the integer value or 1366/// GlobalValue can be used as the offset of the target addressing mode. 1367bool TargetLowering::isLegalAddressImmediate(int64_t V) const { 1368 return false; 1369} 1370bool TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 1371 return false; 1372} 1373 1374 1375// Magic for divide replacement 1376 1377struct ms { 1378 int64_t m; // magic number 1379 int64_t s; // shift amount 1380}; 1381 1382struct mu { 1383 uint64_t m; // magic number 1384 int64_t a; // add indicator 1385 int64_t s; // shift amount 1386}; 1387 1388/// magic - calculate the magic numbers required to codegen an integer sdiv as 1389/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, 1390/// or -1. 1391static ms magic32(int32_t d) { 1392 int32_t p; 1393 uint32_t ad, anc, delta, q1, r1, q2, r2, t; 1394 const uint32_t two31 = 0x80000000U; 1395 struct ms mag; 1396 1397 ad = abs(d); 1398 t = two31 + ((uint32_t)d >> 31); 1399 anc = t - 1 - t%ad; // absolute value of nc 1400 p = 31; // initialize p 1401 q1 = two31/anc; // initialize q1 = 2p/abs(nc) 1402 r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1403 q2 = two31/ad; // initialize q2 = 2p/abs(d) 1404 r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d)) 1405 do { 1406 p = p + 1; 1407 q1 = 2*q1; // update q1 = 2p/abs(nc) 1408 r1 = 2*r1; // update r1 = rem(2p/abs(nc)) 1409 if (r1 >= anc) { // must be unsigned comparison 1410 q1 = q1 + 1; 1411 r1 = r1 - anc; 1412 } 1413 q2 = 2*q2; // update q2 = 2p/abs(d) 1414 r2 = 2*r2; // update r2 = rem(2p/abs(d)) 1415 if (r2 >= ad) { // must be unsigned comparison 1416 q2 = q2 + 1; 1417 r2 = r2 - ad; 1418 } 1419 delta = ad - r2; 1420 } while (q1 < delta || (q1 == delta && r1 == 0)); 1421 1422 mag.m = (int32_t)(q2 + 1); // make sure to sign extend 1423 if (d < 0) mag.m = -mag.m; // resulting magic number 1424 mag.s = p - 32; // resulting shift 1425 return mag; 1426} 1427 1428/// magicu - calculate the magic numbers required to codegen an integer udiv as 1429/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. 1430static mu magicu32(uint32_t d) { 1431 int32_t p; 1432 uint32_t nc, delta, q1, r1, q2, r2; 1433 struct mu magu; 1434 magu.a = 0; // initialize "add" indicator 1435 nc = - 1 - (-d)%d; 1436 p = 31; // initialize p 1437 q1 = 0x80000000/nc; // initialize q1 = 2p/nc 1438 r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc) 1439 q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d 1440 r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d) 1441 do { 1442 p = p + 1; 1443 if (r1 >= nc - r1 ) { 1444 q1 = 2*q1 + 1; // update q1 1445 r1 = 2*r1 - nc; // update r1 1446 } 1447 else { 1448 q1 = 2*q1; // update q1 1449 r1 = 2*r1; // update r1 1450 } 1451 if (r2 + 1 >= d - r2) { 1452 if (q2 >= 0x7FFFFFFF) magu.a = 1; 1453 q2 = 2*q2 + 1; // update q2 1454 r2 = 2*r2 + 1 - d; // update r2 1455 } 1456 else { 1457 if (q2 >= 0x80000000) magu.a = 1; 1458 q2 = 2*q2; // update q2 1459 r2 = 2*r2 + 1; // update r2 1460 } 1461 delta = d - 1 - r2; 1462 } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0))); 1463 magu.m = q2 + 1; // resulting magic number 1464 magu.s = p - 32; // resulting shift 1465 return magu; 1466} 1467 1468/// magic - calculate the magic numbers required to codegen an integer sdiv as 1469/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, 1470/// or -1. 1471static ms magic64(int64_t d) { 1472 int64_t p; 1473 uint64_t ad, anc, delta, q1, r1, q2, r2, t; 1474 const uint64_t two63 = 9223372036854775808ULL; // 2^63 1475 struct ms mag; 1476 1477 ad = d >= 0 ? d : -d; 1478 t = two63 + ((uint64_t)d >> 63); 1479 anc = t - 1 - t%ad; // absolute value of nc 1480 p = 63; // initialize p 1481 q1 = two63/anc; // initialize q1 = 2p/abs(nc) 1482 r1 = two63 - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1483 q2 = two63/ad; // initialize q2 = 2p/abs(d) 1484 r2 = two63 - q2*ad; // initialize r2 = rem(2p,abs(d)) 1485 do { 1486 p = p + 1; 1487 q1 = 2*q1; // update q1 = 2p/abs(nc) 1488 r1 = 2*r1; // update r1 = rem(2p/abs(nc)) 1489 if (r1 >= anc) { // must be unsigned comparison 1490 q1 = q1 + 1; 1491 r1 = r1 - anc; 1492 } 1493 q2 = 2*q2; // update q2 = 2p/abs(d) 1494 r2 = 2*r2; // update r2 = rem(2p/abs(d)) 1495 if (r2 >= ad) { // must be unsigned comparison 1496 q2 = q2 + 1; 1497 r2 = r2 - ad; 1498 } 1499 delta = ad - r2; 1500 } while (q1 < delta || (q1 == delta && r1 == 0)); 1501 1502 mag.m = q2 + 1; 1503 if (d < 0) mag.m = -mag.m; // resulting magic number 1504 mag.s = p - 64; // resulting shift 1505 return mag; 1506} 1507 1508/// magicu - calculate the magic numbers required to codegen an integer udiv as 1509/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. 1510static mu magicu64(uint64_t d) 1511{ 1512 int64_t p; 1513 uint64_t nc, delta, q1, r1, q2, r2; 1514 struct mu magu; 1515 magu.a = 0; // initialize "add" indicator 1516 nc = - 1 - (-d)%d; 1517 p = 63; // initialize p 1518 q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc 1519 r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc) 1520 q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d 1521 r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d) 1522 do { 1523 p = p + 1; 1524 if (r1 >= nc - r1 ) { 1525 q1 = 2*q1 + 1; // update q1 1526 r1 = 2*r1 - nc; // update r1 1527 } 1528 else { 1529 q1 = 2*q1; // update q1 1530 r1 = 2*r1; // update r1 1531 } 1532 if (r2 + 1 >= d - r2) { 1533 if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1; 1534 q2 = 2*q2 + 1; // update q2 1535 r2 = 2*r2 + 1 - d; // update r2 1536 } 1537 else { 1538 if (q2 >= 0x8000000000000000ull) magu.a = 1; 1539 q2 = 2*q2; // update q2 1540 r2 = 2*r2 + 1; // update r2 1541 } 1542 delta = d - 1 - r2; 1543 } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0))); 1544 magu.m = q2 + 1; // resulting magic number 1545 magu.s = p - 64; // resulting shift 1546 return magu; 1547} 1548 1549/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 1550/// return a DAG expression to select that will generate the same value by 1551/// multiplying by a magic number. See: 1552/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 1553SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 1554 std::vector<SDNode*>* Created) const { 1555 MVT::ValueType VT = N->getValueType(0); 1556 1557 // Check to see if we can do this. 1558 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) 1559 return SDOperand(); // BuildSDIV only operates on i32 or i64 1560 if (!isOperationLegal(ISD::MULHS, VT)) 1561 return SDOperand(); // Make sure the target supports MULHS. 1562 1563 int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended(); 1564 ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d); 1565 1566 // Multiply the numerator (operand 0) by the magic value 1567 SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0), 1568 DAG.getConstant(magics.m, VT)); 1569 // If d > 0 and m < 0, add the numerator 1570 if (d > 0 && magics.m < 0) { 1571 Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0)); 1572 if (Created) 1573 Created->push_back(Q.Val); 1574 } 1575 // If d < 0 and m > 0, subtract the numerator. 1576 if (d < 0 && magics.m > 0) { 1577 Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0)); 1578 if (Created) 1579 Created->push_back(Q.Val); 1580 } 1581 // Shift right algebraic if shift value is nonzero 1582 if (magics.s > 0) { 1583 Q = DAG.getNode(ISD::SRA, VT, Q, 1584 DAG.getConstant(magics.s, getShiftAmountTy())); 1585 if (Created) 1586 Created->push_back(Q.Val); 1587 } 1588 // Extract the sign bit and add it to the quotient 1589 SDOperand T = 1590 DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1, 1591 getShiftAmountTy())); 1592 if (Created) 1593 Created->push_back(T.Val); 1594 return DAG.getNode(ISD::ADD, VT, Q, T); 1595} 1596 1597/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, 1598/// return a DAG expression to select that will generate the same value by 1599/// multiplying by a magic number. See: 1600/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 1601SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, 1602 std::vector<SDNode*>* Created) const { 1603 MVT::ValueType VT = N->getValueType(0); 1604 1605 // Check to see if we can do this. 1606 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) 1607 return SDOperand(); // BuildUDIV only operates on i32 or i64 1608 if (!isOperationLegal(ISD::MULHU, VT)) 1609 return SDOperand(); // Make sure the target supports MULHU. 1610 1611 uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue(); 1612 mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d); 1613 1614 // Multiply the numerator (operand 0) by the magic value 1615 SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0), 1616 DAG.getConstant(magics.m, VT)); 1617 if (Created) 1618 Created->push_back(Q.Val); 1619 1620 if (magics.a == 0) { 1621 return DAG.getNode(ISD::SRL, VT, Q, 1622 DAG.getConstant(magics.s, getShiftAmountTy())); 1623 } else { 1624 SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q); 1625 if (Created) 1626 Created->push_back(NPQ.Val); 1627 NPQ = DAG.getNode(ISD::SRL, VT, NPQ, 1628 DAG.getConstant(1, getShiftAmountTy())); 1629 if (Created) 1630 Created->push_back(NPQ.Val); 1631 NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q); 1632 if (Created) 1633 Created->push_back(NPQ.Val); 1634 return DAG.getNode(ISD::SRL, VT, NPQ, 1635 DAG.getConstant(magics.s-1, getShiftAmountTy())); 1636 } 1637} 1638