TargetLowering.cpp revision 3687c1a4d3ecbe6709fddd77d79dc6b128d31431
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by the LLVM research group and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the TargetLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/Target/TargetLowering.h" 15#include "llvm/Target/TargetData.h" 16#include "llvm/Target/TargetMachine.h" 17#include "llvm/Target/MRegisterInfo.h" 18#include "llvm/DerivedTypes.h" 19#include "llvm/CodeGen/SelectionDAG.h" 20#include "llvm/ADT/StringExtras.h" 21#include "llvm/Support/MathExtras.h" 22using namespace llvm; 23 24TargetLowering::TargetLowering(TargetMachine &tm) 25 : TM(tm), TD(TM.getTargetData()) { 26 assert(ISD::BUILTIN_OP_END <= 156 && 27 "Fixed size array in TargetLowering is not large enough!"); 28 // All operations default to being supported. 29 memset(OpActions, 0, sizeof(OpActions)); 30 memset(LoadXActions, 0, sizeof(LoadXActions)); 31 memset(&StoreXActions, 0, sizeof(StoreXActions)); 32 // Initialize all indexed load / store to expand. 33 for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { 34 for (unsigned IM = (unsigned)ISD::PRE_INC; 35 IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { 36 setIndexedLoadAction(IM, (MVT::ValueType)VT, Expand); 37 setIndexedStoreAction(IM, (MVT::ValueType)VT, Expand); 38 } 39 } 40 41 IsLittleEndian = TD->isLittleEndian(); 42 UsesGlobalOffsetTable = false; 43 ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD->getIntPtrType()); 44 ShiftAmtHandling = Undefined; 45 memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); 46 memset(TargetDAGCombineArray, 0, 47 sizeof(TargetDAGCombineArray)/sizeof(TargetDAGCombineArray[0])); 48 maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; 49 allowUnalignedMemoryAccesses = false; 50 UseUnderscoreSetJmpLongJmp = false; 51 IntDivIsCheap = false; 52 Pow2DivIsCheap = false; 53 StackPointerRegisterToSaveRestore = 0; 54 SchedPreferenceInfo = SchedulingForLatency; 55 JumpBufSize = 0; 56 JumpBufAlignment = 0; 57} 58 59TargetLowering::~TargetLowering() {} 60 61/// setValueTypeAction - Set the action for a particular value type. This 62/// assumes an action has not already been set for this value type. 63static void SetValueTypeAction(MVT::ValueType VT, 64 TargetLowering::LegalizeAction Action, 65 TargetLowering &TLI, 66 MVT::ValueType *TransformToType, 67 TargetLowering::ValueTypeActionImpl &ValueTypeActions) { 68 ValueTypeActions.setTypeAction(VT, Action); 69 if (Action == TargetLowering::Promote) { 70 MVT::ValueType PromoteTo; 71 if (VT == MVT::f32) 72 PromoteTo = MVT::f64; 73 else { 74 unsigned LargerReg = VT+1; 75 while (!TLI.isTypeLegal((MVT::ValueType)LargerReg)) { 76 ++LargerReg; 77 assert(MVT::isInteger((MVT::ValueType)LargerReg) && 78 "Nothing to promote to??"); 79 } 80 PromoteTo = (MVT::ValueType)LargerReg; 81 } 82 83 assert(MVT::isInteger(VT) == MVT::isInteger(PromoteTo) && 84 MVT::isFloatingPoint(VT) == MVT::isFloatingPoint(PromoteTo) && 85 "Can only promote from int->int or fp->fp!"); 86 assert(VT < PromoteTo && "Must promote to a larger type!"); 87 TransformToType[VT] = PromoteTo; 88 } else if (Action == TargetLowering::Expand) { 89 assert((VT == MVT::Vector || MVT::isInteger(VT)) && VT > MVT::i8 && 90 "Cannot expand this type: target must support SOME integer reg!"); 91 // Expand to the next smaller integer type! 92 TransformToType[VT] = (MVT::ValueType)(VT-1); 93 } 94} 95 96 97/// computeRegisterProperties - Once all of the register classes are added, 98/// this allows us to compute derived properties we expose. 99void TargetLowering::computeRegisterProperties() { 100 assert(MVT::LAST_VALUETYPE <= 32 && 101 "Too many value types for ValueTypeActions to hold!"); 102 103 // Everything defaults to one. 104 for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) 105 NumElementsForVT[i] = 1; 106 107 // Find the largest integer register class. 108 unsigned LargestIntReg = MVT::i128; 109 for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) 110 assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); 111 112 // Every integer value type larger than this largest register takes twice as 113 // many registers to represent as the previous ValueType. 114 unsigned ExpandedReg = LargestIntReg; ++LargestIntReg; 115 for (++ExpandedReg; MVT::isInteger((MVT::ValueType)ExpandedReg);++ExpandedReg) 116 NumElementsForVT[ExpandedReg] = 2*NumElementsForVT[ExpandedReg-1]; 117 118 // Inspect all of the ValueType's possible, deciding how to process them. 119 for (unsigned IntReg = MVT::i1; IntReg <= MVT::i128; ++IntReg) 120 // If we are expanding this type, expand it! 121 if (getNumElements((MVT::ValueType)IntReg) != 1) 122 SetValueTypeAction((MVT::ValueType)IntReg, Expand, *this, TransformToType, 123 ValueTypeActions); 124 else if (!isTypeLegal((MVT::ValueType)IntReg)) 125 // Otherwise, if we don't have native support, we must promote to a 126 // larger type. 127 SetValueTypeAction((MVT::ValueType)IntReg, Promote, *this, 128 TransformToType, ValueTypeActions); 129 else 130 TransformToType[(MVT::ValueType)IntReg] = (MVT::ValueType)IntReg; 131 132 // If the target does not have native support for F32, promote it to F64. 133 if (!isTypeLegal(MVT::f32)) 134 SetValueTypeAction(MVT::f32, Promote, *this, 135 TransformToType, ValueTypeActions); 136 else 137 TransformToType[MVT::f32] = MVT::f32; 138 139 // Set MVT::Vector to always be Expanded 140 SetValueTypeAction(MVT::Vector, Expand, *this, TransformToType, 141 ValueTypeActions); 142 143 // Loop over all of the legal vector value types, specifying an identity type 144 // transformation. 145 for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; 146 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { 147 if (isTypeLegal((MVT::ValueType)i)) 148 TransformToType[i] = (MVT::ValueType)i; 149 } 150 151 assert(isTypeLegal(MVT::f64) && "Target does not support FP?"); 152 TransformToType[MVT::f64] = MVT::f64; 153} 154 155const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { 156 return NULL; 157} 158 159/// getPackedTypeBreakdown - Packed types are broken down into some number of 160/// legal first class types. For example, <8 x float> maps to 2 MVT::v4f32 161/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. 162/// 163/// This method returns the number and type of the resultant breakdown. 164/// 165unsigned TargetLowering::getPackedTypeBreakdown(const PackedType *PTy, 166 MVT::ValueType &PTyElementVT, 167 MVT::ValueType &PTyLegalElementVT) const { 168 // Figure out the right, legal destination reg to copy into. 169 unsigned NumElts = PTy->getNumElements(); 170 MVT::ValueType EltTy = getValueType(PTy->getElementType()); 171 172 unsigned NumVectorRegs = 1; 173 174 // Divide the input until we get to a supported size. This will always 175 // end with a scalar if the target doesn't support vectors. 176 while (NumElts > 1 && !isTypeLegal(getVectorType(EltTy, NumElts))) { 177 NumElts >>= 1; 178 NumVectorRegs <<= 1; 179 } 180 181 MVT::ValueType VT; 182 if (NumElts == 1) { 183 VT = EltTy; 184 } else { 185 VT = getVectorType(EltTy, NumElts); 186 } 187 PTyElementVT = VT; 188 189 MVT::ValueType DestVT = getTypeToTransformTo(VT); 190 PTyLegalElementVT = DestVT; 191 if (DestVT < VT) { 192 // Value is expanded, e.g. i64 -> i16. 193 return NumVectorRegs*(MVT::getSizeInBits(VT)/MVT::getSizeInBits(DestVT)); 194 } else { 195 // Otherwise, promotion or legal types use the same number of registers as 196 // the vector decimated to the appropriate level. 197 return NumVectorRegs; 198 } 199 200 return 1; 201} 202 203//===----------------------------------------------------------------------===// 204// Optimization Methods 205//===----------------------------------------------------------------------===// 206 207/// ShrinkDemandedConstant - Check to see if the specified operand of the 208/// specified instruction is a constant integer. If so, check to see if there 209/// are any bits set in the constant that are not demanded. If so, shrink the 210/// constant and return true. 211bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op, 212 uint64_t Demanded) { 213 // FIXME: ISD::SELECT, ISD::SELECT_CC 214 switch(Op.getOpcode()) { 215 default: break; 216 case ISD::AND: 217 case ISD::OR: 218 case ISD::XOR: 219 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) 220 if ((~Demanded & C->getValue()) != 0) { 221 MVT::ValueType VT = Op.getValueType(); 222 SDOperand New = DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0), 223 DAG.getConstant(Demanded & C->getValue(), 224 VT)); 225 return CombineTo(Op, New); 226 } 227 break; 228 } 229 return false; 230} 231 232/// SimplifyDemandedBits - Look at Op. At this point, we know that only the 233/// DemandedMask bits of the result of Op are ever used downstream. If we can 234/// use this information to simplify Op, create a new simplified DAG node and 235/// return true, returning the original and new nodes in Old and New. Otherwise, 236/// analyze the expression and return a mask of KnownOne and KnownZero bits for 237/// the expression (used to simplify the caller). The KnownZero/One bits may 238/// only be accurate for those bits in the DemandedMask. 239bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask, 240 uint64_t &KnownZero, 241 uint64_t &KnownOne, 242 TargetLoweringOpt &TLO, 243 unsigned Depth) const { 244 KnownZero = KnownOne = 0; // Don't know anything. 245 // Other users may use these bits. 246 if (!Op.Val->hasOneUse()) { 247 if (Depth != 0) { 248 // If not at the root, Just compute the KnownZero/KnownOne bits to 249 // simplify things downstream. 250 ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); 251 return false; 252 } 253 // If this is the root being simplified, allow it to have multiple uses, 254 // just set the DemandedMask to all bits. 255 DemandedMask = MVT::getIntVTBitMask(Op.getValueType()); 256 } else if (DemandedMask == 0) { 257 // Not demanding any bits from Op. 258 if (Op.getOpcode() != ISD::UNDEF) 259 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::UNDEF, Op.getValueType())); 260 return false; 261 } else if (Depth == 6) { // Limit search depth. 262 return false; 263 } 264 265 uint64_t KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; 266 switch (Op.getOpcode()) { 267 case ISD::Constant: 268 // We know all of the bits for a constant! 269 KnownOne = cast<ConstantSDNode>(Op)->getValue() & DemandedMask; 270 KnownZero = ~KnownOne & DemandedMask; 271 return false; // Don't fall through, will infinitely loop. 272 case ISD::AND: 273 // If the RHS is a constant, check to see if the LHS would be zero without 274 // using the bits from the RHS. Below, we use knowledge about the RHS to 275 // simplify the LHS, here we're using information from the LHS to simplify 276 // the RHS. 277 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 278 uint64_t LHSZero, LHSOne; 279 ComputeMaskedBits(Op.getOperand(0), DemandedMask, 280 LHSZero, LHSOne, Depth+1); 281 // If the LHS already has zeros where RHSC does, this and is dead. 282 if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask)) 283 return TLO.CombineTo(Op, Op.getOperand(0)); 284 // If any of the set bits in the RHS are known zero on the LHS, shrink 285 // the constant. 286 if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & DemandedMask)) 287 return true; 288 } 289 290 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 291 KnownOne, TLO, Depth+1)) 292 return true; 293 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 294 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownZero, 295 KnownZero2, KnownOne2, TLO, Depth+1)) 296 return true; 297 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 298 299 // If all of the demanded bits are known one on one side, return the other. 300 // These bits cannot contribute to the result of the 'and'. 301 if ((DemandedMask & ~KnownZero2 & KnownOne)==(DemandedMask & ~KnownZero2)) 302 return TLO.CombineTo(Op, Op.getOperand(0)); 303 if ((DemandedMask & ~KnownZero & KnownOne2)==(DemandedMask & ~KnownZero)) 304 return TLO.CombineTo(Op, Op.getOperand(1)); 305 // If all of the demanded bits in the inputs are known zeros, return zero. 306 if ((DemandedMask & (KnownZero|KnownZero2)) == DemandedMask) 307 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); 308 // If the RHS is a constant, see if we can simplify it. 309 if (TLO.ShrinkDemandedConstant(Op, DemandedMask & ~KnownZero2)) 310 return true; 311 312 // Output known-1 bits are only known if set in both the LHS & RHS. 313 KnownOne &= KnownOne2; 314 // Output known-0 are known to be clear if zero in either the LHS | RHS. 315 KnownZero |= KnownZero2; 316 break; 317 case ISD::OR: 318 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 319 KnownOne, TLO, Depth+1)) 320 return true; 321 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 322 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownOne, 323 KnownZero2, KnownOne2, TLO, Depth+1)) 324 return true; 325 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 326 327 // If all of the demanded bits are known zero on one side, return the other. 328 // These bits cannot contribute to the result of the 'or'. 329 if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2)) 330 return TLO.CombineTo(Op, Op.getOperand(0)); 331 if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne)) 332 return TLO.CombineTo(Op, Op.getOperand(1)); 333 // If all of the potentially set bits on one side are known to be set on 334 // the other side, just use the 'other' side. 335 if ((DemandedMask & (~KnownZero) & KnownOne2) == 336 (DemandedMask & (~KnownZero))) 337 return TLO.CombineTo(Op, Op.getOperand(0)); 338 if ((DemandedMask & (~KnownZero2) & KnownOne) == 339 (DemandedMask & (~KnownZero2))) 340 return TLO.CombineTo(Op, Op.getOperand(1)); 341 // If the RHS is a constant, see if we can simplify it. 342 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 343 return true; 344 345 // Output known-0 bits are only known if clear in both the LHS & RHS. 346 KnownZero &= KnownZero2; 347 // Output known-1 are known to be set if set in either the LHS | RHS. 348 KnownOne |= KnownOne2; 349 break; 350 case ISD::XOR: 351 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 352 KnownOne, TLO, Depth+1)) 353 return true; 354 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 355 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownZero2, 356 KnownOne2, TLO, Depth+1)) 357 return true; 358 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 359 360 // If all of the demanded bits are known zero on one side, return the other. 361 // These bits cannot contribute to the result of the 'xor'. 362 if ((DemandedMask & KnownZero) == DemandedMask) 363 return TLO.CombineTo(Op, Op.getOperand(0)); 364 if ((DemandedMask & KnownZero2) == DemandedMask) 365 return TLO.CombineTo(Op, Op.getOperand(1)); 366 367 // If all of the unknown bits are known to be zero on one side or the other 368 // (but not both) turn this into an *inclusive* or. 369 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 370 if ((DemandedMask & ~KnownZero & ~KnownZero2) == 0) 371 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, Op.getValueType(), 372 Op.getOperand(0), 373 Op.getOperand(1))); 374 375 // Output known-0 bits are known if clear or set in both the LHS & RHS. 376 KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); 377 // Output known-1 are known to be set if set in only one of the LHS, RHS. 378 KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); 379 380 // If all of the demanded bits on one side are known, and all of the set 381 // bits on that side are also known to be set on the other side, turn this 382 // into an AND, as we know the bits will be cleared. 383 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 384 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) { // all known 385 if ((KnownOne & KnownOne2) == KnownOne) { 386 MVT::ValueType VT = Op.getValueType(); 387 SDOperand ANDC = TLO.DAG.getConstant(~KnownOne & DemandedMask, VT); 388 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, VT, Op.getOperand(0), 389 ANDC)); 390 } 391 } 392 393 // If the RHS is a constant, see if we can simplify it. 394 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. 395 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 396 return true; 397 398 KnownZero = KnownZeroOut; 399 KnownOne = KnownOneOut; 400 break; 401 case ISD::SETCC: 402 // If we know the result of a setcc has the top bits zero, use this info. 403 if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) 404 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 405 break; 406 case ISD::SELECT: 407 if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero, 408 KnownOne, TLO, Depth+1)) 409 return true; 410 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero2, 411 KnownOne2, TLO, Depth+1)) 412 return true; 413 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 414 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 415 416 // If the operands are constants, see if we can simplify them. 417 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 418 return true; 419 420 // Only known if known in both the LHS and RHS. 421 KnownOne &= KnownOne2; 422 KnownZero &= KnownZero2; 423 break; 424 case ISD::SELECT_CC: 425 if (SimplifyDemandedBits(Op.getOperand(3), DemandedMask, KnownZero, 426 KnownOne, TLO, Depth+1)) 427 return true; 428 if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero2, 429 KnownOne2, TLO, Depth+1)) 430 return true; 431 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 432 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 433 434 // If the operands are constants, see if we can simplify them. 435 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 436 return true; 437 438 // Only known if known in both the LHS and RHS. 439 KnownOne &= KnownOne2; 440 KnownZero &= KnownZero2; 441 break; 442 case ISD::SHL: 443 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 444 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask >> SA->getValue(), 445 KnownZero, KnownOne, TLO, Depth+1)) 446 return true; 447 KnownZero <<= SA->getValue(); 448 KnownOne <<= SA->getValue(); 449 KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. 450 } 451 break; 452 case ISD::SRL: 453 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 454 MVT::ValueType VT = Op.getValueType(); 455 unsigned ShAmt = SA->getValue(); 456 457 // Compute the new bits that are at the top now. 458 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 459 if (SimplifyDemandedBits(Op.getOperand(0), 460 (DemandedMask << ShAmt) & TypeMask, 461 KnownZero, KnownOne, TLO, Depth+1)) 462 return true; 463 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 464 KnownZero &= TypeMask; 465 KnownOne &= TypeMask; 466 KnownZero >>= ShAmt; 467 KnownOne >>= ShAmt; 468 469 uint64_t HighBits = (1ULL << ShAmt)-1; 470 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 471 KnownZero |= HighBits; // High bits known zero. 472 } 473 break; 474 case ISD::SRA: 475 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 476 MVT::ValueType VT = Op.getValueType(); 477 unsigned ShAmt = SA->getValue(); 478 479 // Compute the new bits that are at the top now. 480 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 481 482 uint64_t InDemandedMask = (DemandedMask << ShAmt) & TypeMask; 483 484 // If any of the demanded bits are produced by the sign extension, we also 485 // demand the input sign bit. 486 uint64_t HighBits = (1ULL << ShAmt)-1; 487 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 488 if (HighBits & DemandedMask) 489 InDemandedMask |= MVT::getIntVTSignBit(VT); 490 491 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, 492 KnownZero, KnownOne, TLO, Depth+1)) 493 return true; 494 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 495 KnownZero &= TypeMask; 496 KnownOne &= TypeMask; 497 KnownZero >>= ShAmt; 498 KnownOne >>= ShAmt; 499 500 // Handle the sign bits. 501 uint64_t SignBit = MVT::getIntVTSignBit(VT); 502 SignBit >>= ShAmt; // Adjust to where it is now in the mask. 503 504 // If the input sign bit is known to be zero, or if none of the top bits 505 // are demanded, turn this into an unsigned shift right. 506 if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) { 507 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, VT, Op.getOperand(0), 508 Op.getOperand(1))); 509 } else if (KnownOne & SignBit) { // New bits are known one. 510 KnownOne |= HighBits; 511 } 512 } 513 break; 514 case ISD::SIGN_EXTEND_INREG: { 515 MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 516 517 // Sign extension. Compute the demanded bits in the result that are not 518 // present in the input. 519 uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & DemandedMask; 520 521 // If none of the extended bits are demanded, eliminate the sextinreg. 522 if (NewBits == 0) 523 return TLO.CombineTo(Op, Op.getOperand(0)); 524 525 uint64_t InSignBit = MVT::getIntVTSignBit(EVT); 526 int64_t InputDemandedBits = DemandedMask & MVT::getIntVTBitMask(EVT); 527 528 // Since the sign extended bits are demanded, we know that the sign 529 // bit is demanded. 530 InputDemandedBits |= InSignBit; 531 532 if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, 533 KnownZero, KnownOne, TLO, Depth+1)) 534 return true; 535 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 536 537 // If the sign bit of the input is known set or clear, then we know the 538 // top bits of the result. 539 540 // If the input sign bit is known zero, convert this into a zero extension. 541 if (KnownZero & InSignBit) 542 return TLO.CombineTo(Op, 543 TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT)); 544 545 if (KnownOne & InSignBit) { // Input sign bit known set 546 KnownOne |= NewBits; 547 KnownZero &= ~NewBits; 548 } else { // Input sign bit unknown 549 KnownZero &= ~NewBits; 550 KnownOne &= ~NewBits; 551 } 552 break; 553 } 554 case ISD::CTTZ: 555 case ISD::CTLZ: 556 case ISD::CTPOP: { 557 MVT::ValueType VT = Op.getValueType(); 558 unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; 559 KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); 560 KnownOne = 0; 561 break; 562 } 563 case ISD::LOAD: { 564 if (ISD::isZEXTLoad(Op.Val)) { 565 LoadSDNode *LD = cast<LoadSDNode>(Op); 566 MVT::ValueType VT = LD->getLoadedVT(); 567 KnownZero |= ~MVT::getIntVTBitMask(VT) & DemandedMask; 568 } 569 break; 570 } 571 case ISD::ZERO_EXTEND: { 572 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 573 574 // If none of the top bits are demanded, convert this into an any_extend. 575 uint64_t NewBits = (~InMask) & DemandedMask; 576 if (NewBits == 0) 577 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, 578 Op.getValueType(), 579 Op.getOperand(0))); 580 581 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 582 KnownZero, KnownOne, TLO, Depth+1)) 583 return true; 584 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 585 KnownZero |= NewBits; 586 break; 587 } 588 case ISD::SIGN_EXTEND: { 589 MVT::ValueType InVT = Op.getOperand(0).getValueType(); 590 uint64_t InMask = MVT::getIntVTBitMask(InVT); 591 uint64_t InSignBit = MVT::getIntVTSignBit(InVT); 592 uint64_t NewBits = (~InMask) & DemandedMask; 593 594 // If none of the top bits are demanded, convert this into an any_extend. 595 if (NewBits == 0) 596 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(), 597 Op.getOperand(0))); 598 599 // Since some of the sign extended bits are demanded, we know that the sign 600 // bit is demanded. 601 uint64_t InDemandedBits = DemandedMask & InMask; 602 InDemandedBits |= InSignBit; 603 604 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 605 KnownOne, TLO, Depth+1)) 606 return true; 607 608 // If the sign bit is known zero, convert this to a zero extend. 609 if (KnownZero & InSignBit) 610 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, 611 Op.getValueType(), 612 Op.getOperand(0))); 613 614 // If the sign bit is known one, the top bits match. 615 if (KnownOne & InSignBit) { 616 KnownOne |= NewBits; 617 KnownZero &= ~NewBits; 618 } else { // Otherwise, top bits aren't known. 619 KnownOne &= ~NewBits; 620 KnownZero &= ~NewBits; 621 } 622 break; 623 } 624 case ISD::ANY_EXTEND: { 625 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 626 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 627 KnownZero, KnownOne, TLO, Depth+1)) 628 return true; 629 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 630 break; 631 } 632 case ISD::TRUNCATE: { 633 // Simplify the input, using demanded bit information, and compute the known 634 // zero/one bits live out. 635 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, 636 KnownZero, KnownOne, TLO, Depth+1)) 637 return true; 638 639 // If the input is only used by this truncate, see if we can shrink it based 640 // on the known demanded bits. 641 if (Op.getOperand(0).Val->hasOneUse()) { 642 SDOperand In = Op.getOperand(0); 643 switch (In.getOpcode()) { 644 default: break; 645 case ISD::SRL: 646 // Shrink SRL by a constant if none of the high bits shifted in are 647 // demanded. 648 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ 649 uint64_t HighBits = MVT::getIntVTBitMask(In.getValueType()); 650 HighBits &= ~MVT::getIntVTBitMask(Op.getValueType()); 651 HighBits >>= ShAmt->getValue(); 652 653 if (ShAmt->getValue() < MVT::getSizeInBits(Op.getValueType()) && 654 (DemandedMask & HighBits) == 0) { 655 // None of the shifted in bits are needed. Add a truncate of the 656 // shift input, then shift it. 657 SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, 658 Op.getValueType(), 659 In.getOperand(0)); 660 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(), 661 NewTrunc, In.getOperand(1))); 662 } 663 } 664 break; 665 } 666 } 667 668 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 669 uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); 670 KnownZero &= OutMask; 671 KnownOne &= OutMask; 672 break; 673 } 674 case ISD::AssertZext: { 675 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 676 uint64_t InMask = MVT::getIntVTBitMask(VT); 677 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 678 KnownZero, KnownOne, TLO, Depth+1)) 679 return true; 680 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 681 KnownZero |= ~InMask & DemandedMask; 682 break; 683 } 684 case ISD::ADD: 685 case ISD::SUB: 686 case ISD::INTRINSIC_WO_CHAIN: 687 case ISD::INTRINSIC_W_CHAIN: 688 case ISD::INTRINSIC_VOID: 689 // Just use ComputeMaskedBits to compute output bits. 690 ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); 691 break; 692 } 693 694 // If we know the value of all of the demanded bits, return this as a 695 // constant. 696 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) 697 return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); 698 699 return false; 700} 701 702/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use 703/// this predicate to simplify operations downstream. Mask is known to be zero 704/// for bits that V cannot have. 705bool TargetLowering::MaskedValueIsZero(SDOperand Op, uint64_t Mask, 706 unsigned Depth) const { 707 uint64_t KnownZero, KnownOne; 708 ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); 709 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 710 return (KnownZero & Mask) == Mask; 711} 712 713/// ComputeMaskedBits - Determine which of the bits specified in Mask are 714/// known to be either zero or one and return them in the KnownZero/KnownOne 715/// bitsets. This code only analyzes bits in Mask, in order to short-circuit 716/// processing. 717void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask, 718 uint64_t &KnownZero, uint64_t &KnownOne, 719 unsigned Depth) const { 720 KnownZero = KnownOne = 0; // Don't know anything. 721 if (Depth == 6 || Mask == 0) 722 return; // Limit search depth. 723 724 uint64_t KnownZero2, KnownOne2; 725 726 switch (Op.getOpcode()) { 727 case ISD::Constant: 728 // We know all of the bits for a constant! 729 KnownOne = cast<ConstantSDNode>(Op)->getValue() & Mask; 730 KnownZero = ~KnownOne & Mask; 731 return; 732 case ISD::AND: 733 // If either the LHS or the RHS are Zero, the result is zero. 734 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 735 Mask &= ~KnownZero; 736 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 737 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 738 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 739 740 // Output known-1 bits are only known if set in both the LHS & RHS. 741 KnownOne &= KnownOne2; 742 // Output known-0 are known to be clear if zero in either the LHS | RHS. 743 KnownZero |= KnownZero2; 744 return; 745 case ISD::OR: 746 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 747 Mask &= ~KnownOne; 748 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 749 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 750 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 751 752 // Output known-0 bits are only known if clear in both the LHS & RHS. 753 KnownZero &= KnownZero2; 754 // Output known-1 are known to be set if set in either the LHS | RHS. 755 KnownOne |= KnownOne2; 756 return; 757 case ISD::XOR: { 758 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 759 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 760 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 761 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 762 763 // Output known-0 bits are known if clear or set in both the LHS & RHS. 764 uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); 765 // Output known-1 are known to be set if set in only one of the LHS, RHS. 766 KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); 767 KnownZero = KnownZeroOut; 768 return; 769 } 770 case ISD::SELECT: 771 ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); 772 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); 773 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 774 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 775 776 // Only known if known in both the LHS and RHS. 777 KnownOne &= KnownOne2; 778 KnownZero &= KnownZero2; 779 return; 780 case ISD::SELECT_CC: 781 ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); 782 ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); 783 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 784 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 785 786 // Only known if known in both the LHS and RHS. 787 KnownOne &= KnownOne2; 788 KnownZero &= KnownZero2; 789 return; 790 case ISD::SETCC: 791 // If we know the result of a setcc has the top bits zero, use this info. 792 if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) 793 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 794 return; 795 case ISD::SHL: 796 // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 797 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 798 ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(), 799 KnownZero, KnownOne, Depth+1); 800 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 801 KnownZero <<= SA->getValue(); 802 KnownOne <<= SA->getValue(); 803 KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. 804 } 805 return; 806 case ISD::SRL: 807 // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 808 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 809 MVT::ValueType VT = Op.getValueType(); 810 unsigned ShAmt = SA->getValue(); 811 812 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 813 ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask, 814 KnownZero, KnownOne, Depth+1); 815 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 816 KnownZero &= TypeMask; 817 KnownOne &= TypeMask; 818 KnownZero >>= ShAmt; 819 KnownOne >>= ShAmt; 820 821 uint64_t HighBits = (1ULL << ShAmt)-1; 822 HighBits <<= MVT::getSizeInBits(VT)-ShAmt; 823 KnownZero |= HighBits; // High bits known zero. 824 } 825 return; 826 case ISD::SRA: 827 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 828 MVT::ValueType VT = Op.getValueType(); 829 unsigned ShAmt = SA->getValue(); 830 831 // Compute the new bits that are at the top now. 832 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 833 834 uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask; 835 // If any of the demanded bits are produced by the sign extension, we also 836 // demand the input sign bit. 837 uint64_t HighBits = (1ULL << ShAmt)-1; 838 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 839 if (HighBits & Mask) 840 InDemandedMask |= MVT::getIntVTSignBit(VT); 841 842 ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, 843 Depth+1); 844 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 845 KnownZero &= TypeMask; 846 KnownOne &= TypeMask; 847 KnownZero >>= ShAmt; 848 KnownOne >>= ShAmt; 849 850 // Handle the sign bits. 851 uint64_t SignBit = MVT::getIntVTSignBit(VT); 852 SignBit >>= ShAmt; // Adjust to where it is now in the mask. 853 854 if (KnownZero & SignBit) { 855 KnownZero |= HighBits; // New bits are known zero. 856 } else if (KnownOne & SignBit) { 857 KnownOne |= HighBits; // New bits are known one. 858 } 859 } 860 return; 861 case ISD::SIGN_EXTEND_INREG: { 862 MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 863 864 // Sign extension. Compute the demanded bits in the result that are not 865 // present in the input. 866 uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask; 867 868 uint64_t InSignBit = MVT::getIntVTSignBit(EVT); 869 int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT); 870 871 // If the sign extended bits are demanded, we know that the sign 872 // bit is demanded. 873 if (NewBits) 874 InputDemandedBits |= InSignBit; 875 876 ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, 877 KnownZero, KnownOne, Depth+1); 878 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 879 880 // If the sign bit of the input is known set or clear, then we know the 881 // top bits of the result. 882 if (KnownZero & InSignBit) { // Input sign bit known clear 883 KnownZero |= NewBits; 884 KnownOne &= ~NewBits; 885 } else if (KnownOne & InSignBit) { // Input sign bit known set 886 KnownOne |= NewBits; 887 KnownZero &= ~NewBits; 888 } else { // Input sign bit unknown 889 KnownZero &= ~NewBits; 890 KnownOne &= ~NewBits; 891 } 892 return; 893 } 894 case ISD::CTTZ: 895 case ISD::CTLZ: 896 case ISD::CTPOP: { 897 MVT::ValueType VT = Op.getValueType(); 898 unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; 899 KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); 900 KnownOne = 0; 901 return; 902 } 903 case ISD::LOAD: { 904 if (ISD::isZEXTLoad(Op.Val)) { 905 LoadSDNode *LD = cast<LoadSDNode>(Op); 906 MVT::ValueType VT = LD->getLoadedVT(); 907 KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask; 908 } 909 return; 910 } 911 case ISD::ZERO_EXTEND: { 912 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 913 uint64_t NewBits = (~InMask) & Mask; 914 ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 915 KnownOne, Depth+1); 916 KnownZero |= NewBits & Mask; 917 KnownOne &= ~NewBits; 918 return; 919 } 920 case ISD::SIGN_EXTEND: { 921 MVT::ValueType InVT = Op.getOperand(0).getValueType(); 922 unsigned InBits = MVT::getSizeInBits(InVT); 923 uint64_t InMask = MVT::getIntVTBitMask(InVT); 924 uint64_t InSignBit = 1ULL << (InBits-1); 925 uint64_t NewBits = (~InMask) & Mask; 926 uint64_t InDemandedBits = Mask & InMask; 927 928 // If any of the sign extended bits are demanded, we know that the sign 929 // bit is demanded. 930 if (NewBits & Mask) 931 InDemandedBits |= InSignBit; 932 933 ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, 934 KnownOne, Depth+1); 935 // If the sign bit is known zero or one, the top bits match. 936 if (KnownZero & InSignBit) { 937 KnownZero |= NewBits; 938 KnownOne &= ~NewBits; 939 } else if (KnownOne & InSignBit) { 940 KnownOne |= NewBits; 941 KnownZero &= ~NewBits; 942 } else { // Otherwise, top bits aren't known. 943 KnownOne &= ~NewBits; 944 KnownZero &= ~NewBits; 945 } 946 return; 947 } 948 case ISD::ANY_EXTEND: { 949 MVT::ValueType VT = Op.getOperand(0).getValueType(); 950 ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT), 951 KnownZero, KnownOne, Depth+1); 952 return; 953 } 954 case ISD::TRUNCATE: { 955 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 956 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 957 uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); 958 KnownZero &= OutMask; 959 KnownOne &= OutMask; 960 break; 961 } 962 case ISD::AssertZext: { 963 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 964 uint64_t InMask = MVT::getIntVTBitMask(VT); 965 ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 966 KnownOne, Depth+1); 967 KnownZero |= (~InMask) & Mask; 968 return; 969 } 970 case ISD::ADD: { 971 // If either the LHS or the RHS are Zero, the result is zero. 972 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 973 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 974 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 975 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 976 977 // Output known-0 bits are known if clear or set in both the low clear bits 978 // common to both LHS & RHS. For example, 8+(X<<3) is known to have the 979 // low 3 bits clear. 980 uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), 981 CountTrailingZeros_64(~KnownZero2)); 982 983 KnownZero = (1ULL << KnownZeroOut) - 1; 984 KnownOne = 0; 985 return; 986 } 987 case ISD::SUB: { 988 ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)); 989 if (!CLHS) return; 990 991 // We know that the top bits of C-X are clear if X contains less bits 992 // than C (i.e. no wrap-around can happen). For example, 20-X is 993 // positive if we can prove that X is >= 0 and < 16. 994 MVT::ValueType VT = CLHS->getValueType(0); 995 if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) { // sign bit clear 996 unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1); 997 uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit 998 MaskV = ~MaskV & MVT::getIntVTBitMask(VT); 999 ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); 1000 1001 // If all of the MaskV bits are known to be zero, then we know the output 1002 // top bits are zero, because we now know that the output is from [0-C]. 1003 if ((KnownZero & MaskV) == MaskV) { 1004 unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue()); 1005 KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask; // Top bits known zero. 1006 KnownOne = 0; // No one bits known. 1007 } else { 1008 KnownZero = KnownOne = 0; // Otherwise, nothing known. 1009 } 1010 } 1011 return; 1012 } 1013 default: 1014 // Allow the target to implement this method for its nodes. 1015 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { 1016 case ISD::INTRINSIC_WO_CHAIN: 1017 case ISD::INTRINSIC_W_CHAIN: 1018 case ISD::INTRINSIC_VOID: 1019 computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne); 1020 } 1021 return; 1022 } 1023} 1024 1025/// computeMaskedBitsForTargetNode - Determine which of the bits specified 1026/// in Mask are known to be either zero or one and return them in the 1027/// KnownZero/KnownOne bitsets. 1028void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 1029 uint64_t Mask, 1030 uint64_t &KnownZero, 1031 uint64_t &KnownOne, 1032 unsigned Depth) const { 1033 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 1034 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1035 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1036 Op.getOpcode() == ISD::INTRINSIC_VOID) && 1037 "Should use MaskedValueIsZero if you don't know whether Op" 1038 " is a target node!"); 1039 KnownZero = 0; 1040 KnownOne = 0; 1041} 1042 1043/// ComputeNumSignBits - Return the number of times the sign bit of the 1044/// register is replicated into the other bits. We know that at least 1 bit 1045/// is always equal to the sign bit (itself), but other cases can give us 1046/// information. For example, immediately after an "SRA X, 2", we know that 1047/// the top 3 bits are all equal to each other, so we return 3. 1048unsigned TargetLowering::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{ 1049 MVT::ValueType VT = Op.getValueType(); 1050 assert(MVT::isInteger(VT) && "Invalid VT!"); 1051 unsigned VTBits = MVT::getSizeInBits(VT); 1052 unsigned Tmp, Tmp2; 1053 1054 if (Depth == 6) 1055 return 1; // Limit search depth. 1056 1057 switch (Op.getOpcode()) { 1058 default: break; 1059 case ISD::AssertSext: 1060 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 1061 return VTBits-Tmp+1; 1062 case ISD::AssertZext: 1063 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 1064 return VTBits-Tmp; 1065 1066 case ISD::Constant: { 1067 uint64_t Val = cast<ConstantSDNode>(Op)->getValue(); 1068 // If negative, invert the bits, then look at it. 1069 if (Val & MVT::getIntVTSignBit(VT)) 1070 Val = ~Val; 1071 1072 // Shift the bits so they are the leading bits in the int64_t. 1073 Val <<= 64-VTBits; 1074 1075 // Return # leading zeros. We use 'min' here in case Val was zero before 1076 // shifting. We don't want to return '64' as for an i32 "0". 1077 return std::min(VTBits, CountLeadingZeros_64(Val)); 1078 } 1079 1080 case ISD::SIGN_EXTEND: 1081 Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType()); 1082 return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; 1083 1084 case ISD::SIGN_EXTEND_INREG: 1085 // Max of the input and what this extends. 1086 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 1087 Tmp = VTBits-Tmp+1; 1088 1089 Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1090 return std::max(Tmp, Tmp2); 1091 1092 case ISD::SRA: 1093 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1094 // SRA X, C -> adds C sign bits. 1095 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1096 Tmp += C->getValue(); 1097 if (Tmp > VTBits) Tmp = VTBits; 1098 } 1099 return Tmp; 1100 case ISD::SHL: 1101 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1102 // shl destroys sign bits. 1103 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1104 if (C->getValue() >= VTBits || // Bad shift. 1105 C->getValue() >= Tmp) break; // Shifted all sign bits out. 1106 return Tmp - C->getValue(); 1107 } 1108 break; 1109 case ISD::AND: 1110 case ISD::OR: 1111 case ISD::XOR: // NOT is handled here. 1112 // Logical binary ops preserve the number of sign bits. 1113 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1114 if (Tmp == 1) return 1; // Early out. 1115 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1116 return std::min(Tmp, Tmp2); 1117 1118 case ISD::SELECT: 1119 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1120 if (Tmp == 1) return 1; // Early out. 1121 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1122 return std::min(Tmp, Tmp2); 1123 1124 case ISD::SETCC: 1125 // If setcc returns 0/-1, all bits are sign bits. 1126 if (getSetCCResultContents() == ZeroOrNegativeOneSetCCResult) 1127 return VTBits; 1128 break; 1129 case ISD::ROTL: 1130 case ISD::ROTR: 1131 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1132 unsigned RotAmt = C->getValue() & (VTBits-1); 1133 1134 // Handle rotate right by N like a rotate left by 32-N. 1135 if (Op.getOpcode() == ISD::ROTR) 1136 RotAmt = (VTBits-RotAmt) & (VTBits-1); 1137 1138 // If we aren't rotating out all of the known-in sign bits, return the 1139 // number that are left. This handles rotl(sext(x), 1) for example. 1140 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1141 if (Tmp > RotAmt+1) return Tmp-RotAmt; 1142 } 1143 break; 1144 case ISD::ADD: 1145 // Add can have at most one carry bit. Thus we know that the output 1146 // is, at worst, one more bit than the inputs. 1147 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1148 if (Tmp == 1) return 1; // Early out. 1149 1150 // Special case decrementing a value (ADD X, -1): 1151 if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) 1152 if (CRHS->isAllOnesValue()) { 1153 uint64_t KnownZero, KnownOne; 1154 uint64_t Mask = MVT::getIntVTBitMask(VT); 1155 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 1156 1157 // If the input is known to be 0 or 1, the output is 0/-1, which is all 1158 // sign bits set. 1159 if ((KnownZero|1) == Mask) 1160 return VTBits; 1161 1162 // If we are subtracting one from a positive number, there is no carry 1163 // out of the result. 1164 if (KnownZero & MVT::getIntVTSignBit(VT)) 1165 return Tmp; 1166 } 1167 1168 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1169 if (Tmp2 == 1) return 1; 1170 return std::min(Tmp, Tmp2)-1; 1171 break; 1172 1173 case ISD::SUB: 1174 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1175 if (Tmp2 == 1) return 1; 1176 1177 // Handle NEG. 1178 if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) 1179 if (CLHS->getValue() == 0) { 1180 uint64_t KnownZero, KnownOne; 1181 uint64_t Mask = MVT::getIntVTBitMask(VT); 1182 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 1183 // If the input is known to be 0 or 1, the output is 0/-1, which is all 1184 // sign bits set. 1185 if ((KnownZero|1) == Mask) 1186 return VTBits; 1187 1188 // If the input is known to be positive (the sign bit is known clear), 1189 // the output of the NEG has the same number of sign bits as the input. 1190 if (KnownZero & MVT::getIntVTSignBit(VT)) 1191 return Tmp2; 1192 1193 // Otherwise, we treat this like a SUB. 1194 } 1195 1196 // Sub can have at most one carry bit. Thus we know that the output 1197 // is, at worst, one more bit than the inputs. 1198 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1199 if (Tmp == 1) return 1; // Early out. 1200 return std::min(Tmp, Tmp2)-1; 1201 break; 1202 case ISD::TRUNCATE: 1203 // FIXME: it's tricky to do anything useful for this, but it is an important 1204 // case for targets like X86. 1205 break; 1206 } 1207 1208 // Handle LOADX separately here. EXTLOAD case will fallthrough. 1209 if (Op.getOpcode() == ISD::LOAD) { 1210 LoadSDNode *LD = cast<LoadSDNode>(Op); 1211 unsigned ExtType = LD->getExtensionType(); 1212 switch (ExtType) { 1213 default: break; 1214 case ISD::SEXTLOAD: // '17' bits known 1215 Tmp = MVT::getSizeInBits(LD->getLoadedVT()); 1216 return VTBits-Tmp+1; 1217 case ISD::ZEXTLOAD: // '16' bits known 1218 Tmp = MVT::getSizeInBits(LD->getLoadedVT()); 1219 return VTBits-Tmp; 1220 } 1221 } 1222 1223 // Allow the target to implement this method for its nodes. 1224 if (Op.getOpcode() >= ISD::BUILTIN_OP_END || 1225 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1226 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1227 Op.getOpcode() == ISD::INTRINSIC_VOID) { 1228 unsigned NumBits = ComputeNumSignBitsForTargetNode(Op, Depth); 1229 if (NumBits > 1) return NumBits; 1230 } 1231 1232 // Finally, if we can prove that the top bits of the result are 0's or 1's, 1233 // use this information. 1234 uint64_t KnownZero, KnownOne; 1235 uint64_t Mask = MVT::getIntVTBitMask(VT); 1236 ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); 1237 1238 uint64_t SignBit = MVT::getIntVTSignBit(VT); 1239 if (KnownZero & SignBit) { // SignBit is 0 1240 Mask = KnownZero; 1241 } else if (KnownOne & SignBit) { // SignBit is 1; 1242 Mask = KnownOne; 1243 } else { 1244 // Nothing known. 1245 return 1; 1246 } 1247 1248 // Okay, we know that the sign bit in Mask is set. Use CLZ to determine 1249 // the number of identical bits in the top of the input value. 1250 Mask ^= ~0ULL; 1251 Mask <<= 64-VTBits; 1252 // Return # leading zeros. We use 'min' here in case Val was zero before 1253 // shifting. We don't want to return '64' as for an i32 "0". 1254 return std::min(VTBits, CountLeadingZeros_64(Mask)); 1255} 1256 1257 1258 1259/// ComputeNumSignBitsForTargetNode - This method can be implemented by 1260/// targets that want to expose additional information about sign bits to the 1261/// DAG Combiner. 1262unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op, 1263 unsigned Depth) const { 1264 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 1265 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1266 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1267 Op.getOpcode() == ISD::INTRINSIC_VOID) && 1268 "Should use ComputeNumSignBits if you don't know whether Op" 1269 " is a target node!"); 1270 return 1; 1271} 1272 1273 1274SDOperand TargetLowering:: 1275PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1276 // Default implementation: no optimization. 1277 return SDOperand(); 1278} 1279 1280//===----------------------------------------------------------------------===// 1281// Inline Assembler Implementation Methods 1282//===----------------------------------------------------------------------===// 1283 1284TargetLowering::ConstraintType 1285TargetLowering::getConstraintType(char ConstraintLetter) const { 1286 // FIXME: lots more standard ones to handle. 1287 switch (ConstraintLetter) { 1288 default: return C_Unknown; 1289 case 'r': return C_RegisterClass; 1290 case 'm': // memory 1291 case 'o': // offsetable 1292 case 'V': // not offsetable 1293 return C_Memory; 1294 case 'i': // Simple Integer or Relocatable Constant 1295 case 'n': // Simple Integer 1296 case 's': // Relocatable Constant 1297 case 'I': // Target registers. 1298 case 'J': 1299 case 'K': 1300 case 'L': 1301 case 'M': 1302 case 'N': 1303 case 'O': 1304 case 'P': 1305 return C_Other; 1306 } 1307} 1308 1309/// isOperandValidForConstraint - Return the specified operand (possibly 1310/// modified) if the specified SDOperand is valid for the specified target 1311/// constraint letter, otherwise return null. 1312SDOperand TargetLowering::isOperandValidForConstraint(SDOperand Op, 1313 char ConstraintLetter, 1314 SelectionDAG &DAG) { 1315 switch (ConstraintLetter) { 1316 default: return SDOperand(0,0); 1317 case 'i': // Simple Integer or Relocatable Constant 1318 case 'n': // Simple Integer 1319 case 's': // Relocatable Constant 1320 return Op; // FIXME: not right. 1321 } 1322} 1323 1324std::vector<unsigned> TargetLowering:: 1325getRegClassForInlineAsmConstraint(const std::string &Constraint, 1326 MVT::ValueType VT) const { 1327 return std::vector<unsigned>(); 1328} 1329 1330 1331std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: 1332getRegForInlineAsmConstraint(const std::string &Constraint, 1333 MVT::ValueType VT) const { 1334 if (Constraint[0] != '{') 1335 return std::pair<unsigned, const TargetRegisterClass*>(0, 0); 1336 assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); 1337 1338 // Remove the braces from around the name. 1339 std::string RegName(Constraint.begin()+1, Constraint.end()-1); 1340 1341 // Figure out which register class contains this reg. 1342 const MRegisterInfo *RI = TM.getRegisterInfo(); 1343 for (MRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), 1344 E = RI->regclass_end(); RCI != E; ++RCI) { 1345 const TargetRegisterClass *RC = *RCI; 1346 1347 // If none of the the value types for this register class are valid, we 1348 // can't use it. For example, 64-bit reg classes on 32-bit targets. 1349 bool isLegal = false; 1350 for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); 1351 I != E; ++I) { 1352 if (isTypeLegal(*I)) { 1353 isLegal = true; 1354 break; 1355 } 1356 } 1357 1358 if (!isLegal) continue; 1359 1360 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 1361 I != E; ++I) { 1362 if (StringsEqualNoCase(RegName, RI->get(*I).Name)) 1363 return std::make_pair(*I, RC); 1364 } 1365 } 1366 1367 return std::pair<unsigned, const TargetRegisterClass*>(0, 0); 1368} 1369 1370//===----------------------------------------------------------------------===// 1371// Loop Strength Reduction hooks 1372//===----------------------------------------------------------------------===// 1373 1374/// isLegalAddressImmediate - Return true if the integer value or 1375/// GlobalValue can be used as the offset of the target addressing mode. 1376bool TargetLowering::isLegalAddressImmediate(int64_t V) const { 1377 return false; 1378} 1379bool TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 1380 return false; 1381} 1382 1383 1384// Magic for divide replacement 1385 1386struct ms { 1387 int64_t m; // magic number 1388 int64_t s; // shift amount 1389}; 1390 1391struct mu { 1392 uint64_t m; // magic number 1393 int64_t a; // add indicator 1394 int64_t s; // shift amount 1395}; 1396 1397/// magic - calculate the magic numbers required to codegen an integer sdiv as 1398/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, 1399/// or -1. 1400static ms magic32(int32_t d) { 1401 int32_t p; 1402 uint32_t ad, anc, delta, q1, r1, q2, r2, t; 1403 const uint32_t two31 = 0x80000000U; 1404 struct ms mag; 1405 1406 ad = abs(d); 1407 t = two31 + ((uint32_t)d >> 31); 1408 anc = t - 1 - t%ad; // absolute value of nc 1409 p = 31; // initialize p 1410 q1 = two31/anc; // initialize q1 = 2p/abs(nc) 1411 r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1412 q2 = two31/ad; // initialize q2 = 2p/abs(d) 1413 r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d)) 1414 do { 1415 p = p + 1; 1416 q1 = 2*q1; // update q1 = 2p/abs(nc) 1417 r1 = 2*r1; // update r1 = rem(2p/abs(nc)) 1418 if (r1 >= anc) { // must be unsigned comparison 1419 q1 = q1 + 1; 1420 r1 = r1 - anc; 1421 } 1422 q2 = 2*q2; // update q2 = 2p/abs(d) 1423 r2 = 2*r2; // update r2 = rem(2p/abs(d)) 1424 if (r2 >= ad) { // must be unsigned comparison 1425 q2 = q2 + 1; 1426 r2 = r2 - ad; 1427 } 1428 delta = ad - r2; 1429 } while (q1 < delta || (q1 == delta && r1 == 0)); 1430 1431 mag.m = (int32_t)(q2 + 1); // make sure to sign extend 1432 if (d < 0) mag.m = -mag.m; // resulting magic number 1433 mag.s = p - 32; // resulting shift 1434 return mag; 1435} 1436 1437/// magicu - calculate the magic numbers required to codegen an integer udiv as 1438/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. 1439static mu magicu32(uint32_t d) { 1440 int32_t p; 1441 uint32_t nc, delta, q1, r1, q2, r2; 1442 struct mu magu; 1443 magu.a = 0; // initialize "add" indicator 1444 nc = - 1 - (-d)%d; 1445 p = 31; // initialize p 1446 q1 = 0x80000000/nc; // initialize q1 = 2p/nc 1447 r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc) 1448 q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d 1449 r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d) 1450 do { 1451 p = p + 1; 1452 if (r1 >= nc - r1 ) { 1453 q1 = 2*q1 + 1; // update q1 1454 r1 = 2*r1 - nc; // update r1 1455 } 1456 else { 1457 q1 = 2*q1; // update q1 1458 r1 = 2*r1; // update r1 1459 } 1460 if (r2 + 1 >= d - r2) { 1461 if (q2 >= 0x7FFFFFFF) magu.a = 1; 1462 q2 = 2*q2 + 1; // update q2 1463 r2 = 2*r2 + 1 - d; // update r2 1464 } 1465 else { 1466 if (q2 >= 0x80000000) magu.a = 1; 1467 q2 = 2*q2; // update q2 1468 r2 = 2*r2 + 1; // update r2 1469 } 1470 delta = d - 1 - r2; 1471 } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0))); 1472 magu.m = q2 + 1; // resulting magic number 1473 magu.s = p - 32; // resulting shift 1474 return magu; 1475} 1476 1477/// magic - calculate the magic numbers required to codegen an integer sdiv as 1478/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, 1479/// or -1. 1480static ms magic64(int64_t d) { 1481 int64_t p; 1482 uint64_t ad, anc, delta, q1, r1, q2, r2, t; 1483 const uint64_t two63 = 9223372036854775808ULL; // 2^63 1484 struct ms mag; 1485 1486 ad = d >= 0 ? d : -d; 1487 t = two63 + ((uint64_t)d >> 63); 1488 anc = t - 1 - t%ad; // absolute value of nc 1489 p = 63; // initialize p 1490 q1 = two63/anc; // initialize q1 = 2p/abs(nc) 1491 r1 = two63 - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1492 q2 = two63/ad; // initialize q2 = 2p/abs(d) 1493 r2 = two63 - q2*ad; // initialize r2 = rem(2p,abs(d)) 1494 do { 1495 p = p + 1; 1496 q1 = 2*q1; // update q1 = 2p/abs(nc) 1497 r1 = 2*r1; // update r1 = rem(2p/abs(nc)) 1498 if (r1 >= anc) { // must be unsigned comparison 1499 q1 = q1 + 1; 1500 r1 = r1 - anc; 1501 } 1502 q2 = 2*q2; // update q2 = 2p/abs(d) 1503 r2 = 2*r2; // update r2 = rem(2p/abs(d)) 1504 if (r2 >= ad) { // must be unsigned comparison 1505 q2 = q2 + 1; 1506 r2 = r2 - ad; 1507 } 1508 delta = ad - r2; 1509 } while (q1 < delta || (q1 == delta && r1 == 0)); 1510 1511 mag.m = q2 + 1; 1512 if (d < 0) mag.m = -mag.m; // resulting magic number 1513 mag.s = p - 64; // resulting shift 1514 return mag; 1515} 1516 1517/// magicu - calculate the magic numbers required to codegen an integer udiv as 1518/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. 1519static mu magicu64(uint64_t d) 1520{ 1521 int64_t p; 1522 uint64_t nc, delta, q1, r1, q2, r2; 1523 struct mu magu; 1524 magu.a = 0; // initialize "add" indicator 1525 nc = - 1 - (-d)%d; 1526 p = 63; // initialize p 1527 q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc 1528 r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc) 1529 q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d 1530 r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d) 1531 do { 1532 p = p + 1; 1533 if (r1 >= nc - r1 ) { 1534 q1 = 2*q1 + 1; // update q1 1535 r1 = 2*r1 - nc; // update r1 1536 } 1537 else { 1538 q1 = 2*q1; // update q1 1539 r1 = 2*r1; // update r1 1540 } 1541 if (r2 + 1 >= d - r2) { 1542 if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1; 1543 q2 = 2*q2 + 1; // update q2 1544 r2 = 2*r2 + 1 - d; // update r2 1545 } 1546 else { 1547 if (q2 >= 0x8000000000000000ull) magu.a = 1; 1548 q2 = 2*q2; // update q2 1549 r2 = 2*r2 + 1; // update r2 1550 } 1551 delta = d - 1 - r2; 1552 } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0))); 1553 magu.m = q2 + 1; // resulting magic number 1554 magu.s = p - 64; // resulting shift 1555 return magu; 1556} 1557 1558/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 1559/// return a DAG expression to select that will generate the same value by 1560/// multiplying by a magic number. See: 1561/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 1562SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 1563 std::vector<SDNode*>* Created) const { 1564 MVT::ValueType VT = N->getValueType(0); 1565 1566 // Check to see if we can do this. 1567 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) 1568 return SDOperand(); // BuildSDIV only operates on i32 or i64 1569 if (!isOperationLegal(ISD::MULHS, VT)) 1570 return SDOperand(); // Make sure the target supports MULHS. 1571 1572 int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended(); 1573 ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d); 1574 1575 // Multiply the numerator (operand 0) by the magic value 1576 SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0), 1577 DAG.getConstant(magics.m, VT)); 1578 // If d > 0 and m < 0, add the numerator 1579 if (d > 0 && magics.m < 0) { 1580 Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0)); 1581 if (Created) 1582 Created->push_back(Q.Val); 1583 } 1584 // If d < 0 and m > 0, subtract the numerator. 1585 if (d < 0 && magics.m > 0) { 1586 Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0)); 1587 if (Created) 1588 Created->push_back(Q.Val); 1589 } 1590 // Shift right algebraic if shift value is nonzero 1591 if (magics.s > 0) { 1592 Q = DAG.getNode(ISD::SRA, VT, Q, 1593 DAG.getConstant(magics.s, getShiftAmountTy())); 1594 if (Created) 1595 Created->push_back(Q.Val); 1596 } 1597 // Extract the sign bit and add it to the quotient 1598 SDOperand T = 1599 DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1, 1600 getShiftAmountTy())); 1601 if (Created) 1602 Created->push_back(T.Val); 1603 return DAG.getNode(ISD::ADD, VT, Q, T); 1604} 1605 1606/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, 1607/// return a DAG expression to select that will generate the same value by 1608/// multiplying by a magic number. See: 1609/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 1610SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, 1611 std::vector<SDNode*>* Created) const { 1612 MVT::ValueType VT = N->getValueType(0); 1613 1614 // Check to see if we can do this. 1615 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) 1616 return SDOperand(); // BuildUDIV only operates on i32 or i64 1617 if (!isOperationLegal(ISD::MULHU, VT)) 1618 return SDOperand(); // Make sure the target supports MULHU. 1619 1620 uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue(); 1621 mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d); 1622 1623 // Multiply the numerator (operand 0) by the magic value 1624 SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0), 1625 DAG.getConstant(magics.m, VT)); 1626 if (Created) 1627 Created->push_back(Q.Val); 1628 1629 if (magics.a == 0) { 1630 return DAG.getNode(ISD::SRL, VT, Q, 1631 DAG.getConstant(magics.s, getShiftAmountTy())); 1632 } else { 1633 SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q); 1634 if (Created) 1635 Created->push_back(NPQ.Val); 1636 NPQ = DAG.getNode(ISD::SRL, VT, NPQ, 1637 DAG.getConstant(1, getShiftAmountTy())); 1638 if (Created) 1639 Created->push_back(NPQ.Val); 1640 NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q); 1641 if (Created) 1642 Created->push_back(NPQ.Val); 1643 return DAG.getNode(ISD::SRL, VT, NPQ, 1644 DAG.getConstant(magics.s-1, getShiftAmountTy())); 1645 } 1646} 1647