TargetLowering.cpp revision 79cca507fe51aa8af7a9006b380977c61b6f3fea
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file was developed by the LLVM research group and is distributed under 6// the University of Illinois Open Source License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the TargetLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/Target/TargetLowering.h" 15#include "llvm/Target/TargetData.h" 16#include "llvm/Target/TargetMachine.h" 17#include "llvm/Target/MRegisterInfo.h" 18#include "llvm/DerivedTypes.h" 19#include "llvm/CodeGen/SelectionDAG.h" 20#include "llvm/ADT/StringExtras.h" 21#include "llvm/Support/MathExtras.h" 22using namespace llvm; 23 24/// InitLibcallNames - Set default libcall names. 25/// 26static void InitLibcallNames(const char **Names) { 27 Names[RTLIB::SHL_I32] = "__ashlsi3"; 28 Names[RTLIB::SHL_I64] = "__ashldi3"; 29 Names[RTLIB::SRL_I32] = "__lshrsi3"; 30 Names[RTLIB::SRL_I64] = "__lshrdi3"; 31 Names[RTLIB::SRA_I32] = "__ashrsi3"; 32 Names[RTLIB::SRA_I64] = "__ashrdi3"; 33 Names[RTLIB::MUL_I32] = "__mulsi3"; 34 Names[RTLIB::MUL_I64] = "__muldi3"; 35 Names[RTLIB::SDIV_I32] = "__divsi3"; 36 Names[RTLIB::SDIV_I64] = "__divdi3"; 37 Names[RTLIB::UDIV_I32] = "__udivsi3"; 38 Names[RTLIB::UDIV_I64] = "__udivdi3"; 39 Names[RTLIB::SREM_I32] = "__modsi3"; 40 Names[RTLIB::SREM_I64] = "__moddi3"; 41 Names[RTLIB::UREM_I32] = "__umodsi3"; 42 Names[RTLIB::UREM_I64] = "__umoddi3"; 43 Names[RTLIB::NEG_I32] = "__negsi2"; 44 Names[RTLIB::NEG_I64] = "__negdi2"; 45 Names[RTLIB::ADD_F32] = "__addsf3"; 46 Names[RTLIB::ADD_F64] = "__adddf3"; 47 Names[RTLIB::SUB_F32] = "__subsf3"; 48 Names[RTLIB::SUB_F64] = "__subdf3"; 49 Names[RTLIB::MUL_F32] = "__mulsf3"; 50 Names[RTLIB::MUL_F64] = "__muldf3"; 51 Names[RTLIB::DIV_F32] = "__divsf3"; 52 Names[RTLIB::DIV_F64] = "__divdf3"; 53 Names[RTLIB::REM_F32] = "fmodf"; 54 Names[RTLIB::REM_F64] = "fmod"; 55 Names[RTLIB::NEG_F32] = "__negsf2"; 56 Names[RTLIB::NEG_F64] = "__negdf2"; 57 Names[RTLIB::POWI_F32] = "__powisf2"; 58 Names[RTLIB::POWI_F64] = "__powidf2"; 59 Names[RTLIB::SQRT_F32] = "sqrtf"; 60 Names[RTLIB::SQRT_F64] = "sqrt"; 61 Names[RTLIB::SIN_F32] = "sinf"; 62 Names[RTLIB::SIN_F64] = "sin"; 63 Names[RTLIB::COS_F32] = "cosf"; 64 Names[RTLIB::COS_F64] = "cos"; 65 Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; 66 Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; 67 Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; 68 Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; 69 Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; 70 Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; 71 Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; 72 Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; 73 Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; 74 Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; 75 Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; 76 Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; 77 Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; 78 Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; 79 Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; 80 Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; 81 Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; 82 Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; 83 Names[RTLIB::OEQ_F32] = "__eqsf2"; 84 Names[RTLIB::OEQ_F64] = "__eqdf2"; 85 Names[RTLIB::UNE_F32] = "__nesf2"; 86 Names[RTLIB::UNE_F64] = "__nedf2"; 87 Names[RTLIB::OGE_F32] = "__gesf2"; 88 Names[RTLIB::OGE_F64] = "__gedf2"; 89 Names[RTLIB::OLT_F32] = "__ltsf2"; 90 Names[RTLIB::OLT_F64] = "__ltdf2"; 91 Names[RTLIB::OLE_F32] = "__lesf2"; 92 Names[RTLIB::OLE_F64] = "__ledf2"; 93 Names[RTLIB::OGT_F32] = "__gtsf2"; 94 Names[RTLIB::OGT_F64] = "__gtdf2"; 95 Names[RTLIB::UO_F32] = "__unordsf2"; 96 Names[RTLIB::UO_F64] = "__unorddf2"; 97} 98 99TargetLowering::TargetLowering(TargetMachine &tm) 100 : TM(tm), TD(TM.getTargetData()) { 101 assert(ISD::BUILTIN_OP_END <= 156 && 102 "Fixed size array in TargetLowering is not large enough!"); 103 // All operations default to being supported. 104 memset(OpActions, 0, sizeof(OpActions)); 105 memset(LoadXActions, 0, sizeof(LoadXActions)); 106 memset(&StoreXActions, 0, sizeof(StoreXActions)); 107 // Initialize all indexed load / store to expand. 108 for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { 109 for (unsigned IM = (unsigned)ISD::PRE_INC; 110 IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { 111 setIndexedLoadAction(IM, (MVT::ValueType)VT, Expand); 112 setIndexedStoreAction(IM, (MVT::ValueType)VT, Expand); 113 } 114 } 115 116 IsLittleEndian = TD->isLittleEndian(); 117 UsesGlobalOffsetTable = false; 118 ShiftAmountTy = SetCCResultTy = PointerTy = getValueType(TD->getIntPtrType()); 119 ShiftAmtHandling = Undefined; 120 memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); 121 memset(TargetDAGCombineArray, 0, 122 sizeof(TargetDAGCombineArray)/sizeof(TargetDAGCombineArray[0])); 123 maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; 124 allowUnalignedMemoryAccesses = false; 125 UseUnderscoreSetJmp = false; 126 UseUnderscoreLongJmp = false; 127 IntDivIsCheap = false; 128 Pow2DivIsCheap = false; 129 StackPointerRegisterToSaveRestore = 0; 130 SchedPreferenceInfo = SchedulingForLatency; 131 JumpBufSize = 0; 132 JumpBufAlignment = 0; 133 134 InitLibcallNames(LibcallRoutineNames); 135} 136 137TargetLowering::~TargetLowering() {} 138 139/// setValueTypeAction - Set the action for a particular value type. This 140/// assumes an action has not already been set for this value type. 141static void SetValueTypeAction(MVT::ValueType VT, 142 TargetLowering::LegalizeAction Action, 143 TargetLowering &TLI, 144 MVT::ValueType *TransformToType, 145 TargetLowering::ValueTypeActionImpl &ValueTypeActions) { 146 ValueTypeActions.setTypeAction(VT, Action); 147 if (Action == TargetLowering::Promote) { 148 MVT::ValueType PromoteTo; 149 if (VT == MVT::f32) 150 PromoteTo = MVT::f64; 151 else { 152 unsigned LargerReg = VT+1; 153 while (!TLI.isTypeLegal((MVT::ValueType)LargerReg)) { 154 ++LargerReg; 155 assert(MVT::isInteger((MVT::ValueType)LargerReg) && 156 "Nothing to promote to??"); 157 } 158 PromoteTo = (MVT::ValueType)LargerReg; 159 } 160 161 assert(MVT::isInteger(VT) == MVT::isInteger(PromoteTo) && 162 MVT::isFloatingPoint(VT) == MVT::isFloatingPoint(PromoteTo) && 163 "Can only promote from int->int or fp->fp!"); 164 assert(VT < PromoteTo && "Must promote to a larger type!"); 165 TransformToType[VT] = PromoteTo; 166 } else if (Action == TargetLowering::Expand) { 167 // f32 and f64 is each expanded to corresponding integer type of same size. 168 if (VT == MVT::f32) 169 TransformToType[VT] = MVT::i32; 170 else if (VT == MVT::f64) 171 TransformToType[VT] = MVT::i64; 172 else { 173 assert((VT == MVT::Vector || MVT::isInteger(VT)) && VT > MVT::i8 && 174 "Cannot expand this type: target must support SOME integer reg!"); 175 // Expand to the next smaller integer type! 176 TransformToType[VT] = (MVT::ValueType)(VT-1); 177 } 178 } 179} 180 181 182/// computeRegisterProperties - Once all of the register classes are added, 183/// this allows us to compute derived properties we expose. 184void TargetLowering::computeRegisterProperties() { 185 assert(MVT::LAST_VALUETYPE <= 32 && 186 "Too many value types for ValueTypeActions to hold!"); 187 188 // Everything defaults to one. 189 for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) 190 NumElementsForVT[i] = 1; 191 192 // Find the largest integer register class. 193 unsigned LargestIntReg = MVT::i128; 194 for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) 195 assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); 196 197 // Every integer value type larger than this largest register takes twice as 198 // many registers to represent as the previous ValueType. 199 unsigned ExpandedReg = LargestIntReg; ++LargestIntReg; 200 for (++ExpandedReg; MVT::isInteger((MVT::ValueType)ExpandedReg);++ExpandedReg) 201 NumElementsForVT[ExpandedReg] = 2*NumElementsForVT[ExpandedReg-1]; 202 203 // Inspect all of the ValueType's possible, deciding how to process them. 204 for (unsigned IntReg = MVT::i1; IntReg <= MVT::i128; ++IntReg) 205 // If we are expanding this type, expand it! 206 if (getNumElements((MVT::ValueType)IntReg) != 1) 207 SetValueTypeAction((MVT::ValueType)IntReg, Expand, *this, TransformToType, 208 ValueTypeActions); 209 else if (!isTypeLegal((MVT::ValueType)IntReg)) 210 // Otherwise, if we don't have native support, we must promote to a 211 // larger type. 212 SetValueTypeAction((MVT::ValueType)IntReg, Promote, *this, 213 TransformToType, ValueTypeActions); 214 else 215 TransformToType[(MVT::ValueType)IntReg] = (MVT::ValueType)IntReg; 216 217 // If the target does not have native F64 support, expand it to I64. We will 218 // be generating soft float library calls. If the target does not have native 219 // support for F32, promote it to F64 if it is legal. Otherwise, expand it to 220 // I32. 221 if (isTypeLegal(MVT::f64)) 222 TransformToType[MVT::f64] = MVT::f64; 223 else { 224 NumElementsForVT[MVT::f64] = NumElementsForVT[MVT::i64]; 225 SetValueTypeAction(MVT::f64, Expand, *this, TransformToType, 226 ValueTypeActions); 227 } 228 if (isTypeLegal(MVT::f32)) 229 TransformToType[MVT::f32] = MVT::f32; 230 else if (isTypeLegal(MVT::f64)) 231 SetValueTypeAction(MVT::f32, Promote, *this, TransformToType, 232 ValueTypeActions); 233 else { 234 NumElementsForVT[MVT::f32] = NumElementsForVT[MVT::i32]; 235 SetValueTypeAction(MVT::f32, Expand, *this, TransformToType, 236 ValueTypeActions); 237 } 238 239 // Set MVT::Vector to always be Expanded 240 SetValueTypeAction(MVT::Vector, Expand, *this, TransformToType, 241 ValueTypeActions); 242 243 // Loop over all of the legal vector value types, specifying an identity type 244 // transformation. 245 for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; 246 i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { 247 if (isTypeLegal((MVT::ValueType)i)) 248 TransformToType[i] = (MVT::ValueType)i; 249 } 250} 251 252const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { 253 return NULL; 254} 255 256/// getPackedTypeBreakdown - Packed types are broken down into some number of 257/// legal first class types. For example, <8 x float> maps to 2 MVT::v4f32 258/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. 259/// 260/// This method returns the number and type of the resultant breakdown. 261/// 262unsigned TargetLowering::getPackedTypeBreakdown(const PackedType *PTy, 263 MVT::ValueType &PTyElementVT, 264 MVT::ValueType &PTyLegalElementVT) const { 265 // Figure out the right, legal destination reg to copy into. 266 unsigned NumElts = PTy->getNumElements(); 267 MVT::ValueType EltTy = getValueType(PTy->getElementType()); 268 269 unsigned NumVectorRegs = 1; 270 271 // Divide the input until we get to a supported size. This will always 272 // end with a scalar if the target doesn't support vectors. 273 while (NumElts > 1 && !isTypeLegal(getVectorType(EltTy, NumElts))) { 274 NumElts >>= 1; 275 NumVectorRegs <<= 1; 276 } 277 278 MVT::ValueType VT; 279 if (NumElts == 1) { 280 VT = EltTy; 281 } else { 282 VT = getVectorType(EltTy, NumElts); 283 } 284 PTyElementVT = VT; 285 286 MVT::ValueType DestVT = getTypeToTransformTo(VT); 287 PTyLegalElementVT = DestVT; 288 if (DestVT < VT) { 289 // Value is expanded, e.g. i64 -> i16. 290 return NumVectorRegs*(MVT::getSizeInBits(VT)/MVT::getSizeInBits(DestVT)); 291 } else { 292 // Otherwise, promotion or legal types use the same number of registers as 293 // the vector decimated to the appropriate level. 294 return NumVectorRegs; 295 } 296 297 return 1; 298} 299 300//===----------------------------------------------------------------------===// 301// Optimization Methods 302//===----------------------------------------------------------------------===// 303 304/// ShrinkDemandedConstant - Check to see if the specified operand of the 305/// specified instruction is a constant integer. If so, check to see if there 306/// are any bits set in the constant that are not demanded. If so, shrink the 307/// constant and return true. 308bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op, 309 uint64_t Demanded) { 310 // FIXME: ISD::SELECT, ISD::SELECT_CC 311 switch(Op.getOpcode()) { 312 default: break; 313 case ISD::AND: 314 case ISD::OR: 315 case ISD::XOR: 316 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) 317 if ((~Demanded & C->getValue()) != 0) { 318 MVT::ValueType VT = Op.getValueType(); 319 SDOperand New = DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0), 320 DAG.getConstant(Demanded & C->getValue(), 321 VT)); 322 return CombineTo(Op, New); 323 } 324 break; 325 } 326 return false; 327} 328 329/// SimplifyDemandedBits - Look at Op. At this point, we know that only the 330/// DemandedMask bits of the result of Op are ever used downstream. If we can 331/// use this information to simplify Op, create a new simplified DAG node and 332/// return true, returning the original and new nodes in Old and New. Otherwise, 333/// analyze the expression and return a mask of KnownOne and KnownZero bits for 334/// the expression (used to simplify the caller). The KnownZero/One bits may 335/// only be accurate for those bits in the DemandedMask. 336bool TargetLowering::SimplifyDemandedBits(SDOperand Op, uint64_t DemandedMask, 337 uint64_t &KnownZero, 338 uint64_t &KnownOne, 339 TargetLoweringOpt &TLO, 340 unsigned Depth) const { 341 KnownZero = KnownOne = 0; // Don't know anything. 342 // Other users may use these bits. 343 if (!Op.Val->hasOneUse()) { 344 if (Depth != 0) { 345 // If not at the root, Just compute the KnownZero/KnownOne bits to 346 // simplify things downstream. 347 ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); 348 return false; 349 } 350 // If this is the root being simplified, allow it to have multiple uses, 351 // just set the DemandedMask to all bits. 352 DemandedMask = MVT::getIntVTBitMask(Op.getValueType()); 353 } else if (DemandedMask == 0) { 354 // Not demanding any bits from Op. 355 if (Op.getOpcode() != ISD::UNDEF) 356 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::UNDEF, Op.getValueType())); 357 return false; 358 } else if (Depth == 6) { // Limit search depth. 359 return false; 360 } 361 362 uint64_t KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; 363 switch (Op.getOpcode()) { 364 case ISD::Constant: 365 // We know all of the bits for a constant! 366 KnownOne = cast<ConstantSDNode>(Op)->getValue() & DemandedMask; 367 KnownZero = ~KnownOne & DemandedMask; 368 return false; // Don't fall through, will infinitely loop. 369 case ISD::AND: 370 // If the RHS is a constant, check to see if the LHS would be zero without 371 // using the bits from the RHS. Below, we use knowledge about the RHS to 372 // simplify the LHS, here we're using information from the LHS to simplify 373 // the RHS. 374 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 375 uint64_t LHSZero, LHSOne; 376 ComputeMaskedBits(Op.getOperand(0), DemandedMask, 377 LHSZero, LHSOne, Depth+1); 378 // If the LHS already has zeros where RHSC does, this and is dead. 379 if ((LHSZero & DemandedMask) == (~RHSC->getValue() & DemandedMask)) 380 return TLO.CombineTo(Op, Op.getOperand(0)); 381 // If any of the set bits in the RHS are known zero on the LHS, shrink 382 // the constant. 383 if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & DemandedMask)) 384 return true; 385 } 386 387 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 388 KnownOne, TLO, Depth+1)) 389 return true; 390 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 391 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownZero, 392 KnownZero2, KnownOne2, TLO, Depth+1)) 393 return true; 394 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 395 396 // If all of the demanded bits are known one on one side, return the other. 397 // These bits cannot contribute to the result of the 'and'. 398 if ((DemandedMask & ~KnownZero2 & KnownOne)==(DemandedMask & ~KnownZero2)) 399 return TLO.CombineTo(Op, Op.getOperand(0)); 400 if ((DemandedMask & ~KnownZero & KnownOne2)==(DemandedMask & ~KnownZero)) 401 return TLO.CombineTo(Op, Op.getOperand(1)); 402 // If all of the demanded bits in the inputs are known zeros, return zero. 403 if ((DemandedMask & (KnownZero|KnownZero2)) == DemandedMask) 404 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); 405 // If the RHS is a constant, see if we can simplify it. 406 if (TLO.ShrinkDemandedConstant(Op, DemandedMask & ~KnownZero2)) 407 return true; 408 409 // Output known-1 bits are only known if set in both the LHS & RHS. 410 KnownOne &= KnownOne2; 411 // Output known-0 are known to be clear if zero in either the LHS | RHS. 412 KnownZero |= KnownZero2; 413 break; 414 case ISD::OR: 415 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 416 KnownOne, TLO, Depth+1)) 417 return true; 418 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 419 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & ~KnownOne, 420 KnownZero2, KnownOne2, TLO, Depth+1)) 421 return true; 422 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 423 424 // If all of the demanded bits are known zero on one side, return the other. 425 // These bits cannot contribute to the result of the 'or'. 426 if ((DemandedMask & ~KnownOne2 & KnownZero) == (DemandedMask & ~KnownOne2)) 427 return TLO.CombineTo(Op, Op.getOperand(0)); 428 if ((DemandedMask & ~KnownOne & KnownZero2) == (DemandedMask & ~KnownOne)) 429 return TLO.CombineTo(Op, Op.getOperand(1)); 430 // If all of the potentially set bits on one side are known to be set on 431 // the other side, just use the 'other' side. 432 if ((DemandedMask & (~KnownZero) & KnownOne2) == 433 (DemandedMask & (~KnownZero))) 434 return TLO.CombineTo(Op, Op.getOperand(0)); 435 if ((DemandedMask & (~KnownZero2) & KnownOne) == 436 (DemandedMask & (~KnownZero2))) 437 return TLO.CombineTo(Op, Op.getOperand(1)); 438 // If the RHS is a constant, see if we can simplify it. 439 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 440 return true; 441 442 // Output known-0 bits are only known if clear in both the LHS & RHS. 443 KnownZero &= KnownZero2; 444 // Output known-1 are known to be set if set in either the LHS | RHS. 445 KnownOne |= KnownOne2; 446 break; 447 case ISD::XOR: 448 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero, 449 KnownOne, TLO, Depth+1)) 450 return true; 451 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 452 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, KnownZero2, 453 KnownOne2, TLO, Depth+1)) 454 return true; 455 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 456 457 // If all of the demanded bits are known zero on one side, return the other. 458 // These bits cannot contribute to the result of the 'xor'. 459 if ((DemandedMask & KnownZero) == DemandedMask) 460 return TLO.CombineTo(Op, Op.getOperand(0)); 461 if ((DemandedMask & KnownZero2) == DemandedMask) 462 return TLO.CombineTo(Op, Op.getOperand(1)); 463 464 // If all of the unknown bits are known to be zero on one side or the other 465 // (but not both) turn this into an *inclusive* or. 466 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 467 if ((DemandedMask & ~KnownZero & ~KnownZero2) == 0) 468 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, Op.getValueType(), 469 Op.getOperand(0), 470 Op.getOperand(1))); 471 472 // Output known-0 bits are known if clear or set in both the LHS & RHS. 473 KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); 474 // Output known-1 are known to be set if set in only one of the LHS, RHS. 475 KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); 476 477 // If all of the demanded bits on one side are known, and all of the set 478 // bits on that side are also known to be set on the other side, turn this 479 // into an AND, as we know the bits will be cleared. 480 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 481 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) { // all known 482 if ((KnownOne & KnownOne2) == KnownOne) { 483 MVT::ValueType VT = Op.getValueType(); 484 SDOperand ANDC = TLO.DAG.getConstant(~KnownOne & DemandedMask, VT); 485 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, VT, Op.getOperand(0), 486 ANDC)); 487 } 488 } 489 490 // If the RHS is a constant, see if we can simplify it. 491 // FIXME: for XOR, we prefer to force bits to 1 if they will make a -1. 492 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 493 return true; 494 495 KnownZero = KnownZeroOut; 496 KnownOne = KnownOneOut; 497 break; 498 case ISD::SETCC: 499 // If we know the result of a setcc has the top bits zero, use this info. 500 if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) 501 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 502 break; 503 case ISD::SELECT: 504 if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero, 505 KnownOne, TLO, Depth+1)) 506 return true; 507 if (SimplifyDemandedBits(Op.getOperand(1), DemandedMask, KnownZero2, 508 KnownOne2, TLO, Depth+1)) 509 return true; 510 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 511 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 512 513 // If the operands are constants, see if we can simplify them. 514 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 515 return true; 516 517 // Only known if known in both the LHS and RHS. 518 KnownOne &= KnownOne2; 519 KnownZero &= KnownZero2; 520 break; 521 case ISD::SELECT_CC: 522 if (SimplifyDemandedBits(Op.getOperand(3), DemandedMask, KnownZero, 523 KnownOne, TLO, Depth+1)) 524 return true; 525 if (SimplifyDemandedBits(Op.getOperand(2), DemandedMask, KnownZero2, 526 KnownOne2, TLO, Depth+1)) 527 return true; 528 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 529 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 530 531 // If the operands are constants, see if we can simplify them. 532 if (TLO.ShrinkDemandedConstant(Op, DemandedMask)) 533 return true; 534 535 // Only known if known in both the LHS and RHS. 536 KnownOne &= KnownOne2; 537 KnownZero &= KnownZero2; 538 break; 539 case ISD::SHL: 540 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 541 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask >> SA->getValue(), 542 KnownZero, KnownOne, TLO, Depth+1)) 543 return true; 544 KnownZero <<= SA->getValue(); 545 KnownOne <<= SA->getValue(); 546 KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. 547 } 548 break; 549 case ISD::SRL: 550 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 551 MVT::ValueType VT = Op.getValueType(); 552 unsigned ShAmt = SA->getValue(); 553 554 // Compute the new bits that are at the top now. 555 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 556 if (SimplifyDemandedBits(Op.getOperand(0), 557 (DemandedMask << ShAmt) & TypeMask, 558 KnownZero, KnownOne, TLO, Depth+1)) 559 return true; 560 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 561 KnownZero &= TypeMask; 562 KnownOne &= TypeMask; 563 KnownZero >>= ShAmt; 564 KnownOne >>= ShAmt; 565 566 uint64_t HighBits = (1ULL << ShAmt)-1; 567 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 568 KnownZero |= HighBits; // High bits known zero. 569 } 570 break; 571 case ISD::SRA: 572 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 573 MVT::ValueType VT = Op.getValueType(); 574 unsigned ShAmt = SA->getValue(); 575 576 // Compute the new bits that are at the top now. 577 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 578 579 uint64_t InDemandedMask = (DemandedMask << ShAmt) & TypeMask; 580 581 // If any of the demanded bits are produced by the sign extension, we also 582 // demand the input sign bit. 583 uint64_t HighBits = (1ULL << ShAmt)-1; 584 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 585 if (HighBits & DemandedMask) 586 InDemandedMask |= MVT::getIntVTSignBit(VT); 587 588 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, 589 KnownZero, KnownOne, TLO, Depth+1)) 590 return true; 591 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 592 KnownZero &= TypeMask; 593 KnownOne &= TypeMask; 594 KnownZero >>= ShAmt; 595 KnownOne >>= ShAmt; 596 597 // Handle the sign bits. 598 uint64_t SignBit = MVT::getIntVTSignBit(VT); 599 SignBit >>= ShAmt; // Adjust to where it is now in the mask. 600 601 // If the input sign bit is known to be zero, or if none of the top bits 602 // are demanded, turn this into an unsigned shift right. 603 if ((KnownZero & SignBit) || (HighBits & ~DemandedMask) == HighBits) { 604 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, VT, Op.getOperand(0), 605 Op.getOperand(1))); 606 } else if (KnownOne & SignBit) { // New bits are known one. 607 KnownOne |= HighBits; 608 } 609 } 610 break; 611 case ISD::SIGN_EXTEND_INREG: { 612 MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 613 614 // Sign extension. Compute the demanded bits in the result that are not 615 // present in the input. 616 uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & DemandedMask; 617 618 // If none of the extended bits are demanded, eliminate the sextinreg. 619 if (NewBits == 0) 620 return TLO.CombineTo(Op, Op.getOperand(0)); 621 622 uint64_t InSignBit = MVT::getIntVTSignBit(EVT); 623 int64_t InputDemandedBits = DemandedMask & MVT::getIntVTBitMask(EVT); 624 625 // Since the sign extended bits are demanded, we know that the sign 626 // bit is demanded. 627 InputDemandedBits |= InSignBit; 628 629 if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, 630 KnownZero, KnownOne, TLO, Depth+1)) 631 return true; 632 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 633 634 // If the sign bit of the input is known set or clear, then we know the 635 // top bits of the result. 636 637 // If the input sign bit is known zero, convert this into a zero extension. 638 if (KnownZero & InSignBit) 639 return TLO.CombineTo(Op, 640 TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT)); 641 642 if (KnownOne & InSignBit) { // Input sign bit known set 643 KnownOne |= NewBits; 644 KnownZero &= ~NewBits; 645 } else { // Input sign bit unknown 646 KnownZero &= ~NewBits; 647 KnownOne &= ~NewBits; 648 } 649 break; 650 } 651 case ISD::CTTZ: 652 case ISD::CTLZ: 653 case ISD::CTPOP: { 654 MVT::ValueType VT = Op.getValueType(); 655 unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; 656 KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); 657 KnownOne = 0; 658 break; 659 } 660 case ISD::LOAD: { 661 if (ISD::isZEXTLoad(Op.Val)) { 662 LoadSDNode *LD = cast<LoadSDNode>(Op); 663 MVT::ValueType VT = LD->getLoadedVT(); 664 KnownZero |= ~MVT::getIntVTBitMask(VT) & DemandedMask; 665 } 666 break; 667 } 668 case ISD::ZERO_EXTEND: { 669 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 670 671 // If none of the top bits are demanded, convert this into an any_extend. 672 uint64_t NewBits = (~InMask) & DemandedMask; 673 if (NewBits == 0) 674 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, 675 Op.getValueType(), 676 Op.getOperand(0))); 677 678 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 679 KnownZero, KnownOne, TLO, Depth+1)) 680 return true; 681 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 682 KnownZero |= NewBits; 683 break; 684 } 685 case ISD::SIGN_EXTEND: { 686 MVT::ValueType InVT = Op.getOperand(0).getValueType(); 687 uint64_t InMask = MVT::getIntVTBitMask(InVT); 688 uint64_t InSignBit = MVT::getIntVTSignBit(InVT); 689 uint64_t NewBits = (~InMask) & DemandedMask; 690 691 // If none of the top bits are demanded, convert this into an any_extend. 692 if (NewBits == 0) 693 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(), 694 Op.getOperand(0))); 695 696 // Since some of the sign extended bits are demanded, we know that the sign 697 // bit is demanded. 698 uint64_t InDemandedBits = DemandedMask & InMask; 699 InDemandedBits |= InSignBit; 700 701 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 702 KnownOne, TLO, Depth+1)) 703 return true; 704 705 // If the sign bit is known zero, convert this to a zero extend. 706 if (KnownZero & InSignBit) 707 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, 708 Op.getValueType(), 709 Op.getOperand(0))); 710 711 // If the sign bit is known one, the top bits match. 712 if (KnownOne & InSignBit) { 713 KnownOne |= NewBits; 714 KnownZero &= ~NewBits; 715 } else { // Otherwise, top bits aren't known. 716 KnownOne &= ~NewBits; 717 KnownZero &= ~NewBits; 718 } 719 break; 720 } 721 case ISD::ANY_EXTEND: { 722 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 723 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 724 KnownZero, KnownOne, TLO, Depth+1)) 725 return true; 726 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 727 break; 728 } 729 case ISD::TRUNCATE: { 730 // Simplify the input, using demanded bit information, and compute the known 731 // zero/one bits live out. 732 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, 733 KnownZero, KnownOne, TLO, Depth+1)) 734 return true; 735 736 // If the input is only used by this truncate, see if we can shrink it based 737 // on the known demanded bits. 738 if (Op.getOperand(0).Val->hasOneUse()) { 739 SDOperand In = Op.getOperand(0); 740 switch (In.getOpcode()) { 741 default: break; 742 case ISD::SRL: 743 // Shrink SRL by a constant if none of the high bits shifted in are 744 // demanded. 745 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ 746 uint64_t HighBits = MVT::getIntVTBitMask(In.getValueType()); 747 HighBits &= ~MVT::getIntVTBitMask(Op.getValueType()); 748 HighBits >>= ShAmt->getValue(); 749 750 if (ShAmt->getValue() < MVT::getSizeInBits(Op.getValueType()) && 751 (DemandedMask & HighBits) == 0) { 752 // None of the shifted in bits are needed. Add a truncate of the 753 // shift input, then shift it. 754 SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, 755 Op.getValueType(), 756 In.getOperand(0)); 757 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(), 758 NewTrunc, In.getOperand(1))); 759 } 760 } 761 break; 762 } 763 } 764 765 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 766 uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); 767 KnownZero &= OutMask; 768 KnownOne &= OutMask; 769 break; 770 } 771 case ISD::AssertZext: { 772 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 773 uint64_t InMask = MVT::getIntVTBitMask(VT); 774 if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask & InMask, 775 KnownZero, KnownOne, TLO, Depth+1)) 776 return true; 777 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 778 KnownZero |= ~InMask & DemandedMask; 779 break; 780 } 781 case ISD::ADD: 782 case ISD::SUB: 783 case ISD::INTRINSIC_WO_CHAIN: 784 case ISD::INTRINSIC_W_CHAIN: 785 case ISD::INTRINSIC_VOID: 786 // Just use ComputeMaskedBits to compute output bits. 787 ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); 788 break; 789 } 790 791 // If we know the value of all of the demanded bits, return this as a 792 // constant. 793 if ((DemandedMask & (KnownZero|KnownOne)) == DemandedMask) 794 return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); 795 796 return false; 797} 798 799/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use 800/// this predicate to simplify operations downstream. Mask is known to be zero 801/// for bits that V cannot have. 802bool TargetLowering::MaskedValueIsZero(SDOperand Op, uint64_t Mask, 803 unsigned Depth) const { 804 uint64_t KnownZero, KnownOne; 805 ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); 806 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 807 return (KnownZero & Mask) == Mask; 808} 809 810/// ComputeMaskedBits - Determine which of the bits specified in Mask are 811/// known to be either zero or one and return them in the KnownZero/KnownOne 812/// bitsets. This code only analyzes bits in Mask, in order to short-circuit 813/// processing. 814void TargetLowering::ComputeMaskedBits(SDOperand Op, uint64_t Mask, 815 uint64_t &KnownZero, uint64_t &KnownOne, 816 unsigned Depth) const { 817 KnownZero = KnownOne = 0; // Don't know anything. 818 if (Depth == 6 || Mask == 0) 819 return; // Limit search depth. 820 821 uint64_t KnownZero2, KnownOne2; 822 823 switch (Op.getOpcode()) { 824 case ISD::Constant: 825 // We know all of the bits for a constant! 826 KnownOne = cast<ConstantSDNode>(Op)->getValue() & Mask; 827 KnownZero = ~KnownOne & Mask; 828 return; 829 case ISD::AND: 830 // If either the LHS or the RHS are Zero, the result is zero. 831 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 832 Mask &= ~KnownZero; 833 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 834 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 835 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 836 837 // Output known-1 bits are only known if set in both the LHS & RHS. 838 KnownOne &= KnownOne2; 839 // Output known-0 are known to be clear if zero in either the LHS | RHS. 840 KnownZero |= KnownZero2; 841 return; 842 case ISD::OR: 843 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 844 Mask &= ~KnownOne; 845 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 846 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 847 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 848 849 // Output known-0 bits are only known if clear in both the LHS & RHS. 850 KnownZero &= KnownZero2; 851 // Output known-1 are known to be set if set in either the LHS | RHS. 852 KnownOne |= KnownOne2; 853 return; 854 case ISD::XOR: { 855 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 856 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 857 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 858 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 859 860 // Output known-0 bits are known if clear or set in both the LHS & RHS. 861 uint64_t KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); 862 // Output known-1 are known to be set if set in only one of the LHS, RHS. 863 KnownOne = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); 864 KnownZero = KnownZeroOut; 865 return; 866 } 867 case ISD::SELECT: 868 ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); 869 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); 870 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 871 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 872 873 // Only known if known in both the LHS and RHS. 874 KnownOne &= KnownOne2; 875 KnownZero &= KnownZero2; 876 return; 877 case ISD::SELECT_CC: 878 ComputeMaskedBits(Op.getOperand(3), Mask, KnownZero, KnownOne, Depth+1); 879 ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero2, KnownOne2, Depth+1); 880 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 881 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 882 883 // Only known if known in both the LHS and RHS. 884 KnownOne &= KnownOne2; 885 KnownZero &= KnownZero2; 886 return; 887 case ISD::SETCC: 888 // If we know the result of a setcc has the top bits zero, use this info. 889 if (getSetCCResultContents() == TargetLowering::ZeroOrOneSetCCResult) 890 KnownZero |= (MVT::getIntVTBitMask(Op.getValueType()) ^ 1ULL); 891 return; 892 case ISD::SHL: 893 // (shl X, C1) & C2 == 0 iff (X & C2 >>u C1) == 0 894 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 895 ComputeMaskedBits(Op.getOperand(0), Mask >> SA->getValue(), 896 KnownZero, KnownOne, Depth+1); 897 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 898 KnownZero <<= SA->getValue(); 899 KnownOne <<= SA->getValue(); 900 KnownZero |= (1ULL << SA->getValue())-1; // low bits known zero. 901 } 902 return; 903 case ISD::SRL: 904 // (ushr X, C1) & C2 == 0 iff (-1 >> C1) & C2 == 0 905 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 906 MVT::ValueType VT = Op.getValueType(); 907 unsigned ShAmt = SA->getValue(); 908 909 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 910 ComputeMaskedBits(Op.getOperand(0), (Mask << ShAmt) & TypeMask, 911 KnownZero, KnownOne, Depth+1); 912 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 913 KnownZero &= TypeMask; 914 KnownOne &= TypeMask; 915 KnownZero >>= ShAmt; 916 KnownOne >>= ShAmt; 917 918 uint64_t HighBits = (1ULL << ShAmt)-1; 919 HighBits <<= MVT::getSizeInBits(VT)-ShAmt; 920 KnownZero |= HighBits; // High bits known zero. 921 } 922 return; 923 case ISD::SRA: 924 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 925 MVT::ValueType VT = Op.getValueType(); 926 unsigned ShAmt = SA->getValue(); 927 928 // Compute the new bits that are at the top now. 929 uint64_t TypeMask = MVT::getIntVTBitMask(VT); 930 931 uint64_t InDemandedMask = (Mask << ShAmt) & TypeMask; 932 // If any of the demanded bits are produced by the sign extension, we also 933 // demand the input sign bit. 934 uint64_t HighBits = (1ULL << ShAmt)-1; 935 HighBits <<= MVT::getSizeInBits(VT) - ShAmt; 936 if (HighBits & Mask) 937 InDemandedMask |= MVT::getIntVTSignBit(VT); 938 939 ComputeMaskedBits(Op.getOperand(0), InDemandedMask, KnownZero, KnownOne, 940 Depth+1); 941 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 942 KnownZero &= TypeMask; 943 KnownOne &= TypeMask; 944 KnownZero >>= ShAmt; 945 KnownOne >>= ShAmt; 946 947 // Handle the sign bits. 948 uint64_t SignBit = MVT::getIntVTSignBit(VT); 949 SignBit >>= ShAmt; // Adjust to where it is now in the mask. 950 951 if (KnownZero & SignBit) { 952 KnownZero |= HighBits; // New bits are known zero. 953 } else if (KnownOne & SignBit) { 954 KnownOne |= HighBits; // New bits are known one. 955 } 956 } 957 return; 958 case ISD::SIGN_EXTEND_INREG: { 959 MVT::ValueType EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 960 961 // Sign extension. Compute the demanded bits in the result that are not 962 // present in the input. 963 uint64_t NewBits = ~MVT::getIntVTBitMask(EVT) & Mask; 964 965 uint64_t InSignBit = MVT::getIntVTSignBit(EVT); 966 int64_t InputDemandedBits = Mask & MVT::getIntVTBitMask(EVT); 967 968 // If the sign extended bits are demanded, we know that the sign 969 // bit is demanded. 970 if (NewBits) 971 InputDemandedBits |= InSignBit; 972 973 ComputeMaskedBits(Op.getOperand(0), InputDemandedBits, 974 KnownZero, KnownOne, Depth+1); 975 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 976 977 // If the sign bit of the input is known set or clear, then we know the 978 // top bits of the result. 979 if (KnownZero & InSignBit) { // Input sign bit known clear 980 KnownZero |= NewBits; 981 KnownOne &= ~NewBits; 982 } else if (KnownOne & InSignBit) { // Input sign bit known set 983 KnownOne |= NewBits; 984 KnownZero &= ~NewBits; 985 } else { // Input sign bit unknown 986 KnownZero &= ~NewBits; 987 KnownOne &= ~NewBits; 988 } 989 return; 990 } 991 case ISD::CTTZ: 992 case ISD::CTLZ: 993 case ISD::CTPOP: { 994 MVT::ValueType VT = Op.getValueType(); 995 unsigned LowBits = Log2_32(MVT::getSizeInBits(VT))+1; 996 KnownZero = ~((1ULL << LowBits)-1) & MVT::getIntVTBitMask(VT); 997 KnownOne = 0; 998 return; 999 } 1000 case ISD::LOAD: { 1001 if (ISD::isZEXTLoad(Op.Val)) { 1002 LoadSDNode *LD = cast<LoadSDNode>(Op); 1003 MVT::ValueType VT = LD->getLoadedVT(); 1004 KnownZero |= ~MVT::getIntVTBitMask(VT) & Mask; 1005 } 1006 return; 1007 } 1008 case ISD::ZERO_EXTEND: { 1009 uint64_t InMask = MVT::getIntVTBitMask(Op.getOperand(0).getValueType()); 1010 uint64_t NewBits = (~InMask) & Mask; 1011 ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 1012 KnownOne, Depth+1); 1013 KnownZero |= NewBits & Mask; 1014 KnownOne &= ~NewBits; 1015 return; 1016 } 1017 case ISD::SIGN_EXTEND: { 1018 MVT::ValueType InVT = Op.getOperand(0).getValueType(); 1019 unsigned InBits = MVT::getSizeInBits(InVT); 1020 uint64_t InMask = MVT::getIntVTBitMask(InVT); 1021 uint64_t InSignBit = 1ULL << (InBits-1); 1022 uint64_t NewBits = (~InMask) & Mask; 1023 uint64_t InDemandedBits = Mask & InMask; 1024 1025 // If any of the sign extended bits are demanded, we know that the sign 1026 // bit is demanded. 1027 if (NewBits & Mask) 1028 InDemandedBits |= InSignBit; 1029 1030 ComputeMaskedBits(Op.getOperand(0), InDemandedBits, KnownZero, 1031 KnownOne, Depth+1); 1032 // If the sign bit is known zero or one, the top bits match. 1033 if (KnownZero & InSignBit) { 1034 KnownZero |= NewBits; 1035 KnownOne &= ~NewBits; 1036 } else if (KnownOne & InSignBit) { 1037 KnownOne |= NewBits; 1038 KnownZero &= ~NewBits; 1039 } else { // Otherwise, top bits aren't known. 1040 KnownOne &= ~NewBits; 1041 KnownZero &= ~NewBits; 1042 } 1043 return; 1044 } 1045 case ISD::ANY_EXTEND: { 1046 MVT::ValueType VT = Op.getOperand(0).getValueType(); 1047 ComputeMaskedBits(Op.getOperand(0), Mask & MVT::getIntVTBitMask(VT), 1048 KnownZero, KnownOne, Depth+1); 1049 return; 1050 } 1051 case ISD::TRUNCATE: { 1052 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 1053 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 1054 uint64_t OutMask = MVT::getIntVTBitMask(Op.getValueType()); 1055 KnownZero &= OutMask; 1056 KnownOne &= OutMask; 1057 break; 1058 } 1059 case ISD::AssertZext: { 1060 MVT::ValueType VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 1061 uint64_t InMask = MVT::getIntVTBitMask(VT); 1062 ComputeMaskedBits(Op.getOperand(0), Mask & InMask, KnownZero, 1063 KnownOne, Depth+1); 1064 KnownZero |= (~InMask) & Mask; 1065 return; 1066 } 1067 case ISD::ADD: { 1068 // If either the LHS or the RHS are Zero, the result is zero. 1069 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 1070 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); 1071 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 1072 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 1073 1074 // Output known-0 bits are known if clear or set in both the low clear bits 1075 // common to both LHS & RHS. For example, 8+(X<<3) is known to have the 1076 // low 3 bits clear. 1077 uint64_t KnownZeroOut = std::min(CountTrailingZeros_64(~KnownZero), 1078 CountTrailingZeros_64(~KnownZero2)); 1079 1080 KnownZero = (1ULL << KnownZeroOut) - 1; 1081 KnownOne = 0; 1082 return; 1083 } 1084 case ISD::SUB: { 1085 ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)); 1086 if (!CLHS) return; 1087 1088 // We know that the top bits of C-X are clear if X contains less bits 1089 // than C (i.e. no wrap-around can happen). For example, 20-X is 1090 // positive if we can prove that X is >= 0 and < 16. 1091 MVT::ValueType VT = CLHS->getValueType(0); 1092 if ((CLHS->getValue() & MVT::getIntVTSignBit(VT)) == 0) { // sign bit clear 1093 unsigned NLZ = CountLeadingZeros_64(CLHS->getValue()+1); 1094 uint64_t MaskV = (1ULL << (63-NLZ))-1; // NLZ can't be 64 with no sign bit 1095 MaskV = ~MaskV & MVT::getIntVTBitMask(VT); 1096 ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); 1097 1098 // If all of the MaskV bits are known to be zero, then we know the output 1099 // top bits are zero, because we now know that the output is from [0-C]. 1100 if ((KnownZero & MaskV) == MaskV) { 1101 unsigned NLZ2 = CountLeadingZeros_64(CLHS->getValue()); 1102 KnownZero = ~((1ULL << (64-NLZ2))-1) & Mask; // Top bits known zero. 1103 KnownOne = 0; // No one bits known. 1104 } else { 1105 KnownZero = KnownOne = 0; // Otherwise, nothing known. 1106 } 1107 } 1108 return; 1109 } 1110 default: 1111 // Allow the target to implement this method for its nodes. 1112 if (Op.getOpcode() >= ISD::BUILTIN_OP_END) { 1113 case ISD::INTRINSIC_WO_CHAIN: 1114 case ISD::INTRINSIC_W_CHAIN: 1115 case ISD::INTRINSIC_VOID: 1116 computeMaskedBitsForTargetNode(Op, Mask, KnownZero, KnownOne); 1117 } 1118 return; 1119 } 1120} 1121 1122/// computeMaskedBitsForTargetNode - Determine which of the bits specified 1123/// in Mask are known to be either zero or one and return them in the 1124/// KnownZero/KnownOne bitsets. 1125void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 1126 uint64_t Mask, 1127 uint64_t &KnownZero, 1128 uint64_t &KnownOne, 1129 unsigned Depth) const { 1130 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 1131 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1132 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1133 Op.getOpcode() == ISD::INTRINSIC_VOID) && 1134 "Should use MaskedValueIsZero if you don't know whether Op" 1135 " is a target node!"); 1136 KnownZero = 0; 1137 KnownOne = 0; 1138} 1139 1140/// ComputeNumSignBits - Return the number of times the sign bit of the 1141/// register is replicated into the other bits. We know that at least 1 bit 1142/// is always equal to the sign bit (itself), but other cases can give us 1143/// information. For example, immediately after an "SRA X, 2", we know that 1144/// the top 3 bits are all equal to each other, so we return 3. 1145unsigned TargetLowering::ComputeNumSignBits(SDOperand Op, unsigned Depth) const{ 1146 MVT::ValueType VT = Op.getValueType(); 1147 assert(MVT::isInteger(VT) && "Invalid VT!"); 1148 unsigned VTBits = MVT::getSizeInBits(VT); 1149 unsigned Tmp, Tmp2; 1150 1151 if (Depth == 6) 1152 return 1; // Limit search depth. 1153 1154 switch (Op.getOpcode()) { 1155 default: break; 1156 case ISD::AssertSext: 1157 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 1158 return VTBits-Tmp+1; 1159 case ISD::AssertZext: 1160 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 1161 return VTBits-Tmp; 1162 1163 case ISD::Constant: { 1164 uint64_t Val = cast<ConstantSDNode>(Op)->getValue(); 1165 // If negative, invert the bits, then look at it. 1166 if (Val & MVT::getIntVTSignBit(VT)) 1167 Val = ~Val; 1168 1169 // Shift the bits so they are the leading bits in the int64_t. 1170 Val <<= 64-VTBits; 1171 1172 // Return # leading zeros. We use 'min' here in case Val was zero before 1173 // shifting. We don't want to return '64' as for an i32 "0". 1174 return std::min(VTBits, CountLeadingZeros_64(Val)); 1175 } 1176 1177 case ISD::SIGN_EXTEND: 1178 Tmp = VTBits-MVT::getSizeInBits(Op.getOperand(0).getValueType()); 1179 return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; 1180 1181 case ISD::SIGN_EXTEND_INREG: 1182 // Max of the input and what this extends. 1183 Tmp = MVT::getSizeInBits(cast<VTSDNode>(Op.getOperand(1))->getVT()); 1184 Tmp = VTBits-Tmp+1; 1185 1186 Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1187 return std::max(Tmp, Tmp2); 1188 1189 case ISD::SRA: 1190 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1191 // SRA X, C -> adds C sign bits. 1192 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1193 Tmp += C->getValue(); 1194 if (Tmp > VTBits) Tmp = VTBits; 1195 } 1196 return Tmp; 1197 case ISD::SHL: 1198 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1199 // shl destroys sign bits. 1200 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1201 if (C->getValue() >= VTBits || // Bad shift. 1202 C->getValue() >= Tmp) break; // Shifted all sign bits out. 1203 return Tmp - C->getValue(); 1204 } 1205 break; 1206 case ISD::AND: 1207 case ISD::OR: 1208 case ISD::XOR: // NOT is handled here. 1209 // Logical binary ops preserve the number of sign bits. 1210 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1211 if (Tmp == 1) return 1; // Early out. 1212 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1213 return std::min(Tmp, Tmp2); 1214 1215 case ISD::SELECT: 1216 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1217 if (Tmp == 1) return 1; // Early out. 1218 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1219 return std::min(Tmp, Tmp2); 1220 1221 case ISD::SETCC: 1222 // If setcc returns 0/-1, all bits are sign bits. 1223 if (getSetCCResultContents() == ZeroOrNegativeOneSetCCResult) 1224 return VTBits; 1225 break; 1226 case ISD::ROTL: 1227 case ISD::ROTR: 1228 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 1229 unsigned RotAmt = C->getValue() & (VTBits-1); 1230 1231 // Handle rotate right by N like a rotate left by 32-N. 1232 if (Op.getOpcode() == ISD::ROTR) 1233 RotAmt = (VTBits-RotAmt) & (VTBits-1); 1234 1235 // If we aren't rotating out all of the known-in sign bits, return the 1236 // number that are left. This handles rotl(sext(x), 1) for example. 1237 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1238 if (Tmp > RotAmt+1) return Tmp-RotAmt; 1239 } 1240 break; 1241 case ISD::ADD: 1242 // Add can have at most one carry bit. Thus we know that the output 1243 // is, at worst, one more bit than the inputs. 1244 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1245 if (Tmp == 1) return 1; // Early out. 1246 1247 // Special case decrementing a value (ADD X, -1): 1248 if (ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) 1249 if (CRHS->isAllOnesValue()) { 1250 uint64_t KnownZero, KnownOne; 1251 uint64_t Mask = MVT::getIntVTBitMask(VT); 1252 ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero, KnownOne, Depth+1); 1253 1254 // If the input is known to be 0 or 1, the output is 0/-1, which is all 1255 // sign bits set. 1256 if ((KnownZero|1) == Mask) 1257 return VTBits; 1258 1259 // If we are subtracting one from a positive number, there is no carry 1260 // out of the result. 1261 if (KnownZero & MVT::getIntVTSignBit(VT)) 1262 return Tmp; 1263 } 1264 1265 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1266 if (Tmp2 == 1) return 1; 1267 return std::min(Tmp, Tmp2)-1; 1268 break; 1269 1270 case ISD::SUB: 1271 Tmp2 = ComputeNumSignBits(Op.getOperand(1), Depth+1); 1272 if (Tmp2 == 1) return 1; 1273 1274 // Handle NEG. 1275 if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) 1276 if (CLHS->getValue() == 0) { 1277 uint64_t KnownZero, KnownOne; 1278 uint64_t Mask = MVT::getIntVTBitMask(VT); 1279 ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); 1280 // If the input is known to be 0 or 1, the output is 0/-1, which is all 1281 // sign bits set. 1282 if ((KnownZero|1) == Mask) 1283 return VTBits; 1284 1285 // If the input is known to be positive (the sign bit is known clear), 1286 // the output of the NEG has the same number of sign bits as the input. 1287 if (KnownZero & MVT::getIntVTSignBit(VT)) 1288 return Tmp2; 1289 1290 // Otherwise, we treat this like a SUB. 1291 } 1292 1293 // Sub can have at most one carry bit. Thus we know that the output 1294 // is, at worst, one more bit than the inputs. 1295 Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1); 1296 if (Tmp == 1) return 1; // Early out. 1297 return std::min(Tmp, Tmp2)-1; 1298 break; 1299 case ISD::TRUNCATE: 1300 // FIXME: it's tricky to do anything useful for this, but it is an important 1301 // case for targets like X86. 1302 break; 1303 } 1304 1305 // Handle LOADX separately here. EXTLOAD case will fallthrough. 1306 if (Op.getOpcode() == ISD::LOAD) { 1307 LoadSDNode *LD = cast<LoadSDNode>(Op); 1308 unsigned ExtType = LD->getExtensionType(); 1309 switch (ExtType) { 1310 default: break; 1311 case ISD::SEXTLOAD: // '17' bits known 1312 Tmp = MVT::getSizeInBits(LD->getLoadedVT()); 1313 return VTBits-Tmp+1; 1314 case ISD::ZEXTLOAD: // '16' bits known 1315 Tmp = MVT::getSizeInBits(LD->getLoadedVT()); 1316 return VTBits-Tmp; 1317 } 1318 } 1319 1320 // Allow the target to implement this method for its nodes. 1321 if (Op.getOpcode() >= ISD::BUILTIN_OP_END || 1322 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1323 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1324 Op.getOpcode() == ISD::INTRINSIC_VOID) { 1325 unsigned NumBits = ComputeNumSignBitsForTargetNode(Op, Depth); 1326 if (NumBits > 1) return NumBits; 1327 } 1328 1329 // Finally, if we can prove that the top bits of the result are 0's or 1's, 1330 // use this information. 1331 uint64_t KnownZero, KnownOne; 1332 uint64_t Mask = MVT::getIntVTBitMask(VT); 1333 ComputeMaskedBits(Op, Mask, KnownZero, KnownOne, Depth); 1334 1335 uint64_t SignBit = MVT::getIntVTSignBit(VT); 1336 if (KnownZero & SignBit) { // SignBit is 0 1337 Mask = KnownZero; 1338 } else if (KnownOne & SignBit) { // SignBit is 1; 1339 Mask = KnownOne; 1340 } else { 1341 // Nothing known. 1342 return 1; 1343 } 1344 1345 // Okay, we know that the sign bit in Mask is set. Use CLZ to determine 1346 // the number of identical bits in the top of the input value. 1347 Mask ^= ~0ULL; 1348 Mask <<= 64-VTBits; 1349 // Return # leading zeros. We use 'min' here in case Val was zero before 1350 // shifting. We don't want to return '64' as for an i32 "0". 1351 return std::min(VTBits, CountLeadingZeros_64(Mask)); 1352} 1353 1354 1355 1356/// ComputeNumSignBitsForTargetNode - This method can be implemented by 1357/// targets that want to expose additional information about sign bits to the 1358/// DAG Combiner. 1359unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op, 1360 unsigned Depth) const { 1361 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 1362 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1363 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1364 Op.getOpcode() == ISD::INTRINSIC_VOID) && 1365 "Should use ComputeNumSignBits if you don't know whether Op" 1366 " is a target node!"); 1367 return 1; 1368} 1369 1370 1371SDOperand TargetLowering:: 1372PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1373 // Default implementation: no optimization. 1374 return SDOperand(); 1375} 1376 1377//===----------------------------------------------------------------------===// 1378// Inline Assembler Implementation Methods 1379//===----------------------------------------------------------------------===// 1380 1381TargetLowering::ConstraintType 1382TargetLowering::getConstraintType(char ConstraintLetter) const { 1383 // FIXME: lots more standard ones to handle. 1384 switch (ConstraintLetter) { 1385 default: return C_Unknown; 1386 case 'r': return C_RegisterClass; 1387 case 'm': // memory 1388 case 'o': // offsetable 1389 case 'V': // not offsetable 1390 return C_Memory; 1391 case 'i': // Simple Integer or Relocatable Constant 1392 case 'n': // Simple Integer 1393 case 's': // Relocatable Constant 1394 case 'I': // Target registers. 1395 case 'J': 1396 case 'K': 1397 case 'L': 1398 case 'M': 1399 case 'N': 1400 case 'O': 1401 case 'P': 1402 return C_Other; 1403 } 1404} 1405 1406/// isOperandValidForConstraint - Return the specified operand (possibly 1407/// modified) if the specified SDOperand is valid for the specified target 1408/// constraint letter, otherwise return null. 1409SDOperand TargetLowering::isOperandValidForConstraint(SDOperand Op, 1410 char ConstraintLetter, 1411 SelectionDAG &DAG) { 1412 switch (ConstraintLetter) { 1413 default: return SDOperand(0,0); 1414 case 'i': // Simple Integer or Relocatable Constant 1415 case 'n': // Simple Integer 1416 case 's': // Relocatable Constant 1417 return Op; // FIXME: not right. 1418 } 1419} 1420 1421std::vector<unsigned> TargetLowering:: 1422getRegClassForInlineAsmConstraint(const std::string &Constraint, 1423 MVT::ValueType VT) const { 1424 return std::vector<unsigned>(); 1425} 1426 1427 1428std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: 1429getRegForInlineAsmConstraint(const std::string &Constraint, 1430 MVT::ValueType VT) const { 1431 if (Constraint[0] != '{') 1432 return std::pair<unsigned, const TargetRegisterClass*>(0, 0); 1433 assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); 1434 1435 // Remove the braces from around the name. 1436 std::string RegName(Constraint.begin()+1, Constraint.end()-1); 1437 1438 // Figure out which register class contains this reg. 1439 const MRegisterInfo *RI = TM.getRegisterInfo(); 1440 for (MRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), 1441 E = RI->regclass_end(); RCI != E; ++RCI) { 1442 const TargetRegisterClass *RC = *RCI; 1443 1444 // If none of the the value types for this register class are valid, we 1445 // can't use it. For example, 64-bit reg classes on 32-bit targets. 1446 bool isLegal = false; 1447 for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); 1448 I != E; ++I) { 1449 if (isTypeLegal(*I)) { 1450 isLegal = true; 1451 break; 1452 } 1453 } 1454 1455 if (!isLegal) continue; 1456 1457 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 1458 I != E; ++I) { 1459 if (StringsEqualNoCase(RegName, RI->get(*I).Name)) 1460 return std::make_pair(*I, RC); 1461 } 1462 } 1463 1464 return std::pair<unsigned, const TargetRegisterClass*>(0, 0); 1465} 1466 1467//===----------------------------------------------------------------------===// 1468// Loop Strength Reduction hooks 1469//===----------------------------------------------------------------------===// 1470 1471/// isLegalAddressImmediate - Return true if the integer value or 1472/// GlobalValue can be used as the offset of the target addressing mode. 1473bool TargetLowering::isLegalAddressImmediate(int64_t V) const { 1474 return false; 1475} 1476bool TargetLowering::isLegalAddressImmediate(GlobalValue *GV) const { 1477 return false; 1478} 1479 1480 1481// Magic for divide replacement 1482 1483struct ms { 1484 int64_t m; // magic number 1485 int64_t s; // shift amount 1486}; 1487 1488struct mu { 1489 uint64_t m; // magic number 1490 int64_t a; // add indicator 1491 int64_t s; // shift amount 1492}; 1493 1494/// magic - calculate the magic numbers required to codegen an integer sdiv as 1495/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, 1496/// or -1. 1497static ms magic32(int32_t d) { 1498 int32_t p; 1499 uint32_t ad, anc, delta, q1, r1, q2, r2, t; 1500 const uint32_t two31 = 0x80000000U; 1501 struct ms mag; 1502 1503 ad = abs(d); 1504 t = two31 + ((uint32_t)d >> 31); 1505 anc = t - 1 - t%ad; // absolute value of nc 1506 p = 31; // initialize p 1507 q1 = two31/anc; // initialize q1 = 2p/abs(nc) 1508 r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1509 q2 = two31/ad; // initialize q2 = 2p/abs(d) 1510 r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d)) 1511 do { 1512 p = p + 1; 1513 q1 = 2*q1; // update q1 = 2p/abs(nc) 1514 r1 = 2*r1; // update r1 = rem(2p/abs(nc)) 1515 if (r1 >= anc) { // must be unsigned comparison 1516 q1 = q1 + 1; 1517 r1 = r1 - anc; 1518 } 1519 q2 = 2*q2; // update q2 = 2p/abs(d) 1520 r2 = 2*r2; // update r2 = rem(2p/abs(d)) 1521 if (r2 >= ad) { // must be unsigned comparison 1522 q2 = q2 + 1; 1523 r2 = r2 - ad; 1524 } 1525 delta = ad - r2; 1526 } while (q1 < delta || (q1 == delta && r1 == 0)); 1527 1528 mag.m = (int32_t)(q2 + 1); // make sure to sign extend 1529 if (d < 0) mag.m = -mag.m; // resulting magic number 1530 mag.s = p - 32; // resulting shift 1531 return mag; 1532} 1533 1534/// magicu - calculate the magic numbers required to codegen an integer udiv as 1535/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. 1536static mu magicu32(uint32_t d) { 1537 int32_t p; 1538 uint32_t nc, delta, q1, r1, q2, r2; 1539 struct mu magu; 1540 magu.a = 0; // initialize "add" indicator 1541 nc = - 1 - (-d)%d; 1542 p = 31; // initialize p 1543 q1 = 0x80000000/nc; // initialize q1 = 2p/nc 1544 r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc) 1545 q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d 1546 r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d) 1547 do { 1548 p = p + 1; 1549 if (r1 >= nc - r1 ) { 1550 q1 = 2*q1 + 1; // update q1 1551 r1 = 2*r1 - nc; // update r1 1552 } 1553 else { 1554 q1 = 2*q1; // update q1 1555 r1 = 2*r1; // update r1 1556 } 1557 if (r2 + 1 >= d - r2) { 1558 if (q2 >= 0x7FFFFFFF) magu.a = 1; 1559 q2 = 2*q2 + 1; // update q2 1560 r2 = 2*r2 + 1 - d; // update r2 1561 } 1562 else { 1563 if (q2 >= 0x80000000) magu.a = 1; 1564 q2 = 2*q2; // update q2 1565 r2 = 2*r2 + 1; // update r2 1566 } 1567 delta = d - 1 - r2; 1568 } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0))); 1569 magu.m = q2 + 1; // resulting magic number 1570 magu.s = p - 32; // resulting shift 1571 return magu; 1572} 1573 1574/// magic - calculate the magic numbers required to codegen an integer sdiv as 1575/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, 1576/// or -1. 1577static ms magic64(int64_t d) { 1578 int64_t p; 1579 uint64_t ad, anc, delta, q1, r1, q2, r2, t; 1580 const uint64_t two63 = 9223372036854775808ULL; // 2^63 1581 struct ms mag; 1582 1583 ad = d >= 0 ? d : -d; 1584 t = two63 + ((uint64_t)d >> 63); 1585 anc = t - 1 - t%ad; // absolute value of nc 1586 p = 63; // initialize p 1587 q1 = two63/anc; // initialize q1 = 2p/abs(nc) 1588 r1 = two63 - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1589 q2 = two63/ad; // initialize q2 = 2p/abs(d) 1590 r2 = two63 - q2*ad; // initialize r2 = rem(2p,abs(d)) 1591 do { 1592 p = p + 1; 1593 q1 = 2*q1; // update q1 = 2p/abs(nc) 1594 r1 = 2*r1; // update r1 = rem(2p/abs(nc)) 1595 if (r1 >= anc) { // must be unsigned comparison 1596 q1 = q1 + 1; 1597 r1 = r1 - anc; 1598 } 1599 q2 = 2*q2; // update q2 = 2p/abs(d) 1600 r2 = 2*r2; // update r2 = rem(2p/abs(d)) 1601 if (r2 >= ad) { // must be unsigned comparison 1602 q2 = q2 + 1; 1603 r2 = r2 - ad; 1604 } 1605 delta = ad - r2; 1606 } while (q1 < delta || (q1 == delta && r1 == 0)); 1607 1608 mag.m = q2 + 1; 1609 if (d < 0) mag.m = -mag.m; // resulting magic number 1610 mag.s = p - 64; // resulting shift 1611 return mag; 1612} 1613 1614/// magicu - calculate the magic numbers required to codegen an integer udiv as 1615/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. 1616static mu magicu64(uint64_t d) 1617{ 1618 int64_t p; 1619 uint64_t nc, delta, q1, r1, q2, r2; 1620 struct mu magu; 1621 magu.a = 0; // initialize "add" indicator 1622 nc = - 1 - (-d)%d; 1623 p = 63; // initialize p 1624 q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc 1625 r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc) 1626 q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d 1627 r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d) 1628 do { 1629 p = p + 1; 1630 if (r1 >= nc - r1 ) { 1631 q1 = 2*q1 + 1; // update q1 1632 r1 = 2*r1 - nc; // update r1 1633 } 1634 else { 1635 q1 = 2*q1; // update q1 1636 r1 = 2*r1; // update r1 1637 } 1638 if (r2 + 1 >= d - r2) { 1639 if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1; 1640 q2 = 2*q2 + 1; // update q2 1641 r2 = 2*r2 + 1 - d; // update r2 1642 } 1643 else { 1644 if (q2 >= 0x8000000000000000ull) magu.a = 1; 1645 q2 = 2*q2; // update q2 1646 r2 = 2*r2 + 1; // update r2 1647 } 1648 delta = d - 1 - r2; 1649 } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0))); 1650 magu.m = q2 + 1; // resulting magic number 1651 magu.s = p - 64; // resulting shift 1652 return magu; 1653} 1654 1655/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 1656/// return a DAG expression to select that will generate the same value by 1657/// multiplying by a magic number. See: 1658/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 1659SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 1660 std::vector<SDNode*>* Created) const { 1661 MVT::ValueType VT = N->getValueType(0); 1662 1663 // Check to see if we can do this. 1664 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) 1665 return SDOperand(); // BuildSDIV only operates on i32 or i64 1666 if (!isOperationLegal(ISD::MULHS, VT)) 1667 return SDOperand(); // Make sure the target supports MULHS. 1668 1669 int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended(); 1670 ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d); 1671 1672 // Multiply the numerator (operand 0) by the magic value 1673 SDOperand Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0), 1674 DAG.getConstant(magics.m, VT)); 1675 // If d > 0 and m < 0, add the numerator 1676 if (d > 0 && magics.m < 0) { 1677 Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0)); 1678 if (Created) 1679 Created->push_back(Q.Val); 1680 } 1681 // If d < 0 and m > 0, subtract the numerator. 1682 if (d < 0 && magics.m > 0) { 1683 Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0)); 1684 if (Created) 1685 Created->push_back(Q.Val); 1686 } 1687 // Shift right algebraic if shift value is nonzero 1688 if (magics.s > 0) { 1689 Q = DAG.getNode(ISD::SRA, VT, Q, 1690 DAG.getConstant(magics.s, getShiftAmountTy())); 1691 if (Created) 1692 Created->push_back(Q.Val); 1693 } 1694 // Extract the sign bit and add it to the quotient 1695 SDOperand T = 1696 DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(MVT::getSizeInBits(VT)-1, 1697 getShiftAmountTy())); 1698 if (Created) 1699 Created->push_back(T.Val); 1700 return DAG.getNode(ISD::ADD, VT, Q, T); 1701} 1702 1703/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, 1704/// return a DAG expression to select that will generate the same value by 1705/// multiplying by a magic number. See: 1706/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 1707SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, 1708 std::vector<SDNode*>* Created) const { 1709 MVT::ValueType VT = N->getValueType(0); 1710 1711 // Check to see if we can do this. 1712 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) 1713 return SDOperand(); // BuildUDIV only operates on i32 or i64 1714 if (!isOperationLegal(ISD::MULHU, VT)) 1715 return SDOperand(); // Make sure the target supports MULHU. 1716 1717 uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue(); 1718 mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d); 1719 1720 // Multiply the numerator (operand 0) by the magic value 1721 SDOperand Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0), 1722 DAG.getConstant(magics.m, VT)); 1723 if (Created) 1724 Created->push_back(Q.Val); 1725 1726 if (magics.a == 0) { 1727 return DAG.getNode(ISD::SRL, VT, Q, 1728 DAG.getConstant(magics.s, getShiftAmountTy())); 1729 } else { 1730 SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q); 1731 if (Created) 1732 Created->push_back(NPQ.Val); 1733 NPQ = DAG.getNode(ISD::SRL, VT, NPQ, 1734 DAG.getConstant(1, getShiftAmountTy())); 1735 if (Created) 1736 Created->push_back(NPQ.Val); 1737 NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q); 1738 if (Created) 1739 Created->push_back(NPQ.Val); 1740 return DAG.getNode(ISD::SRL, VT, NPQ, 1741 DAG.getConstant(magics.s-1, getShiftAmountTy())); 1742 } 1743} 1744