TargetLowering.cpp revision be1ad4de2900451626c8d4ace07b9ea16099ea1d
1//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This implements the TargetLowering class. 11// 12//===----------------------------------------------------------------------===// 13 14#include "llvm/Target/TargetAsmInfo.h" 15#include "llvm/Target/TargetLowering.h" 16#include "llvm/Target/TargetSubtarget.h" 17#include "llvm/Target/TargetData.h" 18#include "llvm/Target/TargetMachine.h" 19#include "llvm/Target/TargetRegisterInfo.h" 20#include "llvm/GlobalVariable.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/CodeGen/MachineFrameInfo.h" 23#include "llvm/CodeGen/SelectionDAG.h" 24#include "llvm/ADT/StringExtras.h" 25#include "llvm/ADT/STLExtras.h" 26#include "llvm/Support/MathExtras.h" 27using namespace llvm; 28 29/// InitLibcallNames - Set default libcall names. 30/// 31static void InitLibcallNames(const char **Names) { 32 Names[RTLIB::SHL_I32] = "__ashlsi3"; 33 Names[RTLIB::SHL_I64] = "__ashldi3"; 34 Names[RTLIB::SRL_I32] = "__lshrsi3"; 35 Names[RTLIB::SRL_I64] = "__lshrdi3"; 36 Names[RTLIB::SRA_I32] = "__ashrsi3"; 37 Names[RTLIB::SRA_I64] = "__ashrdi3"; 38 Names[RTLIB::MUL_I32] = "__mulsi3"; 39 Names[RTLIB::MUL_I64] = "__muldi3"; 40 Names[RTLIB::SDIV_I32] = "__divsi3"; 41 Names[RTLIB::SDIV_I64] = "__divdi3"; 42 Names[RTLIB::UDIV_I32] = "__udivsi3"; 43 Names[RTLIB::UDIV_I64] = "__udivdi3"; 44 Names[RTLIB::SREM_I32] = "__modsi3"; 45 Names[RTLIB::SREM_I64] = "__moddi3"; 46 Names[RTLIB::UREM_I32] = "__umodsi3"; 47 Names[RTLIB::UREM_I64] = "__umoddi3"; 48 Names[RTLIB::NEG_I32] = "__negsi2"; 49 Names[RTLIB::NEG_I64] = "__negdi2"; 50 Names[RTLIB::ADD_F32] = "__addsf3"; 51 Names[RTLIB::ADD_F64] = "__adddf3"; 52 Names[RTLIB::ADD_F80] = "__addxf3"; 53 Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; 54 Names[RTLIB::SUB_F32] = "__subsf3"; 55 Names[RTLIB::SUB_F64] = "__subdf3"; 56 Names[RTLIB::SUB_F80] = "__subxf3"; 57 Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; 58 Names[RTLIB::MUL_F32] = "__mulsf3"; 59 Names[RTLIB::MUL_F64] = "__muldf3"; 60 Names[RTLIB::MUL_F80] = "__mulxf3"; 61 Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; 62 Names[RTLIB::DIV_F32] = "__divsf3"; 63 Names[RTLIB::DIV_F64] = "__divdf3"; 64 Names[RTLIB::DIV_F80] = "__divxf3"; 65 Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; 66 Names[RTLIB::REM_F32] = "fmodf"; 67 Names[RTLIB::REM_F64] = "fmod"; 68 Names[RTLIB::REM_F80] = "fmodl"; 69 Names[RTLIB::REM_PPCF128] = "fmodl"; 70 Names[RTLIB::POWI_F32] = "__powisf2"; 71 Names[RTLIB::POWI_F64] = "__powidf2"; 72 Names[RTLIB::POWI_F80] = "__powixf2"; 73 Names[RTLIB::POWI_PPCF128] = "__powitf2"; 74 Names[RTLIB::SQRT_F32] = "sqrtf"; 75 Names[RTLIB::SQRT_F64] = "sqrt"; 76 Names[RTLIB::SQRT_F80] = "sqrtl"; 77 Names[RTLIB::SQRT_PPCF128] = "sqrtl"; 78 Names[RTLIB::SIN_F32] = "sinf"; 79 Names[RTLIB::SIN_F64] = "sin"; 80 Names[RTLIB::SIN_F80] = "sinl"; 81 Names[RTLIB::SIN_PPCF128] = "sinl"; 82 Names[RTLIB::COS_F32] = "cosf"; 83 Names[RTLIB::COS_F64] = "cos"; 84 Names[RTLIB::COS_F80] = "cosl"; 85 Names[RTLIB::COS_PPCF128] = "cosl"; 86 Names[RTLIB::POW_F32] = "powf"; 87 Names[RTLIB::POW_F64] = "pow"; 88 Names[RTLIB::POW_F80] = "powl"; 89 Names[RTLIB::POW_PPCF128] = "powl"; 90 Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; 91 Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; 92 Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; 93 Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; 94 Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; 95 Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; 96 Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; 97 Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; 98 Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; 99 Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; 100 Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; 101 Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; 102 Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; 103 Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; 104 Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; 105 Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; 106 Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; 107 Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; 108 Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; 109 Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; 110 Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; 111 Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; 112 Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; 113 Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; 114 Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; 115 Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; 116 Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; 117 Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; 118 Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; 119 Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; 120 Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; 121 Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; 122 Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; 123 Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; 124 Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; 125 Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; 126 Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; 127 Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; 128 Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; 129 Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; 130 Names[RTLIB::OEQ_F32] = "__eqsf2"; 131 Names[RTLIB::OEQ_F64] = "__eqdf2"; 132 Names[RTLIB::UNE_F32] = "__nesf2"; 133 Names[RTLIB::UNE_F64] = "__nedf2"; 134 Names[RTLIB::OGE_F32] = "__gesf2"; 135 Names[RTLIB::OGE_F64] = "__gedf2"; 136 Names[RTLIB::OLT_F32] = "__ltsf2"; 137 Names[RTLIB::OLT_F64] = "__ltdf2"; 138 Names[RTLIB::OLE_F32] = "__lesf2"; 139 Names[RTLIB::OLE_F64] = "__ledf2"; 140 Names[RTLIB::OGT_F32] = "__gtsf2"; 141 Names[RTLIB::OGT_F64] = "__gtdf2"; 142 Names[RTLIB::UO_F32] = "__unordsf2"; 143 Names[RTLIB::UO_F64] = "__unorddf2"; 144 Names[RTLIB::O_F32] = "__unordsf2"; 145 Names[RTLIB::O_F64] = "__unorddf2"; 146} 147 148/// InitCmpLibcallCCs - Set default comparison libcall CC. 149/// 150static void InitCmpLibcallCCs(ISD::CondCode *CCs) { 151 memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); 152 CCs[RTLIB::OEQ_F32] = ISD::SETEQ; 153 CCs[RTLIB::OEQ_F64] = ISD::SETEQ; 154 CCs[RTLIB::UNE_F32] = ISD::SETNE; 155 CCs[RTLIB::UNE_F64] = ISD::SETNE; 156 CCs[RTLIB::OGE_F32] = ISD::SETGE; 157 CCs[RTLIB::OGE_F64] = ISD::SETGE; 158 CCs[RTLIB::OLT_F32] = ISD::SETLT; 159 CCs[RTLIB::OLT_F64] = ISD::SETLT; 160 CCs[RTLIB::OLE_F32] = ISD::SETLE; 161 CCs[RTLIB::OLE_F64] = ISD::SETLE; 162 CCs[RTLIB::OGT_F32] = ISD::SETGT; 163 CCs[RTLIB::OGT_F64] = ISD::SETGT; 164 CCs[RTLIB::UO_F32] = ISD::SETNE; 165 CCs[RTLIB::UO_F64] = ISD::SETNE; 166 CCs[RTLIB::O_F32] = ISD::SETEQ; 167 CCs[RTLIB::O_F64] = ISD::SETEQ; 168} 169 170TargetLowering::TargetLowering(TargetMachine &tm) 171 : TM(tm), TD(TM.getTargetData()) { 172 assert(ISD::BUILTIN_OP_END <= OpActionsCapacity && 173 "Fixed size array in TargetLowering is not large enough!"); 174 // All operations default to being supported. 175 memset(OpActions, 0, sizeof(OpActions)); 176 memset(LoadXActions, 0, sizeof(LoadXActions)); 177 memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); 178 memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); 179 memset(ConvertActions, 0, sizeof(ConvertActions)); 180 181 // Set default actions for various operations. 182 for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { 183 // Default all indexed load / store to expand. 184 for (unsigned IM = (unsigned)ISD::PRE_INC; 185 IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { 186 setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); 187 setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); 188 } 189 190 // These operations default to expand. 191 setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); 192 } 193 194 // Most targets ignore the @llvm.prefetch intrinsic. 195 setOperationAction(ISD::PREFETCH, MVT::Other, Expand); 196 197 // ConstantFP nodes default to expand. Targets can either change this to 198 // Legal, in which case all fp constants are legal, or use addLegalFPImmediate 199 // to optimize expansions for certain constants. 200 setOperationAction(ISD::ConstantFP, MVT::f32, Expand); 201 setOperationAction(ISD::ConstantFP, MVT::f64, Expand); 202 setOperationAction(ISD::ConstantFP, MVT::f80, Expand); 203 204 // Default ISD::TRAP to expand (which turns it into abort). 205 setOperationAction(ISD::TRAP, MVT::Other, Expand); 206 207 IsLittleEndian = TD->isLittleEndian(); 208 UsesGlobalOffsetTable = false; 209 ShiftAmountTy = PointerTy = getValueType(TD->getIntPtrType()); 210 ShiftAmtHandling = Undefined; 211 memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); 212 memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); 213 maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; 214 allowUnalignedMemoryAccesses = false; 215 UseUnderscoreSetJmp = false; 216 UseUnderscoreLongJmp = false; 217 SelectIsExpensive = false; 218 IntDivIsCheap = false; 219 Pow2DivIsCheap = false; 220 StackPointerRegisterToSaveRestore = 0; 221 ExceptionPointerRegister = 0; 222 ExceptionSelectorRegister = 0; 223 SetCCResultContents = UndefinedSetCCResult; 224 SchedPreferenceInfo = SchedulingForLatency; 225 JumpBufSize = 0; 226 JumpBufAlignment = 0; 227 IfCvtBlockSizeLimit = 2; 228 IfCvtDupBlockSizeLimit = 0; 229 PrefLoopAlignment = 0; 230 231 InitLibcallNames(LibcallRoutineNames); 232 InitCmpLibcallCCs(CmpLibcallCCs); 233 234 // Tell Legalize whether the assembler supports DEBUG_LOC. 235 if (!TM.getTargetAsmInfo()->hasDotLocAndDotFile()) 236 setOperationAction(ISD::DEBUG_LOC, MVT::Other, Expand); 237} 238 239TargetLowering::~TargetLowering() {} 240 241/// computeRegisterProperties - Once all of the register classes are added, 242/// this allows us to compute derived properties we expose. 243void TargetLowering::computeRegisterProperties() { 244 assert(MVT::LAST_VALUETYPE <= 32 && 245 "Too many value types for ValueTypeActions to hold!"); 246 247 // Everything defaults to needing one register. 248 for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { 249 NumRegistersForVT[i] = 1; 250 RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; 251 } 252 // ...except isVoid, which doesn't need any registers. 253 NumRegistersForVT[MVT::isVoid] = 0; 254 255 // Find the largest integer register class. 256 unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; 257 for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) 258 assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); 259 260 // Every integer value type larger than this largest register takes twice as 261 // many registers to represent as the previous ValueType. 262 for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) { 263 MVT EVT = (MVT::SimpleValueType)ExpandedReg; 264 if (!EVT.isInteger()) 265 break; 266 NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; 267 RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; 268 TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); 269 ValueTypeActions.setTypeAction(EVT, Expand); 270 } 271 272 // Inspect all of the ValueType's smaller than the largest integer 273 // register to see which ones need promotion. 274 unsigned LegalIntReg = LargestIntReg; 275 for (unsigned IntReg = LargestIntReg - 1; 276 IntReg >= (unsigned)MVT::i1; --IntReg) { 277 MVT IVT = (MVT::SimpleValueType)IntReg; 278 if (isTypeLegal(IVT)) { 279 LegalIntReg = IntReg; 280 } else { 281 RegisterTypeForVT[IntReg] = TransformToType[IntReg] = 282 (MVT::SimpleValueType)LegalIntReg; 283 ValueTypeActions.setTypeAction(IVT, Promote); 284 } 285 } 286 287 // ppcf128 type is really two f64's. 288 if (!isTypeLegal(MVT::ppcf128)) { 289 NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; 290 RegisterTypeForVT[MVT::ppcf128] = MVT::f64; 291 TransformToType[MVT::ppcf128] = MVT::f64; 292 ValueTypeActions.setTypeAction(MVT::ppcf128, Expand); 293 } 294 295 // Decide how to handle f64. If the target does not have native f64 support, 296 // expand it to i64 and we will be generating soft float library calls. 297 if (!isTypeLegal(MVT::f64)) { 298 NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; 299 RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; 300 TransformToType[MVT::f64] = MVT::i64; 301 ValueTypeActions.setTypeAction(MVT::f64, Expand); 302 } 303 304 // Decide how to handle f32. If the target does not have native support for 305 // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. 306 if (!isTypeLegal(MVT::f32)) { 307 if (isTypeLegal(MVT::f64)) { 308 NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; 309 RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; 310 TransformToType[MVT::f32] = MVT::f64; 311 ValueTypeActions.setTypeAction(MVT::f32, Promote); 312 } else { 313 NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; 314 RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; 315 TransformToType[MVT::f32] = MVT::i32; 316 ValueTypeActions.setTypeAction(MVT::f32, Expand); 317 } 318 } 319 320 // Loop over all of the vector value types to see which need transformations. 321 for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; 322 i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { 323 MVT VT = (MVT::SimpleValueType)i; 324 if (!isTypeLegal(VT)) { 325 MVT IntermediateVT, RegisterVT; 326 unsigned NumIntermediates; 327 NumRegistersForVT[i] = 328 getVectorTypeBreakdown(VT, 329 IntermediateVT, NumIntermediates, 330 RegisterVT); 331 RegisterTypeForVT[i] = RegisterVT; 332 TransformToType[i] = MVT::Other; // this isn't actually used 333 ValueTypeActions.setTypeAction(VT, Expand); 334 } 335 } 336} 337 338const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { 339 return NULL; 340} 341 342 343MVT TargetLowering::getSetCCResultType(const SDOperand &) const { 344 return getValueType(TD->getIntPtrType()); 345} 346 347 348/// getVectorTypeBreakdown - Vector types are broken down into some number of 349/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 350/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. 351/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. 352/// 353/// This method returns the number of registers needed, and the VT for each 354/// register. It also returns the VT and quantity of the intermediate values 355/// before they are promoted/expanded. 356/// 357unsigned TargetLowering::getVectorTypeBreakdown(MVT VT, 358 MVT &IntermediateVT, 359 unsigned &NumIntermediates, 360 MVT &RegisterVT) const { 361 // Figure out the right, legal destination reg to copy into. 362 unsigned NumElts = VT.getVectorNumElements(); 363 MVT EltTy = VT.getVectorElementType(); 364 365 unsigned NumVectorRegs = 1; 366 367 // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we 368 // could break down into LHS/RHS like LegalizeDAG does. 369 if (!isPowerOf2_32(NumElts)) { 370 NumVectorRegs = NumElts; 371 NumElts = 1; 372 } 373 374 // Divide the input until we get to a supported size. This will always 375 // end with a scalar if the target doesn't support vectors. 376 while (NumElts > 1 && !isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { 377 NumElts >>= 1; 378 NumVectorRegs <<= 1; 379 } 380 381 NumIntermediates = NumVectorRegs; 382 383 MVT NewVT = MVT::getVectorVT(EltTy, NumElts); 384 if (!isTypeLegal(NewVT)) 385 NewVT = EltTy; 386 IntermediateVT = NewVT; 387 388 MVT DestVT = getTypeToTransformTo(NewVT); 389 RegisterVT = DestVT; 390 if (DestVT.bitsLT(NewVT)) { 391 // Value is expanded, e.g. i64 -> i16. 392 return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); 393 } else { 394 // Otherwise, promotion or legal types use the same number of registers as 395 // the vector decimated to the appropriate level. 396 return NumVectorRegs; 397 } 398 399 return 1; 400} 401 402/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate 403/// function arguments in the caller parameter area. This is the actual 404/// alignment, not its logarithm. 405unsigned TargetLowering::getByValTypeAlignment(const Type *Ty) const { 406 return TD->getCallFrameTypeAlignment(Ty); 407} 408 409SDOperand TargetLowering::getPICJumpTableRelocBase(SDOperand Table, 410 SelectionDAG &DAG) const { 411 if (usesGlobalOffsetTable()) 412 return DAG.getNode(ISD::GLOBAL_OFFSET_TABLE, getPointerTy()); 413 return Table; 414} 415 416//===----------------------------------------------------------------------===// 417// Optimization Methods 418//===----------------------------------------------------------------------===// 419 420/// ShrinkDemandedConstant - Check to see if the specified operand of the 421/// specified instruction is a constant integer. If so, check to see if there 422/// are any bits set in the constant that are not demanded. If so, shrink the 423/// constant and return true. 424bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDOperand Op, 425 const APInt &Demanded) { 426 // FIXME: ISD::SELECT, ISD::SELECT_CC 427 switch(Op.getOpcode()) { 428 default: break; 429 case ISD::AND: 430 case ISD::OR: 431 case ISD::XOR: 432 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) 433 if (C->getAPIntValue().intersects(~Demanded)) { 434 MVT VT = Op.getValueType(); 435 SDOperand New = DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0), 436 DAG.getConstant(Demanded & 437 C->getAPIntValue(), 438 VT)); 439 return CombineTo(Op, New); 440 } 441 break; 442 } 443 return false; 444} 445 446/// SimplifyDemandedBits - Look at Op. At this point, we know that only the 447/// DemandedMask bits of the result of Op are ever used downstream. If we can 448/// use this information to simplify Op, create a new simplified DAG node and 449/// return true, returning the original and new nodes in Old and New. Otherwise, 450/// analyze the expression and return a mask of KnownOne and KnownZero bits for 451/// the expression (used to simplify the caller). The KnownZero/One bits may 452/// only be accurate for those bits in the DemandedMask. 453bool TargetLowering::SimplifyDemandedBits(SDOperand Op, 454 const APInt &DemandedMask, 455 APInt &KnownZero, 456 APInt &KnownOne, 457 TargetLoweringOpt &TLO, 458 unsigned Depth) const { 459 unsigned BitWidth = DemandedMask.getBitWidth(); 460 assert(Op.getValueSizeInBits() == BitWidth && 461 "Mask size mismatches value type size!"); 462 APInt NewMask = DemandedMask; 463 464 // Don't know anything. 465 KnownZero = KnownOne = APInt(BitWidth, 0); 466 467 // Other users may use these bits. 468 if (!Op.Val->hasOneUse()) { 469 if (Depth != 0) { 470 // If not at the root, Just compute the KnownZero/KnownOne bits to 471 // simplify things downstream. 472 TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth); 473 return false; 474 } 475 // If this is the root being simplified, allow it to have multiple uses, 476 // just set the NewMask to all bits. 477 NewMask = APInt::getAllOnesValue(BitWidth); 478 } else if (DemandedMask == 0) { 479 // Not demanding any bits from Op. 480 if (Op.getOpcode() != ISD::UNDEF) 481 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::UNDEF, Op.getValueType())); 482 return false; 483 } else if (Depth == 6) { // Limit search depth. 484 return false; 485 } 486 487 APInt KnownZero2, KnownOne2, KnownZeroOut, KnownOneOut; 488 switch (Op.getOpcode()) { 489 case ISD::Constant: 490 // We know all of the bits for a constant! 491 KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue() & NewMask; 492 KnownZero = ~KnownOne & NewMask; 493 return false; // Don't fall through, will infinitely loop. 494 case ISD::AND: 495 // If the RHS is a constant, check to see if the LHS would be zero without 496 // using the bits from the RHS. Below, we use knowledge about the RHS to 497 // simplify the LHS, here we're using information from the LHS to simplify 498 // the RHS. 499 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 500 APInt LHSZero, LHSOne; 501 TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask, 502 LHSZero, LHSOne, Depth+1); 503 // If the LHS already has zeros where RHSC does, this and is dead. 504 if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask)) 505 return TLO.CombineTo(Op, Op.getOperand(0)); 506 // If any of the set bits in the RHS are known zero on the LHS, shrink 507 // the constant. 508 if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask)) 509 return true; 510 } 511 512 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 513 KnownOne, TLO, Depth+1)) 514 return true; 515 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 516 if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask, 517 KnownZero2, KnownOne2, TLO, Depth+1)) 518 return true; 519 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 520 521 // If all of the demanded bits are known one on one side, return the other. 522 // These bits cannot contribute to the result of the 'and'. 523 if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) 524 return TLO.CombineTo(Op, Op.getOperand(0)); 525 if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) 526 return TLO.CombineTo(Op, Op.getOperand(1)); 527 // If all of the demanded bits in the inputs are known zeros, return zero. 528 if ((NewMask & (KnownZero|KnownZero2)) == NewMask) 529 return TLO.CombineTo(Op, TLO.DAG.getConstant(0, Op.getValueType())); 530 // If the RHS is a constant, see if we can simplify it. 531 if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) 532 return true; 533 534 // Output known-1 bits are only known if set in both the LHS & RHS. 535 KnownOne &= KnownOne2; 536 // Output known-0 are known to be clear if zero in either the LHS | RHS. 537 KnownZero |= KnownZero2; 538 break; 539 case ISD::OR: 540 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 541 KnownOne, TLO, Depth+1)) 542 return true; 543 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 544 if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask, 545 KnownZero2, KnownOne2, TLO, Depth+1)) 546 return true; 547 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 548 549 // If all of the demanded bits are known zero on one side, return the other. 550 // These bits cannot contribute to the result of the 'or'. 551 if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask)) 552 return TLO.CombineTo(Op, Op.getOperand(0)); 553 if ((NewMask & ~KnownOne & KnownZero2) == (~KnownOne & NewMask)) 554 return TLO.CombineTo(Op, Op.getOperand(1)); 555 // If all of the potentially set bits on one side are known to be set on 556 // the other side, just use the 'other' side. 557 if ((NewMask & ~KnownZero & KnownOne2) == (~KnownZero & NewMask)) 558 return TLO.CombineTo(Op, Op.getOperand(0)); 559 if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask)) 560 return TLO.CombineTo(Op, Op.getOperand(1)); 561 // If the RHS is a constant, see if we can simplify it. 562 if (TLO.ShrinkDemandedConstant(Op, NewMask)) 563 return true; 564 565 // Output known-0 bits are only known if clear in both the LHS & RHS. 566 KnownZero &= KnownZero2; 567 // Output known-1 are known to be set if set in either the LHS | RHS. 568 KnownOne |= KnownOne2; 569 break; 570 case ISD::XOR: 571 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero, 572 KnownOne, TLO, Depth+1)) 573 return true; 574 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 575 if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2, 576 KnownOne2, TLO, Depth+1)) 577 return true; 578 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 579 580 // If all of the demanded bits are known zero on one side, return the other. 581 // These bits cannot contribute to the result of the 'xor'. 582 if ((KnownZero & NewMask) == NewMask) 583 return TLO.CombineTo(Op, Op.getOperand(0)); 584 if ((KnownZero2 & NewMask) == NewMask) 585 return TLO.CombineTo(Op, Op.getOperand(1)); 586 587 // If all of the unknown bits are known to be zero on one side or the other 588 // (but not both) turn this into an *inclusive* or. 589 // e.g. (A & C1)^(B & C2) -> (A & C1)|(B & C2) iff C1&C2 == 0 590 if ((NewMask & ~KnownZero & ~KnownZero2) == 0) 591 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, Op.getValueType(), 592 Op.getOperand(0), 593 Op.getOperand(1))); 594 595 // Output known-0 bits are known if clear or set in both the LHS & RHS. 596 KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); 597 // Output known-1 are known to be set if set in only one of the LHS, RHS. 598 KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2); 599 600 // If all of the demanded bits on one side are known, and all of the set 601 // bits on that side are also known to be set on the other side, turn this 602 // into an AND, as we know the bits will be cleared. 603 // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 604 if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known 605 if ((KnownOne & KnownOne2) == KnownOne) { 606 MVT VT = Op.getValueType(); 607 SDOperand ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); 608 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, VT, Op.getOperand(0), 609 ANDC)); 610 } 611 } 612 613 // If the RHS is a constant, see if we can simplify it. 614 // for XOR, we prefer to force bits to 1 if they will make a -1. 615 // if we can't force bits, try to shrink constant 616 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 617 APInt Expanded = C->getAPIntValue() | (~NewMask); 618 // if we can expand it to have all bits set, do it 619 if (Expanded.isAllOnesValue()) { 620 if (Expanded != C->getAPIntValue()) { 621 MVT VT = Op.getValueType(); 622 SDOperand New = TLO.DAG.getNode(Op.getOpcode(), VT, Op.getOperand(0), 623 TLO.DAG.getConstant(Expanded, VT)); 624 return TLO.CombineTo(Op, New); 625 } 626 // if it already has all the bits set, nothing to change 627 // but don't shrink either! 628 } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) { 629 return true; 630 } 631 } 632 633 KnownZero = KnownZeroOut; 634 KnownOne = KnownOneOut; 635 break; 636 case ISD::SELECT: 637 if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero, 638 KnownOne, TLO, Depth+1)) 639 return true; 640 if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2, 641 KnownOne2, TLO, Depth+1)) 642 return true; 643 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 644 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 645 646 // If the operands are constants, see if we can simplify them. 647 if (TLO.ShrinkDemandedConstant(Op, NewMask)) 648 return true; 649 650 // Only known if known in both the LHS and RHS. 651 KnownOne &= KnownOne2; 652 KnownZero &= KnownZero2; 653 break; 654 case ISD::SELECT_CC: 655 if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero, 656 KnownOne, TLO, Depth+1)) 657 return true; 658 if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2, 659 KnownOne2, TLO, Depth+1)) 660 return true; 661 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 662 assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); 663 664 // If the operands are constants, see if we can simplify them. 665 if (TLO.ShrinkDemandedConstant(Op, NewMask)) 666 return true; 667 668 // Only known if known in both the LHS and RHS. 669 KnownOne &= KnownOne2; 670 KnownZero &= KnownZero2; 671 break; 672 case ISD::SHL: 673 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 674 unsigned ShAmt = SA->getValue(); 675 SDOperand InOp = Op.getOperand(0); 676 677 // If the shift count is an invalid immediate, don't do anything. 678 if (ShAmt >= BitWidth) 679 break; 680 681 // If this is ((X >>u C1) << ShAmt), see if we can simplify this into a 682 // single shift. We can do this if the bottom bits (which are shifted 683 // out) are never demanded. 684 if (InOp.getOpcode() == ISD::SRL && 685 isa<ConstantSDNode>(InOp.getOperand(1))) { 686 if (ShAmt && (NewMask & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) { 687 unsigned C1 = cast<ConstantSDNode>(InOp.getOperand(1))->getValue(); 688 unsigned Opc = ISD::SHL; 689 int Diff = ShAmt-C1; 690 if (Diff < 0) { 691 Diff = -Diff; 692 Opc = ISD::SRL; 693 } 694 695 SDOperand NewSA = 696 TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); 697 MVT VT = Op.getValueType(); 698 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, VT, 699 InOp.getOperand(0), NewSA)); 700 } 701 } 702 703 if (SimplifyDemandedBits(Op.getOperand(0), NewMask.lshr(ShAmt), 704 KnownZero, KnownOne, TLO, Depth+1)) 705 return true; 706 KnownZero <<= SA->getValue(); 707 KnownOne <<= SA->getValue(); 708 // low bits known zero. 709 KnownZero |= APInt::getLowBitsSet(BitWidth, SA->getValue()); 710 } 711 break; 712 case ISD::SRL: 713 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 714 MVT VT = Op.getValueType(); 715 unsigned ShAmt = SA->getValue(); 716 unsigned VTSize = VT.getSizeInBits(); 717 SDOperand InOp = Op.getOperand(0); 718 719 // If the shift count is an invalid immediate, don't do anything. 720 if (ShAmt >= BitWidth) 721 break; 722 723 // If this is ((X << C1) >>u ShAmt), see if we can simplify this into a 724 // single shift. We can do this if the top bits (which are shifted out) 725 // are never demanded. 726 if (InOp.getOpcode() == ISD::SHL && 727 isa<ConstantSDNode>(InOp.getOperand(1))) { 728 if (ShAmt && (NewMask & APInt::getHighBitsSet(VTSize, ShAmt)) == 0) { 729 unsigned C1 = cast<ConstantSDNode>(InOp.getOperand(1))->getValue(); 730 unsigned Opc = ISD::SRL; 731 int Diff = ShAmt-C1; 732 if (Diff < 0) { 733 Diff = -Diff; 734 Opc = ISD::SHL; 735 } 736 737 SDOperand NewSA = 738 TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType()); 739 return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, VT, 740 InOp.getOperand(0), NewSA)); 741 } 742 } 743 744 // Compute the new bits that are at the top now. 745 if (SimplifyDemandedBits(InOp, (NewMask << ShAmt), 746 KnownZero, KnownOne, TLO, Depth+1)) 747 return true; 748 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 749 KnownZero = KnownZero.lshr(ShAmt); 750 KnownOne = KnownOne.lshr(ShAmt); 751 752 APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); 753 KnownZero |= HighBits; // High bits known zero. 754 } 755 break; 756 case ISD::SRA: 757 if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { 758 MVT VT = Op.getValueType(); 759 unsigned ShAmt = SA->getValue(); 760 761 // If the shift count is an invalid immediate, don't do anything. 762 if (ShAmt >= BitWidth) 763 break; 764 765 APInt InDemandedMask = (NewMask << ShAmt); 766 767 // If any of the demanded bits are produced by the sign extension, we also 768 // demand the input sign bit. 769 APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt); 770 if (HighBits.intersects(NewMask)) 771 InDemandedMask |= APInt::getSignBit(VT.getSizeInBits()); 772 773 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask, 774 KnownZero, KnownOne, TLO, Depth+1)) 775 return true; 776 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 777 KnownZero = KnownZero.lshr(ShAmt); 778 KnownOne = KnownOne.lshr(ShAmt); 779 780 // Handle the sign bit, adjusted to where it is now in the mask. 781 APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt); 782 783 // If the input sign bit is known to be zero, or if none of the top bits 784 // are demanded, turn this into an unsigned shift right. 785 if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { 786 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, VT, Op.getOperand(0), 787 Op.getOperand(1))); 788 } else if (KnownOne.intersects(SignBit)) { // New bits are known one. 789 KnownOne |= HighBits; 790 } 791 } 792 break; 793 case ISD::SIGN_EXTEND_INREG: { 794 MVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 795 796 // Sign extension. Compute the demanded bits in the result that are not 797 // present in the input. 798 APInt NewBits = APInt::getHighBitsSet(BitWidth, 799 BitWidth - EVT.getSizeInBits()) & 800 NewMask; 801 802 // If none of the extended bits are demanded, eliminate the sextinreg. 803 if (NewBits == 0) 804 return TLO.CombineTo(Op, Op.getOperand(0)); 805 806 APInt InSignBit = APInt::getSignBit(EVT.getSizeInBits()); 807 InSignBit.zext(BitWidth); 808 APInt InputDemandedBits = APInt::getLowBitsSet(BitWidth, 809 EVT.getSizeInBits()) & 810 NewMask; 811 812 // Since the sign extended bits are demanded, we know that the sign 813 // bit is demanded. 814 InputDemandedBits |= InSignBit; 815 816 if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits, 817 KnownZero, KnownOne, TLO, Depth+1)) 818 return true; 819 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 820 821 // If the sign bit of the input is known set or clear, then we know the 822 // top bits of the result. 823 824 // If the input sign bit is known zero, convert this into a zero extension. 825 if (KnownZero.intersects(InSignBit)) 826 return TLO.CombineTo(Op, 827 TLO.DAG.getZeroExtendInReg(Op.getOperand(0), EVT)); 828 829 if (KnownOne.intersects(InSignBit)) { // Input sign bit known set 830 KnownOne |= NewBits; 831 KnownZero &= ~NewBits; 832 } else { // Input sign bit unknown 833 KnownZero &= ~NewBits; 834 KnownOne &= ~NewBits; 835 } 836 break; 837 } 838 case ISD::ZERO_EXTEND: { 839 unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); 840 APInt InMask = NewMask; 841 InMask.trunc(OperandBitWidth); 842 843 // If none of the top bits are demanded, convert this into an any_extend. 844 APInt NewBits = 845 APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask; 846 if (!NewBits.intersects(NewMask)) 847 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, 848 Op.getValueType(), 849 Op.getOperand(0))); 850 851 if (SimplifyDemandedBits(Op.getOperand(0), InMask, 852 KnownZero, KnownOne, TLO, Depth+1)) 853 return true; 854 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 855 KnownZero.zext(BitWidth); 856 KnownOne.zext(BitWidth); 857 KnownZero |= NewBits; 858 break; 859 } 860 case ISD::SIGN_EXTEND: { 861 MVT InVT = Op.getOperand(0).getValueType(); 862 unsigned InBits = InVT.getSizeInBits(); 863 APInt InMask = APInt::getLowBitsSet(BitWidth, InBits); 864 APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits); 865 APInt NewBits = ~InMask & NewMask; 866 867 // If none of the top bits are demanded, convert this into an any_extend. 868 if (NewBits == 0) 869 return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND,Op.getValueType(), 870 Op.getOperand(0))); 871 872 // Since some of the sign extended bits are demanded, we know that the sign 873 // bit is demanded. 874 APInt InDemandedBits = InMask & NewMask; 875 InDemandedBits |= InSignBit; 876 InDemandedBits.trunc(InBits); 877 878 if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero, 879 KnownOne, TLO, Depth+1)) 880 return true; 881 KnownZero.zext(BitWidth); 882 KnownOne.zext(BitWidth); 883 884 // If the sign bit is known zero, convert this to a zero extend. 885 if (KnownZero.intersects(InSignBit)) 886 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, 887 Op.getValueType(), 888 Op.getOperand(0))); 889 890 // If the sign bit is known one, the top bits match. 891 if (KnownOne.intersects(InSignBit)) { 892 KnownOne |= NewBits; 893 KnownZero &= ~NewBits; 894 } else { // Otherwise, top bits aren't known. 895 KnownOne &= ~NewBits; 896 KnownZero &= ~NewBits; 897 } 898 break; 899 } 900 case ISD::ANY_EXTEND: { 901 unsigned OperandBitWidth = Op.getOperand(0).getValueSizeInBits(); 902 APInt InMask = NewMask; 903 InMask.trunc(OperandBitWidth); 904 if (SimplifyDemandedBits(Op.getOperand(0), InMask, 905 KnownZero, KnownOne, TLO, Depth+1)) 906 return true; 907 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 908 KnownZero.zext(BitWidth); 909 KnownOne.zext(BitWidth); 910 break; 911 } 912 case ISD::TRUNCATE: { 913 // Simplify the input, using demanded bit information, and compute the known 914 // zero/one bits live out. 915 APInt TruncMask = NewMask; 916 TruncMask.zext(Op.getOperand(0).getValueSizeInBits()); 917 if (SimplifyDemandedBits(Op.getOperand(0), TruncMask, 918 KnownZero, KnownOne, TLO, Depth+1)) 919 return true; 920 KnownZero.trunc(BitWidth); 921 KnownOne.trunc(BitWidth); 922 923 // If the input is only used by this truncate, see if we can shrink it based 924 // on the known demanded bits. 925 if (Op.getOperand(0).Val->hasOneUse()) { 926 SDOperand In = Op.getOperand(0); 927 unsigned InBitWidth = In.getValueSizeInBits(); 928 switch (In.getOpcode()) { 929 default: break; 930 case ISD::SRL: 931 // Shrink SRL by a constant if none of the high bits shifted in are 932 // demanded. 933 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){ 934 APInt HighBits = APInt::getHighBitsSet(InBitWidth, 935 InBitWidth - BitWidth); 936 HighBits = HighBits.lshr(ShAmt->getValue()); 937 HighBits.trunc(BitWidth); 938 939 if (ShAmt->getValue() < BitWidth && !(HighBits & NewMask)) { 940 // None of the shifted in bits are needed. Add a truncate of the 941 // shift input, then shift it. 942 SDOperand NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, 943 Op.getValueType(), 944 In.getOperand(0)); 945 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL,Op.getValueType(), 946 NewTrunc, In.getOperand(1))); 947 } 948 } 949 break; 950 } 951 } 952 953 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 954 break; 955 } 956 case ISD::AssertZext: { 957 MVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 958 APInt InMask = APInt::getLowBitsSet(BitWidth, 959 VT.getSizeInBits()); 960 if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask, 961 KnownZero, KnownOne, TLO, Depth+1)) 962 return true; 963 assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); 964 KnownZero |= ~InMask & NewMask; 965 break; 966 } 967 case ISD::BIT_CONVERT: 968#if 0 969 // If this is an FP->Int bitcast and if the sign bit is the only thing that 970 // is demanded, turn this into a FGETSIGN. 971 if (NewMask == MVT::getIntegerVTSignBit(Op.getValueType()) && 972 MVT::isFloatingPoint(Op.getOperand(0).getValueType()) && 973 !MVT::isVector(Op.getOperand(0).getValueType())) { 974 // Only do this xform if FGETSIGN is valid or if before legalize. 975 if (!TLO.AfterLegalize || 976 isOperationLegal(ISD::FGETSIGN, Op.getValueType())) { 977 // Make a FGETSIGN + SHL to move the sign bit into the appropriate 978 // place. We expect the SHL to be eliminated by other optimizations. 979 SDOperand Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(), 980 Op.getOperand(0)); 981 unsigned ShVal = Op.getValueType().getSizeInBits()-1; 982 SDOperand ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy()); 983 return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, Op.getValueType(), 984 Sign, ShAmt)); 985 } 986 } 987#endif 988 break; 989 default: 990 // Just use ComputeMaskedBits to compute output bits. 991 TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth); 992 break; 993 } 994 995 // If we know the value of all of the demanded bits, return this as a 996 // constant. 997 if ((NewMask & (KnownZero|KnownOne)) == NewMask) 998 return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType())); 999 1000 return false; 1001} 1002 1003/// computeMaskedBitsForTargetNode - Determine which of the bits specified 1004/// in Mask are known to be either zero or one and return them in the 1005/// KnownZero/KnownOne bitsets. 1006void TargetLowering::computeMaskedBitsForTargetNode(const SDOperand Op, 1007 const APInt &Mask, 1008 APInt &KnownZero, 1009 APInt &KnownOne, 1010 const SelectionDAG &DAG, 1011 unsigned Depth) const { 1012 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 1013 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1014 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1015 Op.getOpcode() == ISD::INTRINSIC_VOID) && 1016 "Should use MaskedValueIsZero if you don't know whether Op" 1017 " is a target node!"); 1018 KnownZero = KnownOne = APInt(Mask.getBitWidth(), 0); 1019} 1020 1021/// ComputeNumSignBitsForTargetNode - This method can be implemented by 1022/// targets that want to expose additional information about sign bits to the 1023/// DAG Combiner. 1024unsigned TargetLowering::ComputeNumSignBitsForTargetNode(SDOperand Op, 1025 unsigned Depth) const { 1026 assert((Op.getOpcode() >= ISD::BUILTIN_OP_END || 1027 Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || 1028 Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || 1029 Op.getOpcode() == ISD::INTRINSIC_VOID) && 1030 "Should use ComputeNumSignBits if you don't know whether Op" 1031 " is a target node!"); 1032 return 1; 1033} 1034 1035 1036/// SimplifySetCC - Try to simplify a setcc built with the specified operands 1037/// and cc. If it is unable to simplify it, return a null SDOperand. 1038SDOperand 1039TargetLowering::SimplifySetCC(MVT VT, SDOperand N0, SDOperand N1, 1040 ISD::CondCode Cond, bool foldBooleans, 1041 DAGCombinerInfo &DCI) const { 1042 SelectionDAG &DAG = DCI.DAG; 1043 1044 // These setcc operations always fold. 1045 switch (Cond) { 1046 default: break; 1047 case ISD::SETFALSE: 1048 case ISD::SETFALSE2: return DAG.getConstant(0, VT); 1049 case ISD::SETTRUE: 1050 case ISD::SETTRUE2: return DAG.getConstant(1, VT); 1051 } 1052 1053 if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.Val)) { 1054 const APInt &C1 = N1C->getAPIntValue(); 1055 if (isa<ConstantSDNode>(N0.Val)) { 1056 return DAG.FoldSetCC(VT, N0, N1, Cond); 1057 } else { 1058 // If the LHS is '(srl (ctlz x), 5)', the RHS is 0/1, and this is an 1059 // equality comparison, then we're just comparing whether X itself is 1060 // zero. 1061 if (N0.getOpcode() == ISD::SRL && (C1 == 0 || C1 == 1) && 1062 N0.getOperand(0).getOpcode() == ISD::CTLZ && 1063 N0.getOperand(1).getOpcode() == ISD::Constant) { 1064 unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getValue(); 1065 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && 1066 ShAmt == Log2_32(N0.getValueType().getSizeInBits())) { 1067 if ((C1 == 0) == (Cond == ISD::SETEQ)) { 1068 // (srl (ctlz x), 5) == 0 -> X != 0 1069 // (srl (ctlz x), 5) != 1 -> X != 0 1070 Cond = ISD::SETNE; 1071 } else { 1072 // (srl (ctlz x), 5) != 0 -> X == 0 1073 // (srl (ctlz x), 5) == 1 -> X == 0 1074 Cond = ISD::SETEQ; 1075 } 1076 SDOperand Zero = DAG.getConstant(0, N0.getValueType()); 1077 return DAG.getSetCC(VT, N0.getOperand(0).getOperand(0), 1078 Zero, Cond); 1079 } 1080 } 1081 1082 // If the LHS is a ZERO_EXTEND, perform the comparison on the input. 1083 if (N0.getOpcode() == ISD::ZERO_EXTEND) { 1084 unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits(); 1085 1086 // If the comparison constant has bits in the upper part, the 1087 // zero-extended value could never match. 1088 if (C1.intersects(APInt::getHighBitsSet(C1.getBitWidth(), 1089 C1.getBitWidth() - InSize))) { 1090 switch (Cond) { 1091 case ISD::SETUGT: 1092 case ISD::SETUGE: 1093 case ISD::SETEQ: return DAG.getConstant(0, VT); 1094 case ISD::SETULT: 1095 case ISD::SETULE: 1096 case ISD::SETNE: return DAG.getConstant(1, VT); 1097 case ISD::SETGT: 1098 case ISD::SETGE: 1099 // True if the sign bit of C1 is set. 1100 return DAG.getConstant(C1.isNegative(), VT); 1101 case ISD::SETLT: 1102 case ISD::SETLE: 1103 // True if the sign bit of C1 isn't set. 1104 return DAG.getConstant(C1.isNonNegative(), VT); 1105 default: 1106 break; 1107 } 1108 } 1109 1110 // Otherwise, we can perform the comparison with the low bits. 1111 switch (Cond) { 1112 case ISD::SETEQ: 1113 case ISD::SETNE: 1114 case ISD::SETUGT: 1115 case ISD::SETUGE: 1116 case ISD::SETULT: 1117 case ISD::SETULE: 1118 return DAG.getSetCC(VT, N0.getOperand(0), 1119 DAG.getConstant(APInt(C1).trunc(InSize), 1120 N0.getOperand(0).getValueType()), 1121 Cond); 1122 default: 1123 break; // todo, be more careful with signed comparisons 1124 } 1125 } else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 1126 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { 1127 MVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT(); 1128 unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits(); 1129 MVT ExtDstTy = N0.getValueType(); 1130 unsigned ExtDstTyBits = ExtDstTy.getSizeInBits(); 1131 1132 // If the extended part has any inconsistent bits, it cannot ever 1133 // compare equal. In other words, they have to be all ones or all 1134 // zeros. 1135 APInt ExtBits = 1136 APInt::getHighBitsSet(ExtDstTyBits, ExtDstTyBits - ExtSrcTyBits); 1137 if ((C1 & ExtBits) != 0 && (C1 & ExtBits) != ExtBits) 1138 return DAG.getConstant(Cond == ISD::SETNE, VT); 1139 1140 SDOperand ZextOp; 1141 MVT Op0Ty = N0.getOperand(0).getValueType(); 1142 if (Op0Ty == ExtSrcTy) { 1143 ZextOp = N0.getOperand(0); 1144 } else { 1145 APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits); 1146 ZextOp = DAG.getNode(ISD::AND, Op0Ty, N0.getOperand(0), 1147 DAG.getConstant(Imm, Op0Ty)); 1148 } 1149 if (!DCI.isCalledByLegalizer()) 1150 DCI.AddToWorklist(ZextOp.Val); 1151 // Otherwise, make this a use of a zext. 1152 return DAG.getSetCC(VT, ZextOp, 1153 DAG.getConstant(C1 & APInt::getLowBitsSet( 1154 ExtDstTyBits, 1155 ExtSrcTyBits), 1156 ExtDstTy), 1157 Cond); 1158 } else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) && 1159 (Cond == ISD::SETEQ || Cond == ISD::SETNE)) { 1160 1161 // SETCC (SETCC), [0|1], [EQ|NE] -> SETCC 1162 if (N0.getOpcode() == ISD::SETCC) { 1163 bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getValue() != 1); 1164 if (TrueWhenTrue) 1165 return N0; 1166 1167 // Invert the condition. 1168 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 1169 CC = ISD::getSetCCInverse(CC, 1170 N0.getOperand(0).getValueType().isInteger()); 1171 return DAG.getSetCC(VT, N0.getOperand(0), N0.getOperand(1), CC); 1172 } 1173 1174 if ((N0.getOpcode() == ISD::XOR || 1175 (N0.getOpcode() == ISD::AND && 1176 N0.getOperand(0).getOpcode() == ISD::XOR && 1177 N0.getOperand(1) == N0.getOperand(0).getOperand(1))) && 1178 isa<ConstantSDNode>(N0.getOperand(1)) && 1179 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue() == 1) { 1180 // If this is (X^1) == 0/1, swap the RHS and eliminate the xor. We 1181 // can only do this if the top bits are known zero. 1182 unsigned BitWidth = N0.getValueSizeInBits(); 1183 if (DAG.MaskedValueIsZero(N0, 1184 APInt::getHighBitsSet(BitWidth, 1185 BitWidth-1))) { 1186 // Okay, get the un-inverted input value. 1187 SDOperand Val; 1188 if (N0.getOpcode() == ISD::XOR) 1189 Val = N0.getOperand(0); 1190 else { 1191 assert(N0.getOpcode() == ISD::AND && 1192 N0.getOperand(0).getOpcode() == ISD::XOR); 1193 // ((X^1)&1)^1 -> X & 1 1194 Val = DAG.getNode(ISD::AND, N0.getValueType(), 1195 N0.getOperand(0).getOperand(0), 1196 N0.getOperand(1)); 1197 } 1198 return DAG.getSetCC(VT, Val, N1, 1199 Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); 1200 } 1201 } 1202 } 1203 1204 APInt MinVal, MaxVal; 1205 unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); 1206 if (ISD::isSignedIntSetCC(Cond)) { 1207 MinVal = APInt::getSignedMinValue(OperandBitSize); 1208 MaxVal = APInt::getSignedMaxValue(OperandBitSize); 1209 } else { 1210 MinVal = APInt::getMinValue(OperandBitSize); 1211 MaxVal = APInt::getMaxValue(OperandBitSize); 1212 } 1213 1214 // Canonicalize GE/LE comparisons to use GT/LT comparisons. 1215 if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { 1216 if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true 1217 // X >= C0 --> X > (C0-1) 1218 return DAG.getSetCC(VT, N0, DAG.getConstant(C1-1, N1.getValueType()), 1219 (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT); 1220 } 1221 1222 if (Cond == ISD::SETLE || Cond == ISD::SETULE) { 1223 if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true 1224 // X <= C0 --> X < (C0+1) 1225 return DAG.getSetCC(VT, N0, DAG.getConstant(C1+1, N1.getValueType()), 1226 (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT); 1227 } 1228 1229 if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) 1230 return DAG.getConstant(0, VT); // X < MIN --> false 1231 if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) 1232 return DAG.getConstant(1, VT); // X >= MIN --> true 1233 if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) 1234 return DAG.getConstant(0, VT); // X > MAX --> false 1235 if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) 1236 return DAG.getConstant(1, VT); // X <= MAX --> true 1237 1238 // Canonicalize setgt X, Min --> setne X, Min 1239 if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) 1240 return DAG.getSetCC(VT, N0, N1, ISD::SETNE); 1241 // Canonicalize setlt X, Max --> setne X, Max 1242 if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MaxVal) 1243 return DAG.getSetCC(VT, N0, N1, ISD::SETNE); 1244 1245 // If we have setult X, 1, turn it into seteq X, 0 1246 if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1) 1247 return DAG.getSetCC(VT, N0, DAG.getConstant(MinVal, N0.getValueType()), 1248 ISD::SETEQ); 1249 // If we have setugt X, Max-1, turn it into seteq X, Max 1250 else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) 1251 return DAG.getSetCC(VT, N0, DAG.getConstant(MaxVal, N0.getValueType()), 1252 ISD::SETEQ); 1253 1254 // If we have "setcc X, C0", check to see if we can shrink the immediate 1255 // by changing cc. 1256 1257 // SETUGT X, SINTMAX -> SETLT X, 0 1258 if (Cond == ISD::SETUGT && OperandBitSize != 1 && 1259 C1 == (~0ULL >> (65-OperandBitSize))) 1260 return DAG.getSetCC(VT, N0, DAG.getConstant(0, N1.getValueType()), 1261 ISD::SETLT); 1262 1263 // FIXME: Implement the rest of these. 1264 1265 // Fold bit comparisons when we can. 1266 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && 1267 VT == N0.getValueType() && N0.getOpcode() == ISD::AND) 1268 if (ConstantSDNode *AndRHS = 1269 dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 1270 if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 1271 // Perform the xform if the AND RHS is a single bit. 1272 if (isPowerOf2_64(AndRHS->getValue())) { 1273 return DAG.getNode(ISD::SRL, VT, N0, 1274 DAG.getConstant(Log2_64(AndRHS->getValue()), 1275 getShiftAmountTy())); 1276 } 1277 } else if (Cond == ISD::SETEQ && C1 == AndRHS->getValue()) { 1278 // (X & 8) == 8 --> (X & 8) >> 3 1279 // Perform the xform if C1 is a single bit. 1280 if (C1.isPowerOf2()) { 1281 return DAG.getNode(ISD::SRL, VT, N0, 1282 DAG.getConstant(C1.logBase2(), getShiftAmountTy())); 1283 } 1284 } 1285 } 1286 } 1287 } else if (isa<ConstantSDNode>(N0.Val)) { 1288 // Ensure that the constant occurs on the RHS. 1289 return DAG.getSetCC(VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); 1290 } 1291 1292 if (isa<ConstantFPSDNode>(N0.Val)) { 1293 // Constant fold or commute setcc. 1294 SDOperand O = DAG.FoldSetCC(VT, N0, N1, Cond); 1295 if (O.Val) return O; 1296 } else if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1.Val)) { 1297 // If the RHS of an FP comparison is a constant, simplify it away in 1298 // some cases. 1299 if (CFP->getValueAPF().isNaN()) { 1300 // If an operand is known to be a nan, we can fold it. 1301 switch (ISD::getUnorderedFlavor(Cond)) { 1302 default: assert(0 && "Unknown flavor!"); 1303 case 0: // Known false. 1304 return DAG.getConstant(0, VT); 1305 case 1: // Known true. 1306 return DAG.getConstant(1, VT); 1307 case 2: // Undefined. 1308 return DAG.getNode(ISD::UNDEF, VT); 1309 } 1310 } 1311 1312 // Otherwise, we know the RHS is not a NaN. Simplify the node to drop the 1313 // constant if knowing that the operand is non-nan is enough. We prefer to 1314 // have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to 1315 // materialize 0.0. 1316 if (Cond == ISD::SETO || Cond == ISD::SETUO) 1317 return DAG.getSetCC(VT, N0, N0, Cond); 1318 } 1319 1320 if (N0 == N1) { 1321 // We can always fold X == X for integer setcc's. 1322 if (N0.getValueType().isInteger()) 1323 return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); 1324 unsigned UOF = ISD::getUnorderedFlavor(Cond); 1325 if (UOF == 2) // FP operators that are undefined on NaNs. 1326 return DAG.getConstant(ISD::isTrueWhenEqual(Cond), VT); 1327 if (UOF == unsigned(ISD::isTrueWhenEqual(Cond))) 1328 return DAG.getConstant(UOF, VT); 1329 // Otherwise, we can't fold it. However, we can simplify it to SETUO/SETO 1330 // if it is not already. 1331 ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; 1332 if (NewCond != Cond) 1333 return DAG.getSetCC(VT, N0, N1, NewCond); 1334 } 1335 1336 if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && 1337 N0.getValueType().isInteger()) { 1338 if (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::SUB || 1339 N0.getOpcode() == ISD::XOR) { 1340 // Simplify (X+Y) == (X+Z) --> Y == Z 1341 if (N0.getOpcode() == N1.getOpcode()) { 1342 if (N0.getOperand(0) == N1.getOperand(0)) 1343 return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(1), Cond); 1344 if (N0.getOperand(1) == N1.getOperand(1)) 1345 return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(0), Cond); 1346 if (DAG.isCommutativeBinOp(N0.getOpcode())) { 1347 // If X op Y == Y op X, try other combinations. 1348 if (N0.getOperand(0) == N1.getOperand(1)) 1349 return DAG.getSetCC(VT, N0.getOperand(1), N1.getOperand(0), Cond); 1350 if (N0.getOperand(1) == N1.getOperand(0)) 1351 return DAG.getSetCC(VT, N0.getOperand(0), N1.getOperand(1), Cond); 1352 } 1353 } 1354 1355 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) { 1356 if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 1357 // Turn (X+C1) == C2 --> X == C2-C1 1358 if (N0.getOpcode() == ISD::ADD && N0.Val->hasOneUse()) { 1359 return DAG.getSetCC(VT, N0.getOperand(0), 1360 DAG.getConstant(RHSC->getValue()-LHSR->getValue(), 1361 N0.getValueType()), Cond); 1362 } 1363 1364 // Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0. 1365 if (N0.getOpcode() == ISD::XOR) 1366 // If we know that all of the inverted bits are zero, don't bother 1367 // performing the inversion. 1368 if (DAG.MaskedValueIsZero(N0.getOperand(0), ~LHSR->getAPIntValue())) 1369 return 1370 DAG.getSetCC(VT, N0.getOperand(0), 1371 DAG.getConstant(LHSR->getAPIntValue() ^ 1372 RHSC->getAPIntValue(), 1373 N0.getValueType()), 1374 Cond); 1375 } 1376 1377 // Turn (C1-X) == C2 --> X == C1-C2 1378 if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { 1379 if (N0.getOpcode() == ISD::SUB && N0.Val->hasOneUse()) { 1380 return 1381 DAG.getSetCC(VT, N0.getOperand(1), 1382 DAG.getConstant(SUBC->getAPIntValue() - 1383 RHSC->getAPIntValue(), 1384 N0.getValueType()), 1385 Cond); 1386 } 1387 } 1388 } 1389 1390 // Simplify (X+Z) == X --> Z == 0 1391 if (N0.getOperand(0) == N1) 1392 return DAG.getSetCC(VT, N0.getOperand(1), 1393 DAG.getConstant(0, N0.getValueType()), Cond); 1394 if (N0.getOperand(1) == N1) { 1395 if (DAG.isCommutativeBinOp(N0.getOpcode())) 1396 return DAG.getSetCC(VT, N0.getOperand(0), 1397 DAG.getConstant(0, N0.getValueType()), Cond); 1398 else if (N0.Val->hasOneUse()) { 1399 assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); 1400 // (Z-X) == X --> Z == X<<1 1401 SDOperand SH = DAG.getNode(ISD::SHL, N1.getValueType(), 1402 N1, 1403 DAG.getConstant(1, getShiftAmountTy())); 1404 if (!DCI.isCalledByLegalizer()) 1405 DCI.AddToWorklist(SH.Val); 1406 return DAG.getSetCC(VT, N0.getOperand(0), SH, Cond); 1407 } 1408 } 1409 } 1410 1411 if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || 1412 N1.getOpcode() == ISD::XOR) { 1413 // Simplify X == (X+Z) --> Z == 0 1414 if (N1.getOperand(0) == N0) { 1415 return DAG.getSetCC(VT, N1.getOperand(1), 1416 DAG.getConstant(0, N1.getValueType()), Cond); 1417 } else if (N1.getOperand(1) == N0) { 1418 if (DAG.isCommutativeBinOp(N1.getOpcode())) { 1419 return DAG.getSetCC(VT, N1.getOperand(0), 1420 DAG.getConstant(0, N1.getValueType()), Cond); 1421 } else if (N1.Val->hasOneUse()) { 1422 assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); 1423 // X == (Z-X) --> X<<1 == Z 1424 SDOperand SH = DAG.getNode(ISD::SHL, N1.getValueType(), N0, 1425 DAG.getConstant(1, getShiftAmountTy())); 1426 if (!DCI.isCalledByLegalizer()) 1427 DCI.AddToWorklist(SH.Val); 1428 return DAG.getSetCC(VT, SH, N1.getOperand(0), Cond); 1429 } 1430 } 1431 } 1432 } 1433 1434 // Fold away ALL boolean setcc's. 1435 SDOperand Temp; 1436 if (N0.getValueType() == MVT::i1 && foldBooleans) { 1437 switch (Cond) { 1438 default: assert(0 && "Unknown integer setcc!"); 1439 case ISD::SETEQ: // X == Y -> (X^Y)^1 1440 Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, N1); 1441 N0 = DAG.getNode(ISD::XOR, MVT::i1, Temp, DAG.getConstant(1, MVT::i1)); 1442 if (!DCI.isCalledByLegalizer()) 1443 DCI.AddToWorklist(Temp.Val); 1444 break; 1445 case ISD::SETNE: // X != Y --> (X^Y) 1446 N0 = DAG.getNode(ISD::XOR, MVT::i1, N0, N1); 1447 break; 1448 case ISD::SETGT: // X >s Y --> X == 0 & Y == 1 --> X^1 & Y 1449 case ISD::SETULT: // X <u Y --> X == 0 & Y == 1 --> X^1 & Y 1450 Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, DAG.getConstant(1, MVT::i1)); 1451 N0 = DAG.getNode(ISD::AND, MVT::i1, N1, Temp); 1452 if (!DCI.isCalledByLegalizer()) 1453 DCI.AddToWorklist(Temp.Val); 1454 break; 1455 case ISD::SETLT: // X <s Y --> X == 1 & Y == 0 --> Y^1 & X 1456 case ISD::SETUGT: // X >u Y --> X == 1 & Y == 0 --> Y^1 & X 1457 Temp = DAG.getNode(ISD::XOR, MVT::i1, N1, DAG.getConstant(1, MVT::i1)); 1458 N0 = DAG.getNode(ISD::AND, MVT::i1, N0, Temp); 1459 if (!DCI.isCalledByLegalizer()) 1460 DCI.AddToWorklist(Temp.Val); 1461 break; 1462 case ISD::SETULE: // X <=u Y --> X == 0 | Y == 1 --> X^1 | Y 1463 case ISD::SETGE: // X >=s Y --> X == 0 | Y == 1 --> X^1 | Y 1464 Temp = DAG.getNode(ISD::XOR, MVT::i1, N0, DAG.getConstant(1, MVT::i1)); 1465 N0 = DAG.getNode(ISD::OR, MVT::i1, N1, Temp); 1466 if (!DCI.isCalledByLegalizer()) 1467 DCI.AddToWorklist(Temp.Val); 1468 break; 1469 case ISD::SETUGE: // X >=u Y --> X == 1 | Y == 0 --> Y^1 | X 1470 case ISD::SETLE: // X <=s Y --> X == 1 | Y == 0 --> Y^1 | X 1471 Temp = DAG.getNode(ISD::XOR, MVT::i1, N1, DAG.getConstant(1, MVT::i1)); 1472 N0 = DAG.getNode(ISD::OR, MVT::i1, N0, Temp); 1473 break; 1474 } 1475 if (VT != MVT::i1) { 1476 if (!DCI.isCalledByLegalizer()) 1477 DCI.AddToWorklist(N0.Val); 1478 // FIXME: If running after legalize, we probably can't do this. 1479 N0 = DAG.getNode(ISD::ZERO_EXTEND, VT, N0); 1480 } 1481 return N0; 1482 } 1483 1484 // Could not fold it. 1485 return SDOperand(); 1486} 1487 1488/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the 1489/// node is a GlobalAddress + offset. 1490bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA, 1491 int64_t &Offset) const { 1492 if (isa<GlobalAddressSDNode>(N)) { 1493 GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N); 1494 GA = GASD->getGlobal(); 1495 Offset += GASD->getOffset(); 1496 return true; 1497 } 1498 1499 if (N->getOpcode() == ISD::ADD) { 1500 SDOperand N1 = N->getOperand(0); 1501 SDOperand N2 = N->getOperand(1); 1502 if (isGAPlusOffset(N1.Val, GA, Offset)) { 1503 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N2); 1504 if (V) { 1505 Offset += V->getSignExtended(); 1506 return true; 1507 } 1508 } else if (isGAPlusOffset(N2.Val, GA, Offset)) { 1509 ConstantSDNode *V = dyn_cast<ConstantSDNode>(N1); 1510 if (V) { 1511 Offset += V->getSignExtended(); 1512 return true; 1513 } 1514 } 1515 } 1516 return false; 1517} 1518 1519 1520/// isConsecutiveLoad - Return true if LD (which must be a LoadSDNode) is 1521/// loading 'Bytes' bytes from a location that is 'Dist' units away from the 1522/// location that the 'Base' load is loading from. 1523bool TargetLowering::isConsecutiveLoad(SDNode *LD, SDNode *Base, 1524 unsigned Bytes, int Dist, 1525 const MachineFrameInfo *MFI) const { 1526 if (LD->getOperand(0).Val != Base->getOperand(0).Val) 1527 return false; 1528 MVT VT = LD->getValueType(0); 1529 if (VT.getSizeInBits() / 8 != Bytes) 1530 return false; 1531 1532 SDOperand Loc = LD->getOperand(1); 1533 SDOperand BaseLoc = Base->getOperand(1); 1534 if (Loc.getOpcode() == ISD::FrameIndex) { 1535 if (BaseLoc.getOpcode() != ISD::FrameIndex) 1536 return false; 1537 int FI = cast<FrameIndexSDNode>(Loc)->getIndex(); 1538 int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex(); 1539 int FS = MFI->getObjectSize(FI); 1540 int BFS = MFI->getObjectSize(BFI); 1541 if (FS != BFS || FS != (int)Bytes) return false; 1542 return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes); 1543 } 1544 1545 GlobalValue *GV1 = NULL; 1546 GlobalValue *GV2 = NULL; 1547 int64_t Offset1 = 0; 1548 int64_t Offset2 = 0; 1549 bool isGA1 = isGAPlusOffset(Loc.Val, GV1, Offset1); 1550 bool isGA2 = isGAPlusOffset(BaseLoc.Val, GV2, Offset2); 1551 if (isGA1 && isGA2 && GV1 == GV2) 1552 return Offset1 == (Offset2 + Dist*Bytes); 1553 return false; 1554} 1555 1556 1557SDOperand TargetLowering:: 1558PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { 1559 // Default implementation: no optimization. 1560 return SDOperand(); 1561} 1562 1563//===----------------------------------------------------------------------===// 1564// Inline Assembler Implementation Methods 1565//===----------------------------------------------------------------------===// 1566 1567 1568TargetLowering::ConstraintType 1569TargetLowering::getConstraintType(const std::string &Constraint) const { 1570 // FIXME: lots more standard ones to handle. 1571 if (Constraint.size() == 1) { 1572 switch (Constraint[0]) { 1573 default: break; 1574 case 'r': return C_RegisterClass; 1575 case 'm': // memory 1576 case 'o': // offsetable 1577 case 'V': // not offsetable 1578 return C_Memory; 1579 case 'i': // Simple Integer or Relocatable Constant 1580 case 'n': // Simple Integer 1581 case 's': // Relocatable Constant 1582 case 'X': // Allow ANY value. 1583 case 'I': // Target registers. 1584 case 'J': 1585 case 'K': 1586 case 'L': 1587 case 'M': 1588 case 'N': 1589 case 'O': 1590 case 'P': 1591 return C_Other; 1592 } 1593 } 1594 1595 if (Constraint.size() > 1 && Constraint[0] == '{' && 1596 Constraint[Constraint.size()-1] == '}') 1597 return C_Register; 1598 return C_Unknown; 1599} 1600 1601/// LowerXConstraint - try to replace an X constraint, which matches anything, 1602/// with another that has more specific requirements based on the type of the 1603/// corresponding operand. 1604const char *TargetLowering::LowerXConstraint(MVT ConstraintVT) const{ 1605 if (ConstraintVT.isInteger()) 1606 return "r"; 1607 if (ConstraintVT.isFloatingPoint()) 1608 return "f"; // works for many targets 1609 return 0; 1610} 1611 1612/// LowerAsmOperandForConstraint - Lower the specified operand into the Ops 1613/// vector. If it is invalid, don't add anything to Ops. 1614void TargetLowering::LowerAsmOperandForConstraint(SDOperand Op, 1615 char ConstraintLetter, 1616 std::vector<SDOperand> &Ops, 1617 SelectionDAG &DAG) const { 1618 switch (ConstraintLetter) { 1619 default: break; 1620 case 'X': // Allows any operand; labels (basic block) use this. 1621 if (Op.getOpcode() == ISD::BasicBlock) { 1622 Ops.push_back(Op); 1623 return; 1624 } 1625 // fall through 1626 case 'i': // Simple Integer or Relocatable Constant 1627 case 'n': // Simple Integer 1628 case 's': { // Relocatable Constant 1629 // These operands are interested in values of the form (GV+C), where C may 1630 // be folded in as an offset of GV, or it may be explicitly added. Also, it 1631 // is possible and fine if either GV or C are missing. 1632 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op); 1633 GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op); 1634 1635 // If we have "(add GV, C)", pull out GV/C 1636 if (Op.getOpcode() == ISD::ADD) { 1637 C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 1638 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0)); 1639 if (C == 0 || GA == 0) { 1640 C = dyn_cast<ConstantSDNode>(Op.getOperand(0)); 1641 GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1)); 1642 } 1643 if (C == 0 || GA == 0) 1644 C = 0, GA = 0; 1645 } 1646 1647 // If we find a valid operand, map to the TargetXXX version so that the 1648 // value itself doesn't get selected. 1649 if (GA) { // Either &GV or &GV+C 1650 if (ConstraintLetter != 'n') { 1651 int64_t Offs = GA->getOffset(); 1652 if (C) Offs += C->getValue(); 1653 Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), 1654 Op.getValueType(), Offs)); 1655 return; 1656 } 1657 } 1658 if (C) { // just C, no GV. 1659 // Simple constants are not allowed for 's'. 1660 if (ConstraintLetter != 's') { 1661 Ops.push_back(DAG.getTargetConstant(C->getValue(), Op.getValueType())); 1662 return; 1663 } 1664 } 1665 break; 1666 } 1667 } 1668} 1669 1670std::vector<unsigned> TargetLowering:: 1671getRegClassForInlineAsmConstraint(const std::string &Constraint, 1672 MVT VT) const { 1673 return std::vector<unsigned>(); 1674} 1675 1676 1677std::pair<unsigned, const TargetRegisterClass*> TargetLowering:: 1678getRegForInlineAsmConstraint(const std::string &Constraint, 1679 MVT VT) const { 1680 if (Constraint[0] != '{') 1681 return std::pair<unsigned, const TargetRegisterClass*>(0, 0); 1682 assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); 1683 1684 // Remove the braces from around the name. 1685 std::string RegName(Constraint.begin()+1, Constraint.end()-1); 1686 1687 // Figure out which register class contains this reg. 1688 const TargetRegisterInfo *RI = TM.getRegisterInfo(); 1689 for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), 1690 E = RI->regclass_end(); RCI != E; ++RCI) { 1691 const TargetRegisterClass *RC = *RCI; 1692 1693 // If none of the the value types for this register class are valid, we 1694 // can't use it. For example, 64-bit reg classes on 32-bit targets. 1695 bool isLegal = false; 1696 for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); 1697 I != E; ++I) { 1698 if (isTypeLegal(*I)) { 1699 isLegal = true; 1700 break; 1701 } 1702 } 1703 1704 if (!isLegal) continue; 1705 1706 for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); 1707 I != E; ++I) { 1708 if (StringsEqualNoCase(RegName, RI->get(*I).AsmName)) 1709 return std::make_pair(*I, RC); 1710 } 1711 } 1712 1713 return std::pair<unsigned, const TargetRegisterClass*>(0, 0); 1714} 1715 1716//===----------------------------------------------------------------------===// 1717// Constraint Selection. 1718 1719/// getConstraintGenerality - Return an integer indicating how general CT 1720/// is. 1721static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { 1722 switch (CT) { 1723 default: assert(0 && "Unknown constraint type!"); 1724 case TargetLowering::C_Other: 1725 case TargetLowering::C_Unknown: 1726 return 0; 1727 case TargetLowering::C_Register: 1728 return 1; 1729 case TargetLowering::C_RegisterClass: 1730 return 2; 1731 case TargetLowering::C_Memory: 1732 return 3; 1733 } 1734} 1735 1736/// ChooseConstraint - If there are multiple different constraints that we 1737/// could pick for this operand (e.g. "imr") try to pick the 'best' one. 1738/// This is somewhat tricky: constraints fall into four classes: 1739/// Other -> immediates and magic values 1740/// Register -> one specific register 1741/// RegisterClass -> a group of regs 1742/// Memory -> memory 1743/// Ideally, we would pick the most specific constraint possible: if we have 1744/// something that fits into a register, we would pick it. The problem here 1745/// is that if we have something that could either be in a register or in 1746/// memory that use of the register could cause selection of *other* 1747/// operands to fail: they might only succeed if we pick memory. Because of 1748/// this the heuristic we use is: 1749/// 1750/// 1) If there is an 'other' constraint, and if the operand is valid for 1751/// that constraint, use it. This makes us take advantage of 'i' 1752/// constraints when available. 1753/// 2) Otherwise, pick the most general constraint present. This prefers 1754/// 'm' over 'r', for example. 1755/// 1756static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, 1757 const TargetLowering &TLI, 1758 SDOperand Op, SelectionDAG *DAG) { 1759 assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); 1760 unsigned BestIdx = 0; 1761 TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; 1762 int BestGenerality = -1; 1763 1764 // Loop over the options, keeping track of the most general one. 1765 for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { 1766 TargetLowering::ConstraintType CType = 1767 TLI.getConstraintType(OpInfo.Codes[i]); 1768 1769 // If this is an 'other' constraint, see if the operand is valid for it. 1770 // For example, on X86 we might have an 'rI' constraint. If the operand 1771 // is an integer in the range [0..31] we want to use I (saving a load 1772 // of a register), otherwise we must use 'r'. 1773 if (CType == TargetLowering::C_Other && Op.Val) { 1774 assert(OpInfo.Codes[i].size() == 1 && 1775 "Unhandled multi-letter 'other' constraint"); 1776 std::vector<SDOperand> ResultOps; 1777 TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], 1778 ResultOps, *DAG); 1779 if (!ResultOps.empty()) { 1780 BestType = CType; 1781 BestIdx = i; 1782 break; 1783 } 1784 } 1785 1786 // This constraint letter is more general than the previous one, use it. 1787 int Generality = getConstraintGenerality(CType); 1788 if (Generality > BestGenerality) { 1789 BestType = CType; 1790 BestIdx = i; 1791 BestGenerality = Generality; 1792 } 1793 } 1794 1795 OpInfo.ConstraintCode = OpInfo.Codes[BestIdx]; 1796 OpInfo.ConstraintType = BestType; 1797} 1798 1799/// ComputeConstraintToUse - Determines the constraint code and constraint 1800/// type to use for the specific AsmOperandInfo, setting 1801/// OpInfo.ConstraintCode and OpInfo.ConstraintType. 1802void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, 1803 SDOperand Op, 1804 SelectionDAG *DAG) const { 1805 assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); 1806 1807 // Single-letter constraints ('r') are very common. 1808 if (OpInfo.Codes.size() == 1) { 1809 OpInfo.ConstraintCode = OpInfo.Codes[0]; 1810 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); 1811 } else { 1812 ChooseConstraint(OpInfo, *this, Op, DAG); 1813 } 1814 1815 // 'X' matches anything. 1816 if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) { 1817 // Labels and constants are handled elsewhere ('X' is the only thing 1818 // that matches labels). 1819 if (isa<BasicBlock>(OpInfo.CallOperandVal) || 1820 isa<ConstantInt>(OpInfo.CallOperandVal)) 1821 return; 1822 1823 // Otherwise, try to resolve it to something we know about by looking at 1824 // the actual operand type. 1825 if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) { 1826 OpInfo.ConstraintCode = Repl; 1827 OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); 1828 } 1829 } 1830} 1831 1832//===----------------------------------------------------------------------===// 1833// Loop Strength Reduction hooks 1834//===----------------------------------------------------------------------===// 1835 1836/// isLegalAddressingMode - Return true if the addressing mode represented 1837/// by AM is legal for this target, for a load/store of the specified type. 1838bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, 1839 const Type *Ty) const { 1840 // The default implementation of this implements a conservative RISCy, r+r and 1841 // r+i addr mode. 1842 1843 // Allows a sign-extended 16-bit immediate field. 1844 if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) 1845 return false; 1846 1847 // No global is ever allowed as a base. 1848 if (AM.BaseGV) 1849 return false; 1850 1851 // Only support r+r, 1852 switch (AM.Scale) { 1853 case 0: // "r+i" or just "i", depending on HasBaseReg. 1854 break; 1855 case 1: 1856 if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. 1857 return false; 1858 // Otherwise we have r+r or r+i. 1859 break; 1860 case 2: 1861 if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. 1862 return false; 1863 // Allow 2*r as r+r. 1864 break; 1865 } 1866 1867 return true; 1868} 1869 1870// Magic for divide replacement 1871 1872struct ms { 1873 int64_t m; // magic number 1874 int64_t s; // shift amount 1875}; 1876 1877struct mu { 1878 uint64_t m; // magic number 1879 int64_t a; // add indicator 1880 int64_t s; // shift amount 1881}; 1882 1883/// magic - calculate the magic numbers required to codegen an integer sdiv as 1884/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, 1885/// or -1. 1886static ms magic32(int32_t d) { 1887 int32_t p; 1888 uint32_t ad, anc, delta, q1, r1, q2, r2, t; 1889 const uint32_t two31 = 0x80000000U; 1890 struct ms mag; 1891 1892 ad = abs(d); 1893 t = two31 + ((uint32_t)d >> 31); 1894 anc = t - 1 - t%ad; // absolute value of nc 1895 p = 31; // initialize p 1896 q1 = two31/anc; // initialize q1 = 2p/abs(nc) 1897 r1 = two31 - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1898 q2 = two31/ad; // initialize q2 = 2p/abs(d) 1899 r2 = two31 - q2*ad; // initialize r2 = rem(2p,abs(d)) 1900 do { 1901 p = p + 1; 1902 q1 = 2*q1; // update q1 = 2p/abs(nc) 1903 r1 = 2*r1; // update r1 = rem(2p/abs(nc)) 1904 if (r1 >= anc) { // must be unsigned comparison 1905 q1 = q1 + 1; 1906 r1 = r1 - anc; 1907 } 1908 q2 = 2*q2; // update q2 = 2p/abs(d) 1909 r2 = 2*r2; // update r2 = rem(2p/abs(d)) 1910 if (r2 >= ad) { // must be unsigned comparison 1911 q2 = q2 + 1; 1912 r2 = r2 - ad; 1913 } 1914 delta = ad - r2; 1915 } while (q1 < delta || (q1 == delta && r1 == 0)); 1916 1917 mag.m = (int32_t)(q2 + 1); // make sure to sign extend 1918 if (d < 0) mag.m = -mag.m; // resulting magic number 1919 mag.s = p - 32; // resulting shift 1920 return mag; 1921} 1922 1923/// magicu - calculate the magic numbers required to codegen an integer udiv as 1924/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. 1925static mu magicu32(uint32_t d) { 1926 int32_t p; 1927 uint32_t nc, delta, q1, r1, q2, r2; 1928 struct mu magu; 1929 magu.a = 0; // initialize "add" indicator 1930 nc = - 1 - (-d)%d; 1931 p = 31; // initialize p 1932 q1 = 0x80000000/nc; // initialize q1 = 2p/nc 1933 r1 = 0x80000000 - q1*nc; // initialize r1 = rem(2p,nc) 1934 q2 = 0x7FFFFFFF/d; // initialize q2 = (2p-1)/d 1935 r2 = 0x7FFFFFFF - q2*d; // initialize r2 = rem((2p-1),d) 1936 do { 1937 p = p + 1; 1938 if (r1 >= nc - r1 ) { 1939 q1 = 2*q1 + 1; // update q1 1940 r1 = 2*r1 - nc; // update r1 1941 } 1942 else { 1943 q1 = 2*q1; // update q1 1944 r1 = 2*r1; // update r1 1945 } 1946 if (r2 + 1 >= d - r2) { 1947 if (q2 >= 0x7FFFFFFF) magu.a = 1; 1948 q2 = 2*q2 + 1; // update q2 1949 r2 = 2*r2 + 1 - d; // update r2 1950 } 1951 else { 1952 if (q2 >= 0x80000000) magu.a = 1; 1953 q2 = 2*q2; // update q2 1954 r2 = 2*r2 + 1; // update r2 1955 } 1956 delta = d - 1 - r2; 1957 } while (p < 64 && (q1 < delta || (q1 == delta && r1 == 0))); 1958 magu.m = q2 + 1; // resulting magic number 1959 magu.s = p - 32; // resulting shift 1960 return magu; 1961} 1962 1963/// magic - calculate the magic numbers required to codegen an integer sdiv as 1964/// a sequence of multiply and shifts. Requires that the divisor not be 0, 1, 1965/// or -1. 1966static ms magic64(int64_t d) { 1967 int64_t p; 1968 uint64_t ad, anc, delta, q1, r1, q2, r2, t; 1969 const uint64_t two63 = 9223372036854775808ULL; // 2^63 1970 struct ms mag; 1971 1972 ad = d >= 0 ? d : -d; 1973 t = two63 + ((uint64_t)d >> 63); 1974 anc = t - 1 - t%ad; // absolute value of nc 1975 p = 63; // initialize p 1976 q1 = two63/anc; // initialize q1 = 2p/abs(nc) 1977 r1 = two63 - q1*anc; // initialize r1 = rem(2p,abs(nc)) 1978 q2 = two63/ad; // initialize q2 = 2p/abs(d) 1979 r2 = two63 - q2*ad; // initialize r2 = rem(2p,abs(d)) 1980 do { 1981 p = p + 1; 1982 q1 = 2*q1; // update q1 = 2p/abs(nc) 1983 r1 = 2*r1; // update r1 = rem(2p/abs(nc)) 1984 if (r1 >= anc) { // must be unsigned comparison 1985 q1 = q1 + 1; 1986 r1 = r1 - anc; 1987 } 1988 q2 = 2*q2; // update q2 = 2p/abs(d) 1989 r2 = 2*r2; // update r2 = rem(2p/abs(d)) 1990 if (r2 >= ad) { // must be unsigned comparison 1991 q2 = q2 + 1; 1992 r2 = r2 - ad; 1993 } 1994 delta = ad - r2; 1995 } while (q1 < delta || (q1 == delta && r1 == 0)); 1996 1997 mag.m = q2 + 1; 1998 if (d < 0) mag.m = -mag.m; // resulting magic number 1999 mag.s = p - 64; // resulting shift 2000 return mag; 2001} 2002 2003/// magicu - calculate the magic numbers required to codegen an integer udiv as 2004/// a sequence of multiply, add and shifts. Requires that the divisor not be 0. 2005static mu magicu64(uint64_t d) 2006{ 2007 int64_t p; 2008 uint64_t nc, delta, q1, r1, q2, r2; 2009 struct mu magu; 2010 magu.a = 0; // initialize "add" indicator 2011 nc = - 1 - (-d)%d; 2012 p = 63; // initialize p 2013 q1 = 0x8000000000000000ull/nc; // initialize q1 = 2p/nc 2014 r1 = 0x8000000000000000ull - q1*nc; // initialize r1 = rem(2p,nc) 2015 q2 = 0x7FFFFFFFFFFFFFFFull/d; // initialize q2 = (2p-1)/d 2016 r2 = 0x7FFFFFFFFFFFFFFFull - q2*d; // initialize r2 = rem((2p-1),d) 2017 do { 2018 p = p + 1; 2019 if (r1 >= nc - r1 ) { 2020 q1 = 2*q1 + 1; // update q1 2021 r1 = 2*r1 - nc; // update r1 2022 } 2023 else { 2024 q1 = 2*q1; // update q1 2025 r1 = 2*r1; // update r1 2026 } 2027 if (r2 + 1 >= d - r2) { 2028 if (q2 >= 0x7FFFFFFFFFFFFFFFull) magu.a = 1; 2029 q2 = 2*q2 + 1; // update q2 2030 r2 = 2*r2 + 1 - d; // update r2 2031 } 2032 else { 2033 if (q2 >= 0x8000000000000000ull) magu.a = 1; 2034 q2 = 2*q2; // update q2 2035 r2 = 2*r2 + 1; // update r2 2036 } 2037 delta = d - 1 - r2; 2038 } while (p < 128 && (q1 < delta || (q1 == delta && r1 == 0))); 2039 magu.m = q2 + 1; // resulting magic number 2040 magu.s = p - 64; // resulting shift 2041 return magu; 2042} 2043 2044/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 2045/// return a DAG expression to select that will generate the same value by 2046/// multiplying by a magic number. See: 2047/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 2048SDOperand TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, 2049 std::vector<SDNode*>* Created) const { 2050 MVT VT = N->getValueType(0); 2051 2052 // Check to see if we can do this. 2053 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) 2054 return SDOperand(); // BuildSDIV only operates on i32 or i64 2055 2056 int64_t d = cast<ConstantSDNode>(N->getOperand(1))->getSignExtended(); 2057 ms magics = (VT == MVT::i32) ? magic32(d) : magic64(d); 2058 2059 // Multiply the numerator (operand 0) by the magic value 2060 SDOperand Q; 2061 if (isOperationLegal(ISD::MULHS, VT)) 2062 Q = DAG.getNode(ISD::MULHS, VT, N->getOperand(0), 2063 DAG.getConstant(magics.m, VT)); 2064 else if (isOperationLegal(ISD::SMUL_LOHI, VT)) 2065 Q = SDOperand(DAG.getNode(ISD::SMUL_LOHI, DAG.getVTList(VT, VT), 2066 N->getOperand(0), 2067 DAG.getConstant(magics.m, VT)).Val, 1); 2068 else 2069 return SDOperand(); // No mulhs or equvialent 2070 // If d > 0 and m < 0, add the numerator 2071 if (d > 0 && magics.m < 0) { 2072 Q = DAG.getNode(ISD::ADD, VT, Q, N->getOperand(0)); 2073 if (Created) 2074 Created->push_back(Q.Val); 2075 } 2076 // If d < 0 and m > 0, subtract the numerator. 2077 if (d < 0 && magics.m > 0) { 2078 Q = DAG.getNode(ISD::SUB, VT, Q, N->getOperand(0)); 2079 if (Created) 2080 Created->push_back(Q.Val); 2081 } 2082 // Shift right algebraic if shift value is nonzero 2083 if (magics.s > 0) { 2084 Q = DAG.getNode(ISD::SRA, VT, Q, 2085 DAG.getConstant(magics.s, getShiftAmountTy())); 2086 if (Created) 2087 Created->push_back(Q.Val); 2088 } 2089 // Extract the sign bit and add it to the quotient 2090 SDOperand T = 2091 DAG.getNode(ISD::SRL, VT, Q, DAG.getConstant(VT.getSizeInBits()-1, 2092 getShiftAmountTy())); 2093 if (Created) 2094 Created->push_back(T.Val); 2095 return DAG.getNode(ISD::ADD, VT, Q, T); 2096} 2097 2098/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, 2099/// return a DAG expression to select that will generate the same value by 2100/// multiplying by a magic number. See: 2101/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 2102SDOperand TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, 2103 std::vector<SDNode*>* Created) const { 2104 MVT VT = N->getValueType(0); 2105 2106 // Check to see if we can do this. 2107 if (!isTypeLegal(VT) || (VT != MVT::i32 && VT != MVT::i64)) 2108 return SDOperand(); // BuildUDIV only operates on i32 or i64 2109 2110 uint64_t d = cast<ConstantSDNode>(N->getOperand(1))->getValue(); 2111 mu magics = (VT == MVT::i32) ? magicu32(d) : magicu64(d); 2112 2113 // Multiply the numerator (operand 0) by the magic value 2114 SDOperand Q; 2115 if (isOperationLegal(ISD::MULHU, VT)) 2116 Q = DAG.getNode(ISD::MULHU, VT, N->getOperand(0), 2117 DAG.getConstant(magics.m, VT)); 2118 else if (isOperationLegal(ISD::UMUL_LOHI, VT)) 2119 Q = SDOperand(DAG.getNode(ISD::UMUL_LOHI, DAG.getVTList(VT, VT), 2120 N->getOperand(0), 2121 DAG.getConstant(magics.m, VT)).Val, 1); 2122 else 2123 return SDOperand(); // No mulhu or equvialent 2124 if (Created) 2125 Created->push_back(Q.Val); 2126 2127 if (magics.a == 0) { 2128 return DAG.getNode(ISD::SRL, VT, Q, 2129 DAG.getConstant(magics.s, getShiftAmountTy())); 2130 } else { 2131 SDOperand NPQ = DAG.getNode(ISD::SUB, VT, N->getOperand(0), Q); 2132 if (Created) 2133 Created->push_back(NPQ.Val); 2134 NPQ = DAG.getNode(ISD::SRL, VT, NPQ, 2135 DAG.getConstant(1, getShiftAmountTy())); 2136 if (Created) 2137 Created->push_back(NPQ.Val); 2138 NPQ = DAG.getNode(ISD::ADD, VT, NPQ, Q); 2139 if (Created) 2140 Created->push_back(NPQ.Val); 2141 return DAG.getNode(ISD::SRL, VT, NPQ, 2142 DAG.getConstant(magics.s-1, getShiftAmountTy())); 2143 } 2144} 2145