DAGCombiner.cpp revision fac14ab1795cfce21e4e5e51c711a11f6181074b
1//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11// both before and after the DAG is legalized. 12// 13// This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14// primarily intended to handle simplification opportunities that are implicit 15// in the LLVM IR and exposed by the various codegen lowering phases. 16// 17//===----------------------------------------------------------------------===// 18 19#define DEBUG_TYPE "dagcombine" 20#include "llvm/CodeGen/SelectionDAG.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/LLVMContext.h" 23#include "llvm/CodeGen/MachineFunction.h" 24#include "llvm/CodeGen/MachineFrameInfo.h" 25#include "llvm/Analysis/AliasAnalysis.h" 26#include "llvm/DataLayout.h" 27#include "llvm/Target/TargetLowering.h" 28#include "llvm/Target/TargetMachine.h" 29#include "llvm/Target/TargetOptions.h" 30#include "llvm/ADT/SmallPtrSet.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/Support/CommandLine.h" 33#include "llvm/Support/Debug.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Support/raw_ostream.h" 37#include <algorithm> 38using namespace llvm; 39 40STATISTIC(NodesCombined , "Number of dag nodes combined"); 41STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 42STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 43STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 44STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 45 46namespace { 47 static cl::opt<bool> 48 CombinerAA("combiner-alias-analysis", cl::Hidden, 49 cl::desc("Turn on alias analysis during testing")); 50 51 static cl::opt<bool> 52 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 53 cl::desc("Include global information in alias analysis")); 54 55//------------------------------ DAGCombiner ---------------------------------// 56 57 class DAGCombiner { 58 SelectionDAG &DAG; 59 const TargetLowering &TLI; 60 CombineLevel Level; 61 CodeGenOpt::Level OptLevel; 62 bool LegalOperations; 63 bool LegalTypes; 64 65 // Worklist of all of the nodes that need to be simplified. 66 // 67 // This has the semantics that when adding to the worklist, 68 // the item added must be next to be processed. It should 69 // also only appear once. The naive approach to this takes 70 // linear time. 71 // 72 // To reduce the insert/remove time to logarithmic, we use 73 // a set and a vector to maintain our worklist. 74 // 75 // The set contains the items on the worklist, but does not 76 // maintain the order they should be visited. 77 // 78 // The vector maintains the order nodes should be visited, but may 79 // contain duplicate or removed nodes. When choosing a node to 80 // visit, we pop off the order stack until we find an item that is 81 // also in the contents set. All operations are O(log N). 82 SmallPtrSet<SDNode*, 64> WorkListContents; 83 SmallVector<SDNode*, 64> WorkListOrder; 84 85 // AA - Used for DAG load/store alias analysis. 86 AliasAnalysis &AA; 87 88 /// AddUsersToWorkList - When an instruction is simplified, add all users of 89 /// the instruction to the work lists because they might get more simplified 90 /// now. 91 /// 92 void AddUsersToWorkList(SDNode *N) { 93 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 94 UI != UE; ++UI) 95 AddToWorkList(*UI); 96 } 97 98 /// visit - call the node-specific routine that knows how to fold each 99 /// particular type of node. 100 SDValue visit(SDNode *N); 101 102 public: 103 /// AddToWorkList - Add to the work list making sure its instance is at the 104 /// back (next to be processed.) 105 void AddToWorkList(SDNode *N) { 106 WorkListContents.insert(N); 107 WorkListOrder.push_back(N); 108 } 109 110 /// removeFromWorkList - remove all instances of N from the worklist. 111 /// 112 void removeFromWorkList(SDNode *N) { 113 WorkListContents.erase(N); 114 } 115 116 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 117 bool AddTo = true); 118 119 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 120 return CombineTo(N, &Res, 1, AddTo); 121 } 122 123 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 124 bool AddTo = true) { 125 SDValue To[] = { Res0, Res1 }; 126 return CombineTo(N, To, 2, AddTo); 127 } 128 129 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 130 131 private: 132 133 /// SimplifyDemandedBits - Check the specified integer node value to see if 134 /// it can be simplified or if things it uses can be simplified by bit 135 /// propagation. If so, return true. 136 bool SimplifyDemandedBits(SDValue Op) { 137 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 138 APInt Demanded = APInt::getAllOnesValue(BitWidth); 139 return SimplifyDemandedBits(Op, Demanded); 140 } 141 142 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 143 144 bool CombineToPreIndexedLoadStore(SDNode *N); 145 bool CombineToPostIndexedLoadStore(SDNode *N); 146 147 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 148 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 149 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 150 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 151 SDValue PromoteIntBinOp(SDValue Op); 152 SDValue PromoteIntShiftOp(SDValue Op); 153 SDValue PromoteExtend(SDValue Op); 154 bool PromoteLoad(SDValue Op); 155 156 void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, 157 SDValue Trunc, SDValue ExtLoad, DebugLoc DL, 158 ISD::NodeType ExtType); 159 160 /// combine - call the node-specific routine that knows how to fold each 161 /// particular type of node. If that doesn't do anything, try the 162 /// target-specific DAG combines. 163 SDValue combine(SDNode *N); 164 165 // Visitation implementation - Implement dag node combining for different 166 // node types. The semantics are as follows: 167 // Return Value: 168 // SDValue.getNode() == 0 - No change was made 169 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 170 // otherwise - N should be replaced by the returned Operand. 171 // 172 SDValue visitTokenFactor(SDNode *N); 173 SDValue visitMERGE_VALUES(SDNode *N); 174 SDValue visitADD(SDNode *N); 175 SDValue visitSUB(SDNode *N); 176 SDValue visitADDC(SDNode *N); 177 SDValue visitSUBC(SDNode *N); 178 SDValue visitADDE(SDNode *N); 179 SDValue visitSUBE(SDNode *N); 180 SDValue visitMUL(SDNode *N); 181 SDValue visitSDIV(SDNode *N); 182 SDValue visitUDIV(SDNode *N); 183 SDValue visitSREM(SDNode *N); 184 SDValue visitUREM(SDNode *N); 185 SDValue visitMULHU(SDNode *N); 186 SDValue visitMULHS(SDNode *N); 187 SDValue visitSMUL_LOHI(SDNode *N); 188 SDValue visitUMUL_LOHI(SDNode *N); 189 SDValue visitSMULO(SDNode *N); 190 SDValue visitUMULO(SDNode *N); 191 SDValue visitSDIVREM(SDNode *N); 192 SDValue visitUDIVREM(SDNode *N); 193 SDValue visitAND(SDNode *N); 194 SDValue visitOR(SDNode *N); 195 SDValue visitXOR(SDNode *N); 196 SDValue SimplifyVBinOp(SDNode *N); 197 SDValue SimplifyVUnaryOp(SDNode *N); 198 SDValue visitSHL(SDNode *N); 199 SDValue visitSRA(SDNode *N); 200 SDValue visitSRL(SDNode *N); 201 SDValue visitCTLZ(SDNode *N); 202 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 203 SDValue visitCTTZ(SDNode *N); 204 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 205 SDValue visitCTPOP(SDNode *N); 206 SDValue visitSELECT(SDNode *N); 207 SDValue visitSELECT_CC(SDNode *N); 208 SDValue visitSETCC(SDNode *N); 209 SDValue visitSIGN_EXTEND(SDNode *N); 210 SDValue visitZERO_EXTEND(SDNode *N); 211 SDValue visitANY_EXTEND(SDNode *N); 212 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 213 SDValue visitTRUNCATE(SDNode *N); 214 SDValue visitBITCAST(SDNode *N); 215 SDValue visitBUILD_PAIR(SDNode *N); 216 SDValue visitFADD(SDNode *N); 217 SDValue visitFSUB(SDNode *N); 218 SDValue visitFMUL(SDNode *N); 219 SDValue visitFMA(SDNode *N); 220 SDValue visitFDIV(SDNode *N); 221 SDValue visitFREM(SDNode *N); 222 SDValue visitFCOPYSIGN(SDNode *N); 223 SDValue visitSINT_TO_FP(SDNode *N); 224 SDValue visitUINT_TO_FP(SDNode *N); 225 SDValue visitFP_TO_SINT(SDNode *N); 226 SDValue visitFP_TO_UINT(SDNode *N); 227 SDValue visitFP_ROUND(SDNode *N); 228 SDValue visitFP_ROUND_INREG(SDNode *N); 229 SDValue visitFP_EXTEND(SDNode *N); 230 SDValue visitFNEG(SDNode *N); 231 SDValue visitFABS(SDNode *N); 232 SDValue visitFCEIL(SDNode *N); 233 SDValue visitFTRUNC(SDNode *N); 234 SDValue visitFFLOOR(SDNode *N); 235 SDValue visitBRCOND(SDNode *N); 236 SDValue visitBR_CC(SDNode *N); 237 SDValue visitLOAD(SDNode *N); 238 SDValue visitSTORE(SDNode *N); 239 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 240 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 241 SDValue visitBUILD_VECTOR(SDNode *N); 242 SDValue visitCONCAT_VECTORS(SDNode *N); 243 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 244 SDValue visitVECTOR_SHUFFLE(SDNode *N); 245 SDValue visitMEMBARRIER(SDNode *N); 246 247 SDValue XformToShuffleWithZero(SDNode *N); 248 SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); 249 250 SDValue visitShiftByConstant(SDNode *N, unsigned Amt); 251 252 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 253 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 254 SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); 255 SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, 256 SDValue N3, ISD::CondCode CC, 257 bool NotExtCompare = false); 258 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 259 DebugLoc DL, bool foldBooleans = true); 260 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 261 unsigned HiOp); 262 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 263 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 264 SDValue BuildSDIV(SDNode *N); 265 SDValue BuildUDIV(SDNode *N); 266 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 267 bool DemandHighBits = true); 268 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 269 SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); 270 SDValue ReduceLoadWidth(SDNode *N); 271 SDValue ReduceLoadOpStoreWidth(SDNode *N); 272 SDValue TransformFPLoadStorePair(SDNode *N); 273 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 274 275 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 276 277 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 278 /// looking for aliasing nodes and adding them to the Aliases vector. 279 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 280 SmallVector<SDValue, 8> &Aliases); 281 282 /// isAlias - Return true if there is any possibility that the two addresses 283 /// overlap. 284 bool isAlias(SDValue Ptr1, int64_t Size1, 285 const Value *SrcValue1, int SrcValueOffset1, 286 unsigned SrcValueAlign1, 287 const MDNode *TBAAInfo1, 288 SDValue Ptr2, int64_t Size2, 289 const Value *SrcValue2, int SrcValueOffset2, 290 unsigned SrcValueAlign2, 291 const MDNode *TBAAInfo2) const; 292 293 /// FindAliasInfo - Extracts the relevant alias information from the memory 294 /// node. Returns true if the operand was a load. 295 bool FindAliasInfo(SDNode *N, 296 SDValue &Ptr, int64_t &Size, 297 const Value *&SrcValue, int &SrcValueOffset, 298 unsigned &SrcValueAlignment, 299 const MDNode *&TBAAInfo) const; 300 301 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, 302 /// looking for a better chain (aliasing node.) 303 SDValue FindBetterChain(SDNode *N, SDValue Chain); 304 305 /// Merge consecutive store operations into a wide store. 306 /// This optimization uses wide integers or vectors when possible. 307 /// \return True if some memory operations were changed. 308 bool MergeConsecutiveStores(StoreSDNode *N); 309 310 public: 311 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 312 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 313 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} 314 315 /// Run - runs the dag combiner on all nodes in the work list 316 void Run(CombineLevel AtLevel); 317 318 SelectionDAG &getDAG() const { return DAG; } 319 320 /// getShiftAmountTy - Returns a type large enough to hold any valid 321 /// shift amount - before type legalization these can be huge. 322 EVT getShiftAmountTy(EVT LHSTy) { 323 return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); 324 } 325 326 /// isTypeLegal - This method returns true if we are running before type 327 /// legalization or if the specified VT is legal. 328 bool isTypeLegal(const EVT &VT) { 329 if (!LegalTypes) return true; 330 return TLI.isTypeLegal(VT); 331 } 332 }; 333} 334 335 336namespace { 337/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted 338/// nodes from the worklist. 339class WorkListRemover : public SelectionDAG::DAGUpdateListener { 340 DAGCombiner &DC; 341public: 342 explicit WorkListRemover(DAGCombiner &dc) 343 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 344 345 virtual void NodeDeleted(SDNode *N, SDNode *E) { 346 DC.removeFromWorkList(N); 347 } 348}; 349} 350 351//===----------------------------------------------------------------------===// 352// TargetLowering::DAGCombinerInfo implementation 353//===----------------------------------------------------------------------===// 354 355void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 356 ((DAGCombiner*)DC)->AddToWorkList(N); 357} 358 359void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 360 ((DAGCombiner*)DC)->removeFromWorkList(N); 361} 362 363SDValue TargetLowering::DAGCombinerInfo:: 364CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { 365 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 366} 367 368SDValue TargetLowering::DAGCombinerInfo:: 369CombineTo(SDNode *N, SDValue Res, bool AddTo) { 370 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 371} 372 373 374SDValue TargetLowering::DAGCombinerInfo:: 375CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 376 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 377} 378 379void TargetLowering::DAGCombinerInfo:: 380CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 381 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 382} 383 384//===----------------------------------------------------------------------===// 385// Helper Functions 386//===----------------------------------------------------------------------===// 387 388/// isNegatibleForFree - Return 1 if we can compute the negated form of the 389/// specified expression for the same cost as the expression itself, or 2 if we 390/// can compute the negated form more cheaply than the expression itself. 391static char isNegatibleForFree(SDValue Op, bool LegalOperations, 392 const TargetLowering &TLI, 393 const TargetOptions *Options, 394 unsigned Depth = 0) { 395 // No compile time optimizations on this type. 396 if (Op.getValueType() == MVT::ppcf128) 397 return 0; 398 399 // fneg is removable even if it has multiple uses. 400 if (Op.getOpcode() == ISD::FNEG) return 2; 401 402 // Don't allow anything with multiple uses. 403 if (!Op.hasOneUse()) return 0; 404 405 // Don't recurse exponentially. 406 if (Depth > 6) return 0; 407 408 switch (Op.getOpcode()) { 409 default: return false; 410 case ISD::ConstantFP: 411 // Don't invert constant FP values after legalize. The negated constant 412 // isn't necessarily legal. 413 return LegalOperations ? 0 : 1; 414 case ISD::FADD: 415 // FIXME: determine better conditions for this xform. 416 if (!Options->UnsafeFPMath) return 0; 417 418 // After operation legalization, it might not be legal to create new FSUBs. 419 if (LegalOperations && 420 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 421 return 0; 422 423 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 424 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 425 Options, Depth + 1)) 426 return V; 427 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 428 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 429 Depth + 1); 430 case ISD::FSUB: 431 // We can't turn -(A-B) into B-A when we honor signed zeros. 432 if (!Options->UnsafeFPMath) return 0; 433 434 // fold (fneg (fsub A, B)) -> (fsub B, A) 435 return 1; 436 437 case ISD::FMUL: 438 case ISD::FDIV: 439 if (Options->HonorSignDependentRoundingFPMath()) return 0; 440 441 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 442 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 443 Options, Depth + 1)) 444 return V; 445 446 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 447 Depth + 1); 448 449 case ISD::FP_EXTEND: 450 case ISD::FP_ROUND: 451 case ISD::FSIN: 452 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 453 Depth + 1); 454 } 455} 456 457/// GetNegatedExpression - If isNegatibleForFree returns true, this function 458/// returns the newly negated expression. 459static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 460 bool LegalOperations, unsigned Depth = 0) { 461 // fneg is removable even if it has multiple uses. 462 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 463 464 // Don't allow anything with multiple uses. 465 assert(Op.hasOneUse() && "Unknown reuse!"); 466 467 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 468 switch (Op.getOpcode()) { 469 default: llvm_unreachable("Unknown code"); 470 case ISD::ConstantFP: { 471 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 472 V.changeSign(); 473 return DAG.getConstantFP(V, Op.getValueType()); 474 } 475 case ISD::FADD: 476 // FIXME: determine better conditions for this xform. 477 assert(DAG.getTarget().Options.UnsafeFPMath); 478 479 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 480 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 481 DAG.getTargetLoweringInfo(), 482 &DAG.getTarget().Options, Depth+1)) 483 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 484 GetNegatedExpression(Op.getOperand(0), DAG, 485 LegalOperations, Depth+1), 486 Op.getOperand(1)); 487 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 488 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 489 GetNegatedExpression(Op.getOperand(1), DAG, 490 LegalOperations, Depth+1), 491 Op.getOperand(0)); 492 case ISD::FSUB: 493 // We can't turn -(A-B) into B-A when we honor signed zeros. 494 assert(DAG.getTarget().Options.UnsafeFPMath); 495 496 // fold (fneg (fsub 0, B)) -> B 497 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 498 if (N0CFP->getValueAPF().isZero()) 499 return Op.getOperand(1); 500 501 // fold (fneg (fsub A, B)) -> (fsub B, A) 502 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 503 Op.getOperand(1), Op.getOperand(0)); 504 505 case ISD::FMUL: 506 case ISD::FDIV: 507 assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); 508 509 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 510 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 511 DAG.getTargetLoweringInfo(), 512 &DAG.getTarget().Options, Depth+1)) 513 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 514 GetNegatedExpression(Op.getOperand(0), DAG, 515 LegalOperations, Depth+1), 516 Op.getOperand(1)); 517 518 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 519 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 520 Op.getOperand(0), 521 GetNegatedExpression(Op.getOperand(1), DAG, 522 LegalOperations, Depth+1)); 523 524 case ISD::FP_EXTEND: 525 case ISD::FSIN: 526 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 527 GetNegatedExpression(Op.getOperand(0), DAG, 528 LegalOperations, Depth+1)); 529 case ISD::FP_ROUND: 530 return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), 531 GetNegatedExpression(Op.getOperand(0), DAG, 532 LegalOperations, Depth+1), 533 Op.getOperand(1)); 534 } 535} 536 537 538// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc 539// that selects between the values 1 and 0, making it equivalent to a setcc. 540// Also, set the incoming LHS, RHS, and CC references to the appropriate 541// nodes based on the type of node we are checking. This simplifies life a 542// bit for the callers. 543static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 544 SDValue &CC) { 545 if (N.getOpcode() == ISD::SETCC) { 546 LHS = N.getOperand(0); 547 RHS = N.getOperand(1); 548 CC = N.getOperand(2); 549 return true; 550 } 551 if (N.getOpcode() == ISD::SELECT_CC && 552 N.getOperand(2).getOpcode() == ISD::Constant && 553 N.getOperand(3).getOpcode() == ISD::Constant && 554 cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && 555 cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { 556 LHS = N.getOperand(0); 557 RHS = N.getOperand(1); 558 CC = N.getOperand(4); 559 return true; 560 } 561 return false; 562} 563 564// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only 565// one use. If this is true, it allows the users to invert the operation for 566// free when it is profitable to do so. 567static bool isOneUseSetCC(SDValue N) { 568 SDValue N0, N1, N2; 569 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 570 return true; 571 return false; 572} 573 574SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, 575 SDValue N0, SDValue N1) { 576 EVT VT = N0.getValueType(); 577 if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { 578 if (isa<ConstantSDNode>(N1)) { 579 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 580 SDValue OpNode = 581 DAG.FoldConstantArithmetic(Opc, VT, 582 cast<ConstantSDNode>(N0.getOperand(1)), 583 cast<ConstantSDNode>(N1)); 584 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 585 } 586 if (N0.hasOneUse()) { 587 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use 588 SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, 589 N0.getOperand(0), N1); 590 AddToWorkList(OpNode.getNode()); 591 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 592 } 593 } 594 595 if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { 596 if (isa<ConstantSDNode>(N0)) { 597 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 598 SDValue OpNode = 599 DAG.FoldConstantArithmetic(Opc, VT, 600 cast<ConstantSDNode>(N1.getOperand(1)), 601 cast<ConstantSDNode>(N0)); 602 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 603 } 604 if (N1.hasOneUse()) { 605 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use 606 SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, 607 N1.getOperand(0), N0); 608 AddToWorkList(OpNode.getNode()); 609 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 610 } 611 } 612 613 return SDValue(); 614} 615 616SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 617 bool AddTo) { 618 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 619 ++NodesCombined; 620 DEBUG(dbgs() << "\nReplacing.1 "; 621 N->dump(&DAG); 622 dbgs() << "\nWith: "; 623 To[0].getNode()->dump(&DAG); 624 dbgs() << " and " << NumTo-1 << " other values\n"; 625 for (unsigned i = 0, e = NumTo; i != e; ++i) 626 assert((!To[i].getNode() || 627 N->getValueType(i) == To[i].getValueType()) && 628 "Cannot combine value to value of different type!")); 629 WorkListRemover DeadNodes(*this); 630 DAG.ReplaceAllUsesWith(N, To); 631 if (AddTo) { 632 // Push the new nodes and any users onto the worklist 633 for (unsigned i = 0, e = NumTo; i != e; ++i) { 634 if (To[i].getNode()) { 635 AddToWorkList(To[i].getNode()); 636 AddUsersToWorkList(To[i].getNode()); 637 } 638 } 639 } 640 641 // Finally, if the node is now dead, remove it from the graph. The node 642 // may not be dead if the replacement process recursively simplified to 643 // something else needing this node. 644 if (N->use_empty()) { 645 // Nodes can be reintroduced into the worklist. Make sure we do not 646 // process a node that has been replaced. 647 removeFromWorkList(N); 648 649 // Finally, since the node is now dead, remove it from the graph. 650 DAG.DeleteNode(N); 651 } 652 return SDValue(N, 0); 653} 654 655void DAGCombiner:: 656CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 657 // Replace all uses. If any nodes become isomorphic to other nodes and 658 // are deleted, make sure to remove them from our worklist. 659 WorkListRemover DeadNodes(*this); 660 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 661 662 // Push the new node and any (possibly new) users onto the worklist. 663 AddToWorkList(TLO.New.getNode()); 664 AddUsersToWorkList(TLO.New.getNode()); 665 666 // Finally, if the node is now dead, remove it from the graph. The node 667 // may not be dead if the replacement process recursively simplified to 668 // something else needing this node. 669 if (TLO.Old.getNode()->use_empty()) { 670 removeFromWorkList(TLO.Old.getNode()); 671 672 // If the operands of this node are only used by the node, they will now 673 // be dead. Make sure to visit them first to delete dead nodes early. 674 for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) 675 if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) 676 AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); 677 678 DAG.DeleteNode(TLO.Old.getNode()); 679 } 680} 681 682/// SimplifyDemandedBits - Check the specified integer node value to see if 683/// it can be simplified or if things it uses can be simplified by bit 684/// propagation. If so, return true. 685bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 686 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 687 APInt KnownZero, KnownOne; 688 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 689 return false; 690 691 // Revisit the node. 692 AddToWorkList(Op.getNode()); 693 694 // Replace the old value with the new one. 695 ++NodesCombined; 696 DEBUG(dbgs() << "\nReplacing.2 "; 697 TLO.Old.getNode()->dump(&DAG); 698 dbgs() << "\nWith: "; 699 TLO.New.getNode()->dump(&DAG); 700 dbgs() << '\n'); 701 702 CommitTargetLoweringOpt(TLO); 703 return true; 704} 705 706void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 707 DebugLoc dl = Load->getDebugLoc(); 708 EVT VT = Load->getValueType(0); 709 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 710 711 DEBUG(dbgs() << "\nReplacing.9 "; 712 Load->dump(&DAG); 713 dbgs() << "\nWith: "; 714 Trunc.getNode()->dump(&DAG); 715 dbgs() << '\n'); 716 WorkListRemover DeadNodes(*this); 717 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 718 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 719 removeFromWorkList(Load); 720 DAG.DeleteNode(Load); 721 AddToWorkList(Trunc.getNode()); 722} 723 724SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 725 Replace = false; 726 DebugLoc dl = Op.getDebugLoc(); 727 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 728 EVT MemVT = LD->getMemoryVT(); 729 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 730 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 731 : ISD::EXTLOAD) 732 : LD->getExtensionType(); 733 Replace = true; 734 return DAG.getExtLoad(ExtType, dl, PVT, 735 LD->getChain(), LD->getBasePtr(), 736 LD->getPointerInfo(), 737 MemVT, LD->isVolatile(), 738 LD->isNonTemporal(), LD->getAlignment()); 739 } 740 741 unsigned Opc = Op.getOpcode(); 742 switch (Opc) { 743 default: break; 744 case ISD::AssertSext: 745 return DAG.getNode(ISD::AssertSext, dl, PVT, 746 SExtPromoteOperand(Op.getOperand(0), PVT), 747 Op.getOperand(1)); 748 case ISD::AssertZext: 749 return DAG.getNode(ISD::AssertZext, dl, PVT, 750 ZExtPromoteOperand(Op.getOperand(0), PVT), 751 Op.getOperand(1)); 752 case ISD::Constant: { 753 unsigned ExtOpc = 754 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 755 return DAG.getNode(ExtOpc, dl, PVT, Op); 756 } 757 } 758 759 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 760 return SDValue(); 761 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 762} 763 764SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 765 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 766 return SDValue(); 767 EVT OldVT = Op.getValueType(); 768 DebugLoc dl = Op.getDebugLoc(); 769 bool Replace = false; 770 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 771 if (NewOp.getNode() == 0) 772 return SDValue(); 773 AddToWorkList(NewOp.getNode()); 774 775 if (Replace) 776 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 777 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 778 DAG.getValueType(OldVT)); 779} 780 781SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 782 EVT OldVT = Op.getValueType(); 783 DebugLoc dl = Op.getDebugLoc(); 784 bool Replace = false; 785 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 786 if (NewOp.getNode() == 0) 787 return SDValue(); 788 AddToWorkList(NewOp.getNode()); 789 790 if (Replace) 791 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 792 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 793} 794 795/// PromoteIntBinOp - Promote the specified integer binary operation if the 796/// target indicates it is beneficial. e.g. On x86, it's usually better to 797/// promote i16 operations to i32 since i16 instructions are longer. 798SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 799 if (!LegalOperations) 800 return SDValue(); 801 802 EVT VT = Op.getValueType(); 803 if (VT.isVector() || !VT.isInteger()) 804 return SDValue(); 805 806 // If operation type is 'undesirable', e.g. i16 on x86, consider 807 // promoting it. 808 unsigned Opc = Op.getOpcode(); 809 if (TLI.isTypeDesirableForOp(Opc, VT)) 810 return SDValue(); 811 812 EVT PVT = VT; 813 // Consult target whether it is a good idea to promote this operation and 814 // what's the right type to promote it to. 815 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 816 assert(PVT != VT && "Don't know what type to promote to!"); 817 818 bool Replace0 = false; 819 SDValue N0 = Op.getOperand(0); 820 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 821 if (NN0.getNode() == 0) 822 return SDValue(); 823 824 bool Replace1 = false; 825 SDValue N1 = Op.getOperand(1); 826 SDValue NN1; 827 if (N0 == N1) 828 NN1 = NN0; 829 else { 830 NN1 = PromoteOperand(N1, PVT, Replace1); 831 if (NN1.getNode() == 0) 832 return SDValue(); 833 } 834 835 AddToWorkList(NN0.getNode()); 836 if (NN1.getNode()) 837 AddToWorkList(NN1.getNode()); 838 839 if (Replace0) 840 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 841 if (Replace1) 842 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 843 844 DEBUG(dbgs() << "\nPromoting "; 845 Op.getNode()->dump(&DAG)); 846 DebugLoc dl = Op.getDebugLoc(); 847 return DAG.getNode(ISD::TRUNCATE, dl, VT, 848 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 849 } 850 return SDValue(); 851} 852 853/// PromoteIntShiftOp - Promote the specified integer shift operation if the 854/// target indicates it is beneficial. e.g. On x86, it's usually better to 855/// promote i16 operations to i32 since i16 instructions are longer. 856SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 857 if (!LegalOperations) 858 return SDValue(); 859 860 EVT VT = Op.getValueType(); 861 if (VT.isVector() || !VT.isInteger()) 862 return SDValue(); 863 864 // If operation type is 'undesirable', e.g. i16 on x86, consider 865 // promoting it. 866 unsigned Opc = Op.getOpcode(); 867 if (TLI.isTypeDesirableForOp(Opc, VT)) 868 return SDValue(); 869 870 EVT PVT = VT; 871 // Consult target whether it is a good idea to promote this operation and 872 // what's the right type to promote it to. 873 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 874 assert(PVT != VT && "Don't know what type to promote to!"); 875 876 bool Replace = false; 877 SDValue N0 = Op.getOperand(0); 878 if (Opc == ISD::SRA) 879 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 880 else if (Opc == ISD::SRL) 881 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 882 else 883 N0 = PromoteOperand(N0, PVT, Replace); 884 if (N0.getNode() == 0) 885 return SDValue(); 886 887 AddToWorkList(N0.getNode()); 888 if (Replace) 889 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 890 891 DEBUG(dbgs() << "\nPromoting "; 892 Op.getNode()->dump(&DAG)); 893 DebugLoc dl = Op.getDebugLoc(); 894 return DAG.getNode(ISD::TRUNCATE, dl, VT, 895 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 896 } 897 return SDValue(); 898} 899 900SDValue DAGCombiner::PromoteExtend(SDValue Op) { 901 if (!LegalOperations) 902 return SDValue(); 903 904 EVT VT = Op.getValueType(); 905 if (VT.isVector() || !VT.isInteger()) 906 return SDValue(); 907 908 // If operation type is 'undesirable', e.g. i16 on x86, consider 909 // promoting it. 910 unsigned Opc = Op.getOpcode(); 911 if (TLI.isTypeDesirableForOp(Opc, VT)) 912 return SDValue(); 913 914 EVT PVT = VT; 915 // Consult target whether it is a good idea to promote this operation and 916 // what's the right type to promote it to. 917 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 918 assert(PVT != VT && "Don't know what type to promote to!"); 919 // fold (aext (aext x)) -> (aext x) 920 // fold (aext (zext x)) -> (zext x) 921 // fold (aext (sext x)) -> (sext x) 922 DEBUG(dbgs() << "\nPromoting "; 923 Op.getNode()->dump(&DAG)); 924 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); 925 } 926 return SDValue(); 927} 928 929bool DAGCombiner::PromoteLoad(SDValue Op) { 930 if (!LegalOperations) 931 return false; 932 933 EVT VT = Op.getValueType(); 934 if (VT.isVector() || !VT.isInteger()) 935 return false; 936 937 // If operation type is 'undesirable', e.g. i16 on x86, consider 938 // promoting it. 939 unsigned Opc = Op.getOpcode(); 940 if (TLI.isTypeDesirableForOp(Opc, VT)) 941 return false; 942 943 EVT PVT = VT; 944 // Consult target whether it is a good idea to promote this operation and 945 // what's the right type to promote it to. 946 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 947 assert(PVT != VT && "Don't know what type to promote to!"); 948 949 DebugLoc dl = Op.getDebugLoc(); 950 SDNode *N = Op.getNode(); 951 LoadSDNode *LD = cast<LoadSDNode>(N); 952 EVT MemVT = LD->getMemoryVT(); 953 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 954 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 955 : ISD::EXTLOAD) 956 : LD->getExtensionType(); 957 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 958 LD->getChain(), LD->getBasePtr(), 959 LD->getPointerInfo(), 960 MemVT, LD->isVolatile(), 961 LD->isNonTemporal(), LD->getAlignment()); 962 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 963 964 DEBUG(dbgs() << "\nPromoting "; 965 N->dump(&DAG); 966 dbgs() << "\nTo: "; 967 Result.getNode()->dump(&DAG); 968 dbgs() << '\n'); 969 WorkListRemover DeadNodes(*this); 970 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 971 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 972 removeFromWorkList(N); 973 DAG.DeleteNode(N); 974 AddToWorkList(Result.getNode()); 975 return true; 976 } 977 return false; 978} 979 980 981//===----------------------------------------------------------------------===// 982// Main DAG Combiner implementation 983//===----------------------------------------------------------------------===// 984 985void DAGCombiner::Run(CombineLevel AtLevel) { 986 // set the instance variables, so that the various visit routines may use it. 987 Level = AtLevel; 988 LegalOperations = Level >= AfterLegalizeVectorOps; 989 LegalTypes = Level >= AfterLegalizeTypes; 990 991 // Add all the dag nodes to the worklist. 992 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 993 E = DAG.allnodes_end(); I != E; ++I) 994 AddToWorkList(I); 995 996 // Create a dummy node (which is not added to allnodes), that adds a reference 997 // to the root node, preventing it from being deleted, and tracking any 998 // changes of the root. 999 HandleSDNode Dummy(DAG.getRoot()); 1000 1001 // The root of the dag may dangle to deleted nodes until the dag combiner is 1002 // done. Set it to null to avoid confusion. 1003 DAG.setRoot(SDValue()); 1004 1005 // while the worklist isn't empty, find a node and 1006 // try and combine it. 1007 while (!WorkListContents.empty()) { 1008 SDNode *N; 1009 // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. 1010 // In order to avoid a linear scan, we use a set (O(log N)) to hold what the 1011 // worklist *should* contain, and check the node we want to visit is should 1012 // actually be visited. 1013 do { 1014 N = WorkListOrder.pop_back_val(); 1015 } while (!WorkListContents.erase(N)); 1016 1017 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1018 // N is deleted from the DAG, since they too may now be dead or may have a 1019 // reduced number of uses, allowing other xforms. 1020 if (N->use_empty() && N != &Dummy) { 1021 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1022 AddToWorkList(N->getOperand(i).getNode()); 1023 1024 DAG.DeleteNode(N); 1025 continue; 1026 } 1027 1028 SDValue RV = combine(N); 1029 1030 if (RV.getNode() == 0) 1031 continue; 1032 1033 ++NodesCombined; 1034 1035 // If we get back the same node we passed in, rather than a new node or 1036 // zero, we know that the node must have defined multiple values and 1037 // CombineTo was used. Since CombineTo takes care of the worklist 1038 // mechanics for us, we have no work to do in this case. 1039 if (RV.getNode() == N) 1040 continue; 1041 1042 assert(N->getOpcode() != ISD::DELETED_NODE && 1043 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1044 "Node was deleted but visit returned new node!"); 1045 1046 DEBUG(dbgs() << "\nReplacing.3 "; 1047 N->dump(&DAG); 1048 dbgs() << "\nWith: "; 1049 RV.getNode()->dump(&DAG); 1050 dbgs() << '\n'); 1051 1052 // Transfer debug value. 1053 DAG.TransferDbgValues(SDValue(N, 0), RV); 1054 WorkListRemover DeadNodes(*this); 1055 if (N->getNumValues() == RV.getNode()->getNumValues()) 1056 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1057 else { 1058 assert(N->getValueType(0) == RV.getValueType() && 1059 N->getNumValues() == 1 && "Type mismatch"); 1060 SDValue OpV = RV; 1061 DAG.ReplaceAllUsesWith(N, &OpV); 1062 } 1063 1064 // Push the new node and any users onto the worklist 1065 AddToWorkList(RV.getNode()); 1066 AddUsersToWorkList(RV.getNode()); 1067 1068 // Add any uses of the old node to the worklist in case this node is the 1069 // last one that uses them. They may become dead after this node is 1070 // deleted. 1071 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1072 AddToWorkList(N->getOperand(i).getNode()); 1073 1074 // Finally, if the node is now dead, remove it from the graph. The node 1075 // may not be dead if the replacement process recursively simplified to 1076 // something else needing this node. 1077 if (N->use_empty()) { 1078 // Nodes can be reintroduced into the worklist. Make sure we do not 1079 // process a node that has been replaced. 1080 removeFromWorkList(N); 1081 1082 // Finally, since the node is now dead, remove it from the graph. 1083 DAG.DeleteNode(N); 1084 } 1085 } 1086 1087 // If the root changed (e.g. it was a dead load, update the root). 1088 DAG.setRoot(Dummy.getValue()); 1089 DAG.RemoveDeadNodes(); 1090} 1091 1092SDValue DAGCombiner::visit(SDNode *N) { 1093 switch (N->getOpcode()) { 1094 default: break; 1095 case ISD::TokenFactor: return visitTokenFactor(N); 1096 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1097 case ISD::ADD: return visitADD(N); 1098 case ISD::SUB: return visitSUB(N); 1099 case ISD::ADDC: return visitADDC(N); 1100 case ISD::SUBC: return visitSUBC(N); 1101 case ISD::ADDE: return visitADDE(N); 1102 case ISD::SUBE: return visitSUBE(N); 1103 case ISD::MUL: return visitMUL(N); 1104 case ISD::SDIV: return visitSDIV(N); 1105 case ISD::UDIV: return visitUDIV(N); 1106 case ISD::SREM: return visitSREM(N); 1107 case ISD::UREM: return visitUREM(N); 1108 case ISD::MULHU: return visitMULHU(N); 1109 case ISD::MULHS: return visitMULHS(N); 1110 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1111 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1112 case ISD::SMULO: return visitSMULO(N); 1113 case ISD::UMULO: return visitUMULO(N); 1114 case ISD::SDIVREM: return visitSDIVREM(N); 1115 case ISD::UDIVREM: return visitUDIVREM(N); 1116 case ISD::AND: return visitAND(N); 1117 case ISD::OR: return visitOR(N); 1118 case ISD::XOR: return visitXOR(N); 1119 case ISD::SHL: return visitSHL(N); 1120 case ISD::SRA: return visitSRA(N); 1121 case ISD::SRL: return visitSRL(N); 1122 case ISD::CTLZ: return visitCTLZ(N); 1123 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1124 case ISD::CTTZ: return visitCTTZ(N); 1125 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1126 case ISD::CTPOP: return visitCTPOP(N); 1127 case ISD::SELECT: return visitSELECT(N); 1128 case ISD::SELECT_CC: return visitSELECT_CC(N); 1129 case ISD::SETCC: return visitSETCC(N); 1130 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1131 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1132 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1133 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1134 case ISD::TRUNCATE: return visitTRUNCATE(N); 1135 case ISD::BITCAST: return visitBITCAST(N); 1136 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1137 case ISD::FADD: return visitFADD(N); 1138 case ISD::FSUB: return visitFSUB(N); 1139 case ISD::FMUL: return visitFMUL(N); 1140 case ISD::FMA: return visitFMA(N); 1141 case ISD::FDIV: return visitFDIV(N); 1142 case ISD::FREM: return visitFREM(N); 1143 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1144 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1145 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1146 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1147 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1148 case ISD::FP_ROUND: return visitFP_ROUND(N); 1149 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1150 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1151 case ISD::FNEG: return visitFNEG(N); 1152 case ISD::FABS: return visitFABS(N); 1153 case ISD::FFLOOR: return visitFFLOOR(N); 1154 case ISD::FCEIL: return visitFCEIL(N); 1155 case ISD::FTRUNC: return visitFTRUNC(N); 1156 case ISD::BRCOND: return visitBRCOND(N); 1157 case ISD::BR_CC: return visitBR_CC(N); 1158 case ISD::LOAD: return visitLOAD(N); 1159 case ISD::STORE: return visitSTORE(N); 1160 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1161 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1162 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1163 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1164 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1165 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1166 case ISD::MEMBARRIER: return visitMEMBARRIER(N); 1167 } 1168 return SDValue(); 1169} 1170 1171SDValue DAGCombiner::combine(SDNode *N) { 1172 SDValue RV = visit(N); 1173 1174 // If nothing happened, try a target-specific DAG combine. 1175 if (RV.getNode() == 0) { 1176 assert(N->getOpcode() != ISD::DELETED_NODE && 1177 "Node was deleted but visit returned NULL!"); 1178 1179 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1180 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1181 1182 // Expose the DAG combiner to the target combiner impls. 1183 TargetLowering::DAGCombinerInfo 1184 DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); 1185 1186 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1187 } 1188 } 1189 1190 // If nothing happened still, try promoting the operation. 1191 if (RV.getNode() == 0) { 1192 switch (N->getOpcode()) { 1193 default: break; 1194 case ISD::ADD: 1195 case ISD::SUB: 1196 case ISD::MUL: 1197 case ISD::AND: 1198 case ISD::OR: 1199 case ISD::XOR: 1200 RV = PromoteIntBinOp(SDValue(N, 0)); 1201 break; 1202 case ISD::SHL: 1203 case ISD::SRA: 1204 case ISD::SRL: 1205 RV = PromoteIntShiftOp(SDValue(N, 0)); 1206 break; 1207 case ISD::SIGN_EXTEND: 1208 case ISD::ZERO_EXTEND: 1209 case ISD::ANY_EXTEND: 1210 RV = PromoteExtend(SDValue(N, 0)); 1211 break; 1212 case ISD::LOAD: 1213 if (PromoteLoad(SDValue(N, 0))) 1214 RV = SDValue(N, 0); 1215 break; 1216 } 1217 } 1218 1219 // If N is a commutative binary node, try commuting it to enable more 1220 // sdisel CSE. 1221 if (RV.getNode() == 0 && 1222 SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1223 N->getNumValues() == 1) { 1224 SDValue N0 = N->getOperand(0); 1225 SDValue N1 = N->getOperand(1); 1226 1227 // Constant operands are canonicalized to RHS. 1228 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1229 SDValue Ops[] = { N1, N0 }; 1230 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), 1231 Ops, 2); 1232 if (CSENode) 1233 return SDValue(CSENode, 0); 1234 } 1235 } 1236 1237 return RV; 1238} 1239 1240/// getInputChainForNode - Given a node, return its input chain if it has one, 1241/// otherwise return a null sd operand. 1242static SDValue getInputChainForNode(SDNode *N) { 1243 if (unsigned NumOps = N->getNumOperands()) { 1244 if (N->getOperand(0).getValueType() == MVT::Other) 1245 return N->getOperand(0); 1246 else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1247 return N->getOperand(NumOps-1); 1248 for (unsigned i = 1; i < NumOps-1; ++i) 1249 if (N->getOperand(i).getValueType() == MVT::Other) 1250 return N->getOperand(i); 1251 } 1252 return SDValue(); 1253} 1254 1255SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1256 // If N has two operands, where one has an input chain equal to the other, 1257 // the 'other' chain is redundant. 1258 if (N->getNumOperands() == 2) { 1259 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1260 return N->getOperand(0); 1261 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1262 return N->getOperand(1); 1263 } 1264 1265 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1266 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1267 SmallPtrSet<SDNode*, 16> SeenOps; 1268 bool Changed = false; // If we should replace this token factor. 1269 1270 // Start out with this token factor. 1271 TFs.push_back(N); 1272 1273 // Iterate through token factors. The TFs grows when new token factors are 1274 // encountered. 1275 for (unsigned i = 0; i < TFs.size(); ++i) { 1276 SDNode *TF = TFs[i]; 1277 1278 // Check each of the operands. 1279 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1280 SDValue Op = TF->getOperand(i); 1281 1282 switch (Op.getOpcode()) { 1283 case ISD::EntryToken: 1284 // Entry tokens don't need to be added to the list. They are 1285 // rededundant. 1286 Changed = true; 1287 break; 1288 1289 case ISD::TokenFactor: 1290 if (Op.hasOneUse() && 1291 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1292 // Queue up for processing. 1293 TFs.push_back(Op.getNode()); 1294 // Clean up in case the token factor is removed. 1295 AddToWorkList(Op.getNode()); 1296 Changed = true; 1297 break; 1298 } 1299 // Fall thru 1300 1301 default: 1302 // Only add if it isn't already in the list. 1303 if (SeenOps.insert(Op.getNode())) 1304 Ops.push_back(Op); 1305 else 1306 Changed = true; 1307 break; 1308 } 1309 } 1310 } 1311 1312 SDValue Result; 1313 1314 // If we've change things around then replace token factor. 1315 if (Changed) { 1316 if (Ops.empty()) { 1317 // The entry token is the only possible outcome. 1318 Result = DAG.getEntryNode(); 1319 } else { 1320 // New and improved token factor. 1321 Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 1322 MVT::Other, &Ops[0], Ops.size()); 1323 } 1324 1325 // Don't add users to work list. 1326 return CombineTo(N, Result, false); 1327 } 1328 1329 return Result; 1330} 1331 1332/// MERGE_VALUES can always be eliminated. 1333SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1334 WorkListRemover DeadNodes(*this); 1335 // Replacing results may cause a different MERGE_VALUES to suddenly 1336 // be CSE'd with N, and carry its uses with it. Iterate until no 1337 // uses remain, to ensure that the node can be safely deleted. 1338 // First add the users of this node to the work list so that they 1339 // can be tried again once they have new operands. 1340 AddUsersToWorkList(N); 1341 do { 1342 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1343 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1344 } while (!N->use_empty()); 1345 removeFromWorkList(N); 1346 DAG.DeleteNode(N); 1347 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1348} 1349 1350static 1351SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, 1352 SelectionDAG &DAG) { 1353 EVT VT = N0.getValueType(); 1354 SDValue N00 = N0.getOperand(0); 1355 SDValue N01 = N0.getOperand(1); 1356 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); 1357 1358 if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && 1359 isa<ConstantSDNode>(N00.getOperand(1))) { 1360 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1361 N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, 1362 DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, 1363 N00.getOperand(0), N01), 1364 DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, 1365 N00.getOperand(1), N01)); 1366 return DAG.getNode(ISD::ADD, DL, VT, N0, N1); 1367 } 1368 1369 return SDValue(); 1370} 1371 1372SDValue DAGCombiner::visitADD(SDNode *N) { 1373 SDValue N0 = N->getOperand(0); 1374 SDValue N1 = N->getOperand(1); 1375 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1376 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1377 EVT VT = N0.getValueType(); 1378 1379 // fold vector ops 1380 if (VT.isVector()) { 1381 SDValue FoldedVOp = SimplifyVBinOp(N); 1382 if (FoldedVOp.getNode()) return FoldedVOp; 1383 } 1384 1385 // fold (add x, undef) -> undef 1386 if (N0.getOpcode() == ISD::UNDEF) 1387 return N0; 1388 if (N1.getOpcode() == ISD::UNDEF) 1389 return N1; 1390 // fold (add c1, c2) -> c1+c2 1391 if (N0C && N1C) 1392 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1393 // canonicalize constant to RHS 1394 if (N0C && !N1C) 1395 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); 1396 // fold (add x, 0) -> x 1397 if (N1C && N1C->isNullValue()) 1398 return N0; 1399 // fold (add Sym, c) -> Sym+c 1400 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1401 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1402 GA->getOpcode() == ISD::GlobalAddress) 1403 return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, 1404 GA->getOffset() + 1405 (uint64_t)N1C->getSExtValue()); 1406 // fold ((c1-A)+c2) -> (c1+c2)-A 1407 if (N1C && N0.getOpcode() == ISD::SUB) 1408 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1409 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1410 DAG.getConstant(N1C->getAPIntValue()+ 1411 N0C->getAPIntValue(), VT), 1412 N0.getOperand(1)); 1413 // reassociate add 1414 SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); 1415 if (RADD.getNode() != 0) 1416 return RADD; 1417 // fold ((0-A) + B) -> B-A 1418 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1419 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1420 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); 1421 // fold (A + (0-B)) -> A-B 1422 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1423 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1424 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); 1425 // fold (A+(B-A)) -> B 1426 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1427 return N1.getOperand(0); 1428 // fold ((B-A)+A) -> B 1429 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1430 return N0.getOperand(0); 1431 // fold (A+(B-(A+C))) to (B-C) 1432 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1433 N0 == N1.getOperand(1).getOperand(0)) 1434 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), 1435 N1.getOperand(1).getOperand(1)); 1436 // fold (A+(B-(C+A))) to (B-C) 1437 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1438 N0 == N1.getOperand(1).getOperand(1)) 1439 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), 1440 N1.getOperand(1).getOperand(0)); 1441 // fold (A+((B-A)+or-C)) to (B+or-C) 1442 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1443 N1.getOperand(0).getOpcode() == ISD::SUB && 1444 N0 == N1.getOperand(0).getOperand(1)) 1445 return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, 1446 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1447 1448 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1449 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1450 SDValue N00 = N0.getOperand(0); 1451 SDValue N01 = N0.getOperand(1); 1452 SDValue N10 = N1.getOperand(0); 1453 SDValue N11 = N1.getOperand(1); 1454 1455 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1456 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1457 DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), 1458 DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); 1459 } 1460 1461 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1462 return SDValue(N, 0); 1463 1464 // fold (a+b) -> (a|b) iff a and b share no bits. 1465 if (VT.isInteger() && !VT.isVector()) { 1466 APInt LHSZero, LHSOne; 1467 APInt RHSZero, RHSOne; 1468 DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); 1469 1470 if (LHSZero.getBoolValue()) { 1471 DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); 1472 1473 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1474 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1475 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1476 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); 1477 } 1478 } 1479 1480 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1481 if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { 1482 SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); 1483 if (Result.getNode()) return Result; 1484 } 1485 if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { 1486 SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); 1487 if (Result.getNode()) return Result; 1488 } 1489 1490 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1491 if (N1.getOpcode() == ISD::SHL && 1492 N1.getOperand(0).getOpcode() == ISD::SUB) 1493 if (ConstantSDNode *C = 1494 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1495 if (C->getAPIntValue() == 0) 1496 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, 1497 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1498 N1.getOperand(0).getOperand(1), 1499 N1.getOperand(1))); 1500 if (N0.getOpcode() == ISD::SHL && 1501 N0.getOperand(0).getOpcode() == ISD::SUB) 1502 if (ConstantSDNode *C = 1503 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1504 if (C->getAPIntValue() == 0) 1505 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, 1506 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1507 N0.getOperand(0).getOperand(1), 1508 N0.getOperand(1))); 1509 1510 if (N1.getOpcode() == ISD::AND) { 1511 SDValue AndOp0 = N1.getOperand(0); 1512 ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 1513 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1514 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1515 1516 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1517 // and similar xforms where the inner op is either ~0 or 0. 1518 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 1519 DebugLoc DL = N->getDebugLoc(); 1520 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1521 } 1522 } 1523 1524 // add (sext i1), X -> sub X, (zext i1) 1525 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1526 N0.getOperand(0).getValueType() == MVT::i1 && 1527 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1528 DebugLoc DL = N->getDebugLoc(); 1529 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1530 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1531 } 1532 1533 return SDValue(); 1534} 1535 1536SDValue DAGCombiner::visitADDC(SDNode *N) { 1537 SDValue N0 = N->getOperand(0); 1538 SDValue N1 = N->getOperand(1); 1539 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1540 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1541 EVT VT = N0.getValueType(); 1542 1543 // If the flag result is dead, turn this into an ADD. 1544 if (!N->hasAnyUseOfValue(1)) 1545 return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), 1546 DAG.getNode(ISD::CARRY_FALSE, 1547 N->getDebugLoc(), MVT::Glue)); 1548 1549 // canonicalize constant to RHS. 1550 if (N0C && !N1C) 1551 return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); 1552 1553 // fold (addc x, 0) -> x + no carry out 1554 if (N1C && N1C->isNullValue()) 1555 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1556 N->getDebugLoc(), MVT::Glue)); 1557 1558 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1559 APInt LHSZero, LHSOne; 1560 APInt RHSZero, RHSOne; 1561 DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); 1562 1563 if (LHSZero.getBoolValue()) { 1564 DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); 1565 1566 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1567 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1568 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1569 return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), 1570 DAG.getNode(ISD::CARRY_FALSE, 1571 N->getDebugLoc(), MVT::Glue)); 1572 } 1573 1574 return SDValue(); 1575} 1576 1577SDValue DAGCombiner::visitADDE(SDNode *N) { 1578 SDValue N0 = N->getOperand(0); 1579 SDValue N1 = N->getOperand(1); 1580 SDValue CarryIn = N->getOperand(2); 1581 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1582 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1583 1584 // canonicalize constant to RHS 1585 if (N0C && !N1C) 1586 return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), 1587 N1, N0, CarryIn); 1588 1589 // fold (adde x, y, false) -> (addc x, y) 1590 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1591 return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); 1592 1593 return SDValue(); 1594} 1595 1596// Since it may not be valid to emit a fold to zero for vector initializers 1597// check if we can before folding. 1598static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, 1599 SelectionDAG &DAG, bool LegalOperations) { 1600 if (!VT.isVector()) { 1601 return DAG.getConstant(0, VT); 1602 } 1603 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { 1604 // Produce a vector of zeros. 1605 SDValue El = DAG.getConstant(0, VT.getVectorElementType()); 1606 std::vector<SDValue> Ops(VT.getVectorNumElements(), El); 1607 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, 1608 &Ops[0], Ops.size()); 1609 } 1610 return SDValue(); 1611} 1612 1613SDValue DAGCombiner::visitSUB(SDNode *N) { 1614 SDValue N0 = N->getOperand(0); 1615 SDValue N1 = N->getOperand(1); 1616 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1617 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1618 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : 1619 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1620 EVT VT = N0.getValueType(); 1621 1622 // fold vector ops 1623 if (VT.isVector()) { 1624 SDValue FoldedVOp = SimplifyVBinOp(N); 1625 if (FoldedVOp.getNode()) return FoldedVOp; 1626 } 1627 1628 // fold (sub x, x) -> 0 1629 // FIXME: Refactor this and xor and other similar operations together. 1630 if (N0 == N1) 1631 return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); 1632 // fold (sub c1, c2) -> c1-c2 1633 if (N0C && N1C) 1634 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1635 // fold (sub x, c) -> (add x, -c) 1636 if (N1C) 1637 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, 1638 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1639 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1640 if (N0C && N0C->isAllOnesValue()) 1641 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); 1642 // fold A-(A-B) -> B 1643 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1644 return N1.getOperand(1); 1645 // fold (A+B)-A -> B 1646 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1647 return N0.getOperand(1); 1648 // fold (A+B)-B -> A 1649 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1650 return N0.getOperand(0); 1651 // fold C2-(A+C1) -> (C2-C1)-A 1652 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1653 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1654 VT); 1655 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, 1656 N1.getOperand(0)); 1657 } 1658 // fold ((A+(B+or-C))-B) -> A+or-C 1659 if (N0.getOpcode() == ISD::ADD && 1660 (N0.getOperand(1).getOpcode() == ISD::SUB || 1661 N0.getOperand(1).getOpcode() == ISD::ADD) && 1662 N0.getOperand(1).getOperand(0) == N1) 1663 return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, 1664 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1665 // fold ((A+(C+B))-B) -> A+C 1666 if (N0.getOpcode() == ISD::ADD && 1667 N0.getOperand(1).getOpcode() == ISD::ADD && 1668 N0.getOperand(1).getOperand(1) == N1) 1669 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, 1670 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1671 // fold ((A-(B-C))-C) -> A-B 1672 if (N0.getOpcode() == ISD::SUB && 1673 N0.getOperand(1).getOpcode() == ISD::SUB && 1674 N0.getOperand(1).getOperand(1) == N1) 1675 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1676 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1677 1678 // If either operand of a sub is undef, the result is undef 1679 if (N0.getOpcode() == ISD::UNDEF) 1680 return N0; 1681 if (N1.getOpcode() == ISD::UNDEF) 1682 return N1; 1683 1684 // If the relocation model supports it, consider symbol offsets. 1685 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1686 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1687 // fold (sub Sym, c) -> Sym-c 1688 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1689 return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, 1690 GA->getOffset() - 1691 (uint64_t)N1C->getSExtValue()); 1692 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1693 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1694 if (GA->getGlobal() == GB->getGlobal()) 1695 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1696 VT); 1697 } 1698 1699 return SDValue(); 1700} 1701 1702SDValue DAGCombiner::visitSUBC(SDNode *N) { 1703 SDValue N0 = N->getOperand(0); 1704 SDValue N1 = N->getOperand(1); 1705 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1706 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1707 EVT VT = N0.getValueType(); 1708 1709 // If the flag result is dead, turn this into an SUB. 1710 if (!N->hasAnyUseOfValue(1)) 1711 return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), 1712 DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1713 MVT::Glue)); 1714 1715 // fold (subc x, x) -> 0 + no borrow 1716 if (N0 == N1) 1717 return CombineTo(N, DAG.getConstant(0, VT), 1718 DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1719 MVT::Glue)); 1720 1721 // fold (subc x, 0) -> x + no borrow 1722 if (N1C && N1C->isNullValue()) 1723 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1724 MVT::Glue)); 1725 1726 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1727 if (N0C && N0C->isAllOnesValue()) 1728 return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), 1729 DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1730 MVT::Glue)); 1731 1732 return SDValue(); 1733} 1734 1735SDValue DAGCombiner::visitSUBE(SDNode *N) { 1736 SDValue N0 = N->getOperand(0); 1737 SDValue N1 = N->getOperand(1); 1738 SDValue CarryIn = N->getOperand(2); 1739 1740 // fold (sube x, y, false) -> (subc x, y) 1741 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1742 return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); 1743 1744 return SDValue(); 1745} 1746 1747SDValue DAGCombiner::visitMUL(SDNode *N) { 1748 SDValue N0 = N->getOperand(0); 1749 SDValue N1 = N->getOperand(1); 1750 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1751 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1752 EVT VT = N0.getValueType(); 1753 1754 // fold vector ops 1755 if (VT.isVector()) { 1756 SDValue FoldedVOp = SimplifyVBinOp(N); 1757 if (FoldedVOp.getNode()) return FoldedVOp; 1758 } 1759 1760 // fold (mul x, undef) -> 0 1761 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1762 return DAG.getConstant(0, VT); 1763 // fold (mul c1, c2) -> c1*c2 1764 if (N0C && N1C) 1765 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); 1766 // canonicalize constant to RHS 1767 if (N0C && !N1C) 1768 return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); 1769 // fold (mul x, 0) -> 0 1770 if (N1C && N1C->isNullValue()) 1771 return N1; 1772 // fold (mul x, -1) -> 0-x 1773 if (N1C && N1C->isAllOnesValue()) 1774 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1775 DAG.getConstant(0, VT), N0); 1776 // fold (mul x, (1 << c)) -> x << c 1777 if (N1C && N1C->getAPIntValue().isPowerOf2()) 1778 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 1779 DAG.getConstant(N1C->getAPIntValue().logBase2(), 1780 getShiftAmountTy(N0.getValueType()))); 1781 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 1782 if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { 1783 unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); 1784 // FIXME: If the input is something that is easily negated (e.g. a 1785 // single-use add), we should put the negate there. 1786 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1787 DAG.getConstant(0, VT), 1788 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 1789 DAG.getConstant(Log2Val, 1790 getShiftAmountTy(N0.getValueType())))); 1791 } 1792 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 1793 if (N1C && N0.getOpcode() == ISD::SHL && 1794 isa<ConstantSDNode>(N0.getOperand(1))) { 1795 SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1796 N1, N0.getOperand(1)); 1797 AddToWorkList(C3.getNode()); 1798 return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1799 N0.getOperand(0), C3); 1800 } 1801 1802 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 1803 // use. 1804 { 1805 SDValue Sh(0,0), Y(0,0); 1806 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 1807 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 1808 N0.getNode()->hasOneUse()) { 1809 Sh = N0; Y = N1; 1810 } else if (N1.getOpcode() == ISD::SHL && 1811 isa<ConstantSDNode>(N1.getOperand(1)) && 1812 N1.getNode()->hasOneUse()) { 1813 Sh = N1; Y = N0; 1814 } 1815 1816 if (Sh.getNode()) { 1817 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1818 Sh.getOperand(0), Y); 1819 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1820 Mul, Sh.getOperand(1)); 1821 } 1822 } 1823 1824 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 1825 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 1826 isa<ConstantSDNode>(N0.getOperand(1))) 1827 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, 1828 DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, 1829 N0.getOperand(0), N1), 1830 DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, 1831 N0.getOperand(1), N1)); 1832 1833 // reassociate mul 1834 SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); 1835 if (RMUL.getNode() != 0) 1836 return RMUL; 1837 1838 return SDValue(); 1839} 1840 1841SDValue DAGCombiner::visitSDIV(SDNode *N) { 1842 SDValue N0 = N->getOperand(0); 1843 SDValue N1 = N->getOperand(1); 1844 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1845 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1846 EVT VT = N->getValueType(0); 1847 1848 // fold vector ops 1849 if (VT.isVector()) { 1850 SDValue FoldedVOp = SimplifyVBinOp(N); 1851 if (FoldedVOp.getNode()) return FoldedVOp; 1852 } 1853 1854 // fold (sdiv c1, c2) -> c1/c2 1855 if (N0C && N1C && !N1C->isNullValue()) 1856 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 1857 // fold (sdiv X, 1) -> X 1858 if (N1C && N1C->getAPIntValue() == 1LL) 1859 return N0; 1860 // fold (sdiv X, -1) -> 0-X 1861 if (N1C && N1C->isAllOnesValue()) 1862 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1863 DAG.getConstant(0, VT), N0); 1864 // If we know the sign bits of both operands are zero, strength reduce to a 1865 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 1866 if (!VT.isVector()) { 1867 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 1868 return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), 1869 N0, N1); 1870 } 1871 // fold (sdiv X, pow2) -> simple ops after legalize 1872 if (N1C && !N1C->isNullValue() && 1873 (N1C->getAPIntValue().isPowerOf2() || 1874 (-N1C->getAPIntValue()).isPowerOf2())) { 1875 // If dividing by powers of two is cheap, then don't perform the following 1876 // fold. 1877 if (TLI.isPow2DivCheap()) 1878 return SDValue(); 1879 1880 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 1881 1882 // Splat the sign bit into the register 1883 SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, 1884 DAG.getConstant(VT.getSizeInBits()-1, 1885 getShiftAmountTy(N0.getValueType()))); 1886 AddToWorkList(SGN.getNode()); 1887 1888 // Add (N0 < 0) ? abs2 - 1 : 0; 1889 SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, 1890 DAG.getConstant(VT.getSizeInBits() - lg2, 1891 getShiftAmountTy(SGN.getValueType()))); 1892 SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); 1893 AddToWorkList(SRL.getNode()); 1894 AddToWorkList(ADD.getNode()); // Divide by pow2 1895 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, 1896 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 1897 1898 // If we're dividing by a positive value, we're done. Otherwise, we must 1899 // negate the result. 1900 if (N1C->getAPIntValue().isNonNegative()) 1901 return SRA; 1902 1903 AddToWorkList(SRA.getNode()); 1904 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1905 DAG.getConstant(0, VT), SRA); 1906 } 1907 1908 // if integer divide is expensive and we satisfy the requirements, emit an 1909 // alternate sequence. 1910 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 1911 SDValue Op = BuildSDIV(N); 1912 if (Op.getNode()) return Op; 1913 } 1914 1915 // undef / X -> 0 1916 if (N0.getOpcode() == ISD::UNDEF) 1917 return DAG.getConstant(0, VT); 1918 // X / undef -> undef 1919 if (N1.getOpcode() == ISD::UNDEF) 1920 return N1; 1921 1922 return SDValue(); 1923} 1924 1925SDValue DAGCombiner::visitUDIV(SDNode *N) { 1926 SDValue N0 = N->getOperand(0); 1927 SDValue N1 = N->getOperand(1); 1928 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1929 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1930 EVT VT = N->getValueType(0); 1931 1932 // fold vector ops 1933 if (VT.isVector()) { 1934 SDValue FoldedVOp = SimplifyVBinOp(N); 1935 if (FoldedVOp.getNode()) return FoldedVOp; 1936 } 1937 1938 // fold (udiv c1, c2) -> c1/c2 1939 if (N0C && N1C && !N1C->isNullValue()) 1940 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 1941 // fold (udiv x, (1 << c)) -> x >>u c 1942 if (N1C && N1C->getAPIntValue().isPowerOf2()) 1943 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, 1944 DAG.getConstant(N1C->getAPIntValue().logBase2(), 1945 getShiftAmountTy(N0.getValueType()))); 1946 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 1947 if (N1.getOpcode() == ISD::SHL) { 1948 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 1949 if (SHC->getAPIntValue().isPowerOf2()) { 1950 EVT ADDVT = N1.getOperand(1).getValueType(); 1951 SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, 1952 N1.getOperand(1), 1953 DAG.getConstant(SHC->getAPIntValue() 1954 .logBase2(), 1955 ADDVT)); 1956 AddToWorkList(Add.getNode()); 1957 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); 1958 } 1959 } 1960 } 1961 // fold (udiv x, c) -> alternate 1962 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 1963 SDValue Op = BuildUDIV(N); 1964 if (Op.getNode()) return Op; 1965 } 1966 1967 // undef / X -> 0 1968 if (N0.getOpcode() == ISD::UNDEF) 1969 return DAG.getConstant(0, VT); 1970 // X / undef -> undef 1971 if (N1.getOpcode() == ISD::UNDEF) 1972 return N1; 1973 1974 return SDValue(); 1975} 1976 1977SDValue DAGCombiner::visitSREM(SDNode *N) { 1978 SDValue N0 = N->getOperand(0); 1979 SDValue N1 = N->getOperand(1); 1980 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1981 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1982 EVT VT = N->getValueType(0); 1983 1984 // fold (srem c1, c2) -> c1%c2 1985 if (N0C && N1C && !N1C->isNullValue()) 1986 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 1987 // If we know the sign bits of both operands are zero, strength reduce to a 1988 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 1989 if (!VT.isVector()) { 1990 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 1991 return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); 1992 } 1993 1994 // If X/C can be simplified by the division-by-constant logic, lower 1995 // X%C to the equivalent of X-X/C*C. 1996 if (N1C && !N1C->isNullValue()) { 1997 SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); 1998 AddToWorkList(Div.getNode()); 1999 SDValue OptimizedDiv = combine(Div.getNode()); 2000 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2001 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 2002 OptimizedDiv, N1); 2003 SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); 2004 AddToWorkList(Mul.getNode()); 2005 return Sub; 2006 } 2007 } 2008 2009 // undef % X -> 0 2010 if (N0.getOpcode() == ISD::UNDEF) 2011 return DAG.getConstant(0, VT); 2012 // X % undef -> undef 2013 if (N1.getOpcode() == ISD::UNDEF) 2014 return N1; 2015 2016 return SDValue(); 2017} 2018 2019SDValue DAGCombiner::visitUREM(SDNode *N) { 2020 SDValue N0 = N->getOperand(0); 2021 SDValue N1 = N->getOperand(1); 2022 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2023 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2024 EVT VT = N->getValueType(0); 2025 2026 // fold (urem c1, c2) -> c1%c2 2027 if (N0C && N1C && !N1C->isNullValue()) 2028 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 2029 // fold (urem x, pow2) -> (and x, pow2-1) 2030 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 2031 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, 2032 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 2033 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2034 if (N1.getOpcode() == ISD::SHL) { 2035 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2036 if (SHC->getAPIntValue().isPowerOf2()) { 2037 SDValue Add = 2038 DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, 2039 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 2040 VT)); 2041 AddToWorkList(Add.getNode()); 2042 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); 2043 } 2044 } 2045 } 2046 2047 // If X/C can be simplified by the division-by-constant logic, lower 2048 // X%C to the equivalent of X-X/C*C. 2049 if (N1C && !N1C->isNullValue()) { 2050 SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); 2051 AddToWorkList(Div.getNode()); 2052 SDValue OptimizedDiv = combine(Div.getNode()); 2053 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2054 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 2055 OptimizedDiv, N1); 2056 SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); 2057 AddToWorkList(Mul.getNode()); 2058 return Sub; 2059 } 2060 } 2061 2062 // undef % X -> 0 2063 if (N0.getOpcode() == ISD::UNDEF) 2064 return DAG.getConstant(0, VT); 2065 // X % undef -> undef 2066 if (N1.getOpcode() == ISD::UNDEF) 2067 return N1; 2068 2069 return SDValue(); 2070} 2071 2072SDValue DAGCombiner::visitMULHS(SDNode *N) { 2073 SDValue N0 = N->getOperand(0); 2074 SDValue N1 = N->getOperand(1); 2075 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2076 EVT VT = N->getValueType(0); 2077 DebugLoc DL = N->getDebugLoc(); 2078 2079 // fold (mulhs x, 0) -> 0 2080 if (N1C && N1C->isNullValue()) 2081 return N1; 2082 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2083 if (N1C && N1C->getAPIntValue() == 1) 2084 return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, 2085 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2086 getShiftAmountTy(N0.getValueType()))); 2087 // fold (mulhs x, undef) -> 0 2088 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2089 return DAG.getConstant(0, VT); 2090 2091 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2092 // plus a shift. 2093 if (VT.isSimple() && !VT.isVector()) { 2094 MVT Simple = VT.getSimpleVT(); 2095 unsigned SimpleSize = Simple.getSizeInBits(); 2096 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2097 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2098 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2099 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2100 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2101 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2102 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2103 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2104 } 2105 } 2106 2107 return SDValue(); 2108} 2109 2110SDValue DAGCombiner::visitMULHU(SDNode *N) { 2111 SDValue N0 = N->getOperand(0); 2112 SDValue N1 = N->getOperand(1); 2113 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2114 EVT VT = N->getValueType(0); 2115 DebugLoc DL = N->getDebugLoc(); 2116 2117 // fold (mulhu x, 0) -> 0 2118 if (N1C && N1C->isNullValue()) 2119 return N1; 2120 // fold (mulhu x, 1) -> 0 2121 if (N1C && N1C->getAPIntValue() == 1) 2122 return DAG.getConstant(0, N0.getValueType()); 2123 // fold (mulhu x, undef) -> 0 2124 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2125 return DAG.getConstant(0, VT); 2126 2127 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2128 // plus a shift. 2129 if (VT.isSimple() && !VT.isVector()) { 2130 MVT Simple = VT.getSimpleVT(); 2131 unsigned SimpleSize = Simple.getSizeInBits(); 2132 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2133 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2134 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2135 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2136 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2137 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2138 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2139 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2140 } 2141 } 2142 2143 return SDValue(); 2144} 2145 2146/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that 2147/// compute two values. LoOp and HiOp give the opcodes for the two computations 2148/// that are being performed. Return true if a simplification was made. 2149/// 2150SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2151 unsigned HiOp) { 2152 // If the high half is not needed, just compute the low half. 2153 bool HiExists = N->hasAnyUseOfValue(1); 2154 if (!HiExists && 2155 (!LegalOperations || 2156 TLI.isOperationLegal(LoOp, N->getValueType(0)))) { 2157 SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), 2158 N->op_begin(), N->getNumOperands()); 2159 return CombineTo(N, Res, Res); 2160 } 2161 2162 // If the low half is not needed, just compute the high half. 2163 bool LoExists = N->hasAnyUseOfValue(0); 2164 if (!LoExists && 2165 (!LegalOperations || 2166 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2167 SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), 2168 N->op_begin(), N->getNumOperands()); 2169 return CombineTo(N, Res, Res); 2170 } 2171 2172 // If both halves are used, return as it is. 2173 if (LoExists && HiExists) 2174 return SDValue(); 2175 2176 // If the two computed results can be simplified separately, separate them. 2177 if (LoExists) { 2178 SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), 2179 N->op_begin(), N->getNumOperands()); 2180 AddToWorkList(Lo.getNode()); 2181 SDValue LoOpt = combine(Lo.getNode()); 2182 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2183 (!LegalOperations || 2184 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2185 return CombineTo(N, LoOpt, LoOpt); 2186 } 2187 2188 if (HiExists) { 2189 SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), 2190 N->op_begin(), N->getNumOperands()); 2191 AddToWorkList(Hi.getNode()); 2192 SDValue HiOpt = combine(Hi.getNode()); 2193 if (HiOpt.getNode() && HiOpt != Hi && 2194 (!LegalOperations || 2195 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2196 return CombineTo(N, HiOpt, HiOpt); 2197 } 2198 2199 return SDValue(); 2200} 2201 2202SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2203 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2204 if (Res.getNode()) return Res; 2205 2206 EVT VT = N->getValueType(0); 2207 DebugLoc DL = N->getDebugLoc(); 2208 2209 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2210 // plus a shift. 2211 if (VT.isSimple() && !VT.isVector()) { 2212 MVT Simple = VT.getSimpleVT(); 2213 unsigned SimpleSize = Simple.getSizeInBits(); 2214 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2215 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2216 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2217 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2218 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2219 // Compute the high part as N1. 2220 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2221 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2222 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2223 // Compute the low part as N0. 2224 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2225 return CombineTo(N, Lo, Hi); 2226 } 2227 } 2228 2229 return SDValue(); 2230} 2231 2232SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2233 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2234 if (Res.getNode()) return Res; 2235 2236 EVT VT = N->getValueType(0); 2237 DebugLoc DL = N->getDebugLoc(); 2238 2239 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2240 // plus a shift. 2241 if (VT.isSimple() && !VT.isVector()) { 2242 MVT Simple = VT.getSimpleVT(); 2243 unsigned SimpleSize = Simple.getSizeInBits(); 2244 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2245 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2246 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2247 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2248 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2249 // Compute the high part as N1. 2250 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2251 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2252 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2253 // Compute the low part as N0. 2254 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2255 return CombineTo(N, Lo, Hi); 2256 } 2257 } 2258 2259 return SDValue(); 2260} 2261 2262SDValue DAGCombiner::visitSMULO(SDNode *N) { 2263 // (smulo x, 2) -> (saddo x, x) 2264 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2265 if (C2->getAPIntValue() == 2) 2266 return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), 2267 N->getOperand(0), N->getOperand(0)); 2268 2269 return SDValue(); 2270} 2271 2272SDValue DAGCombiner::visitUMULO(SDNode *N) { 2273 // (umulo x, 2) -> (uaddo x, x) 2274 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2275 if (C2->getAPIntValue() == 2) 2276 return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), 2277 N->getOperand(0), N->getOperand(0)); 2278 2279 return SDValue(); 2280} 2281 2282SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 2283 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2284 if (Res.getNode()) return Res; 2285 2286 return SDValue(); 2287} 2288 2289SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 2290 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2291 if (Res.getNode()) return Res; 2292 2293 return SDValue(); 2294} 2295 2296/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with 2297/// two operands of the same opcode, try to simplify it. 2298SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2299 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2300 EVT VT = N0.getValueType(); 2301 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2302 2303 // Bail early if none of these transforms apply. 2304 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2305 2306 // For each of OP in AND/OR/XOR: 2307 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2308 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2309 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2310 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2311 // 2312 // do not sink logical op inside of a vector extend, since it may combine 2313 // into a vsetcc. 2314 EVT Op0VT = N0.getOperand(0).getValueType(); 2315 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2316 N0.getOpcode() == ISD::SIGN_EXTEND || 2317 // Avoid infinite looping with PromoteIntBinOp. 2318 (N0.getOpcode() == ISD::ANY_EXTEND && 2319 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2320 (N0.getOpcode() == ISD::TRUNCATE && 2321 (!TLI.isZExtFree(VT, Op0VT) || 2322 !TLI.isTruncateFree(Op0VT, VT)) && 2323 TLI.isTypeLegal(Op0VT))) && 2324 !VT.isVector() && 2325 Op0VT == N1.getOperand(0).getValueType() && 2326 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2327 SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), 2328 N0.getOperand(0).getValueType(), 2329 N0.getOperand(0), N1.getOperand(0)); 2330 AddToWorkList(ORNode.getNode()); 2331 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); 2332 } 2333 2334 // For each of OP in SHL/SRL/SRA/AND... 2335 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2336 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2337 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2338 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2339 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2340 N0.getOperand(1) == N1.getOperand(1)) { 2341 SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), 2342 N0.getOperand(0).getValueType(), 2343 N0.getOperand(0), N1.getOperand(0)); 2344 AddToWorkList(ORNode.getNode()); 2345 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 2346 ORNode, N0.getOperand(1)); 2347 } 2348 2349 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2350 // Only perform this optimization after type legalization and before 2351 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2352 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2353 // we don't want to undo this promotion. 2354 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2355 // on scalars. 2356 if ((N0.getOpcode() == ISD::BITCAST || 2357 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2358 Level == AfterLegalizeTypes) { 2359 SDValue In0 = N0.getOperand(0); 2360 SDValue In1 = N1.getOperand(0); 2361 EVT In0Ty = In0.getValueType(); 2362 EVT In1Ty = In1.getValueType(); 2363 DebugLoc DL = N->getDebugLoc(); 2364 // If both incoming values are integers, and the original types are the 2365 // same. 2366 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2367 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2368 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2369 AddToWorkList(Op.getNode()); 2370 return BC; 2371 } 2372 } 2373 2374 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2375 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2376 // If both shuffles use the same mask, and both shuffle within a single 2377 // vector, then it is worthwhile to move the swizzle after the operation. 2378 // The type-legalizer generates this pattern when loading illegal 2379 // vector types from memory. In many cases this allows additional shuffle 2380 // optimizations. 2381 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 2382 N0.getOperand(1).getOpcode() == ISD::UNDEF && 2383 N1.getOperand(1).getOpcode() == ISD::UNDEF) { 2384 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2385 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2386 2387 assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && 2388 "Inputs to shuffles are not the same type"); 2389 2390 unsigned NumElts = VT.getVectorNumElements(); 2391 2392 // Check that both shuffles use the same mask. The masks are known to be of 2393 // the same length because the result vector type is the same. 2394 bool SameMask = true; 2395 for (unsigned i = 0; i != NumElts; ++i) { 2396 int Idx0 = SVN0->getMaskElt(i); 2397 int Idx1 = SVN1->getMaskElt(i); 2398 if (Idx0 != Idx1) { 2399 SameMask = false; 2400 break; 2401 } 2402 } 2403 2404 if (SameMask) { 2405 SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, 2406 N0.getOperand(0), N1.getOperand(0)); 2407 AddToWorkList(Op.getNode()); 2408 return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, 2409 DAG.getUNDEF(VT), &SVN0->getMask()[0]); 2410 } 2411 } 2412 2413 return SDValue(); 2414} 2415 2416SDValue DAGCombiner::visitAND(SDNode *N) { 2417 SDValue N0 = N->getOperand(0); 2418 SDValue N1 = N->getOperand(1); 2419 SDValue LL, LR, RL, RR, CC0, CC1; 2420 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2421 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2422 EVT VT = N1.getValueType(); 2423 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2424 2425 // fold vector ops 2426 if (VT.isVector()) { 2427 SDValue FoldedVOp = SimplifyVBinOp(N); 2428 if (FoldedVOp.getNode()) return FoldedVOp; 2429 } 2430 2431 // fold (and x, undef) -> 0 2432 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2433 return DAG.getConstant(0, VT); 2434 // fold (and c1, c2) -> c1&c2 2435 if (N0C && N1C) 2436 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2437 // canonicalize constant to RHS 2438 if (N0C && !N1C) 2439 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); 2440 // fold (and x, -1) -> x 2441 if (N1C && N1C->isAllOnesValue()) 2442 return N0; 2443 // if (and x, c) is known to be zero, return 0 2444 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2445 APInt::getAllOnesValue(BitWidth))) 2446 return DAG.getConstant(0, VT); 2447 // reassociate and 2448 SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); 2449 if (RAND.getNode() != 0) 2450 return RAND; 2451 // fold (and (or x, C), D) -> D if (C & D) == D 2452 if (N1C && N0.getOpcode() == ISD::OR) 2453 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2454 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2455 return N1; 2456 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2457 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2458 SDValue N0Op0 = N0.getOperand(0); 2459 APInt Mask = ~N1C->getAPIntValue(); 2460 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2461 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2462 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), 2463 N0.getValueType(), N0Op0); 2464 2465 // Replace uses of the AND with uses of the Zero extend node. 2466 CombineTo(N, Zext); 2467 2468 // We actually want to replace all uses of the any_extend with the 2469 // zero_extend, to avoid duplicating things. This will later cause this 2470 // AND to be folded. 2471 CombineTo(N0.getNode(), Zext); 2472 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2473 } 2474 } 2475 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 2476 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 2477 // already be zero by virtue of the width of the base type of the load. 2478 // 2479 // the 'X' node here can either be nothing or an extract_vector_elt to catch 2480 // more cases. 2481 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 2482 N0.getOperand(0).getOpcode() == ISD::LOAD) || 2483 N0.getOpcode() == ISD::LOAD) { 2484 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 2485 N0 : N0.getOperand(0) ); 2486 2487 // Get the constant (if applicable) the zero'th operand is being ANDed with. 2488 // This can be a pure constant or a vector splat, in which case we treat the 2489 // vector as a scalar and use the splat value. 2490 APInt Constant = APInt::getNullValue(1); 2491 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 2492 Constant = C->getAPIntValue(); 2493 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 2494 APInt SplatValue, SplatUndef; 2495 unsigned SplatBitSize; 2496 bool HasAnyUndefs; 2497 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 2498 SplatBitSize, HasAnyUndefs); 2499 if (IsSplat) { 2500 // Undef bits can contribute to a possible optimisation if set, so 2501 // set them. 2502 SplatValue |= SplatUndef; 2503 2504 // The splat value may be something like "0x00FFFFFF", which means 0 for 2505 // the first vector value and FF for the rest, repeating. We need a mask 2506 // that will apply equally to all members of the vector, so AND all the 2507 // lanes of the constant together. 2508 EVT VT = Vector->getValueType(0); 2509 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 2510 2511 // If the splat value has been compressed to a bitlength lower 2512 // than the size of the vector lane, we need to re-expand it to 2513 // the lane size. 2514 if (BitWidth > SplatBitSize) 2515 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 2516 SplatBitSize < BitWidth; 2517 SplatBitSize = SplatBitSize * 2) 2518 SplatValue |= SplatValue.shl(SplatBitSize); 2519 2520 Constant = APInt::getAllOnesValue(BitWidth); 2521 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 2522 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 2523 } 2524 } 2525 2526 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 2527 // actually legal and isn't going to get expanded, else this is a false 2528 // optimisation. 2529 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 2530 Load->getMemoryVT()); 2531 2532 // Resize the constant to the same size as the original memory access before 2533 // extension. If it is still the AllOnesValue then this AND is completely 2534 // unneeded. 2535 Constant = 2536 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 2537 2538 bool B; 2539 switch (Load->getExtensionType()) { 2540 default: B = false; break; 2541 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 2542 case ISD::ZEXTLOAD: 2543 case ISD::NON_EXTLOAD: B = true; break; 2544 } 2545 2546 if (B && Constant.isAllOnesValue()) { 2547 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 2548 // preserve semantics once we get rid of the AND. 2549 SDValue NewLoad(Load, 0); 2550 if (Load->getExtensionType() == ISD::EXTLOAD) { 2551 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 2552 Load->getValueType(0), Load->getDebugLoc(), 2553 Load->getChain(), Load->getBasePtr(), 2554 Load->getOffset(), Load->getMemoryVT(), 2555 Load->getMemOperand()); 2556 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 2557 if (Load->getNumValues() == 3) { 2558 // PRE/POST_INC loads have 3 values. 2559 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 2560 NewLoad.getValue(2) }; 2561 CombineTo(Load, To, 3, true); 2562 } else { 2563 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 2564 } 2565 } 2566 2567 // Fold the AND away, taking care not to fold to the old load node if we 2568 // replaced it. 2569 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 2570 2571 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2572 } 2573 } 2574 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2575 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2576 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2577 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2578 2579 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2580 LL.getValueType().isInteger()) { 2581 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2582 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2583 SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), 2584 LR.getValueType(), LL, RL); 2585 AddToWorkList(ORNode.getNode()); 2586 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2587 } 2588 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2589 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2590 SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), 2591 LR.getValueType(), LL, RL); 2592 AddToWorkList(ANDNode.getNode()); 2593 return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); 2594 } 2595 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2596 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2597 SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), 2598 LR.getValueType(), LL, RL); 2599 AddToWorkList(ORNode.getNode()); 2600 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2601 } 2602 } 2603 // canonicalize equivalent to ll == rl 2604 if (LL == RR && LR == RL) { 2605 Op1 = ISD::getSetCCSwappedOperands(Op1); 2606 std::swap(RL, RR); 2607 } 2608 if (LL == RL && LR == RR) { 2609 bool isInteger = LL.getValueType().isInteger(); 2610 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2611 if (Result != ISD::SETCC_INVALID && 2612 (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) 2613 return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), 2614 LL, LR, Result); 2615 } 2616 } 2617 2618 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 2619 if (N0.getOpcode() == N1.getOpcode()) { 2620 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2621 if (Tmp.getNode()) return Tmp; 2622 } 2623 2624 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 2625 // fold (and (sra)) -> (and (srl)) when possible. 2626 if (!VT.isVector() && 2627 SimplifyDemandedBits(SDValue(N, 0))) 2628 return SDValue(N, 0); 2629 2630 // fold (zext_inreg (extload x)) -> (zextload x) 2631 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 2632 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2633 EVT MemVT = LN0->getMemoryVT(); 2634 // If we zero all the possible extended bits, then we can turn this into 2635 // a zextload if we are running before legalize or the operation is legal. 2636 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2637 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2638 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2639 ((!LegalOperations && !LN0->isVolatile()) || 2640 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2641 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, 2642 LN0->getChain(), LN0->getBasePtr(), 2643 LN0->getPointerInfo(), MemVT, 2644 LN0->isVolatile(), LN0->isNonTemporal(), 2645 LN0->getAlignment()); 2646 AddToWorkList(N); 2647 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2648 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2649 } 2650 } 2651 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 2652 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 2653 N0.hasOneUse()) { 2654 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2655 EVT MemVT = LN0->getMemoryVT(); 2656 // If we zero all the possible extended bits, then we can turn this into 2657 // a zextload if we are running before legalize or the operation is legal. 2658 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2659 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2660 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2661 ((!LegalOperations && !LN0->isVolatile()) || 2662 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2663 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, 2664 LN0->getChain(), 2665 LN0->getBasePtr(), LN0->getPointerInfo(), 2666 MemVT, 2667 LN0->isVolatile(), LN0->isNonTemporal(), 2668 LN0->getAlignment()); 2669 AddToWorkList(N); 2670 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2671 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2672 } 2673 } 2674 2675 // fold (and (load x), 255) -> (zextload x, i8) 2676 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2677 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2678 if (N1C && (N0.getOpcode() == ISD::LOAD || 2679 (N0.getOpcode() == ISD::ANY_EXTEND && 2680 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2681 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2682 LoadSDNode *LN0 = HasAnyExt 2683 ? cast<LoadSDNode>(N0.getOperand(0)) 2684 : cast<LoadSDNode>(N0); 2685 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2686 LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { 2687 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2688 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2689 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2690 EVT LoadedVT = LN0->getMemoryVT(); 2691 2692 if (ExtVT == LoadedVT && 2693 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2694 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2695 2696 SDValue NewLoad = 2697 DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, 2698 LN0->getChain(), LN0->getBasePtr(), 2699 LN0->getPointerInfo(), 2700 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2701 LN0->getAlignment()); 2702 AddToWorkList(N); 2703 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 2704 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2705 } 2706 2707 // Do not change the width of a volatile load. 2708 // Do not generate loads of non-round integer types since these can 2709 // be expensive (and would be wrong if the type is not byte sized). 2710 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 2711 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2712 EVT PtrType = LN0->getOperand(1).getValueType(); 2713 2714 unsigned Alignment = LN0->getAlignment(); 2715 SDValue NewPtr = LN0->getBasePtr(); 2716 2717 // For big endian targets, we need to add an offset to the pointer 2718 // to load the correct bytes. For little endian systems, we merely 2719 // need to read fewer bytes from the same pointer. 2720 if (TLI.isBigEndian()) { 2721 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 2722 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 2723 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 2724 NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, 2725 NewPtr, DAG.getConstant(PtrOff, PtrType)); 2726 Alignment = MinAlign(Alignment, PtrOff); 2727 } 2728 2729 AddToWorkList(NewPtr.getNode()); 2730 2731 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2732 SDValue Load = 2733 DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, 2734 LN0->getChain(), NewPtr, 2735 LN0->getPointerInfo(), 2736 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2737 Alignment); 2738 AddToWorkList(N); 2739 CombineTo(LN0, Load, Load.getValue(1)); 2740 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2741 } 2742 } 2743 } 2744 } 2745 2746 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 2747 VT.getSizeInBits() <= 64) { 2748 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2749 APInt ADDC = ADDI->getAPIntValue(); 2750 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2751 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 2752 // immediate for an add, but it is legal if its top c2 bits are set, 2753 // transform the ADD so the immediate doesn't need to be materialized 2754 // in a register. 2755 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 2756 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 2757 SRLI->getZExtValue()); 2758 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 2759 ADDC |= Mask; 2760 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2761 SDValue NewAdd = 2762 DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, 2763 N0.getOperand(0), DAG.getConstant(ADDC, VT)); 2764 CombineTo(N0.getNode(), NewAdd); 2765 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2766 } 2767 } 2768 } 2769 } 2770 } 2771 } 2772 2773 2774 return SDValue(); 2775} 2776 2777/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 2778/// 2779SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 2780 bool DemandHighBits) { 2781 if (!LegalOperations) 2782 return SDValue(); 2783 2784 EVT VT = N->getValueType(0); 2785 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 2786 return SDValue(); 2787 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 2788 return SDValue(); 2789 2790 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 2791 bool LookPassAnd0 = false; 2792 bool LookPassAnd1 = false; 2793 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 2794 std::swap(N0, N1); 2795 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 2796 std::swap(N0, N1); 2797 if (N0.getOpcode() == ISD::AND) { 2798 if (!N0.getNode()->hasOneUse()) 2799 return SDValue(); 2800 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2801 if (!N01C || N01C->getZExtValue() != 0xFF00) 2802 return SDValue(); 2803 N0 = N0.getOperand(0); 2804 LookPassAnd0 = true; 2805 } 2806 2807 if (N1.getOpcode() == ISD::AND) { 2808 if (!N1.getNode()->hasOneUse()) 2809 return SDValue(); 2810 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 2811 if (!N11C || N11C->getZExtValue() != 0xFF) 2812 return SDValue(); 2813 N1 = N1.getOperand(0); 2814 LookPassAnd1 = true; 2815 } 2816 2817 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 2818 std::swap(N0, N1); 2819 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 2820 return SDValue(); 2821 if (!N0.getNode()->hasOneUse() || 2822 !N1.getNode()->hasOneUse()) 2823 return SDValue(); 2824 2825 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2826 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 2827 if (!N01C || !N11C) 2828 return SDValue(); 2829 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 2830 return SDValue(); 2831 2832 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 2833 SDValue N00 = N0->getOperand(0); 2834 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 2835 if (!N00.getNode()->hasOneUse()) 2836 return SDValue(); 2837 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 2838 if (!N001C || N001C->getZExtValue() != 0xFF) 2839 return SDValue(); 2840 N00 = N00.getOperand(0); 2841 LookPassAnd0 = true; 2842 } 2843 2844 SDValue N10 = N1->getOperand(0); 2845 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 2846 if (!N10.getNode()->hasOneUse()) 2847 return SDValue(); 2848 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 2849 if (!N101C || N101C->getZExtValue() != 0xFF00) 2850 return SDValue(); 2851 N10 = N10.getOperand(0); 2852 LookPassAnd1 = true; 2853 } 2854 2855 if (N00 != N10) 2856 return SDValue(); 2857 2858 // Make sure everything beyond the low halfword is zero since the SRL 16 2859 // will clear the top bits. 2860 unsigned OpSizeInBits = VT.getSizeInBits(); 2861 if (DemandHighBits && OpSizeInBits > 16 && 2862 (!LookPassAnd0 || !LookPassAnd1) && 2863 !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) 2864 return SDValue(); 2865 2866 SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); 2867 if (OpSizeInBits > 16) 2868 Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, 2869 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 2870 return Res; 2871} 2872 2873/// isBSwapHWordElement - Return true if the specified node is an element 2874/// that makes up a 32-bit packed halfword byteswap. i.e. 2875/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 2876static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { 2877 if (!N.getNode()->hasOneUse()) 2878 return false; 2879 2880 unsigned Opc = N.getOpcode(); 2881 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 2882 return false; 2883 2884 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2885 if (!N1C) 2886 return false; 2887 2888 unsigned Num; 2889 switch (N1C->getZExtValue()) { 2890 default: 2891 return false; 2892 case 0xFF: Num = 0; break; 2893 case 0xFF00: Num = 1; break; 2894 case 0xFF0000: Num = 2; break; 2895 case 0xFF000000: Num = 3; break; 2896 } 2897 2898 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 2899 SDValue N0 = N.getOperand(0); 2900 if (Opc == ISD::AND) { 2901 if (Num == 0 || Num == 2) { 2902 // (x >> 8) & 0xff 2903 // (x >> 8) & 0xff0000 2904 if (N0.getOpcode() != ISD::SRL) 2905 return false; 2906 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2907 if (!C || C->getZExtValue() != 8) 2908 return false; 2909 } else { 2910 // (x << 8) & 0xff00 2911 // (x << 8) & 0xff000000 2912 if (N0.getOpcode() != ISD::SHL) 2913 return false; 2914 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2915 if (!C || C->getZExtValue() != 8) 2916 return false; 2917 } 2918 } else if (Opc == ISD::SHL) { 2919 // (x & 0xff) << 8 2920 // (x & 0xff0000) << 8 2921 if (Num != 0 && Num != 2) 2922 return false; 2923 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2924 if (!C || C->getZExtValue() != 8) 2925 return false; 2926 } else { // Opc == ISD::SRL 2927 // (x & 0xff00) >> 8 2928 // (x & 0xff000000) >> 8 2929 if (Num != 1 && Num != 3) 2930 return false; 2931 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2932 if (!C || C->getZExtValue() != 8) 2933 return false; 2934 } 2935 2936 if (Parts[Num]) 2937 return false; 2938 2939 Parts[Num] = N0.getOperand(0).getNode(); 2940 return true; 2941} 2942 2943/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is 2944/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 2945/// => (rotl (bswap x), 16) 2946SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 2947 if (!LegalOperations) 2948 return SDValue(); 2949 2950 EVT VT = N->getValueType(0); 2951 if (VT != MVT::i32) 2952 return SDValue(); 2953 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 2954 return SDValue(); 2955 2956 SmallVector<SDNode*,4> Parts(4, (SDNode*)0); 2957 // Look for either 2958 // (or (or (and), (and)), (or (and), (and))) 2959 // (or (or (or (and), (and)), (and)), (and)) 2960 if (N0.getOpcode() != ISD::OR) 2961 return SDValue(); 2962 SDValue N00 = N0.getOperand(0); 2963 SDValue N01 = N0.getOperand(1); 2964 2965 if (N1.getOpcode() == ISD::OR) { 2966 // (or (or (and), (and)), (or (and), (and))) 2967 SDValue N000 = N00.getOperand(0); 2968 if (!isBSwapHWordElement(N000, Parts)) 2969 return SDValue(); 2970 2971 SDValue N001 = N00.getOperand(1); 2972 if (!isBSwapHWordElement(N001, Parts)) 2973 return SDValue(); 2974 SDValue N010 = N01.getOperand(0); 2975 if (!isBSwapHWordElement(N010, Parts)) 2976 return SDValue(); 2977 SDValue N011 = N01.getOperand(1); 2978 if (!isBSwapHWordElement(N011, Parts)) 2979 return SDValue(); 2980 } else { 2981 // (or (or (or (and), (and)), (and)), (and)) 2982 if (!isBSwapHWordElement(N1, Parts)) 2983 return SDValue(); 2984 if (!isBSwapHWordElement(N01, Parts)) 2985 return SDValue(); 2986 if (N00.getOpcode() != ISD::OR) 2987 return SDValue(); 2988 SDValue N000 = N00.getOperand(0); 2989 if (!isBSwapHWordElement(N000, Parts)) 2990 return SDValue(); 2991 SDValue N001 = N00.getOperand(1); 2992 if (!isBSwapHWordElement(N001, Parts)) 2993 return SDValue(); 2994 } 2995 2996 // Make sure the parts are all coming from the same node. 2997 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 2998 return SDValue(); 2999 3000 SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, 3001 SDValue(Parts[0],0)); 3002 3003 // Result of the bswap should be rotated by 16. If it's not legal, than 3004 // do (x << 16) | (x >> 16). 3005 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 3006 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3007 return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); 3008 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3009 return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); 3010 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, 3011 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), 3012 DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); 3013} 3014 3015SDValue DAGCombiner::visitOR(SDNode *N) { 3016 SDValue N0 = N->getOperand(0); 3017 SDValue N1 = N->getOperand(1); 3018 SDValue LL, LR, RL, RR, CC0, CC1; 3019 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3020 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3021 EVT VT = N1.getValueType(); 3022 3023 // fold vector ops 3024 if (VT.isVector()) { 3025 SDValue FoldedVOp = SimplifyVBinOp(N); 3026 if (FoldedVOp.getNode()) return FoldedVOp; 3027 } 3028 3029 // fold (or x, undef) -> -1 3030 if (!LegalOperations && 3031 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 3032 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3033 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 3034 } 3035 // fold (or c1, c2) -> c1|c2 3036 if (N0C && N1C) 3037 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 3038 // canonicalize constant to RHS 3039 if (N0C && !N1C) 3040 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); 3041 // fold (or x, 0) -> x 3042 if (N1C && N1C->isNullValue()) 3043 return N0; 3044 // fold (or x, -1) -> -1 3045 if (N1C && N1C->isAllOnesValue()) 3046 return N1; 3047 // fold (or x, c) -> c iff (x & ~c) == 0 3048 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3049 return N1; 3050 3051 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3052 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 3053 if (BSwap.getNode() != 0) 3054 return BSwap; 3055 BSwap = MatchBSwapHWordLow(N, N0, N1); 3056 if (BSwap.getNode() != 0) 3057 return BSwap; 3058 3059 // reassociate or 3060 SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); 3061 if (ROR.getNode() != 0) 3062 return ROR; 3063 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3064 // iff (c1 & c2) == 0. 3065 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3066 isa<ConstantSDNode>(N0.getOperand(1))) { 3067 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3068 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) 3069 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 3070 DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, 3071 N0.getOperand(0), N1), 3072 DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); 3073 } 3074 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3075 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3076 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3077 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3078 3079 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 3080 LL.getValueType().isInteger()) { 3081 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3082 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3083 if (cast<ConstantSDNode>(LR)->isNullValue() && 3084 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3085 SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), 3086 LR.getValueType(), LL, RL); 3087 AddToWorkList(ORNode.getNode()); 3088 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 3089 } 3090 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3091 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3092 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 3093 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3094 SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), 3095 LR.getValueType(), LL, RL); 3096 AddToWorkList(ANDNode.getNode()); 3097 return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); 3098 } 3099 } 3100 // canonicalize equivalent to ll == rl 3101 if (LL == RR && LR == RL) { 3102 Op1 = ISD::getSetCCSwappedOperands(Op1); 3103 std::swap(RL, RR); 3104 } 3105 if (LL == RL && LR == RR) { 3106 bool isInteger = LL.getValueType().isInteger(); 3107 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3108 if (Result != ISD::SETCC_INVALID && 3109 (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) 3110 return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), 3111 LL, LR, Result); 3112 } 3113 } 3114 3115 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3116 if (N0.getOpcode() == N1.getOpcode()) { 3117 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3118 if (Tmp.getNode()) return Tmp; 3119 } 3120 3121 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3122 if (N0.getOpcode() == ISD::AND && 3123 N1.getOpcode() == ISD::AND && 3124 N0.getOperand(1).getOpcode() == ISD::Constant && 3125 N1.getOperand(1).getOpcode() == ISD::Constant && 3126 // Don't increase # computations. 3127 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3128 // We can only do this xform if we know that bits from X that are set in C2 3129 // but not in C1 are already zero. Likewise for Y. 3130 const APInt &LHSMask = 3131 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3132 const APInt &RHSMask = 3133 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 3134 3135 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3136 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3137 SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, 3138 N0.getOperand(0), N1.getOperand(0)); 3139 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, 3140 DAG.getConstant(LHSMask | RHSMask, VT)); 3141 } 3142 } 3143 3144 // See if this is some rotate idiom. 3145 if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) 3146 return SDValue(Rot, 0); 3147 3148 // Simplify the operands using demanded-bits information. 3149 if (!VT.isVector() && 3150 SimplifyDemandedBits(SDValue(N, 0))) 3151 return SDValue(N, 0); 3152 3153 return SDValue(); 3154} 3155 3156/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. 3157static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3158 if (Op.getOpcode() == ISD::AND) { 3159 if (isa<ConstantSDNode>(Op.getOperand(1))) { 3160 Mask = Op.getOperand(1); 3161 Op = Op.getOperand(0); 3162 } else { 3163 return false; 3164 } 3165 } 3166 3167 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3168 Shift = Op; 3169 return true; 3170 } 3171 3172 return false; 3173} 3174 3175// MatchRotate - Handle an 'or' of two operands. If this is one of the many 3176// idioms for rotate, and if the target supports rotation instructions, generate 3177// a rot[lr]. 3178SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { 3179 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3180 EVT VT = LHS.getValueType(); 3181 if (!TLI.isTypeLegal(VT)) return 0; 3182 3183 // The target must have at least one rotate flavor. 3184 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3185 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3186 if (!HasROTL && !HasROTR) return 0; 3187 3188 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3189 SDValue LHSShift; // The shift. 3190 SDValue LHSMask; // AND value if any. 3191 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3192 return 0; // Not part of a rotate. 3193 3194 SDValue RHSShift; // The shift. 3195 SDValue RHSMask; // AND value if any. 3196 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3197 return 0; // Not part of a rotate. 3198 3199 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3200 return 0; // Not shifting the same value. 3201 3202 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3203 return 0; // Shifts must disagree. 3204 3205 // Canonicalize shl to left side in a shl/srl pair. 3206 if (RHSShift.getOpcode() == ISD::SHL) { 3207 std::swap(LHS, RHS); 3208 std::swap(LHSShift, RHSShift); 3209 std::swap(LHSMask , RHSMask ); 3210 } 3211 3212 unsigned OpSizeInBits = VT.getSizeInBits(); 3213 SDValue LHSShiftArg = LHSShift.getOperand(0); 3214 SDValue LHSShiftAmt = LHSShift.getOperand(1); 3215 SDValue RHSShiftAmt = RHSShift.getOperand(1); 3216 3217 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 3218 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 3219 if (LHSShiftAmt.getOpcode() == ISD::Constant && 3220 RHSShiftAmt.getOpcode() == ISD::Constant) { 3221 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 3222 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 3223 if ((LShVal + RShVal) != OpSizeInBits) 3224 return 0; 3225 3226 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3227 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 3228 3229 // If there is an AND of either shifted operand, apply it to the result. 3230 if (LHSMask.getNode() || RHSMask.getNode()) { 3231 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 3232 3233 if (LHSMask.getNode()) { 3234 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 3235 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 3236 } 3237 if (RHSMask.getNode()) { 3238 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 3239 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 3240 } 3241 3242 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 3243 } 3244 3245 return Rot.getNode(); 3246 } 3247 3248 // If there is a mask here, and we have a variable shift, we can't be sure 3249 // that we're masking out the right stuff. 3250 if (LHSMask.getNode() || RHSMask.getNode()) 3251 return 0; 3252 3253 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) 3254 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) 3255 if (RHSShiftAmt.getOpcode() == ISD::SUB && 3256 LHSShiftAmt == RHSShiftAmt.getOperand(1)) { 3257 if (ConstantSDNode *SUBC = 3258 dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { 3259 if (SUBC->getAPIntValue() == OpSizeInBits) { 3260 return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, 3261 HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); 3262 } 3263 } 3264 } 3265 3266 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) 3267 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) 3268 if (LHSShiftAmt.getOpcode() == ISD::SUB && 3269 RHSShiftAmt == LHSShiftAmt.getOperand(1)) { 3270 if (ConstantSDNode *SUBC = 3271 dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { 3272 if (SUBC->getAPIntValue() == OpSizeInBits) { 3273 return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, 3274 HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); 3275 } 3276 } 3277 } 3278 3279 // Look for sign/zext/any-extended or truncate cases: 3280 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3281 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3282 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3283 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3284 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3285 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3286 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3287 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3288 SDValue LExtOp0 = LHSShiftAmt.getOperand(0); 3289 SDValue RExtOp0 = RHSShiftAmt.getOperand(0); 3290 if (RExtOp0.getOpcode() == ISD::SUB && 3291 RExtOp0.getOperand(1) == LExtOp0) { 3292 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 3293 // (rotl x, y) 3294 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 3295 // (rotr x, (sub 32, y)) 3296 if (ConstantSDNode *SUBC = 3297 dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { 3298 if (SUBC->getAPIntValue() == OpSizeInBits) { 3299 return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3300 LHSShiftArg, 3301 HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); 3302 } 3303 } 3304 } else if (LExtOp0.getOpcode() == ISD::SUB && 3305 RExtOp0 == LExtOp0.getOperand(1)) { 3306 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 3307 // (rotr x, y) 3308 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 3309 // (rotl x, (sub 32, y)) 3310 if (ConstantSDNode *SUBC = 3311 dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { 3312 if (SUBC->getAPIntValue() == OpSizeInBits) { 3313 return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, 3314 LHSShiftArg, 3315 HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); 3316 } 3317 } 3318 } 3319 } 3320 3321 return 0; 3322} 3323 3324SDValue DAGCombiner::visitXOR(SDNode *N) { 3325 SDValue N0 = N->getOperand(0); 3326 SDValue N1 = N->getOperand(1); 3327 SDValue LHS, RHS, CC; 3328 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3329 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3330 EVT VT = N0.getValueType(); 3331 3332 // fold vector ops 3333 if (VT.isVector()) { 3334 SDValue FoldedVOp = SimplifyVBinOp(N); 3335 if (FoldedVOp.getNode()) return FoldedVOp; 3336 } 3337 3338 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3339 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3340 return DAG.getConstant(0, VT); 3341 // fold (xor x, undef) -> undef 3342 if (N0.getOpcode() == ISD::UNDEF) 3343 return N0; 3344 if (N1.getOpcode() == ISD::UNDEF) 3345 return N1; 3346 // fold (xor c1, c2) -> c1^c2 3347 if (N0C && N1C) 3348 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 3349 // canonicalize constant to RHS 3350 if (N0C && !N1C) 3351 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); 3352 // fold (xor x, 0) -> x 3353 if (N1C && N1C->isNullValue()) 3354 return N0; 3355 // reassociate xor 3356 SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); 3357 if (RXOR.getNode() != 0) 3358 return RXOR; 3359 3360 // fold !(x cc y) -> (x !cc y) 3361 if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { 3362 bool isInt = LHS.getValueType().isInteger(); 3363 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3364 isInt); 3365 3366 if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { 3367 switch (N0.getOpcode()) { 3368 default: 3369 llvm_unreachable("Unhandled SetCC Equivalent!"); 3370 case ISD::SETCC: 3371 return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); 3372 case ISD::SELECT_CC: 3373 return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), 3374 N0.getOperand(3), NotCC); 3375 } 3376 } 3377 } 3378 3379 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 3380 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 3381 N0.getNode()->hasOneUse() && 3382 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 3383 SDValue V = N0.getOperand(0); 3384 V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, 3385 DAG.getConstant(1, V.getValueType())); 3386 AddToWorkList(V.getNode()); 3387 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); 3388 } 3389 3390 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 3391 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 3392 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3393 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3394 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 3395 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3396 LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS 3397 RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS 3398 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 3399 return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); 3400 } 3401 } 3402 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 3403 if (N1C && N1C->isAllOnesValue() && 3404 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3405 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3406 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 3407 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3408 LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS 3409 RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS 3410 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 3411 return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); 3412 } 3413 } 3414 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 3415 if (N1C && N0.getOpcode() == ISD::XOR) { 3416 ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 3417 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3418 if (N00C) 3419 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), 3420 DAG.getConstant(N1C->getAPIntValue() ^ 3421 N00C->getAPIntValue(), VT)); 3422 if (N01C) 3423 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), 3424 DAG.getConstant(N1C->getAPIntValue() ^ 3425 N01C->getAPIntValue(), VT)); 3426 } 3427 // fold (xor x, x) -> 0 3428 if (N0 == N1) 3429 return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); 3430 3431 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 3432 if (N0.getOpcode() == N1.getOpcode()) { 3433 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3434 if (Tmp.getNode()) return Tmp; 3435 } 3436 3437 // Simplify the expression using non-local knowledge. 3438 if (!VT.isVector() && 3439 SimplifyDemandedBits(SDValue(N, 0))) 3440 return SDValue(N, 0); 3441 3442 return SDValue(); 3443} 3444 3445/// visitShiftByConstant - Handle transforms common to the three shifts, when 3446/// the shift amount is a constant. 3447SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { 3448 SDNode *LHS = N->getOperand(0).getNode(); 3449 if (!LHS->hasOneUse()) return SDValue(); 3450 3451 // We want to pull some binops through shifts, so that we have (and (shift)) 3452 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 3453 // thing happens with address calculations, so it's important to canonicalize 3454 // it. 3455 bool HighBitSet = false; // Can we transform this if the high bit is set? 3456 3457 switch (LHS->getOpcode()) { 3458 default: return SDValue(); 3459 case ISD::OR: 3460 case ISD::XOR: 3461 HighBitSet = false; // We can only transform sra if the high bit is clear. 3462 break; 3463 case ISD::AND: 3464 HighBitSet = true; // We can only transform sra if the high bit is set. 3465 break; 3466 case ISD::ADD: 3467 if (N->getOpcode() != ISD::SHL) 3468 return SDValue(); // only shl(add) not sr[al](add). 3469 HighBitSet = false; // We can only transform sra if the high bit is clear. 3470 break; 3471 } 3472 3473 // We require the RHS of the binop to be a constant as well. 3474 ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 3475 if (!BinOpCst) return SDValue(); 3476 3477 // FIXME: disable this unless the input to the binop is a shift by a constant. 3478 // If it is not a shift, it pessimizes some common cases like: 3479 // 3480 // void foo(int *X, int i) { X[i & 1235] = 1; } 3481 // int bar(int *X, int i) { return X[i & 255]; } 3482 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 3483 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 3484 BinOpLHSVal->getOpcode() != ISD::SRA && 3485 BinOpLHSVal->getOpcode() != ISD::SRL) || 3486 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 3487 return SDValue(); 3488 3489 EVT VT = N->getValueType(0); 3490 3491 // If this is a signed shift right, and the high bit is modified by the 3492 // logical operation, do not perform the transformation. The highBitSet 3493 // boolean indicates the value of the high bit of the constant which would 3494 // cause it to be modified for this operation. 3495 if (N->getOpcode() == ISD::SRA) { 3496 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 3497 if (BinOpRHSSignSet != HighBitSet) 3498 return SDValue(); 3499 } 3500 3501 // Fold the constants, shifting the binop RHS by the shift amount. 3502 SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), 3503 N->getValueType(0), 3504 LHS->getOperand(1), N->getOperand(1)); 3505 3506 // Create the new shift. 3507 SDValue NewShift = DAG.getNode(N->getOpcode(), 3508 LHS->getOperand(0).getDebugLoc(), 3509 VT, LHS->getOperand(0), N->getOperand(1)); 3510 3511 // Create the new binop. 3512 return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); 3513} 3514 3515SDValue DAGCombiner::visitSHL(SDNode *N) { 3516 SDValue N0 = N->getOperand(0); 3517 SDValue N1 = N->getOperand(1); 3518 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3519 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3520 EVT VT = N0.getValueType(); 3521 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3522 3523 // fold (shl c1, c2) -> c1<<c2 3524 if (N0C && N1C) 3525 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 3526 // fold (shl 0, x) -> 0 3527 if (N0C && N0C->isNullValue()) 3528 return N0; 3529 // fold (shl x, c >= size(x)) -> undef 3530 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3531 return DAG.getUNDEF(VT); 3532 // fold (shl x, 0) -> x 3533 if (N1C && N1C->isNullValue()) 3534 return N0; 3535 // fold (shl undef, x) -> 0 3536 if (N0.getOpcode() == ISD::UNDEF) 3537 return DAG.getConstant(0, VT); 3538 // if (shl x, c) is known to be zero, return 0 3539 if (DAG.MaskedValueIsZero(SDValue(N, 0), 3540 APInt::getAllOnesValue(OpSizeInBits))) 3541 return DAG.getConstant(0, VT); 3542 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 3543 if (N1.getOpcode() == ISD::TRUNCATE && 3544 N1.getOperand(0).getOpcode() == ISD::AND && 3545 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3546 SDValue N101 = N1.getOperand(0).getOperand(1); 3547 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3548 EVT TruncVT = N1.getValueType(); 3549 SDValue N100 = N1.getOperand(0).getOperand(0); 3550 APInt TruncC = N101C->getAPIntValue(); 3551 TruncC = TruncC.trunc(TruncVT.getSizeInBits()); 3552 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 3553 DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, 3554 DAG.getNode(ISD::TRUNCATE, 3555 N->getDebugLoc(), 3556 TruncVT, N100), 3557 DAG.getConstant(TruncC, TruncVT))); 3558 } 3559 } 3560 3561 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3562 return SDValue(N, 0); 3563 3564 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 3565 if (N1C && N0.getOpcode() == ISD::SHL && 3566 N0.getOperand(1).getOpcode() == ISD::Constant) { 3567 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3568 uint64_t c2 = N1C->getZExtValue(); 3569 if (c1 + c2 >= OpSizeInBits) 3570 return DAG.getConstant(0, VT); 3571 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), 3572 DAG.getConstant(c1 + c2, N1.getValueType())); 3573 } 3574 3575 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 3576 // For this to be valid, the second form must not preserve any of the bits 3577 // that are shifted out by the inner shift in the first form. This means 3578 // the outer shift size must be >= the number of bits added by the ext. 3579 // As a corollary, we don't care what kind of ext it is. 3580 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 3581 N0.getOpcode() == ISD::ANY_EXTEND || 3582 N0.getOpcode() == ISD::SIGN_EXTEND) && 3583 N0.getOperand(0).getOpcode() == ISD::SHL && 3584 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 3585 uint64_t c1 = 3586 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 3587 uint64_t c2 = N1C->getZExtValue(); 3588 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 3589 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 3590 if (c2 >= OpSizeInBits - InnerShiftSize) { 3591 if (c1 + c2 >= OpSizeInBits) 3592 return DAG.getConstant(0, VT); 3593 return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, 3594 DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, 3595 N0.getOperand(0)->getOperand(0)), 3596 DAG.getConstant(c1 + c2, N1.getValueType())); 3597 } 3598 } 3599 3600 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 3601 // (and (srl x, (sub c1, c2), MASK) 3602 // Only fold this if the inner shift has no other uses -- if it does, folding 3603 // this will increase the total number of instructions. 3604 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && 3605 N0.getOperand(1).getOpcode() == ISD::Constant) { 3606 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3607 if (c1 < VT.getSizeInBits()) { 3608 uint64_t c2 = N1C->getZExtValue(); 3609 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 3610 VT.getSizeInBits() - c1); 3611 SDValue Shift; 3612 if (c2 > c1) { 3613 Mask = Mask.shl(c2-c1); 3614 Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), 3615 DAG.getConstant(c2-c1, N1.getValueType())); 3616 } else { 3617 Mask = Mask.lshr(c1-c2); 3618 Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), 3619 DAG.getConstant(c1-c2, N1.getValueType())); 3620 } 3621 return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, 3622 DAG.getConstant(Mask, VT)); 3623 } 3624 } 3625 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 3626 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 3627 SDValue HiBitsMask = 3628 DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), 3629 VT.getSizeInBits() - 3630 N1C->getZExtValue()), 3631 VT); 3632 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), 3633 HiBitsMask); 3634 } 3635 3636 if (N1C) { 3637 SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); 3638 if (NewSHL.getNode()) 3639 return NewSHL; 3640 } 3641 3642 return SDValue(); 3643} 3644 3645SDValue DAGCombiner::visitSRA(SDNode *N) { 3646 SDValue N0 = N->getOperand(0); 3647 SDValue N1 = N->getOperand(1); 3648 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3649 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3650 EVT VT = N0.getValueType(); 3651 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3652 3653 // fold (sra c1, c2) -> (sra c1, c2) 3654 if (N0C && N1C) 3655 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 3656 // fold (sra 0, x) -> 0 3657 if (N0C && N0C->isNullValue()) 3658 return N0; 3659 // fold (sra -1, x) -> -1 3660 if (N0C && N0C->isAllOnesValue()) 3661 return N0; 3662 // fold (sra x, (setge c, size(x))) -> undef 3663 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3664 return DAG.getUNDEF(VT); 3665 // fold (sra x, 0) -> x 3666 if (N1C && N1C->isNullValue()) 3667 return N0; 3668 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 3669 // sext_inreg. 3670 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 3671 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 3672 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 3673 if (VT.isVector()) 3674 ExtVT = EVT::getVectorVT(*DAG.getContext(), 3675 ExtVT, VT.getVectorNumElements()); 3676 if ((!LegalOperations || 3677 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 3678 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 3679 N0.getOperand(0), DAG.getValueType(ExtVT)); 3680 } 3681 3682 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 3683 if (N1C && N0.getOpcode() == ISD::SRA) { 3684 if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3685 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 3686 if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; 3687 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), 3688 DAG.getConstant(Sum, N1C->getValueType(0))); 3689 } 3690 } 3691 3692 // fold (sra (shl X, m), (sub result_size, n)) 3693 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 3694 // result_size - n != m. 3695 // If truncate is free for the target sext(shl) is likely to result in better 3696 // code. 3697 if (N0.getOpcode() == ISD::SHL) { 3698 // Get the two constanst of the shifts, CN0 = m, CN = n. 3699 const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3700 if (N01C && N1C) { 3701 // Determine what the truncate's result bitsize and type would be. 3702 EVT TruncVT = 3703 EVT::getIntegerVT(*DAG.getContext(), 3704 OpSizeInBits - N1C->getZExtValue()); 3705 // Determine the residual right-shift amount. 3706 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 3707 3708 // If the shift is not a no-op (in which case this should be just a sign 3709 // extend already), the truncated to type is legal, sign_extend is legal 3710 // on that type, and the truncate to that type is both legal and free, 3711 // perform the transform. 3712 if ((ShiftAmt > 0) && 3713 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 3714 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 3715 TLI.isTruncateFree(VT, TruncVT)) { 3716 3717 SDValue Amt = DAG.getConstant(ShiftAmt, 3718 getShiftAmountTy(N0.getOperand(0).getValueType())); 3719 SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, 3720 N0.getOperand(0), Amt); 3721 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, 3722 Shift); 3723 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), 3724 N->getValueType(0), Trunc); 3725 } 3726 } 3727 } 3728 3729 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 3730 if (N1.getOpcode() == ISD::TRUNCATE && 3731 N1.getOperand(0).getOpcode() == ISD::AND && 3732 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3733 SDValue N101 = N1.getOperand(0).getOperand(1); 3734 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3735 EVT TruncVT = N1.getValueType(); 3736 SDValue N100 = N1.getOperand(0).getOperand(0); 3737 APInt TruncC = N101C->getAPIntValue(); 3738 TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); 3739 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, 3740 DAG.getNode(ISD::AND, N->getDebugLoc(), 3741 TruncVT, 3742 DAG.getNode(ISD::TRUNCATE, 3743 N->getDebugLoc(), 3744 TruncVT, N100), 3745 DAG.getConstant(TruncC, TruncVT))); 3746 } 3747 } 3748 3749 // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) 3750 // if c1 is equal to the number of bits the trunc removes 3751 if (N0.getOpcode() == ISD::TRUNCATE && 3752 (N0.getOperand(0).getOpcode() == ISD::SRL || 3753 N0.getOperand(0).getOpcode() == ISD::SRA) && 3754 N0.getOperand(0).hasOneUse() && 3755 N0.getOperand(0).getOperand(1).hasOneUse() && 3756 N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { 3757 EVT LargeVT = N0.getOperand(0).getValueType(); 3758 ConstantSDNode *LargeShiftAmt = 3759 cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); 3760 3761 if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == 3762 LargeShiftAmt->getZExtValue()) { 3763 SDValue Amt = 3764 DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), 3765 getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); 3766 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, 3767 N0.getOperand(0).getOperand(0), Amt); 3768 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); 3769 } 3770 } 3771 3772 // Simplify, based on bits shifted out of the LHS. 3773 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3774 return SDValue(N, 0); 3775 3776 3777 // If the sign bit is known to be zero, switch this to a SRL. 3778 if (DAG.SignBitIsZero(N0)) 3779 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); 3780 3781 if (N1C) { 3782 SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); 3783 if (NewSRA.getNode()) 3784 return NewSRA; 3785 } 3786 3787 return SDValue(); 3788} 3789 3790SDValue DAGCombiner::visitSRL(SDNode *N) { 3791 SDValue N0 = N->getOperand(0); 3792 SDValue N1 = N->getOperand(1); 3793 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3794 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3795 EVT VT = N0.getValueType(); 3796 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3797 3798 // fold (srl c1, c2) -> c1 >>u c2 3799 if (N0C && N1C) 3800 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 3801 // fold (srl 0, x) -> 0 3802 if (N0C && N0C->isNullValue()) 3803 return N0; 3804 // fold (srl x, c >= size(x)) -> undef 3805 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3806 return DAG.getUNDEF(VT); 3807 // fold (srl x, 0) -> x 3808 if (N1C && N1C->isNullValue()) 3809 return N0; 3810 // if (srl x, c) is known to be zero, return 0 3811 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 3812 APInt::getAllOnesValue(OpSizeInBits))) 3813 return DAG.getConstant(0, VT); 3814 3815 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 3816 if (N1C && N0.getOpcode() == ISD::SRL && 3817 N0.getOperand(1).getOpcode() == ISD::Constant) { 3818 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3819 uint64_t c2 = N1C->getZExtValue(); 3820 if (c1 + c2 >= OpSizeInBits) 3821 return DAG.getConstant(0, VT); 3822 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), 3823 DAG.getConstant(c1 + c2, N1.getValueType())); 3824 } 3825 3826 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 3827 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 3828 N0.getOperand(0).getOpcode() == ISD::SRL && 3829 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 3830 uint64_t c1 = 3831 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 3832 uint64_t c2 = N1C->getZExtValue(); 3833 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 3834 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 3835 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 3836 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 3837 if (c1 + OpSizeInBits == InnerShiftSize) { 3838 if (c1 + c2 >= InnerShiftSize) 3839 return DAG.getConstant(0, VT); 3840 return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, 3841 DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, 3842 N0.getOperand(0)->getOperand(0), 3843 DAG.getConstant(c1 + c2, ShiftCountVT))); 3844 } 3845 } 3846 3847 // fold (srl (shl x, c), c) -> (and x, cst2) 3848 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && 3849 N0.getValueSizeInBits() <= 64) { 3850 uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); 3851 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), 3852 DAG.getConstant(~0ULL >> ShAmt, VT)); 3853 } 3854 3855 3856 // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) 3857 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 3858 // Shifting in all undef bits? 3859 EVT SmallVT = N0.getOperand(0).getValueType(); 3860 if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) 3861 return DAG.getUNDEF(VT); 3862 3863 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 3864 uint64_t ShiftAmt = N1C->getZExtValue(); 3865 SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, 3866 N0.getOperand(0), 3867 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 3868 AddToWorkList(SmallShift.getNode()); 3869 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); 3870 } 3871 } 3872 3873 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 3874 // bit, which is unmodified by sra. 3875 if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { 3876 if (N0.getOpcode() == ISD::SRA) 3877 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); 3878 } 3879 3880 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 3881 if (N1C && N0.getOpcode() == ISD::CTLZ && 3882 N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { 3883 APInt KnownZero, KnownOne; 3884 DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); 3885 3886 // If any of the input bits are KnownOne, then the input couldn't be all 3887 // zeros, thus the result of the srl will always be zero. 3888 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 3889 3890 // If all of the bits input the to ctlz node are known to be zero, then 3891 // the result of the ctlz is "32" and the result of the shift is one. 3892 APInt UnknownBits = ~KnownZero; 3893 if (UnknownBits == 0) return DAG.getConstant(1, VT); 3894 3895 // Otherwise, check to see if there is exactly one bit input to the ctlz. 3896 if ((UnknownBits & (UnknownBits - 1)) == 0) { 3897 // Okay, we know that only that the single bit specified by UnknownBits 3898 // could be set on input to the CTLZ node. If this bit is set, the SRL 3899 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 3900 // to an SRL/XOR pair, which is likely to simplify more. 3901 unsigned ShAmt = UnknownBits.countTrailingZeros(); 3902 SDValue Op = N0.getOperand(0); 3903 3904 if (ShAmt) { 3905 Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, 3906 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 3907 AddToWorkList(Op.getNode()); 3908 } 3909 3910 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, 3911 Op, DAG.getConstant(1, VT)); 3912 } 3913 } 3914 3915 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 3916 if (N1.getOpcode() == ISD::TRUNCATE && 3917 N1.getOperand(0).getOpcode() == ISD::AND && 3918 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3919 SDValue N101 = N1.getOperand(0).getOperand(1); 3920 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3921 EVT TruncVT = N1.getValueType(); 3922 SDValue N100 = N1.getOperand(0).getOperand(0); 3923 APInt TruncC = N101C->getAPIntValue(); 3924 TruncC = TruncC.trunc(TruncVT.getSizeInBits()); 3925 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, 3926 DAG.getNode(ISD::AND, N->getDebugLoc(), 3927 TruncVT, 3928 DAG.getNode(ISD::TRUNCATE, 3929 N->getDebugLoc(), 3930 TruncVT, N100), 3931 DAG.getConstant(TruncC, TruncVT))); 3932 } 3933 } 3934 3935 // fold operands of srl based on knowledge that the low bits are not 3936 // demanded. 3937 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3938 return SDValue(N, 0); 3939 3940 if (N1C) { 3941 SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); 3942 if (NewSRL.getNode()) 3943 return NewSRL; 3944 } 3945 3946 // Attempt to convert a srl of a load into a narrower zero-extending load. 3947 SDValue NarrowLoad = ReduceLoadWidth(N); 3948 if (NarrowLoad.getNode()) 3949 return NarrowLoad; 3950 3951 // Here is a common situation. We want to optimize: 3952 // 3953 // %a = ... 3954 // %b = and i32 %a, 2 3955 // %c = srl i32 %b, 1 3956 // brcond i32 %c ... 3957 // 3958 // into 3959 // 3960 // %a = ... 3961 // %b = and %a, 2 3962 // %c = setcc eq %b, 0 3963 // brcond %c ... 3964 // 3965 // However when after the source operand of SRL is optimized into AND, the SRL 3966 // itself may not be optimized further. Look for it and add the BRCOND into 3967 // the worklist. 3968 if (N->hasOneUse()) { 3969 SDNode *Use = *N->use_begin(); 3970 if (Use->getOpcode() == ISD::BRCOND) 3971 AddToWorkList(Use); 3972 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 3973 // Also look pass the truncate. 3974 Use = *Use->use_begin(); 3975 if (Use->getOpcode() == ISD::BRCOND) 3976 AddToWorkList(Use); 3977 } 3978 } 3979 3980 return SDValue(); 3981} 3982 3983SDValue DAGCombiner::visitCTLZ(SDNode *N) { 3984 SDValue N0 = N->getOperand(0); 3985 EVT VT = N->getValueType(0); 3986 3987 // fold (ctlz c1) -> c2 3988 if (isa<ConstantSDNode>(N0)) 3989 return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); 3990 return SDValue(); 3991} 3992 3993SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 3994 SDValue N0 = N->getOperand(0); 3995 EVT VT = N->getValueType(0); 3996 3997 // fold (ctlz_zero_undef c1) -> c2 3998 if (isa<ConstantSDNode>(N0)) 3999 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); 4000 return SDValue(); 4001} 4002 4003SDValue DAGCombiner::visitCTTZ(SDNode *N) { 4004 SDValue N0 = N->getOperand(0); 4005 EVT VT = N->getValueType(0); 4006 4007 // fold (cttz c1) -> c2 4008 if (isa<ConstantSDNode>(N0)) 4009 return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); 4010 return SDValue(); 4011} 4012 4013SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 4014 SDValue N0 = N->getOperand(0); 4015 EVT VT = N->getValueType(0); 4016 4017 // fold (cttz_zero_undef c1) -> c2 4018 if (isa<ConstantSDNode>(N0)) 4019 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); 4020 return SDValue(); 4021} 4022 4023SDValue DAGCombiner::visitCTPOP(SDNode *N) { 4024 SDValue N0 = N->getOperand(0); 4025 EVT VT = N->getValueType(0); 4026 4027 // fold (ctpop c1) -> c2 4028 if (isa<ConstantSDNode>(N0)) 4029 return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); 4030 return SDValue(); 4031} 4032 4033SDValue DAGCombiner::visitSELECT(SDNode *N) { 4034 SDValue N0 = N->getOperand(0); 4035 SDValue N1 = N->getOperand(1); 4036 SDValue N2 = N->getOperand(2); 4037 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4038 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4039 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 4040 EVT VT = N->getValueType(0); 4041 EVT VT0 = N0.getValueType(); 4042 4043 // fold (select C, X, X) -> X 4044 if (N1 == N2) 4045 return N1; 4046 // fold (select true, X, Y) -> X 4047 if (N0C && !N0C->isNullValue()) 4048 return N1; 4049 // fold (select false, X, Y) -> Y 4050 if (N0C && N0C->isNullValue()) 4051 return N2; 4052 // fold (select C, 1, X) -> (or C, X) 4053 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 4054 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); 4055 // fold (select C, 0, 1) -> (xor C, 1) 4056 if (VT.isInteger() && 4057 (VT0 == MVT::i1 || 4058 (VT0.isInteger() && 4059 TLI.getBooleanContents(false) == 4060 TargetLowering::ZeroOrOneBooleanContent)) && 4061 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 4062 SDValue XORNode; 4063 if (VT == VT0) 4064 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, 4065 N0, DAG.getConstant(1, VT0)); 4066 XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, 4067 N0, DAG.getConstant(1, VT0)); 4068 AddToWorkList(XORNode.getNode()); 4069 if (VT.bitsGT(VT0)) 4070 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); 4071 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); 4072 } 4073 // fold (select C, 0, X) -> (and (not C), X) 4074 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 4075 SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); 4076 AddToWorkList(NOTNode.getNode()); 4077 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); 4078 } 4079 // fold (select C, X, 1) -> (or (not C), X) 4080 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 4081 SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); 4082 AddToWorkList(NOTNode.getNode()); 4083 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); 4084 } 4085 // fold (select C, X, 0) -> (and C, X) 4086 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 4087 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); 4088 // fold (select X, X, Y) -> (or X, Y) 4089 // fold (select X, 1, Y) -> (or X, Y) 4090 if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) 4091 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); 4092 // fold (select X, Y, X) -> (and X, Y) 4093 // fold (select X, Y, 0) -> (and X, Y) 4094 if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) 4095 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); 4096 4097 // If we can fold this based on the true/false value, do so. 4098 if (SimplifySelectOps(N, N1, N2)) 4099 return SDValue(N, 0); // Don't revisit N. 4100 4101 // fold selects based on a setcc into other things, such as min/max/abs 4102 if (N0.getOpcode() == ISD::SETCC) { 4103 // FIXME: 4104 // Check against MVT::Other for SELECT_CC, which is a workaround for targets 4105 // having to say they don't support SELECT_CC on every type the DAG knows 4106 // about, since there is no way to mark an opcode illegal at all value types 4107 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && 4108 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) 4109 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, 4110 N0.getOperand(0), N0.getOperand(1), 4111 N1, N2, N0.getOperand(2)); 4112 return SimplifySelect(N->getDebugLoc(), N0, N1, N2); 4113 } 4114 4115 return SDValue(); 4116} 4117 4118SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 4119 SDValue N0 = N->getOperand(0); 4120 SDValue N1 = N->getOperand(1); 4121 SDValue N2 = N->getOperand(2); 4122 SDValue N3 = N->getOperand(3); 4123 SDValue N4 = N->getOperand(4); 4124 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 4125 4126 // fold select_cc lhs, rhs, x, x, cc -> x 4127 if (N2 == N3) 4128 return N2; 4129 4130 // Determine if the condition we're dealing with is constant 4131 SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), 4132 N0, N1, CC, N->getDebugLoc(), false); 4133 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 4134 4135 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 4136 if (!SCCC->isNullValue()) 4137 return N2; // cond always true -> true val 4138 else 4139 return N3; // cond always false -> false val 4140 } 4141 4142 // Fold to a simpler select_cc 4143 if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) 4144 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), 4145 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 4146 SCC.getOperand(2)); 4147 4148 // If we can fold this based on the true/false value, do so. 4149 if (SimplifySelectOps(N, N2, N3)) 4150 return SDValue(N, 0); // Don't revisit N. 4151 4152 // fold select_cc into other things, such as min/max/abs 4153 return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); 4154} 4155 4156SDValue DAGCombiner::visitSETCC(SDNode *N) { 4157 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 4158 cast<CondCodeSDNode>(N->getOperand(2))->get(), 4159 N->getDebugLoc()); 4160} 4161 4162// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 4163// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 4164// transformation. Returns true if extension are possible and the above 4165// mentioned transformation is profitable. 4166static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 4167 unsigned ExtOpc, 4168 SmallVector<SDNode*, 4> &ExtendNodes, 4169 const TargetLowering &TLI) { 4170 bool HasCopyToRegUses = false; 4171 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 4172 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 4173 UE = N0.getNode()->use_end(); 4174 UI != UE; ++UI) { 4175 SDNode *User = *UI; 4176 if (User == N) 4177 continue; 4178 if (UI.getUse().getResNo() != N0.getResNo()) 4179 continue; 4180 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 4181 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 4182 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 4183 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 4184 // Sign bits will be lost after a zext. 4185 return false; 4186 bool Add = false; 4187 for (unsigned i = 0; i != 2; ++i) { 4188 SDValue UseOp = User->getOperand(i); 4189 if (UseOp == N0) 4190 continue; 4191 if (!isa<ConstantSDNode>(UseOp)) 4192 return false; 4193 Add = true; 4194 } 4195 if (Add) 4196 ExtendNodes.push_back(User); 4197 continue; 4198 } 4199 // If truncates aren't free and there are users we can't 4200 // extend, it isn't worthwhile. 4201 if (!isTruncFree) 4202 return false; 4203 // Remember if this value is live-out. 4204 if (User->getOpcode() == ISD::CopyToReg) 4205 HasCopyToRegUses = true; 4206 } 4207 4208 if (HasCopyToRegUses) { 4209 bool BothLiveOut = false; 4210 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 4211 UI != UE; ++UI) { 4212 SDUse &Use = UI.getUse(); 4213 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 4214 BothLiveOut = true; 4215 break; 4216 } 4217 } 4218 if (BothLiveOut) 4219 // Both unextended and extended values are live out. There had better be 4220 // a good reason for the transformation. 4221 return ExtendNodes.size(); 4222 } 4223 return true; 4224} 4225 4226void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, 4227 SDValue Trunc, SDValue ExtLoad, DebugLoc DL, 4228 ISD::NodeType ExtType) { 4229 // Extend SetCC uses if necessary. 4230 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 4231 SDNode *SetCC = SetCCs[i]; 4232 SmallVector<SDValue, 4> Ops; 4233 4234 for (unsigned j = 0; j != 2; ++j) { 4235 SDValue SOp = SetCC->getOperand(j); 4236 if (SOp == Trunc) 4237 Ops.push_back(ExtLoad); 4238 else 4239 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 4240 } 4241 4242 Ops.push_back(SetCC->getOperand(2)); 4243 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), 4244 &Ops[0], Ops.size())); 4245 } 4246} 4247 4248SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 4249 SDValue N0 = N->getOperand(0); 4250 EVT VT = N->getValueType(0); 4251 4252 // fold (sext c1) -> c1 4253 if (isa<ConstantSDNode>(N0)) 4254 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); 4255 4256 // fold (sext (sext x)) -> (sext x) 4257 // fold (sext (aext x)) -> (sext x) 4258 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 4259 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, 4260 N0.getOperand(0)); 4261 4262 if (N0.getOpcode() == ISD::TRUNCATE) { 4263 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 4264 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 4265 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4266 if (NarrowLoad.getNode()) { 4267 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4268 if (NarrowLoad.getNode() != N0.getNode()) { 4269 CombineTo(N0.getNode(), NarrowLoad); 4270 // CombineTo deleted the truncate, if needed, but not what's under it. 4271 AddToWorkList(oye); 4272 } 4273 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4274 } 4275 4276 // See if the value being truncated is already sign extended. If so, just 4277 // eliminate the trunc/sext pair. 4278 SDValue Op = N0.getOperand(0); 4279 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 4280 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 4281 unsigned DestBits = VT.getScalarType().getSizeInBits(); 4282 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 4283 4284 if (OpBits == DestBits) { 4285 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 4286 // bits, it is already ready. 4287 if (NumSignBits > DestBits-MidBits) 4288 return Op; 4289 } else if (OpBits < DestBits) { 4290 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 4291 // bits, just sext from i32. 4292 if (NumSignBits > OpBits-MidBits) 4293 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); 4294 } else { 4295 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 4296 // bits, just truncate to i32. 4297 if (NumSignBits > OpBits-MidBits) 4298 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4299 } 4300 4301 // fold (sext (truncate x)) -> (sextinreg x). 4302 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 4303 N0.getValueType())) { 4304 if (OpBits < DestBits) 4305 Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); 4306 else if (OpBits > DestBits) 4307 Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); 4308 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, 4309 DAG.getValueType(N0.getValueType())); 4310 } 4311 } 4312 4313 // fold (sext (load x)) -> (sext (truncate (sextload x))) 4314 // None of the supported targets knows how to perform load and sign extend 4315 // on vectors in one instruction. We only perform this transformation on 4316 // scalars. 4317 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4318 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4319 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { 4320 bool DoXform = true; 4321 SmallVector<SDNode*, 4> SetCCs; 4322 if (!N0.hasOneUse()) 4323 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 4324 if (DoXform) { 4325 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4326 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4327 LN0->getChain(), 4328 LN0->getBasePtr(), LN0->getPointerInfo(), 4329 N0.getValueType(), 4330 LN0->isVolatile(), LN0->isNonTemporal(), 4331 LN0->getAlignment()); 4332 CombineTo(N, ExtLoad); 4333 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4334 N0.getValueType(), ExtLoad); 4335 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4336 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4337 ISD::SIGN_EXTEND); 4338 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4339 } 4340 } 4341 4342 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 4343 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 4344 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 4345 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 4346 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4347 EVT MemVT = LN0->getMemoryVT(); 4348 if ((!LegalOperations && !LN0->isVolatile()) || 4349 TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { 4350 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4351 LN0->getChain(), 4352 LN0->getBasePtr(), LN0->getPointerInfo(), 4353 MemVT, 4354 LN0->isVolatile(), LN0->isNonTemporal(), 4355 LN0->getAlignment()); 4356 CombineTo(N, ExtLoad); 4357 CombineTo(N0.getNode(), 4358 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4359 N0.getValueType(), ExtLoad), 4360 ExtLoad.getValue(1)); 4361 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4362 } 4363 } 4364 4365 // fold (sext (and/or/xor (load x), cst)) -> 4366 // (and/or/xor (sextload x), (sext cst)) 4367 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 4368 N0.getOpcode() == ISD::XOR) && 4369 isa<LoadSDNode>(N0.getOperand(0)) && 4370 N0.getOperand(1).getOpcode() == ISD::Constant && 4371 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && 4372 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 4373 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 4374 if (LN0->getExtensionType() != ISD::ZEXTLOAD) { 4375 bool DoXform = true; 4376 SmallVector<SDNode*, 4> SetCCs; 4377 if (!N0.hasOneUse()) 4378 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 4379 SetCCs, TLI); 4380 if (DoXform) { 4381 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, 4382 LN0->getChain(), LN0->getBasePtr(), 4383 LN0->getPointerInfo(), 4384 LN0->getMemoryVT(), 4385 LN0->isVolatile(), 4386 LN0->isNonTemporal(), 4387 LN0->getAlignment()); 4388 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4389 Mask = Mask.sext(VT.getSizeInBits()); 4390 SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 4391 ExtLoad, DAG.getConstant(Mask, VT)); 4392 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 4393 N0.getOperand(0).getDebugLoc(), 4394 N0.getOperand(0).getValueType(), ExtLoad); 4395 CombineTo(N, And); 4396 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 4397 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4398 ISD::SIGN_EXTEND); 4399 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4400 } 4401 } 4402 } 4403 4404 if (N0.getOpcode() == ISD::SETCC) { 4405 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 4406 // Only do this before legalize for now. 4407 if (VT.isVector() && !LegalOperations) { 4408 EVT N0VT = N0.getOperand(0).getValueType(); 4409 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 4410 // of the same size as the compared operands. Only optimize sext(setcc()) 4411 // if this is the case. 4412 EVT SVT = TLI.getSetCCResultType(N0VT); 4413 4414 // We know that the # elements of the results is the same as the 4415 // # elements of the compare (and the # elements of the compare result 4416 // for that matter). Check to see that they are the same size. If so, 4417 // we know that the element size of the sext'd result matches the 4418 // element size of the compare operands. 4419 if (VT.getSizeInBits() == SVT.getSizeInBits()) 4420 return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4421 N0.getOperand(1), 4422 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4423 // If the desired elements are smaller or larger than the source 4424 // elements we can use a matching integer vector type and then 4425 // truncate/sign extend 4426 EVT MatchingElementType = 4427 EVT::getIntegerVT(*DAG.getContext(), 4428 N0VT.getScalarType().getSizeInBits()); 4429 EVT MatchingVectorType = 4430 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4431 N0VT.getVectorNumElements()); 4432 4433 if (SVT == MatchingVectorType) { 4434 SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, 4435 N0.getOperand(0), N0.getOperand(1), 4436 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4437 return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); 4438 } 4439 } 4440 4441 // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) 4442 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 4443 SDValue NegOne = 4444 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 4445 SDValue SCC = 4446 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4447 NegOne, DAG.getConstant(0, VT), 4448 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4449 if (SCC.getNode()) return SCC; 4450 if (!LegalOperations || 4451 TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) 4452 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 4453 DAG.getSetCC(N->getDebugLoc(), 4454 TLI.getSetCCResultType(VT), 4455 N0.getOperand(0), N0.getOperand(1), 4456 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 4457 NegOne, DAG.getConstant(0, VT)); 4458 } 4459 4460 // fold (sext x) -> (zext x) if the sign bit is known zero. 4461 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 4462 DAG.SignBitIsZero(N0)) 4463 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); 4464 4465 return SDValue(); 4466} 4467 4468// isTruncateOf - If N is a truncate of some other value, return true, record 4469// the value being truncated in Op and which of Op's bits are zero in KnownZero. 4470// This function computes KnownZero to avoid a duplicated call to 4471// ComputeMaskedBits in the caller. 4472static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 4473 APInt &KnownZero) { 4474 APInt KnownOne; 4475 if (N->getOpcode() == ISD::TRUNCATE) { 4476 Op = N->getOperand(0); 4477 DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); 4478 return true; 4479 } 4480 4481 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || 4482 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 4483 return false; 4484 4485 SDValue Op0 = N->getOperand(0); 4486 SDValue Op1 = N->getOperand(1); 4487 assert(Op0.getValueType() == Op1.getValueType()); 4488 4489 ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); 4490 ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); 4491 if (COp0 && COp0->isNullValue()) 4492 Op = Op1; 4493 else if (COp1 && COp1->isNullValue()) 4494 Op = Op0; 4495 else 4496 return false; 4497 4498 DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); 4499 4500 if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 4501 return false; 4502 4503 return true; 4504} 4505 4506SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 4507 SDValue N0 = N->getOperand(0); 4508 EVT VT = N->getValueType(0); 4509 4510 // fold (zext c1) -> c1 4511 if (isa<ConstantSDNode>(N0)) 4512 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); 4513 // fold (zext (zext x)) -> (zext x) 4514 // fold (zext (aext x)) -> (zext x) 4515 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 4516 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, 4517 N0.getOperand(0)); 4518 4519 // fold (zext (truncate x)) -> (zext x) or 4520 // (zext (truncate x)) -> (truncate x) 4521 // This is valid when the truncated bits of x are already zero. 4522 // FIXME: We should extend this to work for vectors too. 4523 SDValue Op; 4524 APInt KnownZero; 4525 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 4526 APInt TruncatedBits = 4527 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 4528 APInt(Op.getValueSizeInBits(), 0) : 4529 APInt::getBitsSet(Op.getValueSizeInBits(), 4530 N0.getValueSizeInBits(), 4531 std::min(Op.getValueSizeInBits(), 4532 VT.getSizeInBits())); 4533 if (TruncatedBits == (KnownZero & TruncatedBits)) { 4534 if (VT.bitsGT(Op.getValueType())) 4535 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); 4536 if (VT.bitsLT(Op.getValueType())) 4537 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4538 4539 return Op; 4540 } 4541 } 4542 4543 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 4544 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 4545 if (N0.getOpcode() == ISD::TRUNCATE) { 4546 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4547 if (NarrowLoad.getNode()) { 4548 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4549 if (NarrowLoad.getNode() != N0.getNode()) { 4550 CombineTo(N0.getNode(), NarrowLoad); 4551 // CombineTo deleted the truncate, if needed, but not what's under it. 4552 AddToWorkList(oye); 4553 } 4554 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4555 } 4556 } 4557 4558 // fold (zext (truncate x)) -> (and x, mask) 4559 if (N0.getOpcode() == ISD::TRUNCATE && 4560 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { 4561 4562 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 4563 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 4564 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4565 if (NarrowLoad.getNode()) { 4566 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4567 if (NarrowLoad.getNode() != N0.getNode()) { 4568 CombineTo(N0.getNode(), NarrowLoad); 4569 // CombineTo deleted the truncate, if needed, but not what's under it. 4570 AddToWorkList(oye); 4571 } 4572 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4573 } 4574 4575 SDValue Op = N0.getOperand(0); 4576 if (Op.getValueType().bitsLT(VT)) { 4577 Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); 4578 AddToWorkList(Op.getNode()); 4579 } else if (Op.getValueType().bitsGT(VT)) { 4580 Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4581 AddToWorkList(Op.getNode()); 4582 } 4583 return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), 4584 N0.getValueType().getScalarType()); 4585 } 4586 4587 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 4588 // if either of the casts is not free. 4589 if (N0.getOpcode() == ISD::AND && 4590 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 4591 N0.getOperand(1).getOpcode() == ISD::Constant && 4592 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 4593 N0.getValueType()) || 4594 !TLI.isZExtFree(N0.getValueType(), VT))) { 4595 SDValue X = N0.getOperand(0).getOperand(0); 4596 if (X.getValueType().bitsLT(VT)) { 4597 X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); 4598 } else if (X.getValueType().bitsGT(VT)) { 4599 X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); 4600 } 4601 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4602 Mask = Mask.zext(VT.getSizeInBits()); 4603 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4604 X, DAG.getConstant(Mask, VT)); 4605 } 4606 4607 // fold (zext (load x)) -> (zext (truncate (zextload x))) 4608 // None of the supported targets knows how to perform load and vector_zext 4609 // on vectors in one instruction. We only perform this transformation on 4610 // scalars. 4611 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4612 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4613 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { 4614 bool DoXform = true; 4615 SmallVector<SDNode*, 4> SetCCs; 4616 if (!N0.hasOneUse()) 4617 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 4618 if (DoXform) { 4619 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4620 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, 4621 LN0->getChain(), 4622 LN0->getBasePtr(), LN0->getPointerInfo(), 4623 N0.getValueType(), 4624 LN0->isVolatile(), LN0->isNonTemporal(), 4625 LN0->getAlignment()); 4626 CombineTo(N, ExtLoad); 4627 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4628 N0.getValueType(), ExtLoad); 4629 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4630 4631 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4632 ISD::ZERO_EXTEND); 4633 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4634 } 4635 } 4636 4637 // fold (zext (and/or/xor (load x), cst)) -> 4638 // (and/or/xor (zextload x), (zext cst)) 4639 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 4640 N0.getOpcode() == ISD::XOR) && 4641 isa<LoadSDNode>(N0.getOperand(0)) && 4642 N0.getOperand(1).getOpcode() == ISD::Constant && 4643 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && 4644 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 4645 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 4646 if (LN0->getExtensionType() != ISD::SEXTLOAD) { 4647 bool DoXform = true; 4648 SmallVector<SDNode*, 4> SetCCs; 4649 if (!N0.hasOneUse()) 4650 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 4651 SetCCs, TLI); 4652 if (DoXform) { 4653 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, 4654 LN0->getChain(), LN0->getBasePtr(), 4655 LN0->getPointerInfo(), 4656 LN0->getMemoryVT(), 4657 LN0->isVolatile(), 4658 LN0->isNonTemporal(), 4659 LN0->getAlignment()); 4660 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4661 Mask = Mask.zext(VT.getSizeInBits()); 4662 SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 4663 ExtLoad, DAG.getConstant(Mask, VT)); 4664 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 4665 N0.getOperand(0).getDebugLoc(), 4666 N0.getOperand(0).getValueType(), ExtLoad); 4667 CombineTo(N, And); 4668 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 4669 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4670 ISD::ZERO_EXTEND); 4671 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4672 } 4673 } 4674 } 4675 4676 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 4677 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 4678 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 4679 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 4680 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4681 EVT MemVT = LN0->getMemoryVT(); 4682 if ((!LegalOperations && !LN0->isVolatile()) || 4683 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { 4684 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, 4685 LN0->getChain(), 4686 LN0->getBasePtr(), LN0->getPointerInfo(), 4687 MemVT, 4688 LN0->isVolatile(), LN0->isNonTemporal(), 4689 LN0->getAlignment()); 4690 CombineTo(N, ExtLoad); 4691 CombineTo(N0.getNode(), 4692 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), 4693 ExtLoad), 4694 ExtLoad.getValue(1)); 4695 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4696 } 4697 } 4698 4699 if (N0.getOpcode() == ISD::SETCC) { 4700 if (!LegalOperations && VT.isVector()) { 4701 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 4702 // Only do this before legalize for now. 4703 EVT N0VT = N0.getOperand(0).getValueType(); 4704 EVT EltVT = VT.getVectorElementType(); 4705 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 4706 DAG.getConstant(1, EltVT)); 4707 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 4708 // We know that the # elements of the results is the same as the 4709 // # elements of the compare (and the # elements of the compare result 4710 // for that matter). Check to see that they are the same size. If so, 4711 // we know that the element size of the sext'd result matches the 4712 // element size of the compare operands. 4713 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4714 DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4715 N0.getOperand(1), 4716 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 4717 DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, 4718 &OneOps[0], OneOps.size())); 4719 4720 // If the desired elements are smaller or larger than the source 4721 // elements we can use a matching integer vector type and then 4722 // truncate/sign extend 4723 EVT MatchingElementType = 4724 EVT::getIntegerVT(*DAG.getContext(), 4725 N0VT.getScalarType().getSizeInBits()); 4726 EVT MatchingVectorType = 4727 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4728 N0VT.getVectorNumElements()); 4729 SDValue VsetCC = 4730 DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), 4731 N0.getOperand(1), 4732 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4733 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4734 DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), 4735 DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, 4736 &OneOps[0], OneOps.size())); 4737 } 4738 4739 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 4740 SDValue SCC = 4741 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4742 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 4743 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4744 if (SCC.getNode()) return SCC; 4745 } 4746 4747 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 4748 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 4749 isa<ConstantSDNode>(N0.getOperand(1)) && 4750 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 4751 N0.hasOneUse()) { 4752 SDValue ShAmt = N0.getOperand(1); 4753 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 4754 if (N0.getOpcode() == ISD::SHL) { 4755 SDValue InnerZExt = N0.getOperand(0); 4756 // If the original shl may be shifting out bits, do not perform this 4757 // transformation. 4758 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 4759 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 4760 if (ShAmtVal > KnownZeroBits) 4761 return SDValue(); 4762 } 4763 4764 DebugLoc DL = N->getDebugLoc(); 4765 4766 // Ensure that the shift amount is wide enough for the shifted value. 4767 if (VT.getSizeInBits() >= 256) 4768 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 4769 4770 return DAG.getNode(N0.getOpcode(), DL, VT, 4771 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 4772 ShAmt); 4773 } 4774 4775 return SDValue(); 4776} 4777 4778SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 4779 SDValue N0 = N->getOperand(0); 4780 EVT VT = N->getValueType(0); 4781 4782 // fold (aext c1) -> c1 4783 if (isa<ConstantSDNode>(N0)) 4784 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); 4785 // fold (aext (aext x)) -> (aext x) 4786 // fold (aext (zext x)) -> (zext x) 4787 // fold (aext (sext x)) -> (sext x) 4788 if (N0.getOpcode() == ISD::ANY_EXTEND || 4789 N0.getOpcode() == ISD::ZERO_EXTEND || 4790 N0.getOpcode() == ISD::SIGN_EXTEND) 4791 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); 4792 4793 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 4794 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 4795 if (N0.getOpcode() == ISD::TRUNCATE) { 4796 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4797 if (NarrowLoad.getNode()) { 4798 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4799 if (NarrowLoad.getNode() != N0.getNode()) { 4800 CombineTo(N0.getNode(), NarrowLoad); 4801 // CombineTo deleted the truncate, if needed, but not what's under it. 4802 AddToWorkList(oye); 4803 } 4804 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4805 } 4806 } 4807 4808 // fold (aext (truncate x)) 4809 if (N0.getOpcode() == ISD::TRUNCATE) { 4810 SDValue TruncOp = N0.getOperand(0); 4811 if (TruncOp.getValueType() == VT) 4812 return TruncOp; // x iff x size == zext size. 4813 if (TruncOp.getValueType().bitsGT(VT)) 4814 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); 4815 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); 4816 } 4817 4818 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 4819 // if the trunc is not free. 4820 if (N0.getOpcode() == ISD::AND && 4821 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 4822 N0.getOperand(1).getOpcode() == ISD::Constant && 4823 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 4824 N0.getValueType())) { 4825 SDValue X = N0.getOperand(0).getOperand(0); 4826 if (X.getValueType().bitsLT(VT)) { 4827 X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); 4828 } else if (X.getValueType().bitsGT(VT)) { 4829 X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); 4830 } 4831 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4832 Mask = Mask.zext(VT.getSizeInBits()); 4833 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4834 X, DAG.getConstant(Mask, VT)); 4835 } 4836 4837 // fold (aext (load x)) -> (aext (truncate (extload x))) 4838 // None of the supported targets knows how to perform load and any_ext 4839 // on vectors in one instruction. We only perform this transformation on 4840 // scalars. 4841 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4842 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4843 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 4844 bool DoXform = true; 4845 SmallVector<SDNode*, 4> SetCCs; 4846 if (!N0.hasOneUse()) 4847 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 4848 if (DoXform) { 4849 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4850 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, 4851 LN0->getChain(), 4852 LN0->getBasePtr(), LN0->getPointerInfo(), 4853 N0.getValueType(), 4854 LN0->isVolatile(), LN0->isNonTemporal(), 4855 LN0->getAlignment()); 4856 CombineTo(N, ExtLoad); 4857 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4858 N0.getValueType(), ExtLoad); 4859 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4860 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4861 ISD::ANY_EXTEND); 4862 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4863 } 4864 } 4865 4866 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 4867 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 4868 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 4869 if (N0.getOpcode() == ISD::LOAD && 4870 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 4871 N0.hasOneUse()) { 4872 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4873 EVT MemVT = LN0->getMemoryVT(); 4874 SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), 4875 VT, LN0->getChain(), LN0->getBasePtr(), 4876 LN0->getPointerInfo(), MemVT, 4877 LN0->isVolatile(), LN0->isNonTemporal(), 4878 LN0->getAlignment()); 4879 CombineTo(N, ExtLoad); 4880 CombineTo(N0.getNode(), 4881 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4882 N0.getValueType(), ExtLoad), 4883 ExtLoad.getValue(1)); 4884 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4885 } 4886 4887 if (N0.getOpcode() == ISD::SETCC) { 4888 // aext(setcc) -> sext_in_reg(vsetcc) for vectors. 4889 // Only do this before legalize for now. 4890 if (VT.isVector() && !LegalOperations) { 4891 EVT N0VT = N0.getOperand(0).getValueType(); 4892 // We know that the # elements of the results is the same as the 4893 // # elements of the compare (and the # elements of the compare result 4894 // for that matter). Check to see that they are the same size. If so, 4895 // we know that the element size of the sext'd result matches the 4896 // element size of the compare operands. 4897 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 4898 return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4899 N0.getOperand(1), 4900 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4901 // If the desired elements are smaller or larger than the source 4902 // elements we can use a matching integer vector type and then 4903 // truncate/sign extend 4904 else { 4905 EVT MatchingElementType = 4906 EVT::getIntegerVT(*DAG.getContext(), 4907 N0VT.getScalarType().getSizeInBits()); 4908 EVT MatchingVectorType = 4909 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4910 N0VT.getVectorNumElements()); 4911 SDValue VsetCC = 4912 DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), 4913 N0.getOperand(1), 4914 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4915 return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); 4916 } 4917 } 4918 4919 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 4920 SDValue SCC = 4921 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4922 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 4923 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4924 if (SCC.getNode()) 4925 return SCC; 4926 } 4927 4928 return SDValue(); 4929} 4930 4931/// GetDemandedBits - See if the specified operand can be simplified with the 4932/// knowledge that only the bits specified by Mask are used. If so, return the 4933/// simpler operand, otherwise return a null SDValue. 4934SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 4935 switch (V.getOpcode()) { 4936 default: break; 4937 case ISD::Constant: { 4938 const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); 4939 assert(CV != 0 && "Const value should be ConstSDNode."); 4940 const APInt &CVal = CV->getAPIntValue(); 4941 APInt NewVal = CVal & Mask; 4942 if (NewVal != CVal) { 4943 return DAG.getConstant(NewVal, V.getValueType()); 4944 } 4945 break; 4946 } 4947 case ISD::OR: 4948 case ISD::XOR: 4949 // If the LHS or RHS don't contribute bits to the or, drop them. 4950 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 4951 return V.getOperand(1); 4952 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 4953 return V.getOperand(0); 4954 break; 4955 case ISD::SRL: 4956 // Only look at single-use SRLs. 4957 if (!V.getNode()->hasOneUse()) 4958 break; 4959 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 4960 // See if we can recursively simplify the LHS. 4961 unsigned Amt = RHSC->getZExtValue(); 4962 4963 // Watch out for shift count overflow though. 4964 if (Amt >= Mask.getBitWidth()) break; 4965 APInt NewMask = Mask << Amt; 4966 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 4967 if (SimplifyLHS.getNode()) 4968 return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), 4969 SimplifyLHS, V.getOperand(1)); 4970 } 4971 } 4972 return SDValue(); 4973} 4974 4975/// ReduceLoadWidth - If the result of a wider load is shifted to right of N 4976/// bits and then truncated to a narrower type and where N is a multiple 4977/// of number of bits of the narrower type, transform it to a narrower load 4978/// from address + N / num of bits of new type. If the result is to be 4979/// extended, also fold the extension to form a extending load. 4980SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 4981 unsigned Opc = N->getOpcode(); 4982 4983 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 4984 SDValue N0 = N->getOperand(0); 4985 EVT VT = N->getValueType(0); 4986 EVT ExtVT = VT; 4987 4988 // This transformation isn't valid for vector loads. 4989 if (VT.isVector()) 4990 return SDValue(); 4991 4992 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 4993 // extended to VT. 4994 if (Opc == ISD::SIGN_EXTEND_INREG) { 4995 ExtType = ISD::SEXTLOAD; 4996 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 4997 } else if (Opc == ISD::SRL) { 4998 // Another special-case: SRL is basically zero-extending a narrower value. 4999 ExtType = ISD::ZEXTLOAD; 5000 N0 = SDValue(N, 0); 5001 ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 5002 if (!N01) return SDValue(); 5003 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 5004 VT.getSizeInBits() - N01->getZExtValue()); 5005 } 5006 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) 5007 return SDValue(); 5008 5009 unsigned EVTBits = ExtVT.getSizeInBits(); 5010 5011 // Do not generate loads of non-round integer types since these can 5012 // be expensive (and would be wrong if the type is not byte sized). 5013 if (!ExtVT.isRound()) 5014 return SDValue(); 5015 5016 unsigned ShAmt = 0; 5017 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 5018 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 5019 ShAmt = N01->getZExtValue(); 5020 // Is the shift amount a multiple of size of VT? 5021 if ((ShAmt & (EVTBits-1)) == 0) { 5022 N0 = N0.getOperand(0); 5023 // Is the load width a multiple of size of VT? 5024 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 5025 return SDValue(); 5026 } 5027 5028 // At this point, we must have a load or else we can't do the transform. 5029 if (!isa<LoadSDNode>(N0)) return SDValue(); 5030 5031 // If the shift amount is larger than the input type then we're not 5032 // accessing any of the loaded bytes. If the load was a zextload/extload 5033 // then the result of the shift+trunc is zero/undef (handled elsewhere). 5034 // If the load was a sextload then the result is a splat of the sign bit 5035 // of the extended byte. This is not worth optimizing for. 5036 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 5037 return SDValue(); 5038 } 5039 } 5040 5041 // If the load is shifted left (and the result isn't shifted back right), 5042 // we can fold the truncate through the shift. 5043 unsigned ShLeftAmt = 0; 5044 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 5045 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 5046 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 5047 ShLeftAmt = N01->getZExtValue(); 5048 N0 = N0.getOperand(0); 5049 } 5050 } 5051 5052 // If we haven't found a load, we can't narrow it. Don't transform one with 5053 // multiple uses, this would require adding a new load. 5054 if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() || 5055 // Don't change the width of a volatile load. 5056 cast<LoadSDNode>(N0)->isVolatile()) 5057 return SDValue(); 5058 5059 // Verify that we are actually reducing a load width here. 5060 if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits) 5061 return SDValue(); 5062 5063 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5064 EVT PtrType = N0.getOperand(1).getValueType(); 5065 5066 if (PtrType == MVT::Untyped || PtrType.isExtended()) 5067 // It's not possible to generate a constant of extended or untyped type. 5068 return SDValue(); 5069 5070 // For big endian targets, we need to adjust the offset to the pointer to 5071 // load the correct bytes. 5072 if (TLI.isBigEndian()) { 5073 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 5074 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 5075 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 5076 } 5077 5078 uint64_t PtrOff = ShAmt / 8; 5079 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 5080 SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), 5081 PtrType, LN0->getBasePtr(), 5082 DAG.getConstant(PtrOff, PtrType)); 5083 AddToWorkList(NewPtr.getNode()); 5084 5085 SDValue Load; 5086 if (ExtType == ISD::NON_EXTLOAD) 5087 Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, 5088 LN0->getPointerInfo().getWithOffset(PtrOff), 5089 LN0->isVolatile(), LN0->isNonTemporal(), 5090 LN0->isInvariant(), NewAlign); 5091 else 5092 Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, 5093 LN0->getPointerInfo().getWithOffset(PtrOff), 5094 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 5095 NewAlign); 5096 5097 // Replace the old load's chain with the new load's chain. 5098 WorkListRemover DeadNodes(*this); 5099 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 5100 5101 // Shift the result left, if we've swallowed a left shift. 5102 SDValue Result = Load; 5103 if (ShLeftAmt != 0) { 5104 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 5105 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 5106 ShImmTy = VT; 5107 Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, 5108 Result, DAG.getConstant(ShLeftAmt, ShImmTy)); 5109 } 5110 5111 // Return the new loaded value. 5112 return Result; 5113} 5114 5115SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 5116 SDValue N0 = N->getOperand(0); 5117 SDValue N1 = N->getOperand(1); 5118 EVT VT = N->getValueType(0); 5119 EVT EVT = cast<VTSDNode>(N1)->getVT(); 5120 unsigned VTBits = VT.getScalarType().getSizeInBits(); 5121 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 5122 5123 // fold (sext_in_reg c1) -> c1 5124 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) 5125 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); 5126 5127 // If the input is already sign extended, just drop the extension. 5128 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 5129 return N0; 5130 5131 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 5132 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 5133 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { 5134 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 5135 N0.getOperand(0), N1); 5136 } 5137 5138 // fold (sext_in_reg (sext x)) -> (sext x) 5139 // fold (sext_in_reg (aext x)) -> (sext x) 5140 // if x is small enough. 5141 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 5142 SDValue N00 = N0.getOperand(0); 5143 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 5144 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 5145 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); 5146 } 5147 5148 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 5149 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 5150 return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); 5151 5152 // fold operands of sext_in_reg based on knowledge that the top bits are not 5153 // demanded. 5154 if (SimplifyDemandedBits(SDValue(N, 0))) 5155 return SDValue(N, 0); 5156 5157 // fold (sext_in_reg (load x)) -> (smaller sextload x) 5158 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 5159 SDValue NarrowLoad = ReduceLoadWidth(N); 5160 if (NarrowLoad.getNode()) 5161 return NarrowLoad; 5162 5163 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 5164 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 5165 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 5166 if (N0.getOpcode() == ISD::SRL) { 5167 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 5168 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 5169 // We can turn this into an SRA iff the input to the SRL is already sign 5170 // extended enough. 5171 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 5172 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 5173 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, 5174 N0.getOperand(0), N0.getOperand(1)); 5175 } 5176 } 5177 5178 // fold (sext_inreg (extload x)) -> (sextload x) 5179 if (ISD::isEXTLoad(N0.getNode()) && 5180 ISD::isUNINDEXEDLoad(N0.getNode()) && 5181 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 5182 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5183 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 5184 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5185 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 5186 LN0->getChain(), 5187 LN0->getBasePtr(), LN0->getPointerInfo(), 5188 EVT, 5189 LN0->isVolatile(), LN0->isNonTemporal(), 5190 LN0->getAlignment()); 5191 CombineTo(N, ExtLoad); 5192 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 5193 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5194 } 5195 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 5196 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 5197 N0.hasOneUse() && 5198 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 5199 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5200 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 5201 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5202 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 5203 LN0->getChain(), 5204 LN0->getBasePtr(), LN0->getPointerInfo(), 5205 EVT, 5206 LN0->isVolatile(), LN0->isNonTemporal(), 5207 LN0->getAlignment()); 5208 CombineTo(N, ExtLoad); 5209 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 5210 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5211 } 5212 5213 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 5214 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 5215 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 5216 N0.getOperand(1), false); 5217 if (BSwap.getNode() != 0) 5218 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 5219 BSwap, N1); 5220 } 5221 5222 return SDValue(); 5223} 5224 5225SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 5226 SDValue N0 = N->getOperand(0); 5227 EVT VT = N->getValueType(0); 5228 bool isLE = TLI.isLittleEndian(); 5229 5230 // noop truncate 5231 if (N0.getValueType() == N->getValueType(0)) 5232 return N0; 5233 // fold (truncate c1) -> c1 5234 if (isa<ConstantSDNode>(N0)) 5235 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); 5236 // fold (truncate (truncate x)) -> (truncate x) 5237 if (N0.getOpcode() == ISD::TRUNCATE) 5238 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); 5239 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 5240 if (N0.getOpcode() == ISD::ZERO_EXTEND || 5241 N0.getOpcode() == ISD::SIGN_EXTEND || 5242 N0.getOpcode() == ISD::ANY_EXTEND) { 5243 if (N0.getOperand(0).getValueType().bitsLT(VT)) 5244 // if the source is smaller than the dest, we still need an extend 5245 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 5246 N0.getOperand(0)); 5247 if (N0.getOperand(0).getValueType().bitsGT(VT)) 5248 // if the source is larger than the dest, than we just need the truncate 5249 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); 5250 // if the source and dest are the same type, we can drop both the extend 5251 // and the truncate. 5252 return N0.getOperand(0); 5253 } 5254 5255 // Fold extract-and-trunc into a narrow extract. For example: 5256 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 5257 // i32 y = TRUNCATE(i64 x) 5258 // -- becomes -- 5259 // v16i8 b = BITCAST (v2i64 val) 5260 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 5261 // 5262 // Note: We only run this optimization after type legalization (which often 5263 // creates this pattern) and before operation legalization after which 5264 // we need to be more careful about the vector instructions that we generate. 5265 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 5266 LegalTypes && !LegalOperations && N0->hasOneUse()) { 5267 5268 EVT VecTy = N0.getOperand(0).getValueType(); 5269 EVT ExTy = N0.getValueType(); 5270 EVT TrTy = N->getValueType(0); 5271 5272 unsigned NumElem = VecTy.getVectorNumElements(); 5273 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 5274 5275 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 5276 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 5277 5278 SDValue EltNo = N0->getOperand(1); 5279 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 5280 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 5281 EVT IndexTy = N0->getOperand(1).getValueType(); 5282 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 5283 5284 SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 5285 NVT, N0.getOperand(0)); 5286 5287 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 5288 N->getDebugLoc(), TrTy, V, 5289 DAG.getConstant(Index, IndexTy)); 5290 } 5291 } 5292 5293 // See if we can simplify the input to this truncate through knowledge that 5294 // only the low bits are being used. 5295 // For example "trunc (or (shl x, 8), y)" // -> trunc y 5296 // Currently we only perform this optimization on scalars because vectors 5297 // may have different active low bits. 5298 if (!VT.isVector()) { 5299 SDValue Shorter = 5300 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 5301 VT.getSizeInBits())); 5302 if (Shorter.getNode()) 5303 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); 5304 } 5305 // fold (truncate (load x)) -> (smaller load x) 5306 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 5307 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 5308 SDValue Reduced = ReduceLoadWidth(N); 5309 if (Reduced.getNode()) 5310 return Reduced; 5311 } 5312 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 5313 // where ... are all 'undef'. 5314 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 5315 SmallVector<EVT, 8> VTs; 5316 SDValue V; 5317 unsigned Idx = 0; 5318 unsigned NumDefs = 0; 5319 5320 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 5321 SDValue X = N0.getOperand(i); 5322 if (X.getOpcode() != ISD::UNDEF) { 5323 V = X; 5324 Idx = i; 5325 NumDefs++; 5326 } 5327 // Stop if more than one members are non-undef. 5328 if (NumDefs > 1) 5329 break; 5330 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 5331 VT.getVectorElementType(), 5332 X.getValueType().getVectorNumElements())); 5333 } 5334 5335 if (NumDefs == 0) 5336 return DAG.getUNDEF(VT); 5337 5338 if (NumDefs == 1) { 5339 assert(V.getNode() && "The single defined operand is empty!"); 5340 SmallVector<SDValue, 8> Opnds; 5341 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 5342 if (i != Idx) { 5343 Opnds.push_back(DAG.getUNDEF(VTs[i])); 5344 continue; 5345 } 5346 SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V); 5347 AddToWorkList(NV.getNode()); 5348 Opnds.push_back(NV); 5349 } 5350 return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, 5351 &Opnds[0], Opnds.size()); 5352 } 5353 } 5354 5355 // Simplify the operands using demanded-bits information. 5356 if (!VT.isVector() && 5357 SimplifyDemandedBits(SDValue(N, 0))) 5358 return SDValue(N, 0); 5359 5360 return SDValue(); 5361} 5362 5363static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 5364 SDValue Elt = N->getOperand(i); 5365 if (Elt.getOpcode() != ISD::MERGE_VALUES) 5366 return Elt.getNode(); 5367 return Elt.getOperand(Elt.getResNo()).getNode(); 5368} 5369 5370/// CombineConsecutiveLoads - build_pair (load, load) -> load 5371/// if load locations are consecutive. 5372SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 5373 assert(N->getOpcode() == ISD::BUILD_PAIR); 5374 5375 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 5376 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 5377 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 5378 LD1->getPointerInfo().getAddrSpace() != 5379 LD2->getPointerInfo().getAddrSpace()) 5380 return SDValue(); 5381 EVT LD1VT = LD1->getValueType(0); 5382 5383 if (ISD::isNON_EXTLoad(LD2) && 5384 LD2->hasOneUse() && 5385 // If both are volatile this would reduce the number of volatile loads. 5386 // If one is volatile it might be ok, but play conservative and bail out. 5387 !LD1->isVolatile() && 5388 !LD2->isVolatile() && 5389 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 5390 unsigned Align = LD1->getAlignment(); 5391 unsigned NewAlign = TLI.getDataLayout()-> 5392 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 5393 5394 if (NewAlign <= Align && 5395 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 5396 return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), 5397 LD1->getBasePtr(), LD1->getPointerInfo(), 5398 false, false, false, Align); 5399 } 5400 5401 return SDValue(); 5402} 5403 5404SDValue DAGCombiner::visitBITCAST(SDNode *N) { 5405 SDValue N0 = N->getOperand(0); 5406 EVT VT = N->getValueType(0); 5407 5408 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 5409 // Only do this before legalize, since afterward the target may be depending 5410 // on the bitconvert. 5411 // First check to see if this is all constant. 5412 if (!LegalTypes && 5413 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 5414 VT.isVector()) { 5415 bool isSimple = true; 5416 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) 5417 if (N0.getOperand(i).getOpcode() != ISD::UNDEF && 5418 N0.getOperand(i).getOpcode() != ISD::Constant && 5419 N0.getOperand(i).getOpcode() != ISD::ConstantFP) { 5420 isSimple = false; 5421 break; 5422 } 5423 5424 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 5425 assert(!DestEltVT.isVector() && 5426 "Element type of vector ValueType must not be vector!"); 5427 if (isSimple) 5428 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 5429 } 5430 5431 // If the input is a constant, let getNode fold it. 5432 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 5433 SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); 5434 if (Res.getNode() != N) { 5435 if (!LegalOperations || 5436 TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) 5437 return Res; 5438 5439 // Folding it resulted in an illegal node, and it's too late to 5440 // do that. Clean up the old node and forego the transformation. 5441 // Ideally this won't happen very often, because instcombine 5442 // and the earlier dagcombine runs (where illegal nodes are 5443 // permitted) should have folded most of them already. 5444 DAG.DeleteNode(Res.getNode()); 5445 } 5446 } 5447 5448 // (conv (conv x, t1), t2) -> (conv x, t2) 5449 if (N0.getOpcode() == ISD::BITCAST) 5450 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, 5451 N0.getOperand(0)); 5452 5453 // fold (conv (load x)) -> (load (conv*)x) 5454 // If the resultant load doesn't need a higher alignment than the original! 5455 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 5456 // Do not change the width of a volatile load. 5457 !cast<LoadSDNode>(N0)->isVolatile() && 5458 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { 5459 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5460 unsigned Align = TLI.getDataLayout()-> 5461 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 5462 unsigned OrigAlign = LN0->getAlignment(); 5463 5464 if (Align <= OrigAlign) { 5465 SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), 5466 LN0->getBasePtr(), LN0->getPointerInfo(), 5467 LN0->isVolatile(), LN0->isNonTemporal(), 5468 LN0->isInvariant(), OrigAlign); 5469 AddToWorkList(N); 5470 CombineTo(N0.getNode(), 5471 DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5472 N0.getValueType(), Load), 5473 Load.getValue(1)); 5474 return Load; 5475 } 5476 } 5477 5478 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 5479 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 5480 // This often reduces constant pool loads. 5481 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || 5482 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && 5483 N0.getNode()->hasOneUse() && VT.isInteger() && 5484 !VT.isVector() && !N0.getValueType().isVector()) { 5485 SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, 5486 N0.getOperand(0)); 5487 AddToWorkList(NewConv.getNode()); 5488 5489 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 5490 if (N0.getOpcode() == ISD::FNEG) 5491 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, 5492 NewConv, DAG.getConstant(SignBit, VT)); 5493 assert(N0.getOpcode() == ISD::FABS); 5494 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 5495 NewConv, DAG.getConstant(~SignBit, VT)); 5496 } 5497 5498 // fold (bitconvert (fcopysign cst, x)) -> 5499 // (or (and (bitconvert x), sign), (and cst, (not sign))) 5500 // Note that we don't handle (copysign x, cst) because this can always be 5501 // folded to an fneg or fabs. 5502 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 5503 isa<ConstantFPSDNode>(N0.getOperand(0)) && 5504 VT.isInteger() && !VT.isVector()) { 5505 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 5506 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 5507 if (isTypeLegal(IntXVT)) { 5508 SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5509 IntXVT, N0.getOperand(1)); 5510 AddToWorkList(X.getNode()); 5511 5512 // If X has a different width than the result/lhs, sext it or truncate it. 5513 unsigned VTWidth = VT.getSizeInBits(); 5514 if (OrigXWidth < VTWidth) { 5515 X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); 5516 AddToWorkList(X.getNode()); 5517 } else if (OrigXWidth > VTWidth) { 5518 // To get the sign bit in the right place, we have to shift it right 5519 // before truncating. 5520 X = DAG.getNode(ISD::SRL, X.getDebugLoc(), 5521 X.getValueType(), X, 5522 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 5523 AddToWorkList(X.getNode()); 5524 X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); 5525 AddToWorkList(X.getNode()); 5526 } 5527 5528 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 5529 X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, 5530 X, DAG.getConstant(SignBit, VT)); 5531 AddToWorkList(X.getNode()); 5532 5533 SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5534 VT, N0.getOperand(0)); 5535 Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, 5536 Cst, DAG.getConstant(~SignBit, VT)); 5537 AddToWorkList(Cst.getNode()); 5538 5539 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); 5540 } 5541 } 5542 5543 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 5544 if (N0.getOpcode() == ISD::BUILD_PAIR) { 5545 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 5546 if (CombineLD.getNode()) 5547 return CombineLD; 5548 } 5549 5550 return SDValue(); 5551} 5552 5553SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 5554 EVT VT = N->getValueType(0); 5555 return CombineConsecutiveLoads(N, VT); 5556} 5557 5558/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector 5559/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the 5560/// destination element value type. 5561SDValue DAGCombiner:: 5562ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 5563 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 5564 5565 // If this is already the right type, we're done. 5566 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 5567 5568 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 5569 unsigned DstBitSize = DstEltVT.getSizeInBits(); 5570 5571 // If this is a conversion of N elements of one type to N elements of another 5572 // type, convert each element. This handles FP<->INT cases. 5573 if (SrcBitSize == DstBitSize) { 5574 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 5575 BV->getValueType(0).getVectorNumElements()); 5576 5577 // Due to the FP element handling below calling this routine recursively, 5578 // we can end up with a scalar-to-vector node here. 5579 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 5580 return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 5581 DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), 5582 DstEltVT, BV->getOperand(0))); 5583 5584 SmallVector<SDValue, 8> Ops; 5585 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 5586 SDValue Op = BV->getOperand(i); 5587 // If the vector element type is not legal, the BUILD_VECTOR operands 5588 // are promoted and implicitly truncated. Make that explicit here. 5589 if (Op.getValueType() != SrcEltVT) 5590 Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); 5591 Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), 5592 DstEltVT, Op)); 5593 AddToWorkList(Ops.back().getNode()); 5594 } 5595 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5596 &Ops[0], Ops.size()); 5597 } 5598 5599 // Otherwise, we're growing or shrinking the elements. To avoid having to 5600 // handle annoying details of growing/shrinking FP values, we convert them to 5601 // int first. 5602 if (SrcEltVT.isFloatingPoint()) { 5603 // Convert the input float vector to a int vector where the elements are the 5604 // same sizes. 5605 assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); 5606 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 5607 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 5608 SrcEltVT = IntVT; 5609 } 5610 5611 // Now we know the input is an integer vector. If the output is a FP type, 5612 // convert to integer first, then to FP of the right size. 5613 if (DstEltVT.isFloatingPoint()) { 5614 assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); 5615 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 5616 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 5617 5618 // Next, convert to FP elements of the same size. 5619 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 5620 } 5621 5622 // Okay, we know the src/dst types are both integers of differing types. 5623 // Handling growing first. 5624 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 5625 if (SrcBitSize < DstBitSize) { 5626 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 5627 5628 SmallVector<SDValue, 8> Ops; 5629 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 5630 i += NumInputsPerOutput) { 5631 bool isLE = TLI.isLittleEndian(); 5632 APInt NewBits = APInt(DstBitSize, 0); 5633 bool EltIsUndef = true; 5634 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 5635 // Shift the previously computed bits over. 5636 NewBits <<= SrcBitSize; 5637 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 5638 if (Op.getOpcode() == ISD::UNDEF) continue; 5639 EltIsUndef = false; 5640 5641 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 5642 zextOrTrunc(SrcBitSize).zext(DstBitSize); 5643 } 5644 5645 if (EltIsUndef) 5646 Ops.push_back(DAG.getUNDEF(DstEltVT)); 5647 else 5648 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 5649 } 5650 5651 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 5652 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5653 &Ops[0], Ops.size()); 5654 } 5655 5656 // Finally, this must be the case where we are shrinking elements: each input 5657 // turns into multiple outputs. 5658 bool isS2V = ISD::isScalarToVector(BV); 5659 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 5660 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 5661 NumOutputsPerInput*BV->getNumOperands()); 5662 SmallVector<SDValue, 8> Ops; 5663 5664 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 5665 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 5666 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 5667 Ops.push_back(DAG.getUNDEF(DstEltVT)); 5668 continue; 5669 } 5670 5671 APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> 5672 getAPIntValue().zextOrTrunc(SrcBitSize); 5673 5674 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 5675 APInt ThisVal = OpVal.trunc(DstBitSize); 5676 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 5677 if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) 5678 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 5679 return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 5680 Ops[0]); 5681 OpVal = OpVal.lshr(DstBitSize); 5682 } 5683 5684 // For big endian targets, swap the order of the pieces of each element. 5685 if (TLI.isBigEndian()) 5686 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 5687 } 5688 5689 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5690 &Ops[0], Ops.size()); 5691} 5692 5693SDValue DAGCombiner::visitFADD(SDNode *N) { 5694 SDValue N0 = N->getOperand(0); 5695 SDValue N1 = N->getOperand(1); 5696 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5697 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5698 EVT VT = N->getValueType(0); 5699 5700 // fold vector ops 5701 if (VT.isVector()) { 5702 SDValue FoldedVOp = SimplifyVBinOp(N); 5703 if (FoldedVOp.getNode()) return FoldedVOp; 5704 } 5705 5706 // fold (fadd c1, c2) -> c1 + c2 5707 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5708 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); 5709 // canonicalize constant to RHS 5710 if (N0CFP && !N1CFP) 5711 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); 5712 // fold (fadd A, 0) -> A 5713 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 5714 N1CFP->getValueAPF().isZero()) 5715 return N0; 5716 // fold (fadd A, (fneg B)) -> (fsub A, B) 5717 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 5718 isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) 5719 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, 5720 GetNegatedExpression(N1, DAG, LegalOperations)); 5721 // fold (fadd (fneg A), B) -> (fsub B, A) 5722 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 5723 isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) 5724 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, 5725 GetNegatedExpression(N0, DAG, LegalOperations)); 5726 5727 // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 5728 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 5729 N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 5730 isa<ConstantFPSDNode>(N0.getOperand(1))) 5731 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), 5732 DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5733 N0.getOperand(1), N1)); 5734 5735 // In unsafe math mode, we can fold chains of FADD's of the same value 5736 // into multiplications. This transform is not safe in general because 5737 // we are reducing the number of rounding steps. 5738 if (DAG.getTarget().Options.UnsafeFPMath && 5739 TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && 5740 !N0CFP && !N1CFP) { 5741 if (N0.getOpcode() == ISD::FMUL) { 5742 ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 5743 ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 5744 5745 // (fadd (fmul c, x), x) -> (fmul c+1, x) 5746 if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { 5747 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5748 SDValue(CFP00, 0), 5749 DAG.getConstantFP(1.0, VT)); 5750 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5751 N1, NewCFP); 5752 } 5753 5754 // (fadd (fmul x, c), x) -> (fmul c+1, x) 5755 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 5756 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5757 SDValue(CFP01, 0), 5758 DAG.getConstantFP(1.0, VT)); 5759 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5760 N1, NewCFP); 5761 } 5762 5763 // (fadd (fadd x, x), x) -> (fmul 3.0, x) 5764 if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) && 5765 N0.getOperand(0) == N1) { 5766 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5767 N1, DAG.getConstantFP(3.0, VT)); 5768 } 5769 5770 // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) 5771 if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && 5772 N1.getOperand(0) == N1.getOperand(1) && 5773 N0.getOperand(1) == N1.getOperand(0)) { 5774 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5775 SDValue(CFP00, 0), 5776 DAG.getConstantFP(2.0, VT)); 5777 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5778 N0.getOperand(1), NewCFP); 5779 } 5780 5781 // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) 5782 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 5783 N1.getOperand(0) == N1.getOperand(1) && 5784 N0.getOperand(0) == N1.getOperand(0)) { 5785 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5786 SDValue(CFP01, 0), 5787 DAG.getConstantFP(2.0, VT)); 5788 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5789 N0.getOperand(0), NewCFP); 5790 } 5791 } 5792 5793 if (N1.getOpcode() == ISD::FMUL) { 5794 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 5795 ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); 5796 5797 // (fadd x, (fmul c, x)) -> (fmul c+1, x) 5798 if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { 5799 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5800 SDValue(CFP10, 0), 5801 DAG.getConstantFP(1.0, VT)); 5802 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5803 N0, NewCFP); 5804 } 5805 5806 // (fadd x, (fmul x, c)) -> (fmul c+1, x) 5807 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 5808 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5809 SDValue(CFP11, 0), 5810 DAG.getConstantFP(1.0, VT)); 5811 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5812 N0, NewCFP); 5813 } 5814 5815 // (fadd x, (fadd x, x)) -> (fmul 3.0, x) 5816 if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) && 5817 N1.getOperand(0) == N0) { 5818 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5819 N0, DAG.getConstantFP(3.0, VT)); 5820 } 5821 5822 // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) 5823 if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && 5824 N1.getOperand(0) == N1.getOperand(1) && 5825 N0.getOperand(1) == N1.getOperand(0)) { 5826 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5827 SDValue(CFP10, 0), 5828 DAG.getConstantFP(2.0, VT)); 5829 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5830 N0.getOperand(1), NewCFP); 5831 } 5832 5833 // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) 5834 if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && 5835 N1.getOperand(0) == N1.getOperand(1) && 5836 N0.getOperand(0) == N1.getOperand(0)) { 5837 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5838 SDValue(CFP11, 0), 5839 DAG.getConstantFP(2.0, VT)); 5840 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5841 N0.getOperand(0), NewCFP); 5842 } 5843 } 5844 5845 // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) 5846 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 5847 N0.getOperand(0) == N0.getOperand(1) && 5848 N1.getOperand(0) == N1.getOperand(1) && 5849 N0.getOperand(0) == N1.getOperand(0)) { 5850 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5851 N0.getOperand(0), 5852 DAG.getConstantFP(4.0, VT)); 5853 } 5854 } 5855 5856 // FADD -> FMA combines: 5857 if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || 5858 DAG.getTarget().Options.UnsafeFPMath) && 5859 DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && 5860 TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { 5861 5862 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 5863 if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { 5864 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, 5865 N0.getOperand(0), N0.getOperand(1), N1); 5866 } 5867 5868 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 5869 // Note: Commutes FADD operands. 5870 if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { 5871 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, 5872 N1.getOperand(0), N1.getOperand(1), N0); 5873 } 5874 } 5875 5876 return SDValue(); 5877} 5878 5879SDValue DAGCombiner::visitFSUB(SDNode *N) { 5880 SDValue N0 = N->getOperand(0); 5881 SDValue N1 = N->getOperand(1); 5882 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5883 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5884 EVT VT = N->getValueType(0); 5885 DebugLoc dl = N->getDebugLoc(); 5886 5887 // fold vector ops 5888 if (VT.isVector()) { 5889 SDValue FoldedVOp = SimplifyVBinOp(N); 5890 if (FoldedVOp.getNode()) return FoldedVOp; 5891 } 5892 5893 // fold (fsub c1, c2) -> c1-c2 5894 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5895 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); 5896 // fold (fsub A, 0) -> A 5897 if (DAG.getTarget().Options.UnsafeFPMath && 5898 N1CFP && N1CFP->getValueAPF().isZero()) 5899 return N0; 5900 // fold (fsub 0, B) -> -B 5901 if (DAG.getTarget().Options.UnsafeFPMath && 5902 N0CFP && N0CFP->getValueAPF().isZero()) { 5903 if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) 5904 return GetNegatedExpression(N1, DAG, LegalOperations); 5905 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 5906 return DAG.getNode(ISD::FNEG, dl, VT, N1); 5907 } 5908 // fold (fsub A, (fneg B)) -> (fadd A, B) 5909 if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) 5910 return DAG.getNode(ISD::FADD, dl, VT, N0, 5911 GetNegatedExpression(N1, DAG, LegalOperations)); 5912 5913 // If 'unsafe math' is enabled, fold 5914 // (fsub x, x) -> 0.0 & 5915 // (fsub x, (fadd x, y)) -> (fneg y) & 5916 // (fsub x, (fadd y, x)) -> (fneg y) 5917 if (DAG.getTarget().Options.UnsafeFPMath) { 5918 if (N0 == N1) 5919 return DAG.getConstantFP(0.0f, VT); 5920 5921 if (N1.getOpcode() == ISD::FADD) { 5922 SDValue N10 = N1->getOperand(0); 5923 SDValue N11 = N1->getOperand(1); 5924 5925 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, 5926 &DAG.getTarget().Options)) 5927 return GetNegatedExpression(N11, DAG, LegalOperations); 5928 else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, 5929 &DAG.getTarget().Options)) 5930 return GetNegatedExpression(N10, DAG, LegalOperations); 5931 } 5932 } 5933 5934 // FSUB -> FMA combines: 5935 if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || 5936 DAG.getTarget().Options.UnsafeFPMath) && 5937 DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && 5938 TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { 5939 5940 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 5941 if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { 5942 return DAG.getNode(ISD::FMA, dl, VT, 5943 N0.getOperand(0), N0.getOperand(1), 5944 DAG.getNode(ISD::FNEG, dl, VT, N1)); 5945 } 5946 5947 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 5948 // Note: Commutes FSUB operands. 5949 if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { 5950 return DAG.getNode(ISD::FMA, dl, VT, 5951 DAG.getNode(ISD::FNEG, dl, VT, 5952 N1.getOperand(0)), 5953 N1.getOperand(1), N0); 5954 } 5955 5956 // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 5957 if (N0.getOpcode() == ISD::FNEG && 5958 N0.getOperand(0).getOpcode() == ISD::FMUL && 5959 N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { 5960 SDValue N00 = N0.getOperand(0).getOperand(0); 5961 SDValue N01 = N0.getOperand(0).getOperand(1); 5962 return DAG.getNode(ISD::FMA, dl, VT, 5963 DAG.getNode(ISD::FNEG, dl, VT, N00), N01, 5964 DAG.getNode(ISD::FNEG, dl, VT, N1)); 5965 } 5966 } 5967 5968 return SDValue(); 5969} 5970 5971SDValue DAGCombiner::visitFMUL(SDNode *N) { 5972 SDValue N0 = N->getOperand(0); 5973 SDValue N1 = N->getOperand(1); 5974 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5975 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5976 EVT VT = N->getValueType(0); 5977 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5978 5979 // fold vector ops 5980 if (VT.isVector()) { 5981 SDValue FoldedVOp = SimplifyVBinOp(N); 5982 if (FoldedVOp.getNode()) return FoldedVOp; 5983 } 5984 5985 // fold (fmul c1, c2) -> c1*c2 5986 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5987 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); 5988 // canonicalize constant to RHS 5989 if (N0CFP && !N1CFP) 5990 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); 5991 // fold (fmul A, 0) -> 0 5992 if (DAG.getTarget().Options.UnsafeFPMath && 5993 N1CFP && N1CFP->getValueAPF().isZero()) 5994 return N1; 5995 // fold (fmul A, 0) -> 0, vector edition. 5996 if (DAG.getTarget().Options.UnsafeFPMath && 5997 ISD::isBuildVectorAllZeros(N1.getNode())) 5998 return N1; 5999 // fold (fmul A, 1.0) -> A 6000 if (N1CFP && N1CFP->isExactlyValue(1.0)) 6001 return N0; 6002 // fold (fmul X, 2.0) -> (fadd X, X) 6003 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 6004 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); 6005 // fold (fmul X, -1.0) -> (fneg X) 6006 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 6007 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 6008 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); 6009 6010 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 6011 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, 6012 &DAG.getTarget().Options)) { 6013 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 6014 &DAG.getTarget().Options)) { 6015 // Both can be negated for free, check to see if at least one is cheaper 6016 // negated. 6017 if (LHSNeg == 2 || RHSNeg == 2) 6018 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 6019 GetNegatedExpression(N0, DAG, LegalOperations), 6020 GetNegatedExpression(N1, DAG, LegalOperations)); 6021 } 6022 } 6023 6024 // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 6025 if (DAG.getTarget().Options.UnsafeFPMath && 6026 N1CFP && N0.getOpcode() == ISD::FMUL && 6027 N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) 6028 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), 6029 DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 6030 N0.getOperand(1), N1)); 6031 6032 return SDValue(); 6033} 6034 6035SDValue DAGCombiner::visitFMA(SDNode *N) { 6036 SDValue N0 = N->getOperand(0); 6037 SDValue N1 = N->getOperand(1); 6038 SDValue N2 = N->getOperand(2); 6039 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6040 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6041 EVT VT = N->getValueType(0); 6042 DebugLoc dl = N->getDebugLoc(); 6043 6044 if (N0CFP && N0CFP->isExactlyValue(1.0)) 6045 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); 6046 if (N1CFP && N1CFP->isExactlyValue(1.0)) 6047 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2); 6048 6049 // Canonicalize (fma c, x, y) -> (fma x, c, y) 6050 if (N0CFP && !N1CFP) 6051 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); 6052 6053 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 6054 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 6055 N2.getOpcode() == ISD::FMUL && 6056 N0 == N2.getOperand(0) && 6057 N2.getOperand(1).getOpcode() == ISD::ConstantFP) { 6058 return DAG.getNode(ISD::FMUL, dl, VT, N0, 6059 DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); 6060 } 6061 6062 6063 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 6064 if (DAG.getTarget().Options.UnsafeFPMath && 6065 N0.getOpcode() == ISD::FMUL && N1CFP && 6066 N0.getOperand(1).getOpcode() == ISD::ConstantFP) { 6067 return DAG.getNode(ISD::FMA, dl, VT, 6068 N0.getOperand(0), 6069 DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), 6070 N2); 6071 } 6072 6073 // (fma x, 1, y) -> (fadd x, y) 6074 // (fma x, -1, y) -> (fadd (fneg x), y) 6075 if (N1CFP) { 6076 if (N1CFP->isExactlyValue(1.0)) 6077 return DAG.getNode(ISD::FADD, dl, VT, N0, N2); 6078 6079 if (N1CFP->isExactlyValue(-1.0) && 6080 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 6081 SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); 6082 AddToWorkList(RHSNeg.getNode()); 6083 return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); 6084 } 6085 } 6086 6087 // (fma x, c, x) -> (fmul x, (c+1)) 6088 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { 6089 return DAG.getNode(ISD::FMUL, dl, VT, 6090 N0, 6091 DAG.getNode(ISD::FADD, dl, VT, 6092 N1, DAG.getConstantFP(1.0, VT))); 6093 } 6094 6095 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 6096 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 6097 N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { 6098 return DAG.getNode(ISD::FMUL, dl, VT, 6099 N0, 6100 DAG.getNode(ISD::FADD, dl, VT, 6101 N1, DAG.getConstantFP(-1.0, VT))); 6102 } 6103 6104 6105 return SDValue(); 6106} 6107 6108SDValue DAGCombiner::visitFDIV(SDNode *N) { 6109 SDValue N0 = N->getOperand(0); 6110 SDValue N1 = N->getOperand(1); 6111 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6112 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6113 EVT VT = N->getValueType(0); 6114 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6115 6116 // fold vector ops 6117 if (VT.isVector()) { 6118 SDValue FoldedVOp = SimplifyVBinOp(N); 6119 if (FoldedVOp.getNode()) return FoldedVOp; 6120 } 6121 6122 // fold (fdiv c1, c2) -> c1/c2 6123 if (N0CFP && N1CFP && VT != MVT::ppcf128) 6124 return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); 6125 6126 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 6127 if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { 6128 // Compute the reciprocal 1.0 / c2. 6129 APFloat N1APF = N1CFP->getValueAPF(); 6130 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 6131 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 6132 // Only do the transform if the reciprocal is a legal fp immediate that 6133 // isn't too nasty (eg NaN, denormal, ...). 6134 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 6135 (!LegalOperations || 6136 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 6137 // backend)... we should handle this gracefully after Legalize. 6138 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || 6139 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || 6140 TLI.isFPImmLegal(Recip, VT))) 6141 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, 6142 DAG.getConstantFP(Recip, VT)); 6143 } 6144 6145 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 6146 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, 6147 &DAG.getTarget().Options)) { 6148 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 6149 &DAG.getTarget().Options)) { 6150 // Both can be negated for free, check to see if at least one is cheaper 6151 // negated. 6152 if (LHSNeg == 2 || RHSNeg == 2) 6153 return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, 6154 GetNegatedExpression(N0, DAG, LegalOperations), 6155 GetNegatedExpression(N1, DAG, LegalOperations)); 6156 } 6157 } 6158 6159 return SDValue(); 6160} 6161 6162SDValue DAGCombiner::visitFREM(SDNode *N) { 6163 SDValue N0 = N->getOperand(0); 6164 SDValue N1 = N->getOperand(1); 6165 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6166 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6167 EVT VT = N->getValueType(0); 6168 6169 // fold (frem c1, c2) -> fmod(c1,c2) 6170 if (N0CFP && N1CFP && VT != MVT::ppcf128) 6171 return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); 6172 6173 return SDValue(); 6174} 6175 6176SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 6177 SDValue N0 = N->getOperand(0); 6178 SDValue N1 = N->getOperand(1); 6179 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6180 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6181 EVT VT = N->getValueType(0); 6182 6183 if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold 6184 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); 6185 6186 if (N1CFP) { 6187 const APFloat& V = N1CFP->getValueAPF(); 6188 // copysign(x, c1) -> fabs(x) iff ispos(c1) 6189 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 6190 if (!V.isNegative()) { 6191 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 6192 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 6193 } else { 6194 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 6195 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, 6196 DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); 6197 } 6198 } 6199 6200 // copysign(fabs(x), y) -> copysign(x, y) 6201 // copysign(fneg(x), y) -> copysign(x, y) 6202 // copysign(copysign(x,z), y) -> copysign(x, y) 6203 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 6204 N0.getOpcode() == ISD::FCOPYSIGN) 6205 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 6206 N0.getOperand(0), N1); 6207 6208 // copysign(x, abs(y)) -> abs(x) 6209 if (N1.getOpcode() == ISD::FABS) 6210 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 6211 6212 // copysign(x, copysign(y,z)) -> copysign(x, z) 6213 if (N1.getOpcode() == ISD::FCOPYSIGN) 6214 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 6215 N0, N1.getOperand(1)); 6216 6217 // copysign(x, fp_extend(y)) -> copysign(x, y) 6218 // copysign(x, fp_round(y)) -> copysign(x, y) 6219 if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) 6220 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 6221 N0, N1.getOperand(0)); 6222 6223 return SDValue(); 6224} 6225 6226SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 6227 SDValue N0 = N->getOperand(0); 6228 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 6229 EVT VT = N->getValueType(0); 6230 EVT OpVT = N0.getValueType(); 6231 6232 // fold (sint_to_fp c1) -> c1fp 6233 if (N0C && OpVT != MVT::ppcf128 && 6234 // ...but only if the target supports immediate floating-point values 6235 (!LegalOperations || 6236 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 6237 return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); 6238 6239 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 6240 // but UINT_TO_FP is legal on this target, try to convert. 6241 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 6242 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 6243 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 6244 if (DAG.SignBitIsZero(N0)) 6245 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); 6246 } 6247 6248 // The next optimizations are desireable only if SELECT_CC can be lowered. 6249 // Check against MVT::Other for SELECT_CC, which is a workaround for targets 6250 // having to say they don't support SELECT_CC on every type the DAG knows 6251 // about, since there is no way to mark an opcode illegal at all value types 6252 // (See also visitSELECT) 6253 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { 6254 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 6255 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 6256 !VT.isVector() && 6257 (!LegalOperations || 6258 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 6259 SDValue Ops[] = 6260 { N0.getOperand(0), N0.getOperand(1), 6261 DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), 6262 N0.getOperand(2) }; 6263 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); 6264 } 6265 6266 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 6267 // (select_cc x, y, 1.0, 0.0,, cc) 6268 if (N0.getOpcode() == ISD::ZERO_EXTEND && 6269 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 6270 (!LegalOperations || 6271 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 6272 SDValue Ops[] = 6273 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 6274 DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), 6275 N0.getOperand(0).getOperand(2) }; 6276 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); 6277 } 6278 } 6279 6280 return SDValue(); 6281} 6282 6283SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 6284 SDValue N0 = N->getOperand(0); 6285 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 6286 EVT VT = N->getValueType(0); 6287 EVT OpVT = N0.getValueType(); 6288 6289 // fold (uint_to_fp c1) -> c1fp 6290 if (N0C && OpVT != MVT::ppcf128 && 6291 // ...but only if the target supports immediate floating-point values 6292 (!LegalOperations || 6293 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 6294 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); 6295 6296 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 6297 // but SINT_TO_FP is legal on this target, try to convert. 6298 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 6299 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 6300 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 6301 if (DAG.SignBitIsZero(N0)) 6302 return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); 6303 } 6304 6305 // The next optimizations are desireable only if SELECT_CC can be lowered. 6306 // Check against MVT::Other for SELECT_CC, which is a workaround for targets 6307 // having to say they don't support SELECT_CC on every type the DAG knows 6308 // about, since there is no way to mark an opcode illegal at all value types 6309 // (See also visitSELECT) 6310 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { 6311 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 6312 6313 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 6314 (!LegalOperations || 6315 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 6316 SDValue Ops[] = 6317 { N0.getOperand(0), N0.getOperand(1), 6318 DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), 6319 N0.getOperand(2) }; 6320 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); 6321 } 6322 } 6323 6324 return SDValue(); 6325} 6326 6327SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 6328 SDValue N0 = N->getOperand(0); 6329 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6330 EVT VT = N->getValueType(0); 6331 6332 // fold (fp_to_sint c1fp) -> c1 6333 if (N0CFP) 6334 return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); 6335 6336 return SDValue(); 6337} 6338 6339SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 6340 SDValue N0 = N->getOperand(0); 6341 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6342 EVT VT = N->getValueType(0); 6343 6344 // fold (fp_to_uint c1fp) -> c1 6345 if (N0CFP && VT != MVT::ppcf128) 6346 return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); 6347 6348 return SDValue(); 6349} 6350 6351SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 6352 SDValue N0 = N->getOperand(0); 6353 SDValue N1 = N->getOperand(1); 6354 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6355 EVT VT = N->getValueType(0); 6356 6357 // fold (fp_round c1fp) -> c1fp 6358 if (N0CFP && N0.getValueType() != MVT::ppcf128) 6359 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); 6360 6361 // fold (fp_round (fp_extend x)) -> x 6362 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 6363 return N0.getOperand(0); 6364 6365 // fold (fp_round (fp_round x)) -> (fp_round x) 6366 if (N0.getOpcode() == ISD::FP_ROUND) { 6367 // This is a value preserving truncation if both round's are. 6368 bool IsTrunc = N->getConstantOperandVal(1) == 1 && 6369 N0.getNode()->getConstantOperandVal(1) == 1; 6370 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), 6371 DAG.getIntPtrConstant(IsTrunc)); 6372 } 6373 6374 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 6375 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 6376 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, 6377 N0.getOperand(0), N1); 6378 AddToWorkList(Tmp.getNode()); 6379 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 6380 Tmp, N0.getOperand(1)); 6381 } 6382 6383 return SDValue(); 6384} 6385 6386SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 6387 SDValue N0 = N->getOperand(0); 6388 EVT VT = N->getValueType(0); 6389 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6390 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6391 6392 // fold (fp_round_inreg c1fp) -> c1fp 6393 if (N0CFP && isTypeLegal(EVT)) { 6394 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); 6395 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); 6396 } 6397 6398 return SDValue(); 6399} 6400 6401SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 6402 SDValue N0 = N->getOperand(0); 6403 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6404 EVT VT = N->getValueType(0); 6405 6406 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 6407 if (N->hasOneUse() && 6408 N->use_begin()->getOpcode() == ISD::FP_ROUND) 6409 return SDValue(); 6410 6411 // fold (fp_extend c1fp) -> c1fp 6412 if (N0CFP && VT != MVT::ppcf128) 6413 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); 6414 6415 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 6416 // value of X. 6417 if (N0.getOpcode() == ISD::FP_ROUND 6418 && N0.getNode()->getConstantOperandVal(1) == 1) { 6419 SDValue In = N0.getOperand(0); 6420 if (In.getValueType() == VT) return In; 6421 if (VT.bitsLT(In.getValueType())) 6422 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, 6423 In, N0.getOperand(1)); 6424 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); 6425 } 6426 6427 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 6428 if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && 6429 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6430 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 6431 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6432 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, 6433 LN0->getChain(), 6434 LN0->getBasePtr(), LN0->getPointerInfo(), 6435 N0.getValueType(), 6436 LN0->isVolatile(), LN0->isNonTemporal(), 6437 LN0->getAlignment()); 6438 CombineTo(N, ExtLoad); 6439 CombineTo(N0.getNode(), 6440 DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), 6441 N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), 6442 ExtLoad.getValue(1)); 6443 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6444 } 6445 6446 return SDValue(); 6447} 6448 6449SDValue DAGCombiner::visitFNEG(SDNode *N) { 6450 SDValue N0 = N->getOperand(0); 6451 EVT VT = N->getValueType(0); 6452 6453 if (VT.isVector()) { 6454 SDValue FoldedVOp = SimplifyVUnaryOp(N); 6455 if (FoldedVOp.getNode()) return FoldedVOp; 6456 } 6457 6458 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 6459 &DAG.getTarget().Options)) 6460 return GetNegatedExpression(N0, DAG, LegalOperations); 6461 6462 // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading 6463 // constant pool values. 6464 if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && 6465 !VT.isVector() && 6466 N0.getNode()->hasOneUse() && 6467 N0.getOperand(0).getValueType().isInteger()) { 6468 SDValue Int = N0.getOperand(0); 6469 EVT IntVT = Int.getValueType(); 6470 if (IntVT.isInteger() && !IntVT.isVector()) { 6471 Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, 6472 DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 6473 AddToWorkList(Int.getNode()); 6474 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 6475 VT, Int); 6476 } 6477 } 6478 6479 // (fneg (fmul c, x)) -> (fmul -c, x) 6480 if (N0.getOpcode() == ISD::FMUL) { 6481 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 6482 if (CFP1) { 6483 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 6484 N0.getOperand(0), 6485 DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, 6486 N0.getOperand(1))); 6487 } 6488 } 6489 6490 return SDValue(); 6491} 6492 6493SDValue DAGCombiner::visitFCEIL(SDNode *N) { 6494 SDValue N0 = N->getOperand(0); 6495 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6496 EVT VT = N->getValueType(0); 6497 6498 // fold (fceil c1) -> fceil(c1) 6499 if (N0CFP && VT != MVT::ppcf128) 6500 return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); 6501 6502 return SDValue(); 6503} 6504 6505SDValue DAGCombiner::visitFTRUNC(SDNode *N) { 6506 SDValue N0 = N->getOperand(0); 6507 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6508 EVT VT = N->getValueType(0); 6509 6510 // fold (ftrunc c1) -> ftrunc(c1) 6511 if (N0CFP && VT != MVT::ppcf128) 6512 return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); 6513 6514 return SDValue(); 6515} 6516 6517SDValue DAGCombiner::visitFFLOOR(SDNode *N) { 6518 SDValue N0 = N->getOperand(0); 6519 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6520 EVT VT = N->getValueType(0); 6521 6522 // fold (ffloor c1) -> ffloor(c1) 6523 if (N0CFP && VT != MVT::ppcf128) 6524 return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); 6525 6526 return SDValue(); 6527} 6528 6529SDValue DAGCombiner::visitFABS(SDNode *N) { 6530 SDValue N0 = N->getOperand(0); 6531 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6532 EVT VT = N->getValueType(0); 6533 6534 if (VT.isVector()) { 6535 SDValue FoldedVOp = SimplifyVUnaryOp(N); 6536 if (FoldedVOp.getNode()) return FoldedVOp; 6537 } 6538 6539 // fold (fabs c1) -> fabs(c1) 6540 if (N0CFP && VT != MVT::ppcf128) 6541 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 6542 // fold (fabs (fabs x)) -> (fabs x) 6543 if (N0.getOpcode() == ISD::FABS) 6544 return N->getOperand(0); 6545 // fold (fabs (fneg x)) -> (fabs x) 6546 // fold (fabs (fcopysign x, y)) -> (fabs x) 6547 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 6548 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); 6549 6550 // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading 6551 // constant pool values. 6552 if (!TLI.isFAbsFree(VT) && 6553 N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && 6554 N0.getOperand(0).getValueType().isInteger() && 6555 !N0.getOperand(0).getValueType().isVector()) { 6556 SDValue Int = N0.getOperand(0); 6557 EVT IntVT = Int.getValueType(); 6558 if (IntVT.isInteger() && !IntVT.isVector()) { 6559 Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, 6560 DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 6561 AddToWorkList(Int.getNode()); 6562 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 6563 N->getValueType(0), Int); 6564 } 6565 } 6566 6567 return SDValue(); 6568} 6569 6570SDValue DAGCombiner::visitBRCOND(SDNode *N) { 6571 SDValue Chain = N->getOperand(0); 6572 SDValue N1 = N->getOperand(1); 6573 SDValue N2 = N->getOperand(2); 6574 6575 // If N is a constant we could fold this into a fallthrough or unconditional 6576 // branch. However that doesn't happen very often in normal code, because 6577 // Instcombine/SimplifyCFG should have handled the available opportunities. 6578 // If we did this folding here, it would be necessary to update the 6579 // MachineBasicBlock CFG, which is awkward. 6580 6581 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 6582 // on the target. 6583 if (N1.getOpcode() == ISD::SETCC && 6584 TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { 6585 return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, 6586 Chain, N1.getOperand(2), 6587 N1.getOperand(0), N1.getOperand(1), N2); 6588 } 6589 6590 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || 6591 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 6592 (N1.getOperand(0).hasOneUse() && 6593 N1.getOperand(0).getOpcode() == ISD::SRL))) { 6594 SDNode *Trunc = 0; 6595 if (N1.getOpcode() == ISD::TRUNCATE) { 6596 // Look pass the truncate. 6597 Trunc = N1.getNode(); 6598 N1 = N1.getOperand(0); 6599 } 6600 6601 // Match this pattern so that we can generate simpler code: 6602 // 6603 // %a = ... 6604 // %b = and i32 %a, 2 6605 // %c = srl i32 %b, 1 6606 // brcond i32 %c ... 6607 // 6608 // into 6609 // 6610 // %a = ... 6611 // %b = and i32 %a, 2 6612 // %c = setcc eq %b, 0 6613 // brcond %c ... 6614 // 6615 // This applies only when the AND constant value has one bit set and the 6616 // SRL constant is equal to the log2 of the AND constant. The back-end is 6617 // smart enough to convert the result into a TEST/JMP sequence. 6618 SDValue Op0 = N1.getOperand(0); 6619 SDValue Op1 = N1.getOperand(1); 6620 6621 if (Op0.getOpcode() == ISD::AND && 6622 Op1.getOpcode() == ISD::Constant) { 6623 SDValue AndOp1 = Op0.getOperand(1); 6624 6625 if (AndOp1.getOpcode() == ISD::Constant) { 6626 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 6627 6628 if (AndConst.isPowerOf2() && 6629 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 6630 SDValue SetCC = 6631 DAG.getSetCC(N->getDebugLoc(), 6632 TLI.getSetCCResultType(Op0.getValueType()), 6633 Op0, DAG.getConstant(0, Op0.getValueType()), 6634 ISD::SETNE); 6635 6636 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 6637 MVT::Other, Chain, SetCC, N2); 6638 // Don't add the new BRCond into the worklist or else SimplifySelectCC 6639 // will convert it back to (X & C1) >> C2. 6640 CombineTo(N, NewBRCond, false); 6641 // Truncate is dead. 6642 if (Trunc) { 6643 removeFromWorkList(Trunc); 6644 DAG.DeleteNode(Trunc); 6645 } 6646 // Replace the uses of SRL with SETCC 6647 WorkListRemover DeadNodes(*this); 6648 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 6649 removeFromWorkList(N1.getNode()); 6650 DAG.DeleteNode(N1.getNode()); 6651 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6652 } 6653 } 6654 } 6655 6656 if (Trunc) 6657 // Restore N1 if the above transformation doesn't match. 6658 N1 = N->getOperand(1); 6659 } 6660 6661 // Transform br(xor(x, y)) -> br(x != y) 6662 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 6663 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 6664 SDNode *TheXor = N1.getNode(); 6665 SDValue Op0 = TheXor->getOperand(0); 6666 SDValue Op1 = TheXor->getOperand(1); 6667 if (Op0.getOpcode() == Op1.getOpcode()) { 6668 // Avoid missing important xor optimizations. 6669 SDValue Tmp = visitXOR(TheXor); 6670 if (Tmp.getNode() && Tmp.getNode() != TheXor) { 6671 DEBUG(dbgs() << "\nReplacing.8 "; 6672 TheXor->dump(&DAG); 6673 dbgs() << "\nWith: "; 6674 Tmp.getNode()->dump(&DAG); 6675 dbgs() << '\n'); 6676 WorkListRemover DeadNodes(*this); 6677 DAG.ReplaceAllUsesOfValueWith(N1, Tmp); 6678 removeFromWorkList(TheXor); 6679 DAG.DeleteNode(TheXor); 6680 return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 6681 MVT::Other, Chain, Tmp, N2); 6682 } 6683 } 6684 6685 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 6686 bool Equal = false; 6687 if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) 6688 if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && 6689 Op0.getOpcode() == ISD::XOR) { 6690 TheXor = Op0.getNode(); 6691 Equal = true; 6692 } 6693 6694 EVT SetCCVT = N1.getValueType(); 6695 if (LegalTypes) 6696 SetCCVT = TLI.getSetCCResultType(SetCCVT); 6697 SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), 6698 SetCCVT, 6699 Op0, Op1, 6700 Equal ? ISD::SETEQ : ISD::SETNE); 6701 // Replace the uses of XOR with SETCC 6702 WorkListRemover DeadNodes(*this); 6703 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 6704 removeFromWorkList(N1.getNode()); 6705 DAG.DeleteNode(N1.getNode()); 6706 return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 6707 MVT::Other, Chain, SetCC, N2); 6708 } 6709 } 6710 6711 return SDValue(); 6712} 6713 6714// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 6715// 6716SDValue DAGCombiner::visitBR_CC(SDNode *N) { 6717 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 6718 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 6719 6720 // If N is a constant we could fold this into a fallthrough or unconditional 6721 // branch. However that doesn't happen very often in normal code, because 6722 // Instcombine/SimplifyCFG should have handled the available opportunities. 6723 // If we did this folding here, it would be necessary to update the 6724 // MachineBasicBlock CFG, which is awkward. 6725 6726 // Use SimplifySetCC to simplify SETCC's. 6727 SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), 6728 CondLHS, CondRHS, CC->get(), N->getDebugLoc(), 6729 false); 6730 if (Simp.getNode()) AddToWorkList(Simp.getNode()); 6731 6732 // fold to a simpler setcc 6733 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 6734 return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, 6735 N->getOperand(0), Simp.getOperand(2), 6736 Simp.getOperand(0), Simp.getOperand(1), 6737 N->getOperand(4)); 6738 6739 return SDValue(); 6740} 6741 6742/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that 6743/// uses N as its base pointer and that N may be folded in the load / store 6744/// addressing mode. 6745static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 6746 SelectionDAG &DAG, 6747 const TargetLowering &TLI) { 6748 EVT VT; 6749 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 6750 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 6751 return false; 6752 VT = Use->getValueType(0); 6753 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 6754 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 6755 return false; 6756 VT = ST->getValue().getValueType(); 6757 } else 6758 return false; 6759 6760 AddrMode AM; 6761 if (N->getOpcode() == ISD::ADD) { 6762 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 6763 if (Offset) 6764 // [reg +/- imm] 6765 AM.BaseOffs = Offset->getSExtValue(); 6766 else 6767 // [reg +/- reg] 6768 AM.Scale = 1; 6769 } else if (N->getOpcode() == ISD::SUB) { 6770 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 6771 if (Offset) 6772 // [reg +/- imm] 6773 AM.BaseOffs = -Offset->getSExtValue(); 6774 else 6775 // [reg +/- reg] 6776 AM.Scale = 1; 6777 } else 6778 return false; 6779 6780 return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); 6781} 6782 6783/// CombineToPreIndexedLoadStore - Try turning a load / store into a 6784/// pre-indexed load / store when the base pointer is an add or subtract 6785/// and it has other uses besides the load / store. After the 6786/// transformation, the new indexed load / store has effectively folded 6787/// the add / subtract in and all of its other uses are redirected to the 6788/// new load / store. 6789bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 6790 if (Level < AfterLegalizeDAG) 6791 return false; 6792 6793 bool isLoad = true; 6794 SDValue Ptr; 6795 EVT VT; 6796 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 6797 if (LD->isIndexed()) 6798 return false; 6799 VT = LD->getMemoryVT(); 6800 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 6801 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 6802 return false; 6803 Ptr = LD->getBasePtr(); 6804 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 6805 if (ST->isIndexed()) 6806 return false; 6807 VT = ST->getMemoryVT(); 6808 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 6809 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 6810 return false; 6811 Ptr = ST->getBasePtr(); 6812 isLoad = false; 6813 } else { 6814 return false; 6815 } 6816 6817 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 6818 // out. There is no reason to make this a preinc/predec. 6819 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 6820 Ptr.getNode()->hasOneUse()) 6821 return false; 6822 6823 // Ask the target to do addressing mode selection. 6824 SDValue BasePtr; 6825 SDValue Offset; 6826 ISD::MemIndexedMode AM = ISD::UNINDEXED; 6827 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 6828 return false; 6829 // Don't create a indexed load / store with zero offset. 6830 if (isa<ConstantSDNode>(Offset) && 6831 cast<ConstantSDNode>(Offset)->isNullValue()) 6832 return false; 6833 6834 // Try turning it into a pre-indexed load / store except when: 6835 // 1) The new base ptr is a frame index. 6836 // 2) If N is a store and the new base ptr is either the same as or is a 6837 // predecessor of the value being stored. 6838 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 6839 // that would create a cycle. 6840 // 4) All uses are load / store ops that use it as old base ptr. 6841 6842 // Check #1. Preinc'ing a frame index would require copying the stack pointer 6843 // (plus the implicit offset) to a register to preinc anyway. 6844 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 6845 return false; 6846 6847 // Check #2. 6848 if (!isLoad) { 6849 SDValue Val = cast<StoreSDNode>(N)->getValue(); 6850 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 6851 return false; 6852 } 6853 6854 // Now check for #3 and #4. 6855 bool RealUse = false; 6856 6857 // Caches for hasPredecessorHelper 6858 SmallPtrSet<const SDNode *, 32> Visited; 6859 SmallVector<const SDNode *, 16> Worklist; 6860 6861 for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), 6862 E = Ptr.getNode()->use_end(); I != E; ++I) { 6863 SDNode *Use = *I; 6864 if (Use == N) 6865 continue; 6866 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 6867 return false; 6868 6869 // If Ptr may be folded in addressing mode of other use, then it's 6870 // not profitable to do this transformation. 6871 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 6872 RealUse = true; 6873 } 6874 6875 if (!RealUse) 6876 return false; 6877 6878 SDValue Result; 6879 if (isLoad) 6880 Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), 6881 BasePtr, Offset, AM); 6882 else 6883 Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), 6884 BasePtr, Offset, AM); 6885 ++PreIndexedNodes; 6886 ++NodesCombined; 6887 DEBUG(dbgs() << "\nReplacing.4 "; 6888 N->dump(&DAG); 6889 dbgs() << "\nWith: "; 6890 Result.getNode()->dump(&DAG); 6891 dbgs() << '\n'); 6892 WorkListRemover DeadNodes(*this); 6893 if (isLoad) { 6894 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 6895 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 6896 } else { 6897 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 6898 } 6899 6900 // Finally, since the node is now dead, remove it from the graph. 6901 DAG.DeleteNode(N); 6902 6903 // Replace the uses of Ptr with uses of the updated base value. 6904 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 6905 removeFromWorkList(Ptr.getNode()); 6906 DAG.DeleteNode(Ptr.getNode()); 6907 6908 return true; 6909} 6910 6911/// CombineToPostIndexedLoadStore - Try to combine a load / store with a 6912/// add / sub of the base pointer node into a post-indexed load / store. 6913/// The transformation folded the add / subtract into the new indexed 6914/// load / store effectively and all of its uses are redirected to the 6915/// new load / store. 6916bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 6917 if (Level < AfterLegalizeDAG) 6918 return false; 6919 6920 bool isLoad = true; 6921 SDValue Ptr; 6922 EVT VT; 6923 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 6924 if (LD->isIndexed()) 6925 return false; 6926 VT = LD->getMemoryVT(); 6927 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 6928 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 6929 return false; 6930 Ptr = LD->getBasePtr(); 6931 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 6932 if (ST->isIndexed()) 6933 return false; 6934 VT = ST->getMemoryVT(); 6935 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 6936 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 6937 return false; 6938 Ptr = ST->getBasePtr(); 6939 isLoad = false; 6940 } else { 6941 return false; 6942 } 6943 6944 if (Ptr.getNode()->hasOneUse()) 6945 return false; 6946 6947 for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), 6948 E = Ptr.getNode()->use_end(); I != E; ++I) { 6949 SDNode *Op = *I; 6950 if (Op == N || 6951 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 6952 continue; 6953 6954 SDValue BasePtr; 6955 SDValue Offset; 6956 ISD::MemIndexedMode AM = ISD::UNINDEXED; 6957 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 6958 // Don't create a indexed load / store with zero offset. 6959 if (isa<ConstantSDNode>(Offset) && 6960 cast<ConstantSDNode>(Offset)->isNullValue()) 6961 continue; 6962 6963 // Try turning it into a post-indexed load / store except when 6964 // 1) All uses are load / store ops that use it as base ptr (and 6965 // it may be folded as addressing mmode). 6966 // 2) Op must be independent of N, i.e. Op is neither a predecessor 6967 // nor a successor of N. Otherwise, if Op is folded that would 6968 // create a cycle. 6969 6970 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 6971 continue; 6972 6973 // Check for #1. 6974 bool TryNext = false; 6975 for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), 6976 EE = BasePtr.getNode()->use_end(); II != EE; ++II) { 6977 SDNode *Use = *II; 6978 if (Use == Ptr.getNode()) 6979 continue; 6980 6981 // If all the uses are load / store addresses, then don't do the 6982 // transformation. 6983 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 6984 bool RealUse = false; 6985 for (SDNode::use_iterator III = Use->use_begin(), 6986 EEE = Use->use_end(); III != EEE; ++III) { 6987 SDNode *UseUse = *III; 6988 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 6989 RealUse = true; 6990 } 6991 6992 if (!RealUse) { 6993 TryNext = true; 6994 break; 6995 } 6996 } 6997 } 6998 6999 if (TryNext) 7000 continue; 7001 7002 // Check for #2 7003 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 7004 SDValue Result = isLoad 7005 ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), 7006 BasePtr, Offset, AM) 7007 : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), 7008 BasePtr, Offset, AM); 7009 ++PostIndexedNodes; 7010 ++NodesCombined; 7011 DEBUG(dbgs() << "\nReplacing.5 "; 7012 N->dump(&DAG); 7013 dbgs() << "\nWith: "; 7014 Result.getNode()->dump(&DAG); 7015 dbgs() << '\n'); 7016 WorkListRemover DeadNodes(*this); 7017 if (isLoad) { 7018 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 7019 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 7020 } else { 7021 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 7022 } 7023 7024 // Finally, since the node is now dead, remove it from the graph. 7025 DAG.DeleteNode(N); 7026 7027 // Replace the uses of Use with uses of the updated base value. 7028 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 7029 Result.getValue(isLoad ? 1 : 0)); 7030 removeFromWorkList(Op); 7031 DAG.DeleteNode(Op); 7032 return true; 7033 } 7034 } 7035 } 7036 7037 return false; 7038} 7039 7040SDValue DAGCombiner::visitLOAD(SDNode *N) { 7041 LoadSDNode *LD = cast<LoadSDNode>(N); 7042 SDValue Chain = LD->getChain(); 7043 SDValue Ptr = LD->getBasePtr(); 7044 7045 // If load is not volatile and there are no uses of the loaded value (and 7046 // the updated indexed value in case of indexed loads), change uses of the 7047 // chain value into uses of the chain input (i.e. delete the dead load). 7048 if (!LD->isVolatile()) { 7049 if (N->getValueType(1) == MVT::Other) { 7050 // Unindexed loads. 7051 if (!N->hasAnyUseOfValue(0)) { 7052 // It's not safe to use the two value CombineTo variant here. e.g. 7053 // v1, chain2 = load chain1, loc 7054 // v2, chain3 = load chain2, loc 7055 // v3 = add v2, c 7056 // Now we replace use of chain2 with chain1. This makes the second load 7057 // isomorphic to the one we are deleting, and thus makes this load live. 7058 DEBUG(dbgs() << "\nReplacing.6 "; 7059 N->dump(&DAG); 7060 dbgs() << "\nWith chain: "; 7061 Chain.getNode()->dump(&DAG); 7062 dbgs() << "\n"); 7063 WorkListRemover DeadNodes(*this); 7064 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 7065 7066 if (N->use_empty()) { 7067 removeFromWorkList(N); 7068 DAG.DeleteNode(N); 7069 } 7070 7071 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7072 } 7073 } else { 7074 // Indexed loads. 7075 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 7076 if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { 7077 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 7078 DEBUG(dbgs() << "\nReplacing.7 "; 7079 N->dump(&DAG); 7080 dbgs() << "\nWith: "; 7081 Undef.getNode()->dump(&DAG); 7082 dbgs() << " and 2 other values\n"); 7083 WorkListRemover DeadNodes(*this); 7084 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 7085 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), 7086 DAG.getUNDEF(N->getValueType(1))); 7087 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 7088 removeFromWorkList(N); 7089 DAG.DeleteNode(N); 7090 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7091 } 7092 } 7093 } 7094 7095 // If this load is directly stored, replace the load value with the stored 7096 // value. 7097 // TODO: Handle store large -> read small portion. 7098 // TODO: Handle TRUNCSTORE/LOADEXT 7099 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 7100 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 7101 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 7102 if (PrevST->getBasePtr() == Ptr && 7103 PrevST->getValue().getValueType() == N->getValueType(0)) 7104 return CombineTo(N, Chain.getOperand(1), Chain); 7105 } 7106 } 7107 7108 // Try to infer better alignment information than the load already has. 7109 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 7110 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 7111 if (Align > LD->getAlignment()) 7112 return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), 7113 LD->getValueType(0), 7114 Chain, Ptr, LD->getPointerInfo(), 7115 LD->getMemoryVT(), 7116 LD->isVolatile(), LD->isNonTemporal(), Align); 7117 } 7118 } 7119 7120 if (CombinerAA) { 7121 // Walk up chain skipping non-aliasing memory nodes. 7122 SDValue BetterChain = FindBetterChain(N, Chain); 7123 7124 // If there is a better chain. 7125 if (Chain != BetterChain) { 7126 SDValue ReplLoad; 7127 7128 // Replace the chain to void dependency. 7129 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 7130 ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), 7131 BetterChain, Ptr, LD->getPointerInfo(), 7132 LD->isVolatile(), LD->isNonTemporal(), 7133 LD->isInvariant(), LD->getAlignment()); 7134 } else { 7135 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), 7136 LD->getValueType(0), 7137 BetterChain, Ptr, LD->getPointerInfo(), 7138 LD->getMemoryVT(), 7139 LD->isVolatile(), 7140 LD->isNonTemporal(), 7141 LD->getAlignment()); 7142 } 7143 7144 // Create token factor to keep old chain connected. 7145 SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 7146 MVT::Other, Chain, ReplLoad.getValue(1)); 7147 7148 // Make sure the new and old chains are cleaned up. 7149 AddToWorkList(Token.getNode()); 7150 7151 // Replace uses with load result and token factor. Don't add users 7152 // to work list. 7153 return CombineTo(N, ReplLoad.getValue(0), Token, false); 7154 } 7155 } 7156 7157 // Try transforming N to an indexed load. 7158 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 7159 return SDValue(N, 0); 7160 7161 return SDValue(); 7162} 7163 7164/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the 7165/// load is having specific bytes cleared out. If so, return the byte size 7166/// being masked out and the shift amount. 7167static std::pair<unsigned, unsigned> 7168CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 7169 std::pair<unsigned, unsigned> Result(0, 0); 7170 7171 // Check for the structure we're looking for. 7172 if (V->getOpcode() != ISD::AND || 7173 !isa<ConstantSDNode>(V->getOperand(1)) || 7174 !ISD::isNormalLoad(V->getOperand(0).getNode())) 7175 return Result; 7176 7177 // Check the chain and pointer. 7178 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 7179 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 7180 7181 // The store should be chained directly to the load or be an operand of a 7182 // tokenfactor. 7183 if (LD == Chain.getNode()) 7184 ; // ok. 7185 else if (Chain->getOpcode() != ISD::TokenFactor) 7186 return Result; // Fail. 7187 else { 7188 bool isOk = false; 7189 for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) 7190 if (Chain->getOperand(i).getNode() == LD) { 7191 isOk = true; 7192 break; 7193 } 7194 if (!isOk) return Result; 7195 } 7196 7197 // This only handles simple types. 7198 if (V.getValueType() != MVT::i16 && 7199 V.getValueType() != MVT::i32 && 7200 V.getValueType() != MVT::i64) 7201 return Result; 7202 7203 // Check the constant mask. Invert it so that the bits being masked out are 7204 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 7205 // follow the sign bit for uniformity. 7206 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 7207 unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); 7208 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 7209 unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); 7210 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 7211 if (NotMaskLZ == 64) return Result; // All zero mask. 7212 7213 // See if we have a continuous run of bits. If so, we have 0*1+0* 7214 if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) 7215 return Result; 7216 7217 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 7218 if (V.getValueType() != MVT::i64 && NotMaskLZ) 7219 NotMaskLZ -= 64-V.getValueSizeInBits(); 7220 7221 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 7222 switch (MaskedBytes) { 7223 case 1: 7224 case 2: 7225 case 4: break; 7226 default: return Result; // All one mask, or 5-byte mask. 7227 } 7228 7229 // Verify that the first bit starts at a multiple of mask so that the access 7230 // is aligned the same as the access width. 7231 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 7232 7233 Result.first = MaskedBytes; 7234 Result.second = NotMaskTZ/8; 7235 return Result; 7236} 7237 7238 7239/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that 7240/// provides a value as specified by MaskInfo. If so, replace the specified 7241/// store with a narrower store of truncated IVal. 7242static SDNode * 7243ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 7244 SDValue IVal, StoreSDNode *St, 7245 DAGCombiner *DC) { 7246 unsigned NumBytes = MaskInfo.first; 7247 unsigned ByteShift = MaskInfo.second; 7248 SelectionDAG &DAG = DC->getDAG(); 7249 7250 // Check to see if IVal is all zeros in the part being masked in by the 'or' 7251 // that uses this. If not, this is not a replacement. 7252 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 7253 ByteShift*8, (ByteShift+NumBytes)*8); 7254 if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; 7255 7256 // Check that it is legal on the target to do this. It is legal if the new 7257 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 7258 // legalization. 7259 MVT VT = MVT::getIntegerVT(NumBytes*8); 7260 if (!DC->isTypeLegal(VT)) 7261 return 0; 7262 7263 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 7264 // shifted by ByteShift and truncated down to NumBytes. 7265 if (ByteShift) 7266 IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, 7267 DAG.getConstant(ByteShift*8, 7268 DC->getShiftAmountTy(IVal.getValueType()))); 7269 7270 // Figure out the offset for the store and the alignment of the access. 7271 unsigned StOffset; 7272 unsigned NewAlign = St->getAlignment(); 7273 7274 if (DAG.getTargetLoweringInfo().isLittleEndian()) 7275 StOffset = ByteShift; 7276 else 7277 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 7278 7279 SDValue Ptr = St->getBasePtr(); 7280 if (StOffset) { 7281 Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), 7282 Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); 7283 NewAlign = MinAlign(NewAlign, StOffset); 7284 } 7285 7286 // Truncate down to the new size. 7287 IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); 7288 7289 ++OpsNarrowed; 7290 return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 7291 St->getPointerInfo().getWithOffset(StOffset), 7292 false, false, NewAlign).getNode(); 7293} 7294 7295 7296/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is 7297/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some 7298/// of the loaded bits, try narrowing the load and store if it would end up 7299/// being a win for performance or code size. 7300SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 7301 StoreSDNode *ST = cast<StoreSDNode>(N); 7302 if (ST->isVolatile()) 7303 return SDValue(); 7304 7305 SDValue Chain = ST->getChain(); 7306 SDValue Value = ST->getValue(); 7307 SDValue Ptr = ST->getBasePtr(); 7308 EVT VT = Value.getValueType(); 7309 7310 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 7311 return SDValue(); 7312 7313 unsigned Opc = Value.getOpcode(); 7314 7315 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 7316 // is a byte mask indicating a consecutive number of bytes, check to see if 7317 // Y is known to provide just those bytes. If so, we try to replace the 7318 // load + replace + store sequence with a single (narrower) store, which makes 7319 // the load dead. 7320 if (Opc == ISD::OR) { 7321 std::pair<unsigned, unsigned> MaskedLoad; 7322 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 7323 if (MaskedLoad.first) 7324 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 7325 Value.getOperand(1), ST,this)) 7326 return SDValue(NewST, 0); 7327 7328 // Or is commutative, so try swapping X and Y. 7329 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 7330 if (MaskedLoad.first) 7331 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 7332 Value.getOperand(0), ST,this)) 7333 return SDValue(NewST, 0); 7334 } 7335 7336 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 7337 Value.getOperand(1).getOpcode() != ISD::Constant) 7338 return SDValue(); 7339 7340 SDValue N0 = Value.getOperand(0); 7341 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 7342 Chain == SDValue(N0.getNode(), 1)) { 7343 LoadSDNode *LD = cast<LoadSDNode>(N0); 7344 if (LD->getBasePtr() != Ptr || 7345 LD->getPointerInfo().getAddrSpace() != 7346 ST->getPointerInfo().getAddrSpace()) 7347 return SDValue(); 7348 7349 // Find the type to narrow it the load / op / store to. 7350 SDValue N1 = Value.getOperand(1); 7351 unsigned BitWidth = N1.getValueSizeInBits(); 7352 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 7353 if (Opc == ISD::AND) 7354 Imm ^= APInt::getAllOnesValue(BitWidth); 7355 if (Imm == 0 || Imm.isAllOnesValue()) 7356 return SDValue(); 7357 unsigned ShAmt = Imm.countTrailingZeros(); 7358 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 7359 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 7360 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 7361 while (NewBW < BitWidth && 7362 !(TLI.isOperationLegalOrCustom(Opc, NewVT) && 7363 TLI.isNarrowingProfitable(VT, NewVT))) { 7364 NewBW = NextPowerOf2(NewBW); 7365 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 7366 } 7367 if (NewBW >= BitWidth) 7368 return SDValue(); 7369 7370 // If the lsb changed does not start at the type bitwidth boundary, 7371 // start at the previous one. 7372 if (ShAmt % NewBW) 7373 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 7374 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); 7375 if ((Imm & Mask) == Imm) { 7376 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 7377 if (Opc == ISD::AND) 7378 NewImm ^= APInt::getAllOnesValue(NewBW); 7379 uint64_t PtrOff = ShAmt / 8; 7380 // For big endian targets, we need to adjust the offset to the pointer to 7381 // load the correct bytes. 7382 if (TLI.isBigEndian()) 7383 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 7384 7385 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 7386 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 7387 if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) 7388 return SDValue(); 7389 7390 SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), 7391 Ptr.getValueType(), Ptr, 7392 DAG.getConstant(PtrOff, Ptr.getValueType())); 7393 SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), 7394 LD->getChain(), NewPtr, 7395 LD->getPointerInfo().getWithOffset(PtrOff), 7396 LD->isVolatile(), LD->isNonTemporal(), 7397 LD->isInvariant(), NewAlign); 7398 SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, 7399 DAG.getConstant(NewImm, NewVT)); 7400 SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), 7401 NewVal, NewPtr, 7402 ST->getPointerInfo().getWithOffset(PtrOff), 7403 false, false, NewAlign); 7404 7405 AddToWorkList(NewPtr.getNode()); 7406 AddToWorkList(NewLD.getNode()); 7407 AddToWorkList(NewVal.getNode()); 7408 WorkListRemover DeadNodes(*this); 7409 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 7410 ++OpsNarrowed; 7411 return NewST; 7412 } 7413 } 7414 7415 return SDValue(); 7416} 7417 7418/// TransformFPLoadStorePair - For a given floating point load / store pair, 7419/// if the load value isn't used by any other operations, then consider 7420/// transforming the pair to integer load / store operations if the target 7421/// deems the transformation profitable. 7422SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 7423 StoreSDNode *ST = cast<StoreSDNode>(N); 7424 SDValue Chain = ST->getChain(); 7425 SDValue Value = ST->getValue(); 7426 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 7427 Value.hasOneUse() && 7428 Chain == SDValue(Value.getNode(), 1)) { 7429 LoadSDNode *LD = cast<LoadSDNode>(Value); 7430 EVT VT = LD->getMemoryVT(); 7431 if (!VT.isFloatingPoint() || 7432 VT != ST->getMemoryVT() || 7433 LD->isNonTemporal() || 7434 ST->isNonTemporal() || 7435 LD->getPointerInfo().getAddrSpace() != 0 || 7436 ST->getPointerInfo().getAddrSpace() != 0) 7437 return SDValue(); 7438 7439 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 7440 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 7441 !TLI.isOperationLegal(ISD::STORE, IntVT) || 7442 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 7443 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 7444 return SDValue(); 7445 7446 unsigned LDAlign = LD->getAlignment(); 7447 unsigned STAlign = ST->getAlignment(); 7448 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 7449 unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); 7450 if (LDAlign < ABIAlign || STAlign < ABIAlign) 7451 return SDValue(); 7452 7453 SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), 7454 LD->getChain(), LD->getBasePtr(), 7455 LD->getPointerInfo(), 7456 false, false, false, LDAlign); 7457 7458 SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), 7459 NewLD, ST->getBasePtr(), 7460 ST->getPointerInfo(), 7461 false, false, STAlign); 7462 7463 AddToWorkList(NewLD.getNode()); 7464 AddToWorkList(NewST.getNode()); 7465 WorkListRemover DeadNodes(*this); 7466 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 7467 ++LdStFP2Int; 7468 return NewST; 7469 } 7470 7471 return SDValue(); 7472} 7473 7474/// Returns the base pointer and an integer offset from that object. 7475static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) { 7476 if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) { 7477 int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); 7478 SDValue Base = Ptr->getOperand(0); 7479 return std::make_pair(Base, Offset); 7480 } 7481 7482 return std::make_pair(Ptr, 0); 7483} 7484 7485/// Holds a pointer to an LSBaseSDNode as well as information on where it 7486/// is located in a sequence of memory operations connected by a chain. 7487struct MemOpLink { 7488 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 7489 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 7490 // Ptr to the mem node. 7491 LSBaseSDNode *MemNode; 7492 // Offset from the base ptr. 7493 int64_t OffsetFromBase; 7494 // What is the sequence number of this mem node. 7495 // Lowest mem operand in the DAG starts at zero. 7496 unsigned SequenceNum; 7497}; 7498 7499/// Sorts store nodes in a link according to their offset from a shared 7500// base ptr. 7501struct ConsecutiveMemoryChainSorter { 7502 bool operator()(MemOpLink LHS, MemOpLink RHS) { 7503 return LHS.OffsetFromBase < RHS.OffsetFromBase; 7504 } 7505}; 7506 7507bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { 7508 EVT MemVT = St->getMemoryVT(); 7509 int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; 7510 7511 // Don't merge vectors into wider inputs. 7512 if (MemVT.isVector() || !MemVT.isSimple()) 7513 return false; 7514 7515 // Perform an early exit check. Do not bother looking at stored values that 7516 // are not constants or loads. 7517 SDValue StoredVal = St->getValue(); 7518 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 7519 if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && 7520 !IsLoadSrc) 7521 return false; 7522 7523 // Only look at ends of store sequences. 7524 SDValue Chain = SDValue(St, 1); 7525 if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) 7526 return false; 7527 7528 // This holds the base pointer and the offset in bytes from the base pointer. 7529 std::pair<SDValue, int64_t> BasePtr = 7530 GetPointerBaseAndOffset(St->getBasePtr()); 7531 7532 // We must have a base and an offset. 7533 if (!BasePtr.first.getNode()) 7534 return false; 7535 7536 // Do not handle stores to undef base pointers. 7537 if (BasePtr.first.getOpcode() == ISD::UNDEF) 7538 return false; 7539 7540 SmallVector<MemOpLink, 8> StoreNodes; 7541 // Walk up the chain and look for nodes with offsets from the same 7542 // base pointer. Stop when reaching an instruction with a different kind 7543 // or instruction which has a different base pointer. 7544 unsigned Seq = 0; 7545 StoreSDNode *Index = St; 7546 while (Index) { 7547 // If the chain has more than one use, then we can't reorder the mem ops. 7548 if (Index != St && !SDValue(Index, 1)->hasOneUse()) 7549 break; 7550 7551 // Find the base pointer and offset for this memory node. 7552 std::pair<SDValue, int64_t> Ptr = 7553 GetPointerBaseAndOffset(Index->getBasePtr()); 7554 7555 // Check that the base pointer is the same as the original one. 7556 if (Ptr.first.getNode() != BasePtr.first.getNode()) 7557 break; 7558 7559 // Check that the alignment is the same. 7560 if (Index->getAlignment() != St->getAlignment()) 7561 break; 7562 7563 // The memory operands must not be volatile. 7564 if (Index->isVolatile() || Index->isIndexed()) 7565 break; 7566 7567 // No truncation. 7568 if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) 7569 if (St->isTruncatingStore()) 7570 break; 7571 7572 // The stored memory type must be the same. 7573 if (Index->getMemoryVT() != MemVT) 7574 break; 7575 7576 // We do not allow unaligned stores because we want to prevent overriding 7577 // stores. 7578 if (Index->getAlignment()*8 != MemVT.getSizeInBits()) 7579 break; 7580 7581 // We found a potential memory operand to merge. 7582 StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++)); 7583 7584 // Move up the chain to the next memory operation. 7585 Index = dyn_cast<StoreSDNode>(Index->getChain().getNode()); 7586 } 7587 7588 // Check if there is anything to merge. 7589 if (StoreNodes.size() < 2) 7590 return false; 7591 7592 // Sort the memory operands according to their distance from the base pointer. 7593 std::sort(StoreNodes.begin(), StoreNodes.end(), 7594 ConsecutiveMemoryChainSorter()); 7595 7596 // Scan the memory operations on the chain and find the first non-consecutive 7597 // store memory address. 7598 unsigned LastConsecutiveStore = 0; 7599 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 7600 for (unsigned i=1; i<StoreNodes.size(); ++i) { 7601 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 7602 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 7603 break; 7604 7605 // Mark this node as useful. 7606 LastConsecutiveStore = i; 7607 } 7608 7609 // The node with the lowest store address. 7610 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 7611 7612 // Store the constants into memory as one consecutive store. 7613 if (!IsLoadSrc) { 7614 unsigned LastLegalType = 0; 7615 unsigned LastLegalVectorType = 0; 7616 bool NonZero = false; 7617 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 7618 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 7619 SDValue StoredVal = St->getValue(); 7620 7621 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { 7622 NonZero |= !C->isNullValue(); 7623 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { 7624 NonZero |= !C->getConstantFPValue()->isNullValue(); 7625 } else { 7626 // Non constant. 7627 break; 7628 } 7629 7630 // Find a legal type for the constant store. 7631 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 7632 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 7633 if (TLI.isTypeLegal(StoreTy)) 7634 LastLegalType = i+1; 7635 7636 // Find a legal type for the vector store. 7637 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 7638 if (TLI.isTypeLegal(Ty)) 7639 LastLegalVectorType = i + 1; 7640 } 7641 7642 // We only use vectors if the constant is known to be zero. 7643 if (NonZero) 7644 LastLegalVectorType = 0; 7645 7646 // Check if we found a legal integer type to store. 7647 if (LastLegalType == 0 && LastLegalVectorType == 0) 7648 return false; 7649 7650 bool UseVector = LastLegalVectorType > LastLegalType; 7651 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; 7652 7653 // Make sure we have something to merge. 7654 if (NumElem < 2) 7655 return false; 7656 7657 unsigned EarliestNodeUsed = 0; 7658 for (unsigned i=0; i < NumElem; ++i) { 7659 // Find a chain for the new wide-store operand. Notice that some 7660 // of the store nodes that we found may not be selected for inclusion 7661 // in the wide store. The chain we use needs to be the chain of the 7662 // earliest store node which is *used* and replaced by the wide store. 7663 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 7664 EarliestNodeUsed = i; 7665 } 7666 7667 // The earliest Node in the DAG. 7668 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 7669 DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); 7670 7671 SDValue StoredVal; 7672 if (UseVector) { 7673 // Find a legal type for the vector store. 7674 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 7675 assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); 7676 StoredVal = DAG.getConstant(0, Ty); 7677 } else { 7678 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 7679 APInt StoreInt(StoreBW, 0); 7680 7681 // Construct a single integer constant which is made of the smaller 7682 // constant inputs. 7683 bool IsLE = TLI.isLittleEndian(); 7684 for (unsigned i = 0; i < NumElem ; ++i) { 7685 unsigned Idx = IsLE ?(NumElem - 1 - i) : i; 7686 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 7687 SDValue Val = St->getValue(); 7688 StoreInt<<=ElementSizeBytes*8; 7689 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { 7690 StoreInt|=C->getAPIntValue().zext(StoreBW); 7691 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { 7692 StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); 7693 } else { 7694 assert(false && "Invalid constant element type"); 7695 } 7696 } 7697 7698 // Create the new Load and Store operations. 7699 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 7700 StoredVal = DAG.getConstant(StoreInt, StoreTy); 7701 } 7702 7703 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, 7704 FirstInChain->getBasePtr(), 7705 FirstInChain->getPointerInfo(), 7706 false, false, 7707 FirstInChain->getAlignment()); 7708 7709 // Replace the first store with the new store 7710 CombineTo(EarliestOp, NewStore); 7711 // Erase all other stores. 7712 for (unsigned i = 0; i < NumElem ; ++i) { 7713 if (StoreNodes[i].MemNode == EarliestOp) 7714 continue; 7715 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 7716 DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); 7717 removeFromWorkList(St); 7718 DAG.DeleteNode(St); 7719 } 7720 7721 return true; 7722 } 7723 7724 // Below we handle the case of multiple consecutive stores that 7725 // come from multiple consecutive loads. We merge them into a single 7726 // wide load and a single wide store. 7727 7728 // Look for load nodes which are used by the stored values. 7729 SmallVector<MemOpLink, 8> LoadNodes; 7730 7731 // Find acceptable loads. Loads need to have the same chain (token factor), 7732 // must not be zext, volatile, indexed, and they must be consecutive. 7733 SDValue LdBasePtr; 7734 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 7735 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 7736 LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); 7737 if (!Ld) break; 7738 7739 // Loads must only have one use. 7740 if (!Ld->hasNUsesOfValue(1, 0)) 7741 break; 7742 7743 // Check that the alignment is the same as the stores. 7744 if (Ld->getAlignment() != St->getAlignment()) 7745 break; 7746 7747 // The memory operands must not be volatile. 7748 if (Ld->isVolatile() || Ld->isIndexed()) 7749 break; 7750 7751 // We do not accept ext loads. 7752 if (Ld->getExtensionType() != ISD::NON_EXTLOAD) 7753 break; 7754 7755 // The stored memory type must be the same. 7756 if (Ld->getMemoryVT() != MemVT) 7757 break; 7758 7759 std::pair<SDValue, int64_t> LdPtr = 7760 GetPointerBaseAndOffset(Ld->getBasePtr()); 7761 7762 // If this is not the first ptr that we check. 7763 if (LdBasePtr.getNode()) { 7764 // The base ptr must be the same. 7765 if (LdPtr.first != LdBasePtr) 7766 break; 7767 } else { 7768 // Check that all other base pointers are the same as this one. 7769 LdBasePtr = LdPtr.first; 7770 } 7771 7772 // We found a potential memory operand to merge. 7773 LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0)); 7774 } 7775 7776 if (LoadNodes.size() < 2) 7777 return false; 7778 7779 // Scan the memory operations on the chain and find the first non-consecutive 7780 // load memory address. These variables hold the index in the store node 7781 // array. 7782 unsigned LastConsecutiveLoad = 0; 7783 // This variable refers to the size and not index in the array. 7784 unsigned LastLegalVectorType = 0; 7785 unsigned LastLegalIntegerType = 0; 7786 StartAddress = LoadNodes[0].OffsetFromBase; 7787 SDValue FirstChain = LoadNodes[0].MemNode->getChain(); 7788 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 7789 // All loads much share the same chain. 7790 if (LoadNodes[i].MemNode->getChain() != FirstChain) 7791 break; 7792 7793 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 7794 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 7795 break; 7796 LastConsecutiveLoad = i; 7797 7798 // Find a legal type for the vector store. 7799 EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 7800 if (TLI.isTypeLegal(StoreTy)) 7801 LastLegalVectorType = i + 1; 7802 7803 // Find a legal type for the integer store. 7804 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 7805 StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 7806 if (TLI.isTypeLegal(StoreTy)) 7807 LastLegalIntegerType = i + 1; 7808 } 7809 7810 // Only use vector types if the vector type is larger than the integer type. 7811 // If they are the same, use integers. 7812 bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType; 7813 unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); 7814 7815 // We add +1 here because the LastXXX variables refer to location while 7816 // the NumElem refers to array/index size. 7817 unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; 7818 NumElem = std::min(LastLegalType, NumElem); 7819 7820 if (NumElem < 2) 7821 return false; 7822 7823 // The earliest Node in the DAG. 7824 unsigned EarliestNodeUsed = 0; 7825 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 7826 for (unsigned i=1; i<NumElem; ++i) { 7827 // Find a chain for the new wide-store operand. Notice that some 7828 // of the store nodes that we found may not be selected for inclusion 7829 // in the wide store. The chain we use needs to be the chain of the 7830 // earliest store node which is *used* and replaced by the wide store. 7831 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 7832 EarliestNodeUsed = i; 7833 } 7834 7835 // Find if it is better to use vectors or integers to load and store 7836 // to memory. 7837 EVT JointMemOpVT; 7838 if (UseVectorTy) { 7839 JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 7840 } else { 7841 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 7842 JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 7843 } 7844 7845 DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc(); 7846 DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc(); 7847 7848 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 7849 SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, 7850 FirstLoad->getChain(), 7851 FirstLoad->getBasePtr(), 7852 FirstLoad->getPointerInfo(), 7853 false, false, false, 7854 FirstLoad->getAlignment()); 7855 7856 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, 7857 FirstInChain->getBasePtr(), 7858 FirstInChain->getPointerInfo(), false, false, 7859 FirstInChain->getAlignment()); 7860 7861 // Replace one of the loads with the new load. 7862 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); 7863 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 7864 SDValue(NewLoad.getNode(), 1)); 7865 7866 // Remove the rest of the load chains. 7867 for (unsigned i = 1; i < NumElem ; ++i) { 7868 // Replace all chain users of the old load nodes with the chain of the new 7869 // load node. 7870 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 7871 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); 7872 } 7873 7874 // Replace the first store with the new store. 7875 CombineTo(EarliestOp, NewStore); 7876 // Erase all other stores. 7877 for (unsigned i = 0; i < NumElem ; ++i) { 7878 // Remove all Store nodes. 7879 if (StoreNodes[i].MemNode == EarliestOp) 7880 continue; 7881 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 7882 DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); 7883 removeFromWorkList(St); 7884 DAG.DeleteNode(St); 7885 } 7886 7887 return true; 7888} 7889 7890SDValue DAGCombiner::visitSTORE(SDNode *N) { 7891 StoreSDNode *ST = cast<StoreSDNode>(N); 7892 SDValue Chain = ST->getChain(); 7893 SDValue Value = ST->getValue(); 7894 SDValue Ptr = ST->getBasePtr(); 7895 7896 // If this is a store of a bit convert, store the input value if the 7897 // resultant store does not need a higher alignment than the original. 7898 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 7899 ST->isUnindexed()) { 7900 unsigned OrigAlign = ST->getAlignment(); 7901 EVT SVT = Value.getOperand(0).getValueType(); 7902 unsigned Align = TLI.getDataLayout()-> 7903 getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); 7904 if (Align <= OrigAlign && 7905 ((!LegalOperations && !ST->isVolatile()) || 7906 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 7907 return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), 7908 Ptr, ST->getPointerInfo(), ST->isVolatile(), 7909 ST->isNonTemporal(), OrigAlign); 7910 } 7911 7912 // Turn 'store undef, Ptr' -> nothing. 7913 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 7914 return Chain; 7915 7916 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 7917 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { 7918 // NOTE: If the original store is volatile, this transform must not increase 7919 // the number of stores. For example, on x86-32 an f64 can be stored in one 7920 // processor operation but an i64 (which is not legal) requires two. So the 7921 // transform should not be done in this case. 7922 if (Value.getOpcode() != ISD::TargetConstantFP) { 7923 SDValue Tmp; 7924 switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { 7925 default: llvm_unreachable("Unknown FP type"); 7926 case MVT::f16: // We don't do this for these yet. 7927 case MVT::f80: 7928 case MVT::f128: 7929 case MVT::ppcf128: 7930 break; 7931 case MVT::f32: 7932 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 7933 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 7934 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 7935 bitcastToAPInt().getZExtValue(), MVT::i32); 7936 return DAG.getStore(Chain, N->getDebugLoc(), Tmp, 7937 Ptr, ST->getPointerInfo(), ST->isVolatile(), 7938 ST->isNonTemporal(), ST->getAlignment()); 7939 } 7940 break; 7941 case MVT::f64: 7942 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 7943 !ST->isVolatile()) || 7944 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 7945 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 7946 getZExtValue(), MVT::i64); 7947 return DAG.getStore(Chain, N->getDebugLoc(), Tmp, 7948 Ptr, ST->getPointerInfo(), ST->isVolatile(), 7949 ST->isNonTemporal(), ST->getAlignment()); 7950 } 7951 7952 if (!ST->isVolatile() && 7953 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 7954 // Many FP stores are not made apparent until after legalize, e.g. for 7955 // argument passing. Since this is so common, custom legalize the 7956 // 64-bit integer store into two 32-bit stores. 7957 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 7958 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); 7959 SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); 7960 if (TLI.isBigEndian()) std::swap(Lo, Hi); 7961 7962 unsigned Alignment = ST->getAlignment(); 7963 bool isVolatile = ST->isVolatile(); 7964 bool isNonTemporal = ST->isNonTemporal(); 7965 7966 SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, 7967 Ptr, ST->getPointerInfo(), 7968 isVolatile, isNonTemporal, 7969 ST->getAlignment()); 7970 Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, 7971 DAG.getConstant(4, Ptr.getValueType())); 7972 Alignment = MinAlign(Alignment, 4U); 7973 SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, 7974 Ptr, ST->getPointerInfo().getWithOffset(4), 7975 isVolatile, isNonTemporal, 7976 Alignment); 7977 return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 7978 St0, St1); 7979 } 7980 7981 break; 7982 } 7983 } 7984 } 7985 7986 // Try to infer better alignment information than the store already has. 7987 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 7988 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 7989 if (Align > ST->getAlignment()) 7990 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, 7991 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 7992 ST->isVolatile(), ST->isNonTemporal(), Align); 7993 } 7994 } 7995 7996 // Try transforming a pair floating point load / store ops to integer 7997 // load / store ops. 7998 SDValue NewST = TransformFPLoadStorePair(N); 7999 if (NewST.getNode()) 8000 return NewST; 8001 8002 if (CombinerAA) { 8003 // Walk up chain skipping non-aliasing memory nodes. 8004 SDValue BetterChain = FindBetterChain(N, Chain); 8005 8006 // If there is a better chain. 8007 if (Chain != BetterChain) { 8008 SDValue ReplStore; 8009 8010 // Replace the chain to avoid dependency. 8011 if (ST->isTruncatingStore()) { 8012 ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, 8013 ST->getPointerInfo(), 8014 ST->getMemoryVT(), ST->isVolatile(), 8015 ST->isNonTemporal(), ST->getAlignment()); 8016 } else { 8017 ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, 8018 ST->getPointerInfo(), 8019 ST->isVolatile(), ST->isNonTemporal(), 8020 ST->getAlignment()); 8021 } 8022 8023 // Create token to keep both nodes around. 8024 SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 8025 MVT::Other, Chain, ReplStore); 8026 8027 // Make sure the new and old chains are cleaned up. 8028 AddToWorkList(Token.getNode()); 8029 8030 // Don't add users to work list. 8031 return CombineTo(N, Token, false); 8032 } 8033 } 8034 8035 // Try transforming N to an indexed store. 8036 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 8037 return SDValue(N, 0); 8038 8039 // FIXME: is there such a thing as a truncating indexed store? 8040 if (ST->isTruncatingStore() && ST->isUnindexed() && 8041 Value.getValueType().isInteger()) { 8042 // See if we can simplify the input to this truncstore with knowledge that 8043 // only the low bits are being used. For example: 8044 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 8045 SDValue Shorter = 8046 GetDemandedBits(Value, 8047 APInt::getLowBitsSet( 8048 Value.getValueType().getScalarType().getSizeInBits(), 8049 ST->getMemoryVT().getScalarType().getSizeInBits())); 8050 AddToWorkList(Value.getNode()); 8051 if (Shorter.getNode()) 8052 return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, 8053 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 8054 ST->isVolatile(), ST->isNonTemporal(), 8055 ST->getAlignment()); 8056 8057 // Otherwise, see if we can simplify the operation with 8058 // SimplifyDemandedBits, which only works if the value has a single use. 8059 if (SimplifyDemandedBits(Value, 8060 APInt::getLowBitsSet( 8061 Value.getValueType().getScalarType().getSizeInBits(), 8062 ST->getMemoryVT().getScalarType().getSizeInBits()))) 8063 return SDValue(N, 0); 8064 } 8065 8066 // If this is a load followed by a store to the same location, then the store 8067 // is dead/noop. 8068 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 8069 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 8070 ST->isUnindexed() && !ST->isVolatile() && 8071 // There can't be any side effects between the load and store, such as 8072 // a call or store. 8073 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 8074 // The store is dead, remove it. 8075 return Chain; 8076 } 8077 } 8078 8079 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 8080 // truncating store. We can do this even if this is already a truncstore. 8081 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 8082 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 8083 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 8084 ST->getMemoryVT())) { 8085 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), 8086 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 8087 ST->isVolatile(), ST->isNonTemporal(), 8088 ST->getAlignment()); 8089 } 8090 8091 // Only perform this optimization before the types are legal, because we 8092 // don't want to perform this optimization on every DAGCombine invocation. 8093 if (!LegalTypes && MergeConsecutiveStores(ST)) 8094 return SDValue(N, 0); 8095 8096 return ReduceLoadOpStoreWidth(N); 8097} 8098 8099SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 8100 SDValue InVec = N->getOperand(0); 8101 SDValue InVal = N->getOperand(1); 8102 SDValue EltNo = N->getOperand(2); 8103 DebugLoc dl = N->getDebugLoc(); 8104 8105 // If the inserted element is an UNDEF, just use the input vector. 8106 if (InVal.getOpcode() == ISD::UNDEF) 8107 return InVec; 8108 8109 EVT VT = InVec.getValueType(); 8110 8111 // If we can't generate a legal BUILD_VECTOR, exit 8112 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 8113 return SDValue(); 8114 8115 // Check that we know which element is being inserted 8116 if (!isa<ConstantSDNode>(EltNo)) 8117 return SDValue(); 8118 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 8119 8120 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 8121 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 8122 // vector elements. 8123 SmallVector<SDValue, 8> Ops; 8124 if (InVec.getOpcode() == ISD::BUILD_VECTOR) { 8125 Ops.append(InVec.getNode()->op_begin(), 8126 InVec.getNode()->op_end()); 8127 } else if (InVec.getOpcode() == ISD::UNDEF) { 8128 unsigned NElts = VT.getVectorNumElements(); 8129 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 8130 } else { 8131 return SDValue(); 8132 } 8133 8134 // Insert the element 8135 if (Elt < Ops.size()) { 8136 // All the operands of BUILD_VECTOR must have the same type; 8137 // we enforce that here. 8138 EVT OpVT = Ops[0].getValueType(); 8139 if (InVal.getValueType() != OpVT) 8140 InVal = OpVT.bitsGT(InVal.getValueType()) ? 8141 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 8142 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 8143 Ops[Elt] = InVal; 8144 } 8145 8146 // Return the new vector 8147 return DAG.getNode(ISD::BUILD_VECTOR, dl, 8148 VT, &Ops[0], Ops.size()); 8149} 8150 8151SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 8152 // (vextract (scalar_to_vector val, 0) -> val 8153 SDValue InVec = N->getOperand(0); 8154 EVT VT = InVec.getValueType(); 8155 EVT NVT = N->getValueType(0); 8156 8157 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 8158 // Check if the result type doesn't match the inserted element type. A 8159 // SCALAR_TO_VECTOR may truncate the inserted element and the 8160 // EXTRACT_VECTOR_ELT may widen the extracted vector. 8161 SDValue InOp = InVec.getOperand(0); 8162 if (InOp.getValueType() != NVT) { 8163 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 8164 return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); 8165 } 8166 return InOp; 8167 } 8168 8169 SDValue EltNo = N->getOperand(1); 8170 bool ConstEltNo = isa<ConstantSDNode>(EltNo); 8171 8172 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 8173 // We only perform this optimization before the op legalization phase because 8174 // we may introduce new vector instructions which are not backed by TD 8175 // patterns. For example on AVX, extracting elements from a wide vector 8176 // without using extract_subvector. 8177 if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE 8178 && ConstEltNo && !LegalOperations) { 8179 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 8180 int NumElem = VT.getVectorNumElements(); 8181 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); 8182 // Find the new index to extract from. 8183 int OrigElt = SVOp->getMaskElt(Elt); 8184 8185 // Extracting an undef index is undef. 8186 if (OrigElt == -1) 8187 return DAG.getUNDEF(NVT); 8188 8189 // Select the right vector half to extract from. 8190 if (OrigElt < NumElem) { 8191 InVec = InVec->getOperand(0); 8192 } else { 8193 InVec = InVec->getOperand(1); 8194 OrigElt -= NumElem; 8195 } 8196 8197 EVT IndexTy = N->getOperand(1).getValueType(); 8198 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, 8199 InVec, DAG.getConstant(OrigElt, IndexTy)); 8200 } 8201 8202 // Perform only after legalization to ensure build_vector / vector_shuffle 8203 // optimizations have already been done. 8204 if (!LegalOperations) return SDValue(); 8205 8206 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 8207 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 8208 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 8209 8210 if (ConstEltNo) { 8211 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 8212 bool NewLoad = false; 8213 bool BCNumEltsChanged = false; 8214 EVT ExtVT = VT.getVectorElementType(); 8215 EVT LVT = ExtVT; 8216 8217 // If the result of load has to be truncated, then it's not necessarily 8218 // profitable. 8219 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 8220 return SDValue(); 8221 8222 if (InVec.getOpcode() == ISD::BITCAST) { 8223 // Don't duplicate a load with other uses. 8224 if (!InVec.hasOneUse()) 8225 return SDValue(); 8226 8227 EVT BCVT = InVec.getOperand(0).getValueType(); 8228 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 8229 return SDValue(); 8230 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 8231 BCNumEltsChanged = true; 8232 InVec = InVec.getOperand(0); 8233 ExtVT = BCVT.getVectorElementType(); 8234 NewLoad = true; 8235 } 8236 8237 LoadSDNode *LN0 = NULL; 8238 const ShuffleVectorSDNode *SVN = NULL; 8239 if (ISD::isNormalLoad(InVec.getNode())) { 8240 LN0 = cast<LoadSDNode>(InVec); 8241 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 8242 InVec.getOperand(0).getValueType() == ExtVT && 8243 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 8244 // Don't duplicate a load with other uses. 8245 if (!InVec.hasOneUse()) 8246 return SDValue(); 8247 8248 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 8249 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 8250 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 8251 // => 8252 // (load $addr+1*size) 8253 8254 // Don't duplicate a load with other uses. 8255 if (!InVec.hasOneUse()) 8256 return SDValue(); 8257 8258 // If the bit convert changed the number of elements, it is unsafe 8259 // to examine the mask. 8260 if (BCNumEltsChanged) 8261 return SDValue(); 8262 8263 // Select the input vector, guarding against out of range extract vector. 8264 unsigned NumElems = VT.getVectorNumElements(); 8265 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 8266 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 8267 8268 if (InVec.getOpcode() == ISD::BITCAST) { 8269 // Don't duplicate a load with other uses. 8270 if (!InVec.hasOneUse()) 8271 return SDValue(); 8272 8273 InVec = InVec.getOperand(0); 8274 } 8275 if (ISD::isNormalLoad(InVec.getNode())) { 8276 LN0 = cast<LoadSDNode>(InVec); 8277 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 8278 } 8279 } 8280 8281 // Make sure we found a non-volatile load and the extractelement is 8282 // the only use. 8283 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 8284 return SDValue(); 8285 8286 // If Idx was -1 above, Elt is going to be -1, so just return undef. 8287 if (Elt == -1) 8288 return DAG.getUNDEF(LVT); 8289 8290 unsigned Align = LN0->getAlignment(); 8291 if (NewLoad) { 8292 // Check the resultant load doesn't need a higher alignment than the 8293 // original load. 8294 unsigned NewAlign = 8295 TLI.getDataLayout() 8296 ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); 8297 8298 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) 8299 return SDValue(); 8300 8301 Align = NewAlign; 8302 } 8303 8304 SDValue NewPtr = LN0->getBasePtr(); 8305 unsigned PtrOff = 0; 8306 8307 if (Elt) { 8308 PtrOff = LVT.getSizeInBits() * Elt / 8; 8309 EVT PtrType = NewPtr.getValueType(); 8310 if (TLI.isBigEndian()) 8311 PtrOff = VT.getSizeInBits() / 8 - PtrOff; 8312 NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, 8313 DAG.getConstant(PtrOff, PtrType)); 8314 } 8315 8316 // The replacement we need to do here is a little tricky: we need to 8317 // replace an extractelement of a load with a load. 8318 // Use ReplaceAllUsesOfValuesWith to do the replacement. 8319 // Note that this replacement assumes that the extractvalue is the only 8320 // use of the load; that's okay because we don't want to perform this 8321 // transformation in other cases anyway. 8322 SDValue Load; 8323 SDValue Chain; 8324 if (NVT.bitsGT(LVT)) { 8325 // If the result type of vextract is wider than the load, then issue an 8326 // extending load instead. 8327 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) 8328 ? ISD::ZEXTLOAD : ISD::EXTLOAD; 8329 Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), 8330 NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), 8331 LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); 8332 Chain = Load.getValue(1); 8333 } else { 8334 Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, 8335 LN0->getPointerInfo().getWithOffset(PtrOff), 8336 LN0->isVolatile(), LN0->isNonTemporal(), 8337 LN0->isInvariant(), Align); 8338 Chain = Load.getValue(1); 8339 if (NVT.bitsLT(LVT)) 8340 Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); 8341 else 8342 Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); 8343 } 8344 WorkListRemover DeadNodes(*this); 8345 SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; 8346 SDValue To[] = { Load, Chain }; 8347 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 8348 // Since we're explcitly calling ReplaceAllUses, add the new node to the 8349 // worklist explicitly as well. 8350 AddToWorkList(Load.getNode()); 8351 AddUsersToWorkList(Load.getNode()); // Add users too 8352 // Make sure to revisit this node to clean it up; it will usually be dead. 8353 AddToWorkList(N); 8354 return SDValue(N, 0); 8355 } 8356 8357 return SDValue(); 8358} 8359 8360// Simplify (build_vec (ext )) to (bitcast (build_vec )) 8361SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { 8362 // We perform this optimization post type-legalization because 8363 // the type-legalizer often scalarizes integer-promoted vectors. 8364 // Performing this optimization before may create bit-casts which 8365 // will be type-legalized to complex code sequences. 8366 // We perform this optimization only before the operation legalizer because we 8367 // may introduce illegal operations. 8368 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 8369 return SDValue(); 8370 8371 unsigned NumInScalars = N->getNumOperands(); 8372 DebugLoc dl = N->getDebugLoc(); 8373 EVT VT = N->getValueType(0); 8374 8375 // Check to see if this is a BUILD_VECTOR of a bunch of values 8376 // which come from any_extend or zero_extend nodes. If so, we can create 8377 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 8378 // optimizations. We do not handle sign-extend because we can't fill the sign 8379 // using shuffles. 8380 EVT SourceType = MVT::Other; 8381 bool AllAnyExt = true; 8382 8383 for (unsigned i = 0; i != NumInScalars; ++i) { 8384 SDValue In = N->getOperand(i); 8385 // Ignore undef inputs. 8386 if (In.getOpcode() == ISD::UNDEF) continue; 8387 8388 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 8389 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 8390 8391 // Abort if the element is not an extension. 8392 if (!ZeroExt && !AnyExt) { 8393 SourceType = MVT::Other; 8394 break; 8395 } 8396 8397 // The input is a ZeroExt or AnyExt. Check the original type. 8398 EVT InTy = In.getOperand(0).getValueType(); 8399 8400 // Check that all of the widened source types are the same. 8401 if (SourceType == MVT::Other) 8402 // First time. 8403 SourceType = InTy; 8404 else if (InTy != SourceType) { 8405 // Multiple income types. Abort. 8406 SourceType = MVT::Other; 8407 break; 8408 } 8409 8410 // Check if all of the extends are ANY_EXTENDs. 8411 AllAnyExt &= AnyExt; 8412 } 8413 8414 // In order to have valid types, all of the inputs must be extended from the 8415 // same source type and all of the inputs must be any or zero extend. 8416 // Scalar sizes must be a power of two. 8417 EVT OutScalarTy = VT.getScalarType(); 8418 bool ValidTypes = SourceType != MVT::Other && 8419 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 8420 isPowerOf2_32(SourceType.getSizeInBits()); 8421 8422 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 8423 // turn into a single shuffle instruction. 8424 if (!ValidTypes) 8425 return SDValue(); 8426 8427 bool isLE = TLI.isLittleEndian(); 8428 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 8429 assert(ElemRatio > 1 && "Invalid element size ratio"); 8430 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 8431 DAG.getConstant(0, SourceType); 8432 8433 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 8434 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 8435 8436 // Populate the new build_vector 8437 for (unsigned i=0; i < N->getNumOperands(); ++i) { 8438 SDValue Cast = N->getOperand(i); 8439 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 8440 Cast.getOpcode() == ISD::ZERO_EXTEND || 8441 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 8442 SDValue In; 8443 if (Cast.getOpcode() == ISD::UNDEF) 8444 In = DAG.getUNDEF(SourceType); 8445 else 8446 In = Cast->getOperand(0); 8447 unsigned Index = isLE ? (i * ElemRatio) : 8448 (i * ElemRatio + (ElemRatio - 1)); 8449 8450 assert(Index < Ops.size() && "Invalid index"); 8451 Ops[Index] = In; 8452 } 8453 8454 // The type of the new BUILD_VECTOR node. 8455 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 8456 assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 8457 "Invalid vector size"); 8458 // Check if the new vector type is legal. 8459 if (!isTypeLegal(VecVT)) return SDValue(); 8460 8461 // Make the new BUILD_VECTOR. 8462 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size()); 8463 8464 // The new BUILD_VECTOR node has the potential to be further optimized. 8465 AddToWorkList(BV.getNode()); 8466 // Bitcast to the desired type. 8467 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 8468} 8469 8470SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 8471 unsigned NumInScalars = N->getNumOperands(); 8472 DebugLoc dl = N->getDebugLoc(); 8473 EVT VT = N->getValueType(0); 8474 8475 // A vector built entirely of undefs is undef. 8476 if (ISD::allOperandsUndef(N)) 8477 return DAG.getUNDEF(VT); 8478 8479 SDValue V = reduceBuildVecExtToExtBuildVec(N); 8480 if (V.getNode()) 8481 return V; 8482 8483 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 8484 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 8485 // at most two distinct vectors, turn this into a shuffle node. 8486 8487 // May only combine to shuffle after legalize if shuffle is legal. 8488 if (LegalOperations && 8489 !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) 8490 return SDValue(); 8491 8492 SDValue VecIn1, VecIn2; 8493 for (unsigned i = 0; i != NumInScalars; ++i) { 8494 // Ignore undef inputs. 8495 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 8496 8497 // If this input is something other than a EXTRACT_VECTOR_ELT with a 8498 // constant index, bail out. 8499 if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || 8500 !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { 8501 VecIn1 = VecIn2 = SDValue(0, 0); 8502 break; 8503 } 8504 8505 // We allow up to two distinct input vectors. 8506 SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); 8507 if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) 8508 continue; 8509 8510 if (VecIn1.getNode() == 0) { 8511 VecIn1 = ExtractedFromVec; 8512 } else if (VecIn2.getNode() == 0) { 8513 VecIn2 = ExtractedFromVec; 8514 } else { 8515 // Too many inputs. 8516 VecIn1 = VecIn2 = SDValue(0, 0); 8517 break; 8518 } 8519 } 8520 8521 // If everything is good, we can make a shuffle operation. 8522 if (VecIn1.getNode()) { 8523 SmallVector<int, 8> Mask; 8524 for (unsigned i = 0; i != NumInScalars; ++i) { 8525 if (N->getOperand(i).getOpcode() == ISD::UNDEF) { 8526 Mask.push_back(-1); 8527 continue; 8528 } 8529 8530 // If extracting from the first vector, just use the index directly. 8531 SDValue Extract = N->getOperand(i); 8532 SDValue ExtVal = Extract.getOperand(1); 8533 if (Extract.getOperand(0) == VecIn1) { 8534 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 8535 if (ExtIndex > VT.getVectorNumElements()) 8536 return SDValue(); 8537 8538 Mask.push_back(ExtIndex); 8539 continue; 8540 } 8541 8542 // Otherwise, use InIdx + VecSize 8543 unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 8544 Mask.push_back(Idx+NumInScalars); 8545 } 8546 8547 // We can't generate a shuffle node with mismatched input and output types. 8548 // Attempt to transform a single input vector to the correct type. 8549 if ((VT != VecIn1.getValueType())) { 8550 // We don't support shuffeling between TWO values of different types. 8551 if (VecIn2.getNode() != 0) 8552 return SDValue(); 8553 8554 // We only support widening of vectors which are half the size of the 8555 // output registers. For example XMM->YMM widening on X86 with AVX. 8556 if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) 8557 return SDValue(); 8558 8559 // If the input vector type has a different base type to the output 8560 // vector type, bail out. 8561 if (VecIn1.getValueType().getVectorElementType() != 8562 VT.getVectorElementType()) 8563 return SDValue(); 8564 8565 // Widen the input vector by adding undef values. 8566 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, 8567 VecIn1, DAG.getUNDEF(VecIn1.getValueType())); 8568 } 8569 8570 // If VecIn2 is unused then change it to undef. 8571 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 8572 8573 // Check that we were able to transform all incoming values to the same 8574 // type. 8575 if (VecIn2.getValueType() != VecIn1.getValueType() || 8576 VecIn1.getValueType() != VT) 8577 return SDValue(); 8578 8579 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 8580 if (!isTypeLegal(VT)) 8581 return SDValue(); 8582 8583 // Return the new VECTOR_SHUFFLE node. 8584 SDValue Ops[2]; 8585 Ops[0] = VecIn1; 8586 Ops[1] = VecIn2; 8587 return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); 8588 } 8589 8590 return SDValue(); 8591} 8592 8593SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 8594 // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of 8595 // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector 8596 // inputs come from at most two distinct vectors, turn this into a shuffle 8597 // node. 8598 8599 // If we only have one input vector, we don't need to do any concatenation. 8600 if (N->getNumOperands() == 1) 8601 return N->getOperand(0); 8602 8603 // Check if all of the operands are undefs. 8604 if (ISD::allOperandsUndef(N)) 8605 return DAG.getUNDEF(N->getValueType(0)); 8606 8607 return SDValue(); 8608} 8609 8610SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 8611 EVT NVT = N->getValueType(0); 8612 SDValue V = N->getOperand(0); 8613 8614 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 8615 // Handle only simple case where vector being inserted and vector 8616 // being extracted are of same type, and are half size of larger vectors. 8617 EVT BigVT = V->getOperand(0).getValueType(); 8618 EVT SmallVT = V->getOperand(1).getValueType(); 8619 if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) 8620 return SDValue(); 8621 8622 // Only handle cases where both indexes are constants with the same type. 8623 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8624 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 8625 8626 if (InsIdx && ExtIdx && 8627 InsIdx->getValueType(0).getSizeInBits() <= 64 && 8628 ExtIdx->getValueType(0).getSizeInBits() <= 64) { 8629 // Combine: 8630 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 8631 // Into: 8632 // indices are equal => V1 8633 // otherwise => (extract_subvec V1, ExtIdx) 8634 if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) 8635 return V->getOperand(1); 8636 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, 8637 V->getOperand(0), N->getOperand(1)); 8638 } 8639 } 8640 8641 if (V->getOpcode() == ISD::CONCAT_VECTORS) { 8642 // Combine: 8643 // (extract_subvec (concat V1, V2, ...), i) 8644 // Into: 8645 // Vi if possible 8646 // Only operand 0 is checked as 'concat' assumes all inputs of the same type. 8647 if (V->getOperand(0).getValueType() != NVT) 8648 return SDValue(); 8649 unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 8650 unsigned NumElems = NVT.getVectorNumElements(); 8651 assert((Idx % NumElems) == 0 && 8652 "IDX in concat is not a multiple of the result vector length."); 8653 return V->getOperand(Idx / NumElems); 8654 } 8655 8656 return SDValue(); 8657} 8658 8659SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 8660 EVT VT = N->getValueType(0); 8661 unsigned NumElts = VT.getVectorNumElements(); 8662 8663 SDValue N0 = N->getOperand(0); 8664 SDValue N1 = N->getOperand(1); 8665 8666 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 8667 8668 // Canonicalize shuffle undef, undef -> undef 8669 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 8670 return DAG.getUNDEF(VT); 8671 8672 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 8673 8674 // Canonicalize shuffle v, v -> v, undef 8675 if (N0 == N1) { 8676 SmallVector<int, 8> NewMask; 8677 for (unsigned i = 0; i != NumElts; ++i) { 8678 int Idx = SVN->getMaskElt(i); 8679 if (Idx >= (int)NumElts) Idx -= NumElts; 8680 NewMask.push_back(Idx); 8681 } 8682 return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), 8683 &NewMask[0]); 8684 } 8685 8686 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 8687 if (N0.getOpcode() == ISD::UNDEF) { 8688 SmallVector<int, 8> NewMask; 8689 for (unsigned i = 0; i != NumElts; ++i) { 8690 int Idx = SVN->getMaskElt(i); 8691 if (Idx >= 0) { 8692 if (Idx < (int)NumElts) 8693 Idx += NumElts; 8694 else 8695 Idx -= NumElts; 8696 } 8697 NewMask.push_back(Idx); 8698 } 8699 return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), 8700 &NewMask[0]); 8701 } 8702 8703 // Remove references to rhs if it is undef 8704 if (N1.getOpcode() == ISD::UNDEF) { 8705 bool Changed = false; 8706 SmallVector<int, 8> NewMask; 8707 for (unsigned i = 0; i != NumElts; ++i) { 8708 int Idx = SVN->getMaskElt(i); 8709 if (Idx >= (int)NumElts) { 8710 Idx = -1; 8711 Changed = true; 8712 } 8713 NewMask.push_back(Idx); 8714 } 8715 if (Changed) 8716 return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); 8717 } 8718 8719 // If it is a splat, check if the argument vector is another splat or a 8720 // build_vector with all scalar elements the same. 8721 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 8722 SDNode *V = N0.getNode(); 8723 8724 // If this is a bit convert that changes the element type of the vector but 8725 // not the number of vector elements, look through it. Be careful not to 8726 // look though conversions that change things like v4f32 to v2f64. 8727 if (V->getOpcode() == ISD::BITCAST) { 8728 SDValue ConvInput = V->getOperand(0); 8729 if (ConvInput.getValueType().isVector() && 8730 ConvInput.getValueType().getVectorNumElements() == NumElts) 8731 V = ConvInput.getNode(); 8732 } 8733 8734 if (V->getOpcode() == ISD::BUILD_VECTOR) { 8735 assert(V->getNumOperands() == NumElts && 8736 "BUILD_VECTOR has wrong number of operands"); 8737 SDValue Base; 8738 bool AllSame = true; 8739 for (unsigned i = 0; i != NumElts; ++i) { 8740 if (V->getOperand(i).getOpcode() != ISD::UNDEF) { 8741 Base = V->getOperand(i); 8742 break; 8743 } 8744 } 8745 // Splat of <u, u, u, u>, return <u, u, u, u> 8746 if (!Base.getNode()) 8747 return N0; 8748 for (unsigned i = 0; i != NumElts; ++i) { 8749 if (V->getOperand(i) != Base) { 8750 AllSame = false; 8751 break; 8752 } 8753 } 8754 // Splat of <x, x, x, x>, return <x, x, x, x> 8755 if (AllSame) 8756 return N0; 8757 } 8758 } 8759 8760 // If this shuffle node is simply a swizzle of another shuffle node, 8761 // and it reverses the swizzle of the previous shuffle then we can 8762 // optimize shuffle(shuffle(x, undef), undef) -> x. 8763 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 8764 N1.getOpcode() == ISD::UNDEF) { 8765 8766 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); 8767 8768 // Shuffle nodes can only reverse shuffles with a single non-undef value. 8769 if (N0.getOperand(1).getOpcode() != ISD::UNDEF) 8770 return SDValue(); 8771 8772 // The incoming shuffle must be of the same type as the result of the 8773 // current shuffle. 8774 assert(OtherSV->getOperand(0).getValueType() == VT && 8775 "Shuffle types don't match"); 8776 8777 for (unsigned i = 0; i != NumElts; ++i) { 8778 int Idx = SVN->getMaskElt(i); 8779 assert(Idx < (int)NumElts && "Index references undef operand"); 8780 // Next, this index comes from the first value, which is the incoming 8781 // shuffle. Adopt the incoming index. 8782 if (Idx >= 0) 8783 Idx = OtherSV->getMaskElt(Idx); 8784 8785 // The combined shuffle must map each index to itself. 8786 if (Idx >= 0 && (unsigned)Idx != i) 8787 return SDValue(); 8788 } 8789 8790 return OtherSV->getOperand(0); 8791 } 8792 8793 return SDValue(); 8794} 8795 8796SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { 8797 if (!TLI.getShouldFoldAtomicFences()) 8798 return SDValue(); 8799 8800 SDValue atomic = N->getOperand(0); 8801 switch (atomic.getOpcode()) { 8802 case ISD::ATOMIC_CMP_SWAP: 8803 case ISD::ATOMIC_SWAP: 8804 case ISD::ATOMIC_LOAD_ADD: 8805 case ISD::ATOMIC_LOAD_SUB: 8806 case ISD::ATOMIC_LOAD_AND: 8807 case ISD::ATOMIC_LOAD_OR: 8808 case ISD::ATOMIC_LOAD_XOR: 8809 case ISD::ATOMIC_LOAD_NAND: 8810 case ISD::ATOMIC_LOAD_MIN: 8811 case ISD::ATOMIC_LOAD_MAX: 8812 case ISD::ATOMIC_LOAD_UMIN: 8813 case ISD::ATOMIC_LOAD_UMAX: 8814 break; 8815 default: 8816 return SDValue(); 8817 } 8818 8819 SDValue fence = atomic.getOperand(0); 8820 if (fence.getOpcode() != ISD::MEMBARRIER) 8821 return SDValue(); 8822 8823 switch (atomic.getOpcode()) { 8824 case ISD::ATOMIC_CMP_SWAP: 8825 return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), 8826 fence.getOperand(0), 8827 atomic.getOperand(1), atomic.getOperand(2), 8828 atomic.getOperand(3)), atomic.getResNo()); 8829 case ISD::ATOMIC_SWAP: 8830 case ISD::ATOMIC_LOAD_ADD: 8831 case ISD::ATOMIC_LOAD_SUB: 8832 case ISD::ATOMIC_LOAD_AND: 8833 case ISD::ATOMIC_LOAD_OR: 8834 case ISD::ATOMIC_LOAD_XOR: 8835 case ISD::ATOMIC_LOAD_NAND: 8836 case ISD::ATOMIC_LOAD_MIN: 8837 case ISD::ATOMIC_LOAD_MAX: 8838 case ISD::ATOMIC_LOAD_UMIN: 8839 case ISD::ATOMIC_LOAD_UMAX: 8840 return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), 8841 fence.getOperand(0), 8842 atomic.getOperand(1), atomic.getOperand(2)), 8843 atomic.getResNo()); 8844 default: 8845 return SDValue(); 8846 } 8847} 8848 8849/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform 8850/// an AND to a vector_shuffle with the destination vector and a zero vector. 8851/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 8852/// vector_shuffle V, Zero, <0, 4, 2, 4> 8853SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 8854 EVT VT = N->getValueType(0); 8855 DebugLoc dl = N->getDebugLoc(); 8856 SDValue LHS = N->getOperand(0); 8857 SDValue RHS = N->getOperand(1); 8858 if (N->getOpcode() == ISD::AND) { 8859 if (RHS.getOpcode() == ISD::BITCAST) 8860 RHS = RHS.getOperand(0); 8861 if (RHS.getOpcode() == ISD::BUILD_VECTOR) { 8862 SmallVector<int, 8> Indices; 8863 unsigned NumElts = RHS.getNumOperands(); 8864 for (unsigned i = 0; i != NumElts; ++i) { 8865 SDValue Elt = RHS.getOperand(i); 8866 if (!isa<ConstantSDNode>(Elt)) 8867 return SDValue(); 8868 8869 if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) 8870 Indices.push_back(i); 8871 else if (cast<ConstantSDNode>(Elt)->isNullValue()) 8872 Indices.push_back(NumElts); 8873 else 8874 return SDValue(); 8875 } 8876 8877 // Let's see if the target supports this vector_shuffle. 8878 EVT RVT = RHS.getValueType(); 8879 if (!TLI.isVectorClearMaskLegal(Indices, RVT)) 8880 return SDValue(); 8881 8882 // Return the new VECTOR_SHUFFLE node. 8883 EVT EltVT = RVT.getVectorElementType(); 8884 SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), 8885 DAG.getConstant(0, EltVT)); 8886 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 8887 RVT, &ZeroOps[0], ZeroOps.size()); 8888 LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); 8889 SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); 8890 return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); 8891 } 8892 } 8893 8894 return SDValue(); 8895} 8896 8897/// SimplifyVBinOp - Visit a binary vector operation, like ADD. 8898SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 8899 // After legalize, the target may be depending on adds and other 8900 // binary ops to provide legal ways to construct constants or other 8901 // things. Simplifying them may result in a loss of legality. 8902 if (LegalOperations) return SDValue(); 8903 8904 assert(N->getValueType(0).isVector() && 8905 "SimplifyVBinOp only works on vectors!"); 8906 8907 SDValue LHS = N->getOperand(0); 8908 SDValue RHS = N->getOperand(1); 8909 SDValue Shuffle = XformToShuffleWithZero(N); 8910 if (Shuffle.getNode()) return Shuffle; 8911 8912 // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold 8913 // this operation. 8914 if (LHS.getOpcode() == ISD::BUILD_VECTOR && 8915 RHS.getOpcode() == ISD::BUILD_VECTOR) { 8916 SmallVector<SDValue, 8> Ops; 8917 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 8918 SDValue LHSOp = LHS.getOperand(i); 8919 SDValue RHSOp = RHS.getOperand(i); 8920 // If these two elements can't be folded, bail out. 8921 if ((LHSOp.getOpcode() != ISD::UNDEF && 8922 LHSOp.getOpcode() != ISD::Constant && 8923 LHSOp.getOpcode() != ISD::ConstantFP) || 8924 (RHSOp.getOpcode() != ISD::UNDEF && 8925 RHSOp.getOpcode() != ISD::Constant && 8926 RHSOp.getOpcode() != ISD::ConstantFP)) 8927 break; 8928 8929 // Can't fold divide by zero. 8930 if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || 8931 N->getOpcode() == ISD::FDIV) { 8932 if ((RHSOp.getOpcode() == ISD::Constant && 8933 cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || 8934 (RHSOp.getOpcode() == ISD::ConstantFP && 8935 cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) 8936 break; 8937 } 8938 8939 EVT VT = LHSOp.getValueType(); 8940 EVT RVT = RHSOp.getValueType(); 8941 if (RVT != VT) { 8942 // Integer BUILD_VECTOR operands may have types larger than the element 8943 // size (e.g., when the element type is not legal). Prior to type 8944 // legalization, the types may not match between the two BUILD_VECTORS. 8945 // Truncate one of the operands to make them match. 8946 if (RVT.getSizeInBits() > VT.getSizeInBits()) { 8947 RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); 8948 } else { 8949 LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); 8950 VT = RVT; 8951 } 8952 } 8953 SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, 8954 LHSOp, RHSOp); 8955 if (FoldOp.getOpcode() != ISD::UNDEF && 8956 FoldOp.getOpcode() != ISD::Constant && 8957 FoldOp.getOpcode() != ISD::ConstantFP) 8958 break; 8959 Ops.push_back(FoldOp); 8960 AddToWorkList(FoldOp.getNode()); 8961 } 8962 8963 if (Ops.size() == LHS.getNumOperands()) 8964 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 8965 LHS.getValueType(), &Ops[0], Ops.size()); 8966 } 8967 8968 return SDValue(); 8969} 8970 8971/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. 8972SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { 8973 // After legalize, the target may be depending on adds and other 8974 // binary ops to provide legal ways to construct constants or other 8975 // things. Simplifying them may result in a loss of legality. 8976 if (LegalOperations) return SDValue(); 8977 8978 assert(N->getValueType(0).isVector() && 8979 "SimplifyVUnaryOp only works on vectors!"); 8980 8981 SDValue N0 = N->getOperand(0); 8982 8983 if (N0.getOpcode() != ISD::BUILD_VECTOR) 8984 return SDValue(); 8985 8986 // Operand is a BUILD_VECTOR node, see if we can constant fold it. 8987 SmallVector<SDValue, 8> Ops; 8988 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 8989 SDValue Op = N0.getOperand(i); 8990 if (Op.getOpcode() != ISD::UNDEF && 8991 Op.getOpcode() != ISD::ConstantFP) 8992 break; 8993 EVT EltVT = Op.getValueType(); 8994 SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op); 8995 if (FoldOp.getOpcode() != ISD::UNDEF && 8996 FoldOp.getOpcode() != ISD::ConstantFP) 8997 break; 8998 Ops.push_back(FoldOp); 8999 AddToWorkList(FoldOp.getNode()); 9000 } 9001 9002 if (Ops.size() != N0.getNumOperands()) 9003 return SDValue(); 9004 9005 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 9006 N0.getValueType(), &Ops[0], Ops.size()); 9007} 9008 9009SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, 9010 SDValue N1, SDValue N2){ 9011 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 9012 9013 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 9014 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 9015 9016 // If we got a simplified select_cc node back from SimplifySelectCC, then 9017 // break it down into a new SETCC node, and a new SELECT node, and then return 9018 // the SELECT node, since we were called with a SELECT node. 9019 if (SCC.getNode()) { 9020 // Check to see if we got a select_cc back (to turn into setcc/select). 9021 // Otherwise, just return whatever node we got back, like fabs. 9022 if (SCC.getOpcode() == ISD::SELECT_CC) { 9023 SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), 9024 N0.getValueType(), 9025 SCC.getOperand(0), SCC.getOperand(1), 9026 SCC.getOperand(4)); 9027 AddToWorkList(SETCC.getNode()); 9028 return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), 9029 SCC.getOperand(2), SCC.getOperand(3), SETCC); 9030 } 9031 9032 return SCC; 9033 } 9034 return SDValue(); 9035} 9036 9037/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS 9038/// are the two values being selected between, see if we can simplify the 9039/// select. Callers of this should assume that TheSelect is deleted if this 9040/// returns true. As such, they should return the appropriate thing (e.g. the 9041/// node) back to the top-level of the DAG combiner loop to avoid it being 9042/// looked at. 9043bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 9044 SDValue RHS) { 9045 9046 // Cannot simplify select with vector condition 9047 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 9048 9049 // If this is a select from two identical things, try to pull the operation 9050 // through the select. 9051 if (LHS.getOpcode() != RHS.getOpcode() || 9052 !LHS.hasOneUse() || !RHS.hasOneUse()) 9053 return false; 9054 9055 // If this is a load and the token chain is identical, replace the select 9056 // of two loads with a load through a select of the address to load from. 9057 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 9058 // constants have been dropped into the constant pool. 9059 if (LHS.getOpcode() == ISD::LOAD) { 9060 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 9061 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 9062 9063 // Token chains must be identical. 9064 if (LHS.getOperand(0) != RHS.getOperand(0) || 9065 // Do not let this transformation reduce the number of volatile loads. 9066 LLD->isVolatile() || RLD->isVolatile() || 9067 // If this is an EXTLOAD, the VT's must match. 9068 LLD->getMemoryVT() != RLD->getMemoryVT() || 9069 // If this is an EXTLOAD, the kind of extension must match. 9070 (LLD->getExtensionType() != RLD->getExtensionType() && 9071 // The only exception is if one of the extensions is anyext. 9072 LLD->getExtensionType() != ISD::EXTLOAD && 9073 RLD->getExtensionType() != ISD::EXTLOAD) || 9074 // FIXME: this discards src value information. This is 9075 // over-conservative. It would be beneficial to be able to remember 9076 // both potential memory locations. Since we are discarding 9077 // src value info, don't do the transformation if the memory 9078 // locations are not in the default address space. 9079 LLD->getPointerInfo().getAddrSpace() != 0 || 9080 RLD->getPointerInfo().getAddrSpace() != 0) 9081 return false; 9082 9083 // Check that the select condition doesn't reach either load. If so, 9084 // folding this will induce a cycle into the DAG. If not, this is safe to 9085 // xform, so create a select of the addresses. 9086 SDValue Addr; 9087 if (TheSelect->getOpcode() == ISD::SELECT) { 9088 SDNode *CondNode = TheSelect->getOperand(0).getNode(); 9089 if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || 9090 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) 9091 return false; 9092 // The loads must not depend on one another. 9093 if (LLD->isPredecessorOf(RLD) || 9094 RLD->isPredecessorOf(LLD)) 9095 return false; 9096 Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), 9097 LLD->getBasePtr().getValueType(), 9098 TheSelect->getOperand(0), LLD->getBasePtr(), 9099 RLD->getBasePtr()); 9100 } else { // Otherwise SELECT_CC 9101 SDNode *CondLHS = TheSelect->getOperand(0).getNode(); 9102 SDNode *CondRHS = TheSelect->getOperand(1).getNode(); 9103 9104 if ((LLD->hasAnyUseOfValue(1) && 9105 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || 9106 (RLD->hasAnyUseOfValue(1) && 9107 (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) 9108 return false; 9109 9110 Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), 9111 LLD->getBasePtr().getValueType(), 9112 TheSelect->getOperand(0), 9113 TheSelect->getOperand(1), 9114 LLD->getBasePtr(), RLD->getBasePtr(), 9115 TheSelect->getOperand(4)); 9116 } 9117 9118 SDValue Load; 9119 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 9120 Load = DAG.getLoad(TheSelect->getValueType(0), 9121 TheSelect->getDebugLoc(), 9122 // FIXME: Discards pointer info. 9123 LLD->getChain(), Addr, MachinePointerInfo(), 9124 LLD->isVolatile(), LLD->isNonTemporal(), 9125 LLD->isInvariant(), LLD->getAlignment()); 9126 } else { 9127 Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? 9128 RLD->getExtensionType() : LLD->getExtensionType(), 9129 TheSelect->getDebugLoc(), 9130 TheSelect->getValueType(0), 9131 // FIXME: Discards pointer info. 9132 LLD->getChain(), Addr, MachinePointerInfo(), 9133 LLD->getMemoryVT(), LLD->isVolatile(), 9134 LLD->isNonTemporal(), LLD->getAlignment()); 9135 } 9136 9137 // Users of the select now use the result of the load. 9138 CombineTo(TheSelect, Load); 9139 9140 // Users of the old loads now use the new load's chain. We know the 9141 // old-load value is dead now. 9142 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 9143 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 9144 return true; 9145 } 9146 9147 return false; 9148} 9149 9150/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 9151/// where 'cond' is the comparison specified by CC. 9152SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, 9153 SDValue N2, SDValue N3, 9154 ISD::CondCode CC, bool NotExtCompare) { 9155 // (x ? y : y) -> y. 9156 if (N2 == N3) return N2; 9157 9158 EVT VT = N2.getValueType(); 9159 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 9160 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 9161 ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); 9162 9163 // Determine if the condition we're dealing with is constant 9164 SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), 9165 N0, N1, CC, DL, false); 9166 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 9167 ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); 9168 9169 // fold select_cc true, x, y -> x 9170 if (SCCC && !SCCC->isNullValue()) 9171 return N2; 9172 // fold select_cc false, x, y -> y 9173 if (SCCC && SCCC->isNullValue()) 9174 return N3; 9175 9176 // Check to see if we can simplify the select into an fabs node 9177 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { 9178 // Allow either -0.0 or 0.0 9179 if (CFP->getValueAPF().isZero()) { 9180 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs 9181 if ((CC == ISD::SETGE || CC == ISD::SETGT) && 9182 N0 == N2 && N3.getOpcode() == ISD::FNEG && 9183 N2 == N3.getOperand(0)) 9184 return DAG.getNode(ISD::FABS, DL, VT, N0); 9185 9186 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs 9187 if ((CC == ISD::SETLT || CC == ISD::SETLE) && 9188 N0 == N3 && N2.getOpcode() == ISD::FNEG && 9189 N2.getOperand(0) == N3) 9190 return DAG.getNode(ISD::FABS, DL, VT, N3); 9191 } 9192 } 9193 9194 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 9195 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 9196 // in it. This is a win when the constant is not otherwise available because 9197 // it replaces two constant pool loads with one. We only do this if the FP 9198 // type is known to be legal, because if it isn't, then we are before legalize 9199 // types an we want the other legalization to happen first (e.g. to avoid 9200 // messing with soft float) and if the ConstantFP is not legal, because if 9201 // it is legal, we may not need to store the FP constant in a constant pool. 9202 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) 9203 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { 9204 if (TLI.isTypeLegal(N2.getValueType()) && 9205 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 9206 TargetLowering::Legal) && 9207 // If both constants have multiple uses, then we won't need to do an 9208 // extra load, they are likely around in registers for other users. 9209 (TV->hasOneUse() || FV->hasOneUse())) { 9210 Constant *Elts[] = { 9211 const_cast<ConstantFP*>(FV->getConstantFPValue()), 9212 const_cast<ConstantFP*>(TV->getConstantFPValue()) 9213 }; 9214 Type *FPTy = Elts[0]->getType(); 9215 const DataLayout &TD = *TLI.getDataLayout(); 9216 9217 // Create a ConstantArray of the two constants. 9218 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 9219 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), 9220 TD.getPrefTypeAlignment(FPTy)); 9221 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 9222 9223 // Get the offsets to the 0 and 1 element of the array so that we can 9224 // select between them. 9225 SDValue Zero = DAG.getIntPtrConstant(0); 9226 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 9227 SDValue One = DAG.getIntPtrConstant(EltSize); 9228 9229 SDValue Cond = DAG.getSetCC(DL, 9230 TLI.getSetCCResultType(N0.getValueType()), 9231 N0, N1, CC); 9232 AddToWorkList(Cond.getNode()); 9233 SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), 9234 Cond, One, Zero); 9235 AddToWorkList(CstOffset.getNode()); 9236 CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, 9237 CstOffset); 9238 AddToWorkList(CPIdx.getNode()); 9239 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 9240 MachinePointerInfo::getConstantPool(), false, 9241 false, false, Alignment); 9242 9243 } 9244 } 9245 9246 // Check to see if we can perform the "gzip trick", transforming 9247 // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) 9248 if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && 9249 (N1C->isNullValue() || // (a < 0) ? b : 0 9250 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 9251 EVT XType = N0.getValueType(); 9252 EVT AType = N2.getValueType(); 9253 if (XType.bitsGE(AType)) { 9254 // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a 9255 // single-bit constant. 9256 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { 9257 unsigned ShCtV = N2C->getAPIntValue().logBase2(); 9258 ShCtV = XType.getSizeInBits()-ShCtV-1; 9259 SDValue ShCt = DAG.getConstant(ShCtV, 9260 getShiftAmountTy(N0.getValueType())); 9261 SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), 9262 XType, N0, ShCt); 9263 AddToWorkList(Shift.getNode()); 9264 9265 if (XType.bitsGT(AType)) { 9266 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 9267 AddToWorkList(Shift.getNode()); 9268 } 9269 9270 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 9271 } 9272 9273 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), 9274 XType, N0, 9275 DAG.getConstant(XType.getSizeInBits()-1, 9276 getShiftAmountTy(N0.getValueType()))); 9277 AddToWorkList(Shift.getNode()); 9278 9279 if (XType.bitsGT(AType)) { 9280 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 9281 AddToWorkList(Shift.getNode()); 9282 } 9283 9284 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 9285 } 9286 } 9287 9288 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 9289 // where y is has a single bit set. 9290 // A plaintext description would be, we can turn the SELECT_CC into an AND 9291 // when the condition can be materialized as an all-ones register. Any 9292 // single bit-test can be materialized as an all-ones register with 9293 // shift-left and shift-right-arith. 9294 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 9295 N0->getValueType(0) == VT && 9296 N1C && N1C->isNullValue() && 9297 N2C && N2C->isNullValue()) { 9298 SDValue AndLHS = N0->getOperand(0); 9299 ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 9300 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 9301 // Shift the tested bit over the sign bit. 9302 APInt AndMask = ConstAndRHS->getAPIntValue(); 9303 SDValue ShlAmt = 9304 DAG.getConstant(AndMask.countLeadingZeros(), 9305 getShiftAmountTy(AndLHS.getValueType())); 9306 SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); 9307 9308 // Now arithmetic right shift it all the way over, so the result is either 9309 // all-ones, or zero. 9310 SDValue ShrAmt = 9311 DAG.getConstant(AndMask.getBitWidth()-1, 9312 getShiftAmountTy(Shl.getValueType())); 9313 SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); 9314 9315 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 9316 } 9317 } 9318 9319 // fold select C, 16, 0 -> shl C, 4 9320 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && 9321 TLI.getBooleanContents(N0.getValueType().isVector()) == 9322 TargetLowering::ZeroOrOneBooleanContent) { 9323 9324 // If the caller doesn't want us to simplify this into a zext of a compare, 9325 // don't do it. 9326 if (NotExtCompare && N2C->getAPIntValue() == 1) 9327 return SDValue(); 9328 9329 // Get a SetCC of the condition 9330 // FIXME: Should probably make sure that setcc is legal if we ever have a 9331 // target where it isn't. 9332 SDValue Temp, SCC; 9333 // cast from setcc result type to select result type 9334 if (LegalTypes) { 9335 SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), 9336 N0, N1, CC); 9337 if (N2.getValueType().bitsLT(SCC.getValueType())) 9338 Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); 9339 else 9340 Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), 9341 N2.getValueType(), SCC); 9342 } else { 9343 SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); 9344 Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), 9345 N2.getValueType(), SCC); 9346 } 9347 9348 AddToWorkList(SCC.getNode()); 9349 AddToWorkList(Temp.getNode()); 9350 9351 if (N2C->getAPIntValue() == 1) 9352 return Temp; 9353 9354 // shl setcc result by log2 n2c 9355 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, 9356 DAG.getConstant(N2C->getAPIntValue().logBase2(), 9357 getShiftAmountTy(Temp.getValueType()))); 9358 } 9359 9360 // Check to see if this is the equivalent of setcc 9361 // FIXME: Turn all of these into setcc if setcc if setcc is legal 9362 // otherwise, go ahead with the folds. 9363 if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { 9364 EVT XType = N0.getValueType(); 9365 if (!LegalOperations || 9366 TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { 9367 SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); 9368 if (Res.getValueType() != VT) 9369 Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); 9370 return Res; 9371 } 9372 9373 // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) 9374 if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && 9375 (!LegalOperations || 9376 TLI.isOperationLegal(ISD::CTLZ, XType))) { 9377 SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); 9378 return DAG.getNode(ISD::SRL, DL, XType, Ctlz, 9379 DAG.getConstant(Log2_32(XType.getSizeInBits()), 9380 getShiftAmountTy(Ctlz.getValueType()))); 9381 } 9382 // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) 9383 if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { 9384 SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), 9385 XType, DAG.getConstant(0, XType), N0); 9386 SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); 9387 return DAG.getNode(ISD::SRL, DL, XType, 9388 DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), 9389 DAG.getConstant(XType.getSizeInBits()-1, 9390 getShiftAmountTy(XType))); 9391 } 9392 // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) 9393 if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { 9394 SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, 9395 DAG.getConstant(XType.getSizeInBits()-1, 9396 getShiftAmountTy(N0.getValueType()))); 9397 return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); 9398 } 9399 } 9400 9401 // Check to see if this is an integer abs. 9402 // select_cc setg[te] X, 0, X, -X -> 9403 // select_cc setgt X, -1, X, -X -> 9404 // select_cc setl[te] X, 0, -X, X -> 9405 // select_cc setlt X, 1, -X, X -> 9406 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 9407 if (N1C) { 9408 ConstantSDNode *SubC = NULL; 9409 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || 9410 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 9411 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 9412 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 9413 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || 9414 (N1C->isOne() && CC == ISD::SETLT)) && 9415 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 9416 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 9417 9418 EVT XType = N0.getValueType(); 9419 if (SubC && SubC->isNullValue() && XType.isInteger()) { 9420 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, 9421 N0, 9422 DAG.getConstant(XType.getSizeInBits()-1, 9423 getShiftAmountTy(N0.getValueType()))); 9424 SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), 9425 XType, N0, Shift); 9426 AddToWorkList(Shift.getNode()); 9427 AddToWorkList(Add.getNode()); 9428 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 9429 } 9430 } 9431 9432 return SDValue(); 9433} 9434 9435/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. 9436SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, 9437 SDValue N1, ISD::CondCode Cond, 9438 DebugLoc DL, bool foldBooleans) { 9439 TargetLowering::DAGCombinerInfo 9440 DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); 9441 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 9442} 9443 9444/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 9445/// return a DAG expression to select that will generate the same value by 9446/// multiplying by a magic number. See: 9447/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 9448SDValue DAGCombiner::BuildSDIV(SDNode *N) { 9449 std::vector<SDNode*> Built; 9450 SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); 9451 9452 for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); 9453 ii != ee; ++ii) 9454 AddToWorkList(*ii); 9455 return S; 9456} 9457 9458/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, 9459/// return a DAG expression to select that will generate the same value by 9460/// multiplying by a magic number. See: 9461/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 9462SDValue DAGCombiner::BuildUDIV(SDNode *N) { 9463 std::vector<SDNode*> Built; 9464 SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); 9465 9466 for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); 9467 ii != ee; ++ii) 9468 AddToWorkList(*ii); 9469 return S; 9470} 9471 9472/// FindBaseOffset - Return true if base is a frame index, which is known not 9473// to alias with anything but itself. Provides base object and offset as 9474// results. 9475static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, 9476 const GlobalValue *&GV, const void *&CV) { 9477 // Assume it is a primitive operation. 9478 Base = Ptr; Offset = 0; GV = 0; CV = 0; 9479 9480 // If it's an adding a simple constant then integrate the offset. 9481 if (Base.getOpcode() == ISD::ADD) { 9482 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { 9483 Base = Base.getOperand(0); 9484 Offset += C->getZExtValue(); 9485 } 9486 } 9487 9488 // Return the underlying GlobalValue, and update the Offset. Return false 9489 // for GlobalAddressSDNode since the same GlobalAddress may be represented 9490 // by multiple nodes with different offsets. 9491 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { 9492 GV = G->getGlobal(); 9493 Offset += G->getOffset(); 9494 return false; 9495 } 9496 9497 // Return the underlying Constant value, and update the Offset. Return false 9498 // for ConstantSDNodes since the same constant pool entry may be represented 9499 // by multiple nodes with different offsets. 9500 if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { 9501 CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() 9502 : (const void *)C->getConstVal(); 9503 Offset += C->getOffset(); 9504 return false; 9505 } 9506 // If it's any of the following then it can't alias with anything but itself. 9507 return isa<FrameIndexSDNode>(Base); 9508} 9509 9510/// isAlias - Return true if there is any possibility that the two addresses 9511/// overlap. 9512bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, 9513 const Value *SrcValue1, int SrcValueOffset1, 9514 unsigned SrcValueAlign1, 9515 const MDNode *TBAAInfo1, 9516 SDValue Ptr2, int64_t Size2, 9517 const Value *SrcValue2, int SrcValueOffset2, 9518 unsigned SrcValueAlign2, 9519 const MDNode *TBAAInfo2) const { 9520 // If they are the same then they must be aliases. 9521 if (Ptr1 == Ptr2) return true; 9522 9523 // Gather base node and offset information. 9524 SDValue Base1, Base2; 9525 int64_t Offset1, Offset2; 9526 const GlobalValue *GV1, *GV2; 9527 const void *CV1, *CV2; 9528 bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); 9529 bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); 9530 9531 // If they have a same base address then check to see if they overlap. 9532 if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) 9533 return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); 9534 9535 // It is possible for different frame indices to alias each other, mostly 9536 // when tail call optimization reuses return address slots for arguments. 9537 // To catch this case, look up the actual index of frame indices to compute 9538 // the real alias relationship. 9539 if (isFrameIndex1 && isFrameIndex2) { 9540 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 9541 Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); 9542 Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); 9543 return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); 9544 } 9545 9546 // Otherwise, if we know what the bases are, and they aren't identical, then 9547 // we know they cannot alias. 9548 if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) 9549 return false; 9550 9551 // If we know required SrcValue1 and SrcValue2 have relatively large alignment 9552 // compared to the size and offset of the access, we may be able to prove they 9553 // do not alias. This check is conservative for now to catch cases created by 9554 // splitting vector types. 9555 if ((SrcValueAlign1 == SrcValueAlign2) && 9556 (SrcValueOffset1 != SrcValueOffset2) && 9557 (Size1 == Size2) && (SrcValueAlign1 > Size1)) { 9558 int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; 9559 int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; 9560 9561 // There is no overlap between these relatively aligned accesses of similar 9562 // size, return no alias. 9563 if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) 9564 return false; 9565 } 9566 9567 if (CombinerGlobalAA) { 9568 // Use alias analysis information. 9569 int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); 9570 int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; 9571 int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; 9572 AliasAnalysis::AliasResult AAResult = 9573 AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), 9574 AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); 9575 if (AAResult == AliasAnalysis::NoAlias) 9576 return false; 9577 } 9578 9579 // Otherwise we have to assume they alias. 9580 return true; 9581} 9582 9583/// FindAliasInfo - Extracts the relevant alias information from the memory 9584/// node. Returns true if the operand was a load. 9585bool DAGCombiner::FindAliasInfo(SDNode *N, 9586 SDValue &Ptr, int64_t &Size, 9587 const Value *&SrcValue, 9588 int &SrcValueOffset, 9589 unsigned &SrcValueAlign, 9590 const MDNode *&TBAAInfo) const { 9591 LSBaseSDNode *LS = cast<LSBaseSDNode>(N); 9592 9593 Ptr = LS->getBasePtr(); 9594 Size = LS->getMemoryVT().getSizeInBits() >> 3; 9595 SrcValue = LS->getSrcValue(); 9596 SrcValueOffset = LS->getSrcValueOffset(); 9597 SrcValueAlign = LS->getOriginalAlignment(); 9598 TBAAInfo = LS->getTBAAInfo(); 9599 return isa<LoadSDNode>(LS); 9600} 9601 9602/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 9603/// looking for aliasing nodes and adding them to the Aliases vector. 9604void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 9605 SmallVector<SDValue, 8> &Aliases) { 9606 SmallVector<SDValue, 8> Chains; // List of chains to visit. 9607 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 9608 9609 // Get alias information for node. 9610 SDValue Ptr; 9611 int64_t Size; 9612 const Value *SrcValue; 9613 int SrcValueOffset; 9614 unsigned SrcValueAlign; 9615 const MDNode *SrcTBAAInfo; 9616 bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 9617 SrcValueAlign, SrcTBAAInfo); 9618 9619 // Starting off. 9620 Chains.push_back(OriginalChain); 9621 unsigned Depth = 0; 9622 9623 // Look at each chain and determine if it is an alias. If so, add it to the 9624 // aliases list. If not, then continue up the chain looking for the next 9625 // candidate. 9626 while (!Chains.empty()) { 9627 SDValue Chain = Chains.back(); 9628 Chains.pop_back(); 9629 9630 // For TokenFactor nodes, look at each operand and only continue up the 9631 // chain until we find two aliases. If we've seen two aliases, assume we'll 9632 // find more and revert to original chain since the xform is unlikely to be 9633 // profitable. 9634 // 9635 // FIXME: The depth check could be made to return the last non-aliasing 9636 // chain we found before we hit a tokenfactor rather than the original 9637 // chain. 9638 if (Depth > 6 || Aliases.size() == 2) { 9639 Aliases.clear(); 9640 Aliases.push_back(OriginalChain); 9641 break; 9642 } 9643 9644 // Don't bother if we've been before. 9645 if (!Visited.insert(Chain.getNode())) 9646 continue; 9647 9648 switch (Chain.getOpcode()) { 9649 case ISD::EntryToken: 9650 // Entry token is ideal chain operand, but handled in FindBetterChain. 9651 break; 9652 9653 case ISD::LOAD: 9654 case ISD::STORE: { 9655 // Get alias information for Chain. 9656 SDValue OpPtr; 9657 int64_t OpSize; 9658 const Value *OpSrcValue; 9659 int OpSrcValueOffset; 9660 unsigned OpSrcValueAlign; 9661 const MDNode *OpSrcTBAAInfo; 9662 bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, 9663 OpSrcValue, OpSrcValueOffset, 9664 OpSrcValueAlign, 9665 OpSrcTBAAInfo); 9666 9667 // If chain is alias then stop here. 9668 if (!(IsLoad && IsOpLoad) && 9669 isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, 9670 SrcTBAAInfo, 9671 OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, 9672 OpSrcValueAlign, OpSrcTBAAInfo)) { 9673 Aliases.push_back(Chain); 9674 } else { 9675 // Look further up the chain. 9676 Chains.push_back(Chain.getOperand(0)); 9677 ++Depth; 9678 } 9679 break; 9680 } 9681 9682 case ISD::TokenFactor: 9683 // We have to check each of the operands of the token factor for "small" 9684 // token factors, so we queue them up. Adding the operands to the queue 9685 // (stack) in reverse order maintains the original order and increases the 9686 // likelihood that getNode will find a matching token factor (CSE.) 9687 if (Chain.getNumOperands() > 16) { 9688 Aliases.push_back(Chain); 9689 break; 9690 } 9691 for (unsigned n = Chain.getNumOperands(); n;) 9692 Chains.push_back(Chain.getOperand(--n)); 9693 ++Depth; 9694 break; 9695 9696 default: 9697 // For all other instructions we will just have to take what we can get. 9698 Aliases.push_back(Chain); 9699 break; 9700 } 9701 } 9702} 9703 9704/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking 9705/// for a better chain (aliasing node.) 9706SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 9707 SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. 9708 9709 // Accumulate all the aliases to this node. 9710 GatherAllAliases(N, OldChain, Aliases); 9711 9712 // If no operands then chain to entry token. 9713 if (Aliases.size() == 0) 9714 return DAG.getEntryNode(); 9715 9716 // If a single operand then chain to it. We don't need to revisit it. 9717 if (Aliases.size() == 1) 9718 return Aliases[0]; 9719 9720 // Construct a custom tailored token factor. 9721 return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 9722 &Aliases[0], Aliases.size()); 9723} 9724 9725// SelectionDAG::Combine - This is the entry point for the file. 9726// 9727void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, 9728 CodeGenOpt::Level OptLevel) { 9729 /// run - This is the main entry point to this class. 9730 /// 9731 DAGCombiner(*this, AA, OptLevel).Run(Level); 9732} 9733