DAGCombiner.cpp revision 8e2b8ae3b11b9778da5cb54d330e984b33979bf9
1//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11// both before and after the DAG is legalized. 12// 13// This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14// primarily intended to handle simplification opportunities that are implicit 15// in the LLVM IR and exposed by the various codegen lowering phases. 16// 17//===----------------------------------------------------------------------===// 18 19#define DEBUG_TYPE "dagcombine" 20#include "llvm/CodeGen/SelectionDAG.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/LLVMContext.h" 23#include "llvm/CodeGen/MachineFunction.h" 24#include "llvm/CodeGen/MachineFrameInfo.h" 25#include "llvm/Analysis/AliasAnalysis.h" 26#include "llvm/DataLayout.h" 27#include "llvm/Target/TargetLowering.h" 28#include "llvm/Target/TargetMachine.h" 29#include "llvm/Target/TargetOptions.h" 30#include "llvm/ADT/SmallPtrSet.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/Support/CommandLine.h" 33#include "llvm/Support/Debug.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Support/raw_ostream.h" 37#include <algorithm> 38using namespace llvm; 39 40STATISTIC(NodesCombined , "Number of dag nodes combined"); 41STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 42STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 43STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 44STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 45 46namespace { 47 static cl::opt<bool> 48 CombinerAA("combiner-alias-analysis", cl::Hidden, 49 cl::desc("Turn on alias analysis during testing")); 50 51 static cl::opt<bool> 52 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 53 cl::desc("Include global information in alias analysis")); 54 55//------------------------------ DAGCombiner ---------------------------------// 56 57 class DAGCombiner { 58 SelectionDAG &DAG; 59 const TargetLowering &TLI; 60 CombineLevel Level; 61 CodeGenOpt::Level OptLevel; 62 bool LegalOperations; 63 bool LegalTypes; 64 65 // Worklist of all of the nodes that need to be simplified. 66 // 67 // This has the semantics that when adding to the worklist, 68 // the item added must be next to be processed. It should 69 // also only appear once. The naive approach to this takes 70 // linear time. 71 // 72 // To reduce the insert/remove time to logarithmic, we use 73 // a set and a vector to maintain our worklist. 74 // 75 // The set contains the items on the worklist, but does not 76 // maintain the order they should be visited. 77 // 78 // The vector maintains the order nodes should be visited, but may 79 // contain duplicate or removed nodes. When choosing a node to 80 // visit, we pop off the order stack until we find an item that is 81 // also in the contents set. All operations are O(log N). 82 SmallPtrSet<SDNode*, 64> WorkListContents; 83 SmallVector<SDNode*, 64> WorkListOrder; 84 85 // AA - Used for DAG load/store alias analysis. 86 AliasAnalysis &AA; 87 88 /// AddUsersToWorkList - When an instruction is simplified, add all users of 89 /// the instruction to the work lists because they might get more simplified 90 /// now. 91 /// 92 void AddUsersToWorkList(SDNode *N) { 93 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 94 UI != UE; ++UI) 95 AddToWorkList(*UI); 96 } 97 98 /// visit - call the node-specific routine that knows how to fold each 99 /// particular type of node. 100 SDValue visit(SDNode *N); 101 102 public: 103 /// AddToWorkList - Add to the work list making sure its instance is at the 104 /// back (next to be processed.) 105 void AddToWorkList(SDNode *N) { 106 WorkListContents.insert(N); 107 WorkListOrder.push_back(N); 108 } 109 110 /// removeFromWorkList - remove all instances of N from the worklist. 111 /// 112 void removeFromWorkList(SDNode *N) { 113 WorkListContents.erase(N); 114 } 115 116 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 117 bool AddTo = true); 118 119 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 120 return CombineTo(N, &Res, 1, AddTo); 121 } 122 123 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 124 bool AddTo = true) { 125 SDValue To[] = { Res0, Res1 }; 126 return CombineTo(N, To, 2, AddTo); 127 } 128 129 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 130 131 private: 132 133 /// SimplifyDemandedBits - Check the specified integer node value to see if 134 /// it can be simplified or if things it uses can be simplified by bit 135 /// propagation. If so, return true. 136 bool SimplifyDemandedBits(SDValue Op) { 137 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 138 APInt Demanded = APInt::getAllOnesValue(BitWidth); 139 return SimplifyDemandedBits(Op, Demanded); 140 } 141 142 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 143 144 bool CombineToPreIndexedLoadStore(SDNode *N); 145 bool CombineToPostIndexedLoadStore(SDNode *N); 146 147 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 148 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 149 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 150 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 151 SDValue PromoteIntBinOp(SDValue Op); 152 SDValue PromoteIntShiftOp(SDValue Op); 153 SDValue PromoteExtend(SDValue Op); 154 bool PromoteLoad(SDValue Op); 155 156 void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, 157 SDValue Trunc, SDValue ExtLoad, DebugLoc DL, 158 ISD::NodeType ExtType); 159 160 /// combine - call the node-specific routine that knows how to fold each 161 /// particular type of node. If that doesn't do anything, try the 162 /// target-specific DAG combines. 163 SDValue combine(SDNode *N); 164 165 // Visitation implementation - Implement dag node combining for different 166 // node types. The semantics are as follows: 167 // Return Value: 168 // SDValue.getNode() == 0 - No change was made 169 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 170 // otherwise - N should be replaced by the returned Operand. 171 // 172 SDValue visitTokenFactor(SDNode *N); 173 SDValue visitMERGE_VALUES(SDNode *N); 174 SDValue visitADD(SDNode *N); 175 SDValue visitSUB(SDNode *N); 176 SDValue visitADDC(SDNode *N); 177 SDValue visitSUBC(SDNode *N); 178 SDValue visitADDE(SDNode *N); 179 SDValue visitSUBE(SDNode *N); 180 SDValue visitMUL(SDNode *N); 181 SDValue visitSDIV(SDNode *N); 182 SDValue visitUDIV(SDNode *N); 183 SDValue visitSREM(SDNode *N); 184 SDValue visitUREM(SDNode *N); 185 SDValue visitMULHU(SDNode *N); 186 SDValue visitMULHS(SDNode *N); 187 SDValue visitSMUL_LOHI(SDNode *N); 188 SDValue visitUMUL_LOHI(SDNode *N); 189 SDValue visitSMULO(SDNode *N); 190 SDValue visitUMULO(SDNode *N); 191 SDValue visitSDIVREM(SDNode *N); 192 SDValue visitUDIVREM(SDNode *N); 193 SDValue visitAND(SDNode *N); 194 SDValue visitOR(SDNode *N); 195 SDValue visitXOR(SDNode *N); 196 SDValue SimplifyVBinOp(SDNode *N); 197 SDValue SimplifyVUnaryOp(SDNode *N); 198 SDValue visitSHL(SDNode *N); 199 SDValue visitSRA(SDNode *N); 200 SDValue visitSRL(SDNode *N); 201 SDValue visitCTLZ(SDNode *N); 202 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 203 SDValue visitCTTZ(SDNode *N); 204 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 205 SDValue visitCTPOP(SDNode *N); 206 SDValue visitSELECT(SDNode *N); 207 SDValue visitSELECT_CC(SDNode *N); 208 SDValue visitSETCC(SDNode *N); 209 SDValue visitSIGN_EXTEND(SDNode *N); 210 SDValue visitZERO_EXTEND(SDNode *N); 211 SDValue visitANY_EXTEND(SDNode *N); 212 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 213 SDValue visitTRUNCATE(SDNode *N); 214 SDValue visitBITCAST(SDNode *N); 215 SDValue visitBUILD_PAIR(SDNode *N); 216 SDValue visitFADD(SDNode *N); 217 SDValue visitFSUB(SDNode *N); 218 SDValue visitFMUL(SDNode *N); 219 SDValue visitFMA(SDNode *N); 220 SDValue visitFDIV(SDNode *N); 221 SDValue visitFREM(SDNode *N); 222 SDValue visitFCOPYSIGN(SDNode *N); 223 SDValue visitSINT_TO_FP(SDNode *N); 224 SDValue visitUINT_TO_FP(SDNode *N); 225 SDValue visitFP_TO_SINT(SDNode *N); 226 SDValue visitFP_TO_UINT(SDNode *N); 227 SDValue visitFP_ROUND(SDNode *N); 228 SDValue visitFP_ROUND_INREG(SDNode *N); 229 SDValue visitFP_EXTEND(SDNode *N); 230 SDValue visitFNEG(SDNode *N); 231 SDValue visitFABS(SDNode *N); 232 SDValue visitFCEIL(SDNode *N); 233 SDValue visitFTRUNC(SDNode *N); 234 SDValue visitFFLOOR(SDNode *N); 235 SDValue visitBRCOND(SDNode *N); 236 SDValue visitBR_CC(SDNode *N); 237 SDValue visitLOAD(SDNode *N); 238 SDValue visitSTORE(SDNode *N); 239 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 240 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 241 SDValue visitBUILD_VECTOR(SDNode *N); 242 SDValue visitCONCAT_VECTORS(SDNode *N); 243 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 244 SDValue visitVECTOR_SHUFFLE(SDNode *N); 245 SDValue visitMEMBARRIER(SDNode *N); 246 247 SDValue XformToShuffleWithZero(SDNode *N); 248 SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); 249 250 SDValue visitShiftByConstant(SDNode *N, unsigned Amt); 251 252 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 253 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 254 SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); 255 SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, 256 SDValue N3, ISD::CondCode CC, 257 bool NotExtCompare = false); 258 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 259 DebugLoc DL, bool foldBooleans = true); 260 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 261 unsigned HiOp); 262 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 263 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 264 SDValue BuildSDIV(SDNode *N); 265 SDValue BuildUDIV(SDNode *N); 266 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 267 bool DemandHighBits = true); 268 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 269 SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); 270 SDValue ReduceLoadWidth(SDNode *N); 271 SDValue ReduceLoadOpStoreWidth(SDNode *N); 272 SDValue TransformFPLoadStorePair(SDNode *N); 273 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 274 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); 275 276 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 277 278 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 279 /// looking for aliasing nodes and adding them to the Aliases vector. 280 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 281 SmallVector<SDValue, 8> &Aliases); 282 283 /// isAlias - Return true if there is any possibility that the two addresses 284 /// overlap. 285 bool isAlias(SDValue Ptr1, int64_t Size1, 286 const Value *SrcValue1, int SrcValueOffset1, 287 unsigned SrcValueAlign1, 288 const MDNode *TBAAInfo1, 289 SDValue Ptr2, int64_t Size2, 290 const Value *SrcValue2, int SrcValueOffset2, 291 unsigned SrcValueAlign2, 292 const MDNode *TBAAInfo2) const; 293 294 /// FindAliasInfo - Extracts the relevant alias information from the memory 295 /// node. Returns true if the operand was a load. 296 bool FindAliasInfo(SDNode *N, 297 SDValue &Ptr, int64_t &Size, 298 const Value *&SrcValue, int &SrcValueOffset, 299 unsigned &SrcValueAlignment, 300 const MDNode *&TBAAInfo) const; 301 302 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, 303 /// looking for a better chain (aliasing node.) 304 SDValue FindBetterChain(SDNode *N, SDValue Chain); 305 306 /// Merge consecutive store operations into a wide store. 307 /// This optimization uses wide integers or vectors when possible. 308 /// \return True if some memory operations were changed. 309 bool MergeConsecutiveStores(StoreSDNode *N); 310 311 public: 312 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 313 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 314 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} 315 316 /// Run - runs the dag combiner on all nodes in the work list 317 void Run(CombineLevel AtLevel); 318 319 SelectionDAG &getDAG() const { return DAG; } 320 321 /// getShiftAmountTy - Returns a type large enough to hold any valid 322 /// shift amount - before type legalization these can be huge. 323 EVT getShiftAmountTy(EVT LHSTy) { 324 return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); 325 } 326 327 /// isTypeLegal - This method returns true if we are running before type 328 /// legalization or if the specified VT is legal. 329 bool isTypeLegal(const EVT &VT) { 330 if (!LegalTypes) return true; 331 return TLI.isTypeLegal(VT); 332 } 333 }; 334} 335 336 337namespace { 338/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted 339/// nodes from the worklist. 340class WorkListRemover : public SelectionDAG::DAGUpdateListener { 341 DAGCombiner &DC; 342public: 343 explicit WorkListRemover(DAGCombiner &dc) 344 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 345 346 virtual void NodeDeleted(SDNode *N, SDNode *E) { 347 DC.removeFromWorkList(N); 348 } 349}; 350} 351 352//===----------------------------------------------------------------------===// 353// TargetLowering::DAGCombinerInfo implementation 354//===----------------------------------------------------------------------===// 355 356void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 357 ((DAGCombiner*)DC)->AddToWorkList(N); 358} 359 360void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 361 ((DAGCombiner*)DC)->removeFromWorkList(N); 362} 363 364SDValue TargetLowering::DAGCombinerInfo:: 365CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { 366 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 367} 368 369SDValue TargetLowering::DAGCombinerInfo:: 370CombineTo(SDNode *N, SDValue Res, bool AddTo) { 371 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 372} 373 374 375SDValue TargetLowering::DAGCombinerInfo:: 376CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 377 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 378} 379 380void TargetLowering::DAGCombinerInfo:: 381CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 382 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 383} 384 385//===----------------------------------------------------------------------===// 386// Helper Functions 387//===----------------------------------------------------------------------===// 388 389/// isNegatibleForFree - Return 1 if we can compute the negated form of the 390/// specified expression for the same cost as the expression itself, or 2 if we 391/// can compute the negated form more cheaply than the expression itself. 392static char isNegatibleForFree(SDValue Op, bool LegalOperations, 393 const TargetLowering &TLI, 394 const TargetOptions *Options, 395 unsigned Depth = 0) { 396 // fneg is removable even if it has multiple uses. 397 if (Op.getOpcode() == ISD::FNEG) return 2; 398 399 // Don't allow anything with multiple uses. 400 if (!Op.hasOneUse()) return 0; 401 402 // Don't recurse exponentially. 403 if (Depth > 6) return 0; 404 405 switch (Op.getOpcode()) { 406 default: return false; 407 case ISD::ConstantFP: 408 // Don't invert constant FP values after legalize. The negated constant 409 // isn't necessarily legal. 410 return LegalOperations ? 0 : 1; 411 case ISD::FADD: 412 // FIXME: determine better conditions for this xform. 413 if (!Options->UnsafeFPMath) return 0; 414 415 // After operation legalization, it might not be legal to create new FSUBs. 416 if (LegalOperations && 417 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 418 return 0; 419 420 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 421 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 422 Options, Depth + 1)) 423 return V; 424 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 425 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 426 Depth + 1); 427 case ISD::FSUB: 428 // We can't turn -(A-B) into B-A when we honor signed zeros. 429 if (!Options->UnsafeFPMath) return 0; 430 431 // fold (fneg (fsub A, B)) -> (fsub B, A) 432 return 1; 433 434 case ISD::FMUL: 435 case ISD::FDIV: 436 if (Options->HonorSignDependentRoundingFPMath()) return 0; 437 438 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 439 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 440 Options, Depth + 1)) 441 return V; 442 443 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 444 Depth + 1); 445 446 case ISD::FP_EXTEND: 447 case ISD::FP_ROUND: 448 case ISD::FSIN: 449 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 450 Depth + 1); 451 } 452} 453 454/// GetNegatedExpression - If isNegatibleForFree returns true, this function 455/// returns the newly negated expression. 456static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 457 bool LegalOperations, unsigned Depth = 0) { 458 // fneg is removable even if it has multiple uses. 459 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 460 461 // Don't allow anything with multiple uses. 462 assert(Op.hasOneUse() && "Unknown reuse!"); 463 464 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 465 switch (Op.getOpcode()) { 466 default: llvm_unreachable("Unknown code"); 467 case ISD::ConstantFP: { 468 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 469 V.changeSign(); 470 return DAG.getConstantFP(V, Op.getValueType()); 471 } 472 case ISD::FADD: 473 // FIXME: determine better conditions for this xform. 474 assert(DAG.getTarget().Options.UnsafeFPMath); 475 476 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 477 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 478 DAG.getTargetLoweringInfo(), 479 &DAG.getTarget().Options, Depth+1)) 480 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 481 GetNegatedExpression(Op.getOperand(0), DAG, 482 LegalOperations, Depth+1), 483 Op.getOperand(1)); 484 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 485 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 486 GetNegatedExpression(Op.getOperand(1), DAG, 487 LegalOperations, Depth+1), 488 Op.getOperand(0)); 489 case ISD::FSUB: 490 // We can't turn -(A-B) into B-A when we honor signed zeros. 491 assert(DAG.getTarget().Options.UnsafeFPMath); 492 493 // fold (fneg (fsub 0, B)) -> B 494 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 495 if (N0CFP->getValueAPF().isZero()) 496 return Op.getOperand(1); 497 498 // fold (fneg (fsub A, B)) -> (fsub B, A) 499 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 500 Op.getOperand(1), Op.getOperand(0)); 501 502 case ISD::FMUL: 503 case ISD::FDIV: 504 assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); 505 506 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 507 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 508 DAG.getTargetLoweringInfo(), 509 &DAG.getTarget().Options, Depth+1)) 510 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 511 GetNegatedExpression(Op.getOperand(0), DAG, 512 LegalOperations, Depth+1), 513 Op.getOperand(1)); 514 515 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 516 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 517 Op.getOperand(0), 518 GetNegatedExpression(Op.getOperand(1), DAG, 519 LegalOperations, Depth+1)); 520 521 case ISD::FP_EXTEND: 522 case ISD::FSIN: 523 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 524 GetNegatedExpression(Op.getOperand(0), DAG, 525 LegalOperations, Depth+1)); 526 case ISD::FP_ROUND: 527 return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), 528 GetNegatedExpression(Op.getOperand(0), DAG, 529 LegalOperations, Depth+1), 530 Op.getOperand(1)); 531 } 532} 533 534 535// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc 536// that selects between the values 1 and 0, making it equivalent to a setcc. 537// Also, set the incoming LHS, RHS, and CC references to the appropriate 538// nodes based on the type of node we are checking. This simplifies life a 539// bit for the callers. 540static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 541 SDValue &CC) { 542 if (N.getOpcode() == ISD::SETCC) { 543 LHS = N.getOperand(0); 544 RHS = N.getOperand(1); 545 CC = N.getOperand(2); 546 return true; 547 } 548 if (N.getOpcode() == ISD::SELECT_CC && 549 N.getOperand(2).getOpcode() == ISD::Constant && 550 N.getOperand(3).getOpcode() == ISD::Constant && 551 cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && 552 cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { 553 LHS = N.getOperand(0); 554 RHS = N.getOperand(1); 555 CC = N.getOperand(4); 556 return true; 557 } 558 return false; 559} 560 561// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only 562// one use. If this is true, it allows the users to invert the operation for 563// free when it is profitable to do so. 564static bool isOneUseSetCC(SDValue N) { 565 SDValue N0, N1, N2; 566 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 567 return true; 568 return false; 569} 570 571SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, 572 SDValue N0, SDValue N1) { 573 EVT VT = N0.getValueType(); 574 if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { 575 if (isa<ConstantSDNode>(N1)) { 576 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 577 SDValue OpNode = 578 DAG.FoldConstantArithmetic(Opc, VT, 579 cast<ConstantSDNode>(N0.getOperand(1)), 580 cast<ConstantSDNode>(N1)); 581 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 582 } 583 if (N0.hasOneUse()) { 584 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use 585 SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, 586 N0.getOperand(0), N1); 587 AddToWorkList(OpNode.getNode()); 588 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 589 } 590 } 591 592 if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { 593 if (isa<ConstantSDNode>(N0)) { 594 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 595 SDValue OpNode = 596 DAG.FoldConstantArithmetic(Opc, VT, 597 cast<ConstantSDNode>(N1.getOperand(1)), 598 cast<ConstantSDNode>(N0)); 599 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 600 } 601 if (N1.hasOneUse()) { 602 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use 603 SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, 604 N1.getOperand(0), N0); 605 AddToWorkList(OpNode.getNode()); 606 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 607 } 608 } 609 610 return SDValue(); 611} 612 613SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 614 bool AddTo) { 615 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 616 ++NodesCombined; 617 DEBUG(dbgs() << "\nReplacing.1 "; 618 N->dump(&DAG); 619 dbgs() << "\nWith: "; 620 To[0].getNode()->dump(&DAG); 621 dbgs() << " and " << NumTo-1 << " other values\n"; 622 for (unsigned i = 0, e = NumTo; i != e; ++i) 623 assert((!To[i].getNode() || 624 N->getValueType(i) == To[i].getValueType()) && 625 "Cannot combine value to value of different type!")); 626 WorkListRemover DeadNodes(*this); 627 DAG.ReplaceAllUsesWith(N, To); 628 if (AddTo) { 629 // Push the new nodes and any users onto the worklist 630 for (unsigned i = 0, e = NumTo; i != e; ++i) { 631 if (To[i].getNode()) { 632 AddToWorkList(To[i].getNode()); 633 AddUsersToWorkList(To[i].getNode()); 634 } 635 } 636 } 637 638 // Finally, if the node is now dead, remove it from the graph. The node 639 // may not be dead if the replacement process recursively simplified to 640 // something else needing this node. 641 if (N->use_empty()) { 642 // Nodes can be reintroduced into the worklist. Make sure we do not 643 // process a node that has been replaced. 644 removeFromWorkList(N); 645 646 // Finally, since the node is now dead, remove it from the graph. 647 DAG.DeleteNode(N); 648 } 649 return SDValue(N, 0); 650} 651 652void DAGCombiner:: 653CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 654 // Replace all uses. If any nodes become isomorphic to other nodes and 655 // are deleted, make sure to remove them from our worklist. 656 WorkListRemover DeadNodes(*this); 657 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 658 659 // Push the new node and any (possibly new) users onto the worklist. 660 AddToWorkList(TLO.New.getNode()); 661 AddUsersToWorkList(TLO.New.getNode()); 662 663 // Finally, if the node is now dead, remove it from the graph. The node 664 // may not be dead if the replacement process recursively simplified to 665 // something else needing this node. 666 if (TLO.Old.getNode()->use_empty()) { 667 removeFromWorkList(TLO.Old.getNode()); 668 669 // If the operands of this node are only used by the node, they will now 670 // be dead. Make sure to visit them first to delete dead nodes early. 671 for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) 672 if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) 673 AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); 674 675 DAG.DeleteNode(TLO.Old.getNode()); 676 } 677} 678 679/// SimplifyDemandedBits - Check the specified integer node value to see if 680/// it can be simplified or if things it uses can be simplified by bit 681/// propagation. If so, return true. 682bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 683 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 684 APInt KnownZero, KnownOne; 685 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 686 return false; 687 688 // Revisit the node. 689 AddToWorkList(Op.getNode()); 690 691 // Replace the old value with the new one. 692 ++NodesCombined; 693 DEBUG(dbgs() << "\nReplacing.2 "; 694 TLO.Old.getNode()->dump(&DAG); 695 dbgs() << "\nWith: "; 696 TLO.New.getNode()->dump(&DAG); 697 dbgs() << '\n'); 698 699 CommitTargetLoweringOpt(TLO); 700 return true; 701} 702 703void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 704 DebugLoc dl = Load->getDebugLoc(); 705 EVT VT = Load->getValueType(0); 706 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 707 708 DEBUG(dbgs() << "\nReplacing.9 "; 709 Load->dump(&DAG); 710 dbgs() << "\nWith: "; 711 Trunc.getNode()->dump(&DAG); 712 dbgs() << '\n'); 713 WorkListRemover DeadNodes(*this); 714 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 715 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 716 removeFromWorkList(Load); 717 DAG.DeleteNode(Load); 718 AddToWorkList(Trunc.getNode()); 719} 720 721SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 722 Replace = false; 723 DebugLoc dl = Op.getDebugLoc(); 724 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 725 EVT MemVT = LD->getMemoryVT(); 726 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 727 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 728 : ISD::EXTLOAD) 729 : LD->getExtensionType(); 730 Replace = true; 731 return DAG.getExtLoad(ExtType, dl, PVT, 732 LD->getChain(), LD->getBasePtr(), 733 LD->getPointerInfo(), 734 MemVT, LD->isVolatile(), 735 LD->isNonTemporal(), LD->getAlignment()); 736 } 737 738 unsigned Opc = Op.getOpcode(); 739 switch (Opc) { 740 default: break; 741 case ISD::AssertSext: 742 return DAG.getNode(ISD::AssertSext, dl, PVT, 743 SExtPromoteOperand(Op.getOperand(0), PVT), 744 Op.getOperand(1)); 745 case ISD::AssertZext: 746 return DAG.getNode(ISD::AssertZext, dl, PVT, 747 ZExtPromoteOperand(Op.getOperand(0), PVT), 748 Op.getOperand(1)); 749 case ISD::Constant: { 750 unsigned ExtOpc = 751 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 752 return DAG.getNode(ExtOpc, dl, PVT, Op); 753 } 754 } 755 756 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 757 return SDValue(); 758 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 759} 760 761SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 762 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 763 return SDValue(); 764 EVT OldVT = Op.getValueType(); 765 DebugLoc dl = Op.getDebugLoc(); 766 bool Replace = false; 767 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 768 if (NewOp.getNode() == 0) 769 return SDValue(); 770 AddToWorkList(NewOp.getNode()); 771 772 if (Replace) 773 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 774 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 775 DAG.getValueType(OldVT)); 776} 777 778SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 779 EVT OldVT = Op.getValueType(); 780 DebugLoc dl = Op.getDebugLoc(); 781 bool Replace = false; 782 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 783 if (NewOp.getNode() == 0) 784 return SDValue(); 785 AddToWorkList(NewOp.getNode()); 786 787 if (Replace) 788 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 789 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 790} 791 792/// PromoteIntBinOp - Promote the specified integer binary operation if the 793/// target indicates it is beneficial. e.g. On x86, it's usually better to 794/// promote i16 operations to i32 since i16 instructions are longer. 795SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 796 if (!LegalOperations) 797 return SDValue(); 798 799 EVT VT = Op.getValueType(); 800 if (VT.isVector() || !VT.isInteger()) 801 return SDValue(); 802 803 // If operation type is 'undesirable', e.g. i16 on x86, consider 804 // promoting it. 805 unsigned Opc = Op.getOpcode(); 806 if (TLI.isTypeDesirableForOp(Opc, VT)) 807 return SDValue(); 808 809 EVT PVT = VT; 810 // Consult target whether it is a good idea to promote this operation and 811 // what's the right type to promote it to. 812 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 813 assert(PVT != VT && "Don't know what type to promote to!"); 814 815 bool Replace0 = false; 816 SDValue N0 = Op.getOperand(0); 817 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 818 if (NN0.getNode() == 0) 819 return SDValue(); 820 821 bool Replace1 = false; 822 SDValue N1 = Op.getOperand(1); 823 SDValue NN1; 824 if (N0 == N1) 825 NN1 = NN0; 826 else { 827 NN1 = PromoteOperand(N1, PVT, Replace1); 828 if (NN1.getNode() == 0) 829 return SDValue(); 830 } 831 832 AddToWorkList(NN0.getNode()); 833 if (NN1.getNode()) 834 AddToWorkList(NN1.getNode()); 835 836 if (Replace0) 837 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 838 if (Replace1) 839 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 840 841 DEBUG(dbgs() << "\nPromoting "; 842 Op.getNode()->dump(&DAG)); 843 DebugLoc dl = Op.getDebugLoc(); 844 return DAG.getNode(ISD::TRUNCATE, dl, VT, 845 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 846 } 847 return SDValue(); 848} 849 850/// PromoteIntShiftOp - Promote the specified integer shift operation if the 851/// target indicates it is beneficial. e.g. On x86, it's usually better to 852/// promote i16 operations to i32 since i16 instructions are longer. 853SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 854 if (!LegalOperations) 855 return SDValue(); 856 857 EVT VT = Op.getValueType(); 858 if (VT.isVector() || !VT.isInteger()) 859 return SDValue(); 860 861 // If operation type is 'undesirable', e.g. i16 on x86, consider 862 // promoting it. 863 unsigned Opc = Op.getOpcode(); 864 if (TLI.isTypeDesirableForOp(Opc, VT)) 865 return SDValue(); 866 867 EVT PVT = VT; 868 // Consult target whether it is a good idea to promote this operation and 869 // what's the right type to promote it to. 870 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 871 assert(PVT != VT && "Don't know what type to promote to!"); 872 873 bool Replace = false; 874 SDValue N0 = Op.getOperand(0); 875 if (Opc == ISD::SRA) 876 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 877 else if (Opc == ISD::SRL) 878 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 879 else 880 N0 = PromoteOperand(N0, PVT, Replace); 881 if (N0.getNode() == 0) 882 return SDValue(); 883 884 AddToWorkList(N0.getNode()); 885 if (Replace) 886 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 887 888 DEBUG(dbgs() << "\nPromoting "; 889 Op.getNode()->dump(&DAG)); 890 DebugLoc dl = Op.getDebugLoc(); 891 return DAG.getNode(ISD::TRUNCATE, dl, VT, 892 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 893 } 894 return SDValue(); 895} 896 897SDValue DAGCombiner::PromoteExtend(SDValue Op) { 898 if (!LegalOperations) 899 return SDValue(); 900 901 EVT VT = Op.getValueType(); 902 if (VT.isVector() || !VT.isInteger()) 903 return SDValue(); 904 905 // If operation type is 'undesirable', e.g. i16 on x86, consider 906 // promoting it. 907 unsigned Opc = Op.getOpcode(); 908 if (TLI.isTypeDesirableForOp(Opc, VT)) 909 return SDValue(); 910 911 EVT PVT = VT; 912 // Consult target whether it is a good idea to promote this operation and 913 // what's the right type to promote it to. 914 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 915 assert(PVT != VT && "Don't know what type to promote to!"); 916 // fold (aext (aext x)) -> (aext x) 917 // fold (aext (zext x)) -> (zext x) 918 // fold (aext (sext x)) -> (sext x) 919 DEBUG(dbgs() << "\nPromoting "; 920 Op.getNode()->dump(&DAG)); 921 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); 922 } 923 return SDValue(); 924} 925 926bool DAGCombiner::PromoteLoad(SDValue Op) { 927 if (!LegalOperations) 928 return false; 929 930 EVT VT = Op.getValueType(); 931 if (VT.isVector() || !VT.isInteger()) 932 return false; 933 934 // If operation type is 'undesirable', e.g. i16 on x86, consider 935 // promoting it. 936 unsigned Opc = Op.getOpcode(); 937 if (TLI.isTypeDesirableForOp(Opc, VT)) 938 return false; 939 940 EVT PVT = VT; 941 // Consult target whether it is a good idea to promote this operation and 942 // what's the right type to promote it to. 943 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 944 assert(PVT != VT && "Don't know what type to promote to!"); 945 946 DebugLoc dl = Op.getDebugLoc(); 947 SDNode *N = Op.getNode(); 948 LoadSDNode *LD = cast<LoadSDNode>(N); 949 EVT MemVT = LD->getMemoryVT(); 950 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 951 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 952 : ISD::EXTLOAD) 953 : LD->getExtensionType(); 954 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 955 LD->getChain(), LD->getBasePtr(), 956 LD->getPointerInfo(), 957 MemVT, LD->isVolatile(), 958 LD->isNonTemporal(), LD->getAlignment()); 959 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 960 961 DEBUG(dbgs() << "\nPromoting "; 962 N->dump(&DAG); 963 dbgs() << "\nTo: "; 964 Result.getNode()->dump(&DAG); 965 dbgs() << '\n'); 966 WorkListRemover DeadNodes(*this); 967 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 968 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 969 removeFromWorkList(N); 970 DAG.DeleteNode(N); 971 AddToWorkList(Result.getNode()); 972 return true; 973 } 974 return false; 975} 976 977 978//===----------------------------------------------------------------------===// 979// Main DAG Combiner implementation 980//===----------------------------------------------------------------------===// 981 982void DAGCombiner::Run(CombineLevel AtLevel) { 983 // set the instance variables, so that the various visit routines may use it. 984 Level = AtLevel; 985 LegalOperations = Level >= AfterLegalizeVectorOps; 986 LegalTypes = Level >= AfterLegalizeTypes; 987 988 // Add all the dag nodes to the worklist. 989 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 990 E = DAG.allnodes_end(); I != E; ++I) 991 AddToWorkList(I); 992 993 // Create a dummy node (which is not added to allnodes), that adds a reference 994 // to the root node, preventing it from being deleted, and tracking any 995 // changes of the root. 996 HandleSDNode Dummy(DAG.getRoot()); 997 998 // The root of the dag may dangle to deleted nodes until the dag combiner is 999 // done. Set it to null to avoid confusion. 1000 DAG.setRoot(SDValue()); 1001 1002 // while the worklist isn't empty, find a node and 1003 // try and combine it. 1004 while (!WorkListContents.empty()) { 1005 SDNode *N; 1006 // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. 1007 // In order to avoid a linear scan, we use a set (O(log N)) to hold what the 1008 // worklist *should* contain, and check the node we want to visit is should 1009 // actually be visited. 1010 do { 1011 N = WorkListOrder.pop_back_val(); 1012 } while (!WorkListContents.erase(N)); 1013 1014 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1015 // N is deleted from the DAG, since they too may now be dead or may have a 1016 // reduced number of uses, allowing other xforms. 1017 if (N->use_empty() && N != &Dummy) { 1018 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1019 AddToWorkList(N->getOperand(i).getNode()); 1020 1021 DAG.DeleteNode(N); 1022 continue; 1023 } 1024 1025 SDValue RV = combine(N); 1026 1027 if (RV.getNode() == 0) 1028 continue; 1029 1030 ++NodesCombined; 1031 1032 // If we get back the same node we passed in, rather than a new node or 1033 // zero, we know that the node must have defined multiple values and 1034 // CombineTo was used. Since CombineTo takes care of the worklist 1035 // mechanics for us, we have no work to do in this case. 1036 if (RV.getNode() == N) 1037 continue; 1038 1039 assert(N->getOpcode() != ISD::DELETED_NODE && 1040 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1041 "Node was deleted but visit returned new node!"); 1042 1043 DEBUG(dbgs() << "\nReplacing.3 "; 1044 N->dump(&DAG); 1045 dbgs() << "\nWith: "; 1046 RV.getNode()->dump(&DAG); 1047 dbgs() << '\n'); 1048 1049 // Transfer debug value. 1050 DAG.TransferDbgValues(SDValue(N, 0), RV); 1051 WorkListRemover DeadNodes(*this); 1052 if (N->getNumValues() == RV.getNode()->getNumValues()) 1053 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1054 else { 1055 assert(N->getValueType(0) == RV.getValueType() && 1056 N->getNumValues() == 1 && "Type mismatch"); 1057 SDValue OpV = RV; 1058 DAG.ReplaceAllUsesWith(N, &OpV); 1059 } 1060 1061 // Push the new node and any users onto the worklist 1062 AddToWorkList(RV.getNode()); 1063 AddUsersToWorkList(RV.getNode()); 1064 1065 // Add any uses of the old node to the worklist in case this node is the 1066 // last one that uses them. They may become dead after this node is 1067 // deleted. 1068 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1069 AddToWorkList(N->getOperand(i).getNode()); 1070 1071 // Finally, if the node is now dead, remove it from the graph. The node 1072 // may not be dead if the replacement process recursively simplified to 1073 // something else needing this node. 1074 if (N->use_empty()) { 1075 // Nodes can be reintroduced into the worklist. Make sure we do not 1076 // process a node that has been replaced. 1077 removeFromWorkList(N); 1078 1079 // Finally, since the node is now dead, remove it from the graph. 1080 DAG.DeleteNode(N); 1081 } 1082 } 1083 1084 // If the root changed (e.g. it was a dead load, update the root). 1085 DAG.setRoot(Dummy.getValue()); 1086 DAG.RemoveDeadNodes(); 1087} 1088 1089SDValue DAGCombiner::visit(SDNode *N) { 1090 switch (N->getOpcode()) { 1091 default: break; 1092 case ISD::TokenFactor: return visitTokenFactor(N); 1093 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1094 case ISD::ADD: return visitADD(N); 1095 case ISD::SUB: return visitSUB(N); 1096 case ISD::ADDC: return visitADDC(N); 1097 case ISD::SUBC: return visitSUBC(N); 1098 case ISD::ADDE: return visitADDE(N); 1099 case ISD::SUBE: return visitSUBE(N); 1100 case ISD::MUL: return visitMUL(N); 1101 case ISD::SDIV: return visitSDIV(N); 1102 case ISD::UDIV: return visitUDIV(N); 1103 case ISD::SREM: return visitSREM(N); 1104 case ISD::UREM: return visitUREM(N); 1105 case ISD::MULHU: return visitMULHU(N); 1106 case ISD::MULHS: return visitMULHS(N); 1107 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1108 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1109 case ISD::SMULO: return visitSMULO(N); 1110 case ISD::UMULO: return visitUMULO(N); 1111 case ISD::SDIVREM: return visitSDIVREM(N); 1112 case ISD::UDIVREM: return visitUDIVREM(N); 1113 case ISD::AND: return visitAND(N); 1114 case ISD::OR: return visitOR(N); 1115 case ISD::XOR: return visitXOR(N); 1116 case ISD::SHL: return visitSHL(N); 1117 case ISD::SRA: return visitSRA(N); 1118 case ISD::SRL: return visitSRL(N); 1119 case ISD::CTLZ: return visitCTLZ(N); 1120 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1121 case ISD::CTTZ: return visitCTTZ(N); 1122 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1123 case ISD::CTPOP: return visitCTPOP(N); 1124 case ISD::SELECT: return visitSELECT(N); 1125 case ISD::SELECT_CC: return visitSELECT_CC(N); 1126 case ISD::SETCC: return visitSETCC(N); 1127 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1128 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1129 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1130 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1131 case ISD::TRUNCATE: return visitTRUNCATE(N); 1132 case ISD::BITCAST: return visitBITCAST(N); 1133 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1134 case ISD::FADD: return visitFADD(N); 1135 case ISD::FSUB: return visitFSUB(N); 1136 case ISD::FMUL: return visitFMUL(N); 1137 case ISD::FMA: return visitFMA(N); 1138 case ISD::FDIV: return visitFDIV(N); 1139 case ISD::FREM: return visitFREM(N); 1140 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1141 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1142 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1143 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1144 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1145 case ISD::FP_ROUND: return visitFP_ROUND(N); 1146 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1147 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1148 case ISD::FNEG: return visitFNEG(N); 1149 case ISD::FABS: return visitFABS(N); 1150 case ISD::FFLOOR: return visitFFLOOR(N); 1151 case ISD::FCEIL: return visitFCEIL(N); 1152 case ISD::FTRUNC: return visitFTRUNC(N); 1153 case ISD::BRCOND: return visitBRCOND(N); 1154 case ISD::BR_CC: return visitBR_CC(N); 1155 case ISD::LOAD: return visitLOAD(N); 1156 case ISD::STORE: return visitSTORE(N); 1157 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1158 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1159 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1160 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1161 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1162 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1163 case ISD::MEMBARRIER: return visitMEMBARRIER(N); 1164 } 1165 return SDValue(); 1166} 1167 1168SDValue DAGCombiner::combine(SDNode *N) { 1169 SDValue RV = visit(N); 1170 1171 // If nothing happened, try a target-specific DAG combine. 1172 if (RV.getNode() == 0) { 1173 assert(N->getOpcode() != ISD::DELETED_NODE && 1174 "Node was deleted but visit returned NULL!"); 1175 1176 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1177 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1178 1179 // Expose the DAG combiner to the target combiner impls. 1180 TargetLowering::DAGCombinerInfo 1181 DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); 1182 1183 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1184 } 1185 } 1186 1187 // If nothing happened still, try promoting the operation. 1188 if (RV.getNode() == 0) { 1189 switch (N->getOpcode()) { 1190 default: break; 1191 case ISD::ADD: 1192 case ISD::SUB: 1193 case ISD::MUL: 1194 case ISD::AND: 1195 case ISD::OR: 1196 case ISD::XOR: 1197 RV = PromoteIntBinOp(SDValue(N, 0)); 1198 break; 1199 case ISD::SHL: 1200 case ISD::SRA: 1201 case ISD::SRL: 1202 RV = PromoteIntShiftOp(SDValue(N, 0)); 1203 break; 1204 case ISD::SIGN_EXTEND: 1205 case ISD::ZERO_EXTEND: 1206 case ISD::ANY_EXTEND: 1207 RV = PromoteExtend(SDValue(N, 0)); 1208 break; 1209 case ISD::LOAD: 1210 if (PromoteLoad(SDValue(N, 0))) 1211 RV = SDValue(N, 0); 1212 break; 1213 } 1214 } 1215 1216 // If N is a commutative binary node, try commuting it to enable more 1217 // sdisel CSE. 1218 if (RV.getNode() == 0 && 1219 SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1220 N->getNumValues() == 1) { 1221 SDValue N0 = N->getOperand(0); 1222 SDValue N1 = N->getOperand(1); 1223 1224 // Constant operands are canonicalized to RHS. 1225 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1226 SDValue Ops[] = { N1, N0 }; 1227 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), 1228 Ops, 2); 1229 if (CSENode) 1230 return SDValue(CSENode, 0); 1231 } 1232 } 1233 1234 return RV; 1235} 1236 1237/// getInputChainForNode - Given a node, return its input chain if it has one, 1238/// otherwise return a null sd operand. 1239static SDValue getInputChainForNode(SDNode *N) { 1240 if (unsigned NumOps = N->getNumOperands()) { 1241 if (N->getOperand(0).getValueType() == MVT::Other) 1242 return N->getOperand(0); 1243 else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1244 return N->getOperand(NumOps-1); 1245 for (unsigned i = 1; i < NumOps-1; ++i) 1246 if (N->getOperand(i).getValueType() == MVT::Other) 1247 return N->getOperand(i); 1248 } 1249 return SDValue(); 1250} 1251 1252SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1253 // If N has two operands, where one has an input chain equal to the other, 1254 // the 'other' chain is redundant. 1255 if (N->getNumOperands() == 2) { 1256 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1257 return N->getOperand(0); 1258 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1259 return N->getOperand(1); 1260 } 1261 1262 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1263 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1264 SmallPtrSet<SDNode*, 16> SeenOps; 1265 bool Changed = false; // If we should replace this token factor. 1266 1267 // Start out with this token factor. 1268 TFs.push_back(N); 1269 1270 // Iterate through token factors. The TFs grows when new token factors are 1271 // encountered. 1272 for (unsigned i = 0; i < TFs.size(); ++i) { 1273 SDNode *TF = TFs[i]; 1274 1275 // Check each of the operands. 1276 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1277 SDValue Op = TF->getOperand(i); 1278 1279 switch (Op.getOpcode()) { 1280 case ISD::EntryToken: 1281 // Entry tokens don't need to be added to the list. They are 1282 // rededundant. 1283 Changed = true; 1284 break; 1285 1286 case ISD::TokenFactor: 1287 if (Op.hasOneUse() && 1288 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1289 // Queue up for processing. 1290 TFs.push_back(Op.getNode()); 1291 // Clean up in case the token factor is removed. 1292 AddToWorkList(Op.getNode()); 1293 Changed = true; 1294 break; 1295 } 1296 // Fall thru 1297 1298 default: 1299 // Only add if it isn't already in the list. 1300 if (SeenOps.insert(Op.getNode())) 1301 Ops.push_back(Op); 1302 else 1303 Changed = true; 1304 break; 1305 } 1306 } 1307 } 1308 1309 SDValue Result; 1310 1311 // If we've change things around then replace token factor. 1312 if (Changed) { 1313 if (Ops.empty()) { 1314 // The entry token is the only possible outcome. 1315 Result = DAG.getEntryNode(); 1316 } else { 1317 // New and improved token factor. 1318 Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 1319 MVT::Other, &Ops[0], Ops.size()); 1320 } 1321 1322 // Don't add users to work list. 1323 return CombineTo(N, Result, false); 1324 } 1325 1326 return Result; 1327} 1328 1329/// MERGE_VALUES can always be eliminated. 1330SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1331 WorkListRemover DeadNodes(*this); 1332 // Replacing results may cause a different MERGE_VALUES to suddenly 1333 // be CSE'd with N, and carry its uses with it. Iterate until no 1334 // uses remain, to ensure that the node can be safely deleted. 1335 // First add the users of this node to the work list so that they 1336 // can be tried again once they have new operands. 1337 AddUsersToWorkList(N); 1338 do { 1339 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1340 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1341 } while (!N->use_empty()); 1342 removeFromWorkList(N); 1343 DAG.DeleteNode(N); 1344 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1345} 1346 1347static 1348SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, 1349 SelectionDAG &DAG) { 1350 EVT VT = N0.getValueType(); 1351 SDValue N00 = N0.getOperand(0); 1352 SDValue N01 = N0.getOperand(1); 1353 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); 1354 1355 if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && 1356 isa<ConstantSDNode>(N00.getOperand(1))) { 1357 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1358 N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, 1359 DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, 1360 N00.getOperand(0), N01), 1361 DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, 1362 N00.getOperand(1), N01)); 1363 return DAG.getNode(ISD::ADD, DL, VT, N0, N1); 1364 } 1365 1366 return SDValue(); 1367} 1368 1369SDValue DAGCombiner::visitADD(SDNode *N) { 1370 SDValue N0 = N->getOperand(0); 1371 SDValue N1 = N->getOperand(1); 1372 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1373 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1374 EVT VT = N0.getValueType(); 1375 1376 // fold vector ops 1377 if (VT.isVector()) { 1378 SDValue FoldedVOp = SimplifyVBinOp(N); 1379 if (FoldedVOp.getNode()) return FoldedVOp; 1380 } 1381 1382 // fold (add x, undef) -> undef 1383 if (N0.getOpcode() == ISD::UNDEF) 1384 return N0; 1385 if (N1.getOpcode() == ISD::UNDEF) 1386 return N1; 1387 // fold (add c1, c2) -> c1+c2 1388 if (N0C && N1C) 1389 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1390 // canonicalize constant to RHS 1391 if (N0C && !N1C) 1392 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); 1393 // fold (add x, 0) -> x 1394 if (N1C && N1C->isNullValue()) 1395 return N0; 1396 // fold (add Sym, c) -> Sym+c 1397 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1398 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1399 GA->getOpcode() == ISD::GlobalAddress) 1400 return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, 1401 GA->getOffset() + 1402 (uint64_t)N1C->getSExtValue()); 1403 // fold ((c1-A)+c2) -> (c1+c2)-A 1404 if (N1C && N0.getOpcode() == ISD::SUB) 1405 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1406 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1407 DAG.getConstant(N1C->getAPIntValue()+ 1408 N0C->getAPIntValue(), VT), 1409 N0.getOperand(1)); 1410 // reassociate add 1411 SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); 1412 if (RADD.getNode() != 0) 1413 return RADD; 1414 // fold ((0-A) + B) -> B-A 1415 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1416 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1417 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); 1418 // fold (A + (0-B)) -> A-B 1419 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1420 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1421 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); 1422 // fold (A+(B-A)) -> B 1423 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1424 return N1.getOperand(0); 1425 // fold ((B-A)+A) -> B 1426 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1427 return N0.getOperand(0); 1428 // fold (A+(B-(A+C))) to (B-C) 1429 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1430 N0 == N1.getOperand(1).getOperand(0)) 1431 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), 1432 N1.getOperand(1).getOperand(1)); 1433 // fold (A+(B-(C+A))) to (B-C) 1434 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1435 N0 == N1.getOperand(1).getOperand(1)) 1436 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), 1437 N1.getOperand(1).getOperand(0)); 1438 // fold (A+((B-A)+or-C)) to (B+or-C) 1439 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1440 N1.getOperand(0).getOpcode() == ISD::SUB && 1441 N0 == N1.getOperand(0).getOperand(1)) 1442 return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, 1443 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1444 1445 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1446 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1447 SDValue N00 = N0.getOperand(0); 1448 SDValue N01 = N0.getOperand(1); 1449 SDValue N10 = N1.getOperand(0); 1450 SDValue N11 = N1.getOperand(1); 1451 1452 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1453 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1454 DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), 1455 DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); 1456 } 1457 1458 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1459 return SDValue(N, 0); 1460 1461 // fold (a+b) -> (a|b) iff a and b share no bits. 1462 if (VT.isInteger() && !VT.isVector()) { 1463 APInt LHSZero, LHSOne; 1464 APInt RHSZero, RHSOne; 1465 DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); 1466 1467 if (LHSZero.getBoolValue()) { 1468 DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); 1469 1470 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1471 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1472 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1473 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); 1474 } 1475 } 1476 1477 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1478 if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { 1479 SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); 1480 if (Result.getNode()) return Result; 1481 } 1482 if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { 1483 SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); 1484 if (Result.getNode()) return Result; 1485 } 1486 1487 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1488 if (N1.getOpcode() == ISD::SHL && 1489 N1.getOperand(0).getOpcode() == ISD::SUB) 1490 if (ConstantSDNode *C = 1491 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1492 if (C->getAPIntValue() == 0) 1493 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, 1494 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1495 N1.getOperand(0).getOperand(1), 1496 N1.getOperand(1))); 1497 if (N0.getOpcode() == ISD::SHL && 1498 N0.getOperand(0).getOpcode() == ISD::SUB) 1499 if (ConstantSDNode *C = 1500 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1501 if (C->getAPIntValue() == 0) 1502 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, 1503 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1504 N0.getOperand(0).getOperand(1), 1505 N0.getOperand(1))); 1506 1507 if (N1.getOpcode() == ISD::AND) { 1508 SDValue AndOp0 = N1.getOperand(0); 1509 ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 1510 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1511 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1512 1513 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1514 // and similar xforms where the inner op is either ~0 or 0. 1515 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 1516 DebugLoc DL = N->getDebugLoc(); 1517 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1518 } 1519 } 1520 1521 // add (sext i1), X -> sub X, (zext i1) 1522 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1523 N0.getOperand(0).getValueType() == MVT::i1 && 1524 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1525 DebugLoc DL = N->getDebugLoc(); 1526 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1527 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1528 } 1529 1530 return SDValue(); 1531} 1532 1533SDValue DAGCombiner::visitADDC(SDNode *N) { 1534 SDValue N0 = N->getOperand(0); 1535 SDValue N1 = N->getOperand(1); 1536 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1537 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1538 EVT VT = N0.getValueType(); 1539 1540 // If the flag result is dead, turn this into an ADD. 1541 if (!N->hasAnyUseOfValue(1)) 1542 return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), 1543 DAG.getNode(ISD::CARRY_FALSE, 1544 N->getDebugLoc(), MVT::Glue)); 1545 1546 // canonicalize constant to RHS. 1547 if (N0C && !N1C) 1548 return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); 1549 1550 // fold (addc x, 0) -> x + no carry out 1551 if (N1C && N1C->isNullValue()) 1552 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1553 N->getDebugLoc(), MVT::Glue)); 1554 1555 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1556 APInt LHSZero, LHSOne; 1557 APInt RHSZero, RHSOne; 1558 DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); 1559 1560 if (LHSZero.getBoolValue()) { 1561 DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); 1562 1563 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1564 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1565 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1566 return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), 1567 DAG.getNode(ISD::CARRY_FALSE, 1568 N->getDebugLoc(), MVT::Glue)); 1569 } 1570 1571 return SDValue(); 1572} 1573 1574SDValue DAGCombiner::visitADDE(SDNode *N) { 1575 SDValue N0 = N->getOperand(0); 1576 SDValue N1 = N->getOperand(1); 1577 SDValue CarryIn = N->getOperand(2); 1578 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1579 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1580 1581 // canonicalize constant to RHS 1582 if (N0C && !N1C) 1583 return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), 1584 N1, N0, CarryIn); 1585 1586 // fold (adde x, y, false) -> (addc x, y) 1587 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1588 return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); 1589 1590 return SDValue(); 1591} 1592 1593// Since it may not be valid to emit a fold to zero for vector initializers 1594// check if we can before folding. 1595static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, 1596 SelectionDAG &DAG, bool LegalOperations) { 1597 if (!VT.isVector()) { 1598 return DAG.getConstant(0, VT); 1599 } 1600 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { 1601 // Produce a vector of zeros. 1602 SDValue El = DAG.getConstant(0, VT.getVectorElementType()); 1603 std::vector<SDValue> Ops(VT.getVectorNumElements(), El); 1604 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, 1605 &Ops[0], Ops.size()); 1606 } 1607 return SDValue(); 1608} 1609 1610SDValue DAGCombiner::visitSUB(SDNode *N) { 1611 SDValue N0 = N->getOperand(0); 1612 SDValue N1 = N->getOperand(1); 1613 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1614 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1615 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : 1616 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1617 EVT VT = N0.getValueType(); 1618 1619 // fold vector ops 1620 if (VT.isVector()) { 1621 SDValue FoldedVOp = SimplifyVBinOp(N); 1622 if (FoldedVOp.getNode()) return FoldedVOp; 1623 } 1624 1625 // fold (sub x, x) -> 0 1626 // FIXME: Refactor this and xor and other similar operations together. 1627 if (N0 == N1) 1628 return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); 1629 // fold (sub c1, c2) -> c1-c2 1630 if (N0C && N1C) 1631 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1632 // fold (sub x, c) -> (add x, -c) 1633 if (N1C) 1634 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, 1635 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1636 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1637 if (N0C && N0C->isAllOnesValue()) 1638 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); 1639 // fold A-(A-B) -> B 1640 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1641 return N1.getOperand(1); 1642 // fold (A+B)-A -> B 1643 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1644 return N0.getOperand(1); 1645 // fold (A+B)-B -> A 1646 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1647 return N0.getOperand(0); 1648 // fold C2-(A+C1) -> (C2-C1)-A 1649 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1650 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1651 VT); 1652 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, 1653 N1.getOperand(0)); 1654 } 1655 // fold ((A+(B+or-C))-B) -> A+or-C 1656 if (N0.getOpcode() == ISD::ADD && 1657 (N0.getOperand(1).getOpcode() == ISD::SUB || 1658 N0.getOperand(1).getOpcode() == ISD::ADD) && 1659 N0.getOperand(1).getOperand(0) == N1) 1660 return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, 1661 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1662 // fold ((A+(C+B))-B) -> A+C 1663 if (N0.getOpcode() == ISD::ADD && 1664 N0.getOperand(1).getOpcode() == ISD::ADD && 1665 N0.getOperand(1).getOperand(1) == N1) 1666 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, 1667 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1668 // fold ((A-(B-C))-C) -> A-B 1669 if (N0.getOpcode() == ISD::SUB && 1670 N0.getOperand(1).getOpcode() == ISD::SUB && 1671 N0.getOperand(1).getOperand(1) == N1) 1672 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1673 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1674 1675 // If either operand of a sub is undef, the result is undef 1676 if (N0.getOpcode() == ISD::UNDEF) 1677 return N0; 1678 if (N1.getOpcode() == ISD::UNDEF) 1679 return N1; 1680 1681 // If the relocation model supports it, consider symbol offsets. 1682 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1683 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1684 // fold (sub Sym, c) -> Sym-c 1685 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1686 return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, 1687 GA->getOffset() - 1688 (uint64_t)N1C->getSExtValue()); 1689 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1690 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1691 if (GA->getGlobal() == GB->getGlobal()) 1692 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1693 VT); 1694 } 1695 1696 return SDValue(); 1697} 1698 1699SDValue DAGCombiner::visitSUBC(SDNode *N) { 1700 SDValue N0 = N->getOperand(0); 1701 SDValue N1 = N->getOperand(1); 1702 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1703 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1704 EVT VT = N0.getValueType(); 1705 1706 // If the flag result is dead, turn this into an SUB. 1707 if (!N->hasAnyUseOfValue(1)) 1708 return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), 1709 DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1710 MVT::Glue)); 1711 1712 // fold (subc x, x) -> 0 + no borrow 1713 if (N0 == N1) 1714 return CombineTo(N, DAG.getConstant(0, VT), 1715 DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1716 MVT::Glue)); 1717 1718 // fold (subc x, 0) -> x + no borrow 1719 if (N1C && N1C->isNullValue()) 1720 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1721 MVT::Glue)); 1722 1723 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1724 if (N0C && N0C->isAllOnesValue()) 1725 return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), 1726 DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1727 MVT::Glue)); 1728 1729 return SDValue(); 1730} 1731 1732SDValue DAGCombiner::visitSUBE(SDNode *N) { 1733 SDValue N0 = N->getOperand(0); 1734 SDValue N1 = N->getOperand(1); 1735 SDValue CarryIn = N->getOperand(2); 1736 1737 // fold (sube x, y, false) -> (subc x, y) 1738 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1739 return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); 1740 1741 return SDValue(); 1742} 1743 1744SDValue DAGCombiner::visitMUL(SDNode *N) { 1745 SDValue N0 = N->getOperand(0); 1746 SDValue N1 = N->getOperand(1); 1747 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1748 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1749 EVT VT = N0.getValueType(); 1750 1751 // fold vector ops 1752 if (VT.isVector()) { 1753 SDValue FoldedVOp = SimplifyVBinOp(N); 1754 if (FoldedVOp.getNode()) return FoldedVOp; 1755 } 1756 1757 // fold (mul x, undef) -> 0 1758 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1759 return DAG.getConstant(0, VT); 1760 // fold (mul c1, c2) -> c1*c2 1761 if (N0C && N1C) 1762 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); 1763 // canonicalize constant to RHS 1764 if (N0C && !N1C) 1765 return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); 1766 // fold (mul x, 0) -> 0 1767 if (N1C && N1C->isNullValue()) 1768 return N1; 1769 // fold (mul x, -1) -> 0-x 1770 if (N1C && N1C->isAllOnesValue()) 1771 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1772 DAG.getConstant(0, VT), N0); 1773 // fold (mul x, (1 << c)) -> x << c 1774 if (N1C && N1C->getAPIntValue().isPowerOf2()) 1775 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 1776 DAG.getConstant(N1C->getAPIntValue().logBase2(), 1777 getShiftAmountTy(N0.getValueType()))); 1778 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 1779 if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { 1780 unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); 1781 // FIXME: If the input is something that is easily negated (e.g. a 1782 // single-use add), we should put the negate there. 1783 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1784 DAG.getConstant(0, VT), 1785 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 1786 DAG.getConstant(Log2Val, 1787 getShiftAmountTy(N0.getValueType())))); 1788 } 1789 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 1790 if (N1C && N0.getOpcode() == ISD::SHL && 1791 isa<ConstantSDNode>(N0.getOperand(1))) { 1792 SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1793 N1, N0.getOperand(1)); 1794 AddToWorkList(C3.getNode()); 1795 return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1796 N0.getOperand(0), C3); 1797 } 1798 1799 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 1800 // use. 1801 { 1802 SDValue Sh(0,0), Y(0,0); 1803 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 1804 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 1805 N0.getNode()->hasOneUse()) { 1806 Sh = N0; Y = N1; 1807 } else if (N1.getOpcode() == ISD::SHL && 1808 isa<ConstantSDNode>(N1.getOperand(1)) && 1809 N1.getNode()->hasOneUse()) { 1810 Sh = N1; Y = N0; 1811 } 1812 1813 if (Sh.getNode()) { 1814 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1815 Sh.getOperand(0), Y); 1816 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1817 Mul, Sh.getOperand(1)); 1818 } 1819 } 1820 1821 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 1822 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 1823 isa<ConstantSDNode>(N0.getOperand(1))) 1824 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, 1825 DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, 1826 N0.getOperand(0), N1), 1827 DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, 1828 N0.getOperand(1), N1)); 1829 1830 // reassociate mul 1831 SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); 1832 if (RMUL.getNode() != 0) 1833 return RMUL; 1834 1835 return SDValue(); 1836} 1837 1838SDValue DAGCombiner::visitSDIV(SDNode *N) { 1839 SDValue N0 = N->getOperand(0); 1840 SDValue N1 = N->getOperand(1); 1841 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1842 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1843 EVT VT = N->getValueType(0); 1844 1845 // fold vector ops 1846 if (VT.isVector()) { 1847 SDValue FoldedVOp = SimplifyVBinOp(N); 1848 if (FoldedVOp.getNode()) return FoldedVOp; 1849 } 1850 1851 // fold (sdiv c1, c2) -> c1/c2 1852 if (N0C && N1C && !N1C->isNullValue()) 1853 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 1854 // fold (sdiv X, 1) -> X 1855 if (N1C && N1C->getAPIntValue() == 1LL) 1856 return N0; 1857 // fold (sdiv X, -1) -> 0-X 1858 if (N1C && N1C->isAllOnesValue()) 1859 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1860 DAG.getConstant(0, VT), N0); 1861 // If we know the sign bits of both operands are zero, strength reduce to a 1862 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 1863 if (!VT.isVector()) { 1864 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 1865 return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), 1866 N0, N1); 1867 } 1868 // fold (sdiv X, pow2) -> simple ops after legalize 1869 if (N1C && !N1C->isNullValue() && 1870 (N1C->getAPIntValue().isPowerOf2() || 1871 (-N1C->getAPIntValue()).isPowerOf2())) { 1872 // If dividing by powers of two is cheap, then don't perform the following 1873 // fold. 1874 if (TLI.isPow2DivCheap()) 1875 return SDValue(); 1876 1877 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 1878 1879 // Splat the sign bit into the register 1880 SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, 1881 DAG.getConstant(VT.getSizeInBits()-1, 1882 getShiftAmountTy(N0.getValueType()))); 1883 AddToWorkList(SGN.getNode()); 1884 1885 // Add (N0 < 0) ? abs2 - 1 : 0; 1886 SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, 1887 DAG.getConstant(VT.getSizeInBits() - lg2, 1888 getShiftAmountTy(SGN.getValueType()))); 1889 SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); 1890 AddToWorkList(SRL.getNode()); 1891 AddToWorkList(ADD.getNode()); // Divide by pow2 1892 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, 1893 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 1894 1895 // If we're dividing by a positive value, we're done. Otherwise, we must 1896 // negate the result. 1897 if (N1C->getAPIntValue().isNonNegative()) 1898 return SRA; 1899 1900 AddToWorkList(SRA.getNode()); 1901 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1902 DAG.getConstant(0, VT), SRA); 1903 } 1904 1905 // if integer divide is expensive and we satisfy the requirements, emit an 1906 // alternate sequence. 1907 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 1908 SDValue Op = BuildSDIV(N); 1909 if (Op.getNode()) return Op; 1910 } 1911 1912 // undef / X -> 0 1913 if (N0.getOpcode() == ISD::UNDEF) 1914 return DAG.getConstant(0, VT); 1915 // X / undef -> undef 1916 if (N1.getOpcode() == ISD::UNDEF) 1917 return N1; 1918 1919 return SDValue(); 1920} 1921 1922SDValue DAGCombiner::visitUDIV(SDNode *N) { 1923 SDValue N0 = N->getOperand(0); 1924 SDValue N1 = N->getOperand(1); 1925 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1926 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1927 EVT VT = N->getValueType(0); 1928 1929 // fold vector ops 1930 if (VT.isVector()) { 1931 SDValue FoldedVOp = SimplifyVBinOp(N); 1932 if (FoldedVOp.getNode()) return FoldedVOp; 1933 } 1934 1935 // fold (udiv c1, c2) -> c1/c2 1936 if (N0C && N1C && !N1C->isNullValue()) 1937 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 1938 // fold (udiv x, (1 << c)) -> x >>u c 1939 if (N1C && N1C->getAPIntValue().isPowerOf2()) 1940 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, 1941 DAG.getConstant(N1C->getAPIntValue().logBase2(), 1942 getShiftAmountTy(N0.getValueType()))); 1943 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 1944 if (N1.getOpcode() == ISD::SHL) { 1945 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 1946 if (SHC->getAPIntValue().isPowerOf2()) { 1947 EVT ADDVT = N1.getOperand(1).getValueType(); 1948 SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, 1949 N1.getOperand(1), 1950 DAG.getConstant(SHC->getAPIntValue() 1951 .logBase2(), 1952 ADDVT)); 1953 AddToWorkList(Add.getNode()); 1954 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); 1955 } 1956 } 1957 } 1958 // fold (udiv x, c) -> alternate 1959 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 1960 SDValue Op = BuildUDIV(N); 1961 if (Op.getNode()) return Op; 1962 } 1963 1964 // undef / X -> 0 1965 if (N0.getOpcode() == ISD::UNDEF) 1966 return DAG.getConstant(0, VT); 1967 // X / undef -> undef 1968 if (N1.getOpcode() == ISD::UNDEF) 1969 return N1; 1970 1971 return SDValue(); 1972} 1973 1974SDValue DAGCombiner::visitSREM(SDNode *N) { 1975 SDValue N0 = N->getOperand(0); 1976 SDValue N1 = N->getOperand(1); 1977 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1978 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1979 EVT VT = N->getValueType(0); 1980 1981 // fold (srem c1, c2) -> c1%c2 1982 if (N0C && N1C && !N1C->isNullValue()) 1983 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 1984 // If we know the sign bits of both operands are zero, strength reduce to a 1985 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 1986 if (!VT.isVector()) { 1987 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 1988 return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); 1989 } 1990 1991 // If X/C can be simplified by the division-by-constant logic, lower 1992 // X%C to the equivalent of X-X/C*C. 1993 if (N1C && !N1C->isNullValue()) { 1994 SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); 1995 AddToWorkList(Div.getNode()); 1996 SDValue OptimizedDiv = combine(Div.getNode()); 1997 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 1998 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1999 OptimizedDiv, N1); 2000 SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); 2001 AddToWorkList(Mul.getNode()); 2002 return Sub; 2003 } 2004 } 2005 2006 // undef % X -> 0 2007 if (N0.getOpcode() == ISD::UNDEF) 2008 return DAG.getConstant(0, VT); 2009 // X % undef -> undef 2010 if (N1.getOpcode() == ISD::UNDEF) 2011 return N1; 2012 2013 return SDValue(); 2014} 2015 2016SDValue DAGCombiner::visitUREM(SDNode *N) { 2017 SDValue N0 = N->getOperand(0); 2018 SDValue N1 = N->getOperand(1); 2019 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2020 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2021 EVT VT = N->getValueType(0); 2022 2023 // fold (urem c1, c2) -> c1%c2 2024 if (N0C && N1C && !N1C->isNullValue()) 2025 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 2026 // fold (urem x, pow2) -> (and x, pow2-1) 2027 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 2028 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, 2029 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 2030 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2031 if (N1.getOpcode() == ISD::SHL) { 2032 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2033 if (SHC->getAPIntValue().isPowerOf2()) { 2034 SDValue Add = 2035 DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, 2036 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 2037 VT)); 2038 AddToWorkList(Add.getNode()); 2039 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); 2040 } 2041 } 2042 } 2043 2044 // If X/C can be simplified by the division-by-constant logic, lower 2045 // X%C to the equivalent of X-X/C*C. 2046 if (N1C && !N1C->isNullValue()) { 2047 SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); 2048 AddToWorkList(Div.getNode()); 2049 SDValue OptimizedDiv = combine(Div.getNode()); 2050 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2051 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 2052 OptimizedDiv, N1); 2053 SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); 2054 AddToWorkList(Mul.getNode()); 2055 return Sub; 2056 } 2057 } 2058 2059 // undef % X -> 0 2060 if (N0.getOpcode() == ISD::UNDEF) 2061 return DAG.getConstant(0, VT); 2062 // X % undef -> undef 2063 if (N1.getOpcode() == ISD::UNDEF) 2064 return N1; 2065 2066 return SDValue(); 2067} 2068 2069SDValue DAGCombiner::visitMULHS(SDNode *N) { 2070 SDValue N0 = N->getOperand(0); 2071 SDValue N1 = N->getOperand(1); 2072 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2073 EVT VT = N->getValueType(0); 2074 DebugLoc DL = N->getDebugLoc(); 2075 2076 // fold (mulhs x, 0) -> 0 2077 if (N1C && N1C->isNullValue()) 2078 return N1; 2079 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2080 if (N1C && N1C->getAPIntValue() == 1) 2081 return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, 2082 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2083 getShiftAmountTy(N0.getValueType()))); 2084 // fold (mulhs x, undef) -> 0 2085 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2086 return DAG.getConstant(0, VT); 2087 2088 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2089 // plus a shift. 2090 if (VT.isSimple() && !VT.isVector()) { 2091 MVT Simple = VT.getSimpleVT(); 2092 unsigned SimpleSize = Simple.getSizeInBits(); 2093 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2094 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2095 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2096 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2097 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2098 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2099 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2100 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2101 } 2102 } 2103 2104 return SDValue(); 2105} 2106 2107SDValue DAGCombiner::visitMULHU(SDNode *N) { 2108 SDValue N0 = N->getOperand(0); 2109 SDValue N1 = N->getOperand(1); 2110 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2111 EVT VT = N->getValueType(0); 2112 DebugLoc DL = N->getDebugLoc(); 2113 2114 // fold (mulhu x, 0) -> 0 2115 if (N1C && N1C->isNullValue()) 2116 return N1; 2117 // fold (mulhu x, 1) -> 0 2118 if (N1C && N1C->getAPIntValue() == 1) 2119 return DAG.getConstant(0, N0.getValueType()); 2120 // fold (mulhu x, undef) -> 0 2121 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2122 return DAG.getConstant(0, VT); 2123 2124 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2125 // plus a shift. 2126 if (VT.isSimple() && !VT.isVector()) { 2127 MVT Simple = VT.getSimpleVT(); 2128 unsigned SimpleSize = Simple.getSizeInBits(); 2129 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2130 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2131 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2132 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2133 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2134 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2135 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2136 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2137 } 2138 } 2139 2140 return SDValue(); 2141} 2142 2143/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that 2144/// compute two values. LoOp and HiOp give the opcodes for the two computations 2145/// that are being performed. Return true if a simplification was made. 2146/// 2147SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2148 unsigned HiOp) { 2149 // If the high half is not needed, just compute the low half. 2150 bool HiExists = N->hasAnyUseOfValue(1); 2151 if (!HiExists && 2152 (!LegalOperations || 2153 TLI.isOperationLegal(LoOp, N->getValueType(0)))) { 2154 SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), 2155 N->op_begin(), N->getNumOperands()); 2156 return CombineTo(N, Res, Res); 2157 } 2158 2159 // If the low half is not needed, just compute the high half. 2160 bool LoExists = N->hasAnyUseOfValue(0); 2161 if (!LoExists && 2162 (!LegalOperations || 2163 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2164 SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), 2165 N->op_begin(), N->getNumOperands()); 2166 return CombineTo(N, Res, Res); 2167 } 2168 2169 // If both halves are used, return as it is. 2170 if (LoExists && HiExists) 2171 return SDValue(); 2172 2173 // If the two computed results can be simplified separately, separate them. 2174 if (LoExists) { 2175 SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), 2176 N->op_begin(), N->getNumOperands()); 2177 AddToWorkList(Lo.getNode()); 2178 SDValue LoOpt = combine(Lo.getNode()); 2179 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2180 (!LegalOperations || 2181 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2182 return CombineTo(N, LoOpt, LoOpt); 2183 } 2184 2185 if (HiExists) { 2186 SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), 2187 N->op_begin(), N->getNumOperands()); 2188 AddToWorkList(Hi.getNode()); 2189 SDValue HiOpt = combine(Hi.getNode()); 2190 if (HiOpt.getNode() && HiOpt != Hi && 2191 (!LegalOperations || 2192 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2193 return CombineTo(N, HiOpt, HiOpt); 2194 } 2195 2196 return SDValue(); 2197} 2198 2199SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2200 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2201 if (Res.getNode()) return Res; 2202 2203 EVT VT = N->getValueType(0); 2204 DebugLoc DL = N->getDebugLoc(); 2205 2206 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2207 // plus a shift. 2208 if (VT.isSimple() && !VT.isVector()) { 2209 MVT Simple = VT.getSimpleVT(); 2210 unsigned SimpleSize = Simple.getSizeInBits(); 2211 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2212 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2213 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2214 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2215 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2216 // Compute the high part as N1. 2217 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2218 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2219 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2220 // Compute the low part as N0. 2221 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2222 return CombineTo(N, Lo, Hi); 2223 } 2224 } 2225 2226 return SDValue(); 2227} 2228 2229SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2230 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2231 if (Res.getNode()) return Res; 2232 2233 EVT VT = N->getValueType(0); 2234 DebugLoc DL = N->getDebugLoc(); 2235 2236 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2237 // plus a shift. 2238 if (VT.isSimple() && !VT.isVector()) { 2239 MVT Simple = VT.getSimpleVT(); 2240 unsigned SimpleSize = Simple.getSizeInBits(); 2241 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2242 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2243 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2244 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2245 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2246 // Compute the high part as N1. 2247 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2248 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2249 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2250 // Compute the low part as N0. 2251 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2252 return CombineTo(N, Lo, Hi); 2253 } 2254 } 2255 2256 return SDValue(); 2257} 2258 2259SDValue DAGCombiner::visitSMULO(SDNode *N) { 2260 // (smulo x, 2) -> (saddo x, x) 2261 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2262 if (C2->getAPIntValue() == 2) 2263 return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), 2264 N->getOperand(0), N->getOperand(0)); 2265 2266 return SDValue(); 2267} 2268 2269SDValue DAGCombiner::visitUMULO(SDNode *N) { 2270 // (umulo x, 2) -> (uaddo x, x) 2271 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2272 if (C2->getAPIntValue() == 2) 2273 return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), 2274 N->getOperand(0), N->getOperand(0)); 2275 2276 return SDValue(); 2277} 2278 2279SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 2280 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2281 if (Res.getNode()) return Res; 2282 2283 return SDValue(); 2284} 2285 2286SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 2287 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2288 if (Res.getNode()) return Res; 2289 2290 return SDValue(); 2291} 2292 2293/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with 2294/// two operands of the same opcode, try to simplify it. 2295SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2296 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2297 EVT VT = N0.getValueType(); 2298 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2299 2300 // Bail early if none of these transforms apply. 2301 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2302 2303 // For each of OP in AND/OR/XOR: 2304 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2305 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2306 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2307 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2308 // 2309 // do not sink logical op inside of a vector extend, since it may combine 2310 // into a vsetcc. 2311 EVT Op0VT = N0.getOperand(0).getValueType(); 2312 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2313 N0.getOpcode() == ISD::SIGN_EXTEND || 2314 // Avoid infinite looping with PromoteIntBinOp. 2315 (N0.getOpcode() == ISD::ANY_EXTEND && 2316 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2317 (N0.getOpcode() == ISD::TRUNCATE && 2318 (!TLI.isZExtFree(VT, Op0VT) || 2319 !TLI.isTruncateFree(Op0VT, VT)) && 2320 TLI.isTypeLegal(Op0VT))) && 2321 !VT.isVector() && 2322 Op0VT == N1.getOperand(0).getValueType() && 2323 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2324 SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), 2325 N0.getOperand(0).getValueType(), 2326 N0.getOperand(0), N1.getOperand(0)); 2327 AddToWorkList(ORNode.getNode()); 2328 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); 2329 } 2330 2331 // For each of OP in SHL/SRL/SRA/AND... 2332 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2333 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2334 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2335 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2336 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2337 N0.getOperand(1) == N1.getOperand(1)) { 2338 SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), 2339 N0.getOperand(0).getValueType(), 2340 N0.getOperand(0), N1.getOperand(0)); 2341 AddToWorkList(ORNode.getNode()); 2342 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 2343 ORNode, N0.getOperand(1)); 2344 } 2345 2346 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2347 // Only perform this optimization after type legalization and before 2348 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2349 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2350 // we don't want to undo this promotion. 2351 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2352 // on scalars. 2353 if ((N0.getOpcode() == ISD::BITCAST || 2354 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2355 Level == AfterLegalizeTypes) { 2356 SDValue In0 = N0.getOperand(0); 2357 SDValue In1 = N1.getOperand(0); 2358 EVT In0Ty = In0.getValueType(); 2359 EVT In1Ty = In1.getValueType(); 2360 DebugLoc DL = N->getDebugLoc(); 2361 // If both incoming values are integers, and the original types are the 2362 // same. 2363 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2364 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2365 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2366 AddToWorkList(Op.getNode()); 2367 return BC; 2368 } 2369 } 2370 2371 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2372 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2373 // If both shuffles use the same mask, and both shuffle within a single 2374 // vector, then it is worthwhile to move the swizzle after the operation. 2375 // The type-legalizer generates this pattern when loading illegal 2376 // vector types from memory. In many cases this allows additional shuffle 2377 // optimizations. 2378 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 2379 N0.getOperand(1).getOpcode() == ISD::UNDEF && 2380 N1.getOperand(1).getOpcode() == ISD::UNDEF) { 2381 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2382 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2383 2384 assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && 2385 "Inputs to shuffles are not the same type"); 2386 2387 unsigned NumElts = VT.getVectorNumElements(); 2388 2389 // Check that both shuffles use the same mask. The masks are known to be of 2390 // the same length because the result vector type is the same. 2391 bool SameMask = true; 2392 for (unsigned i = 0; i != NumElts; ++i) { 2393 int Idx0 = SVN0->getMaskElt(i); 2394 int Idx1 = SVN1->getMaskElt(i); 2395 if (Idx0 != Idx1) { 2396 SameMask = false; 2397 break; 2398 } 2399 } 2400 2401 if (SameMask) { 2402 SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, 2403 N0.getOperand(0), N1.getOperand(0)); 2404 AddToWorkList(Op.getNode()); 2405 return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, 2406 DAG.getUNDEF(VT), &SVN0->getMask()[0]); 2407 } 2408 } 2409 2410 return SDValue(); 2411} 2412 2413SDValue DAGCombiner::visitAND(SDNode *N) { 2414 SDValue N0 = N->getOperand(0); 2415 SDValue N1 = N->getOperand(1); 2416 SDValue LL, LR, RL, RR, CC0, CC1; 2417 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2418 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2419 EVT VT = N1.getValueType(); 2420 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2421 2422 // fold vector ops 2423 if (VT.isVector()) { 2424 SDValue FoldedVOp = SimplifyVBinOp(N); 2425 if (FoldedVOp.getNode()) return FoldedVOp; 2426 } 2427 2428 // fold (and x, undef) -> 0 2429 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2430 return DAG.getConstant(0, VT); 2431 // fold (and c1, c2) -> c1&c2 2432 if (N0C && N1C) 2433 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2434 // canonicalize constant to RHS 2435 if (N0C && !N1C) 2436 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); 2437 // fold (and x, -1) -> x 2438 if (N1C && N1C->isAllOnesValue()) 2439 return N0; 2440 // if (and x, c) is known to be zero, return 0 2441 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2442 APInt::getAllOnesValue(BitWidth))) 2443 return DAG.getConstant(0, VT); 2444 // reassociate and 2445 SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); 2446 if (RAND.getNode() != 0) 2447 return RAND; 2448 // fold (and (or x, C), D) -> D if (C & D) == D 2449 if (N1C && N0.getOpcode() == ISD::OR) 2450 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2451 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2452 return N1; 2453 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2454 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2455 SDValue N0Op0 = N0.getOperand(0); 2456 APInt Mask = ~N1C->getAPIntValue(); 2457 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2458 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2459 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), 2460 N0.getValueType(), N0Op0); 2461 2462 // Replace uses of the AND with uses of the Zero extend node. 2463 CombineTo(N, Zext); 2464 2465 // We actually want to replace all uses of the any_extend with the 2466 // zero_extend, to avoid duplicating things. This will later cause this 2467 // AND to be folded. 2468 CombineTo(N0.getNode(), Zext); 2469 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2470 } 2471 } 2472 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 2473 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 2474 // already be zero by virtue of the width of the base type of the load. 2475 // 2476 // the 'X' node here can either be nothing or an extract_vector_elt to catch 2477 // more cases. 2478 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 2479 N0.getOperand(0).getOpcode() == ISD::LOAD) || 2480 N0.getOpcode() == ISD::LOAD) { 2481 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 2482 N0 : N0.getOperand(0) ); 2483 2484 // Get the constant (if applicable) the zero'th operand is being ANDed with. 2485 // This can be a pure constant or a vector splat, in which case we treat the 2486 // vector as a scalar and use the splat value. 2487 APInt Constant = APInt::getNullValue(1); 2488 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 2489 Constant = C->getAPIntValue(); 2490 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 2491 APInt SplatValue, SplatUndef; 2492 unsigned SplatBitSize; 2493 bool HasAnyUndefs; 2494 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 2495 SplatBitSize, HasAnyUndefs); 2496 if (IsSplat) { 2497 // Undef bits can contribute to a possible optimisation if set, so 2498 // set them. 2499 SplatValue |= SplatUndef; 2500 2501 // The splat value may be something like "0x00FFFFFF", which means 0 for 2502 // the first vector value and FF for the rest, repeating. We need a mask 2503 // that will apply equally to all members of the vector, so AND all the 2504 // lanes of the constant together. 2505 EVT VT = Vector->getValueType(0); 2506 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 2507 2508 // If the splat value has been compressed to a bitlength lower 2509 // than the size of the vector lane, we need to re-expand it to 2510 // the lane size. 2511 if (BitWidth > SplatBitSize) 2512 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 2513 SplatBitSize < BitWidth; 2514 SplatBitSize = SplatBitSize * 2) 2515 SplatValue |= SplatValue.shl(SplatBitSize); 2516 2517 Constant = APInt::getAllOnesValue(BitWidth); 2518 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 2519 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 2520 } 2521 } 2522 2523 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 2524 // actually legal and isn't going to get expanded, else this is a false 2525 // optimisation. 2526 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 2527 Load->getMemoryVT()); 2528 2529 // Resize the constant to the same size as the original memory access before 2530 // extension. If it is still the AllOnesValue then this AND is completely 2531 // unneeded. 2532 Constant = 2533 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 2534 2535 bool B; 2536 switch (Load->getExtensionType()) { 2537 default: B = false; break; 2538 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 2539 case ISD::ZEXTLOAD: 2540 case ISD::NON_EXTLOAD: B = true; break; 2541 } 2542 2543 if (B && Constant.isAllOnesValue()) { 2544 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 2545 // preserve semantics once we get rid of the AND. 2546 SDValue NewLoad(Load, 0); 2547 if (Load->getExtensionType() == ISD::EXTLOAD) { 2548 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 2549 Load->getValueType(0), Load->getDebugLoc(), 2550 Load->getChain(), Load->getBasePtr(), 2551 Load->getOffset(), Load->getMemoryVT(), 2552 Load->getMemOperand()); 2553 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 2554 if (Load->getNumValues() == 3) { 2555 // PRE/POST_INC loads have 3 values. 2556 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 2557 NewLoad.getValue(2) }; 2558 CombineTo(Load, To, 3, true); 2559 } else { 2560 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 2561 } 2562 } 2563 2564 // Fold the AND away, taking care not to fold to the old load node if we 2565 // replaced it. 2566 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 2567 2568 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2569 } 2570 } 2571 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2572 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2573 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2574 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2575 2576 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2577 LL.getValueType().isInteger()) { 2578 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2579 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2580 SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), 2581 LR.getValueType(), LL, RL); 2582 AddToWorkList(ORNode.getNode()); 2583 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2584 } 2585 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2586 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2587 SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), 2588 LR.getValueType(), LL, RL); 2589 AddToWorkList(ANDNode.getNode()); 2590 return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); 2591 } 2592 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2593 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2594 SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), 2595 LR.getValueType(), LL, RL); 2596 AddToWorkList(ORNode.getNode()); 2597 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2598 } 2599 } 2600 // canonicalize equivalent to ll == rl 2601 if (LL == RR && LR == RL) { 2602 Op1 = ISD::getSetCCSwappedOperands(Op1); 2603 std::swap(RL, RR); 2604 } 2605 if (LL == RL && LR == RR) { 2606 bool isInteger = LL.getValueType().isInteger(); 2607 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2608 if (Result != ISD::SETCC_INVALID && 2609 (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) 2610 return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), 2611 LL, LR, Result); 2612 } 2613 } 2614 2615 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 2616 if (N0.getOpcode() == N1.getOpcode()) { 2617 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2618 if (Tmp.getNode()) return Tmp; 2619 } 2620 2621 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 2622 // fold (and (sra)) -> (and (srl)) when possible. 2623 if (!VT.isVector() && 2624 SimplifyDemandedBits(SDValue(N, 0))) 2625 return SDValue(N, 0); 2626 2627 // fold (zext_inreg (extload x)) -> (zextload x) 2628 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 2629 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2630 EVT MemVT = LN0->getMemoryVT(); 2631 // If we zero all the possible extended bits, then we can turn this into 2632 // a zextload if we are running before legalize or the operation is legal. 2633 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2634 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2635 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2636 ((!LegalOperations && !LN0->isVolatile()) || 2637 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2638 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, 2639 LN0->getChain(), LN0->getBasePtr(), 2640 LN0->getPointerInfo(), MemVT, 2641 LN0->isVolatile(), LN0->isNonTemporal(), 2642 LN0->getAlignment()); 2643 AddToWorkList(N); 2644 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2645 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2646 } 2647 } 2648 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 2649 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 2650 N0.hasOneUse()) { 2651 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2652 EVT MemVT = LN0->getMemoryVT(); 2653 // If we zero all the possible extended bits, then we can turn this into 2654 // a zextload if we are running before legalize or the operation is legal. 2655 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2656 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2657 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2658 ((!LegalOperations && !LN0->isVolatile()) || 2659 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2660 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, 2661 LN0->getChain(), 2662 LN0->getBasePtr(), LN0->getPointerInfo(), 2663 MemVT, 2664 LN0->isVolatile(), LN0->isNonTemporal(), 2665 LN0->getAlignment()); 2666 AddToWorkList(N); 2667 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2668 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2669 } 2670 } 2671 2672 // fold (and (load x), 255) -> (zextload x, i8) 2673 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2674 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2675 if (N1C && (N0.getOpcode() == ISD::LOAD || 2676 (N0.getOpcode() == ISD::ANY_EXTEND && 2677 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2678 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2679 LoadSDNode *LN0 = HasAnyExt 2680 ? cast<LoadSDNode>(N0.getOperand(0)) 2681 : cast<LoadSDNode>(N0); 2682 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2683 LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { 2684 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2685 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2686 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2687 EVT LoadedVT = LN0->getMemoryVT(); 2688 2689 if (ExtVT == LoadedVT && 2690 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2691 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2692 2693 SDValue NewLoad = 2694 DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, 2695 LN0->getChain(), LN0->getBasePtr(), 2696 LN0->getPointerInfo(), 2697 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2698 LN0->getAlignment()); 2699 AddToWorkList(N); 2700 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 2701 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2702 } 2703 2704 // Do not change the width of a volatile load. 2705 // Do not generate loads of non-round integer types since these can 2706 // be expensive (and would be wrong if the type is not byte sized). 2707 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 2708 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2709 EVT PtrType = LN0->getOperand(1).getValueType(); 2710 2711 unsigned Alignment = LN0->getAlignment(); 2712 SDValue NewPtr = LN0->getBasePtr(); 2713 2714 // For big endian targets, we need to add an offset to the pointer 2715 // to load the correct bytes. For little endian systems, we merely 2716 // need to read fewer bytes from the same pointer. 2717 if (TLI.isBigEndian()) { 2718 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 2719 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 2720 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 2721 NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, 2722 NewPtr, DAG.getConstant(PtrOff, PtrType)); 2723 Alignment = MinAlign(Alignment, PtrOff); 2724 } 2725 2726 AddToWorkList(NewPtr.getNode()); 2727 2728 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2729 SDValue Load = 2730 DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, 2731 LN0->getChain(), NewPtr, 2732 LN0->getPointerInfo(), 2733 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2734 Alignment); 2735 AddToWorkList(N); 2736 CombineTo(LN0, Load, Load.getValue(1)); 2737 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2738 } 2739 } 2740 } 2741 } 2742 2743 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 2744 VT.getSizeInBits() <= 64) { 2745 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2746 APInt ADDC = ADDI->getAPIntValue(); 2747 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2748 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 2749 // immediate for an add, but it is legal if its top c2 bits are set, 2750 // transform the ADD so the immediate doesn't need to be materialized 2751 // in a register. 2752 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 2753 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 2754 SRLI->getZExtValue()); 2755 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 2756 ADDC |= Mask; 2757 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2758 SDValue NewAdd = 2759 DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, 2760 N0.getOperand(0), DAG.getConstant(ADDC, VT)); 2761 CombineTo(N0.getNode(), NewAdd); 2762 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2763 } 2764 } 2765 } 2766 } 2767 } 2768 } 2769 2770 2771 return SDValue(); 2772} 2773 2774/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 2775/// 2776SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 2777 bool DemandHighBits) { 2778 if (!LegalOperations) 2779 return SDValue(); 2780 2781 EVT VT = N->getValueType(0); 2782 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 2783 return SDValue(); 2784 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 2785 return SDValue(); 2786 2787 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 2788 bool LookPassAnd0 = false; 2789 bool LookPassAnd1 = false; 2790 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 2791 std::swap(N0, N1); 2792 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 2793 std::swap(N0, N1); 2794 if (N0.getOpcode() == ISD::AND) { 2795 if (!N0.getNode()->hasOneUse()) 2796 return SDValue(); 2797 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2798 if (!N01C || N01C->getZExtValue() != 0xFF00) 2799 return SDValue(); 2800 N0 = N0.getOperand(0); 2801 LookPassAnd0 = true; 2802 } 2803 2804 if (N1.getOpcode() == ISD::AND) { 2805 if (!N1.getNode()->hasOneUse()) 2806 return SDValue(); 2807 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 2808 if (!N11C || N11C->getZExtValue() != 0xFF) 2809 return SDValue(); 2810 N1 = N1.getOperand(0); 2811 LookPassAnd1 = true; 2812 } 2813 2814 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 2815 std::swap(N0, N1); 2816 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 2817 return SDValue(); 2818 if (!N0.getNode()->hasOneUse() || 2819 !N1.getNode()->hasOneUse()) 2820 return SDValue(); 2821 2822 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2823 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 2824 if (!N01C || !N11C) 2825 return SDValue(); 2826 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 2827 return SDValue(); 2828 2829 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 2830 SDValue N00 = N0->getOperand(0); 2831 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 2832 if (!N00.getNode()->hasOneUse()) 2833 return SDValue(); 2834 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 2835 if (!N001C || N001C->getZExtValue() != 0xFF) 2836 return SDValue(); 2837 N00 = N00.getOperand(0); 2838 LookPassAnd0 = true; 2839 } 2840 2841 SDValue N10 = N1->getOperand(0); 2842 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 2843 if (!N10.getNode()->hasOneUse()) 2844 return SDValue(); 2845 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 2846 if (!N101C || N101C->getZExtValue() != 0xFF00) 2847 return SDValue(); 2848 N10 = N10.getOperand(0); 2849 LookPassAnd1 = true; 2850 } 2851 2852 if (N00 != N10) 2853 return SDValue(); 2854 2855 // Make sure everything beyond the low halfword is zero since the SRL 16 2856 // will clear the top bits. 2857 unsigned OpSizeInBits = VT.getSizeInBits(); 2858 if (DemandHighBits && OpSizeInBits > 16 && 2859 (!LookPassAnd0 || !LookPassAnd1) && 2860 !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) 2861 return SDValue(); 2862 2863 SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); 2864 if (OpSizeInBits > 16) 2865 Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, 2866 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 2867 return Res; 2868} 2869 2870/// isBSwapHWordElement - Return true if the specified node is an element 2871/// that makes up a 32-bit packed halfword byteswap. i.e. 2872/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 2873static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { 2874 if (!N.getNode()->hasOneUse()) 2875 return false; 2876 2877 unsigned Opc = N.getOpcode(); 2878 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 2879 return false; 2880 2881 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2882 if (!N1C) 2883 return false; 2884 2885 unsigned Num; 2886 switch (N1C->getZExtValue()) { 2887 default: 2888 return false; 2889 case 0xFF: Num = 0; break; 2890 case 0xFF00: Num = 1; break; 2891 case 0xFF0000: Num = 2; break; 2892 case 0xFF000000: Num = 3; break; 2893 } 2894 2895 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 2896 SDValue N0 = N.getOperand(0); 2897 if (Opc == ISD::AND) { 2898 if (Num == 0 || Num == 2) { 2899 // (x >> 8) & 0xff 2900 // (x >> 8) & 0xff0000 2901 if (N0.getOpcode() != ISD::SRL) 2902 return false; 2903 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2904 if (!C || C->getZExtValue() != 8) 2905 return false; 2906 } else { 2907 // (x << 8) & 0xff00 2908 // (x << 8) & 0xff000000 2909 if (N0.getOpcode() != ISD::SHL) 2910 return false; 2911 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2912 if (!C || C->getZExtValue() != 8) 2913 return false; 2914 } 2915 } else if (Opc == ISD::SHL) { 2916 // (x & 0xff) << 8 2917 // (x & 0xff0000) << 8 2918 if (Num != 0 && Num != 2) 2919 return false; 2920 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2921 if (!C || C->getZExtValue() != 8) 2922 return false; 2923 } else { // Opc == ISD::SRL 2924 // (x & 0xff00) >> 8 2925 // (x & 0xff000000) >> 8 2926 if (Num != 1 && Num != 3) 2927 return false; 2928 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2929 if (!C || C->getZExtValue() != 8) 2930 return false; 2931 } 2932 2933 if (Parts[Num]) 2934 return false; 2935 2936 Parts[Num] = N0.getOperand(0).getNode(); 2937 return true; 2938} 2939 2940/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is 2941/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 2942/// => (rotl (bswap x), 16) 2943SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 2944 if (!LegalOperations) 2945 return SDValue(); 2946 2947 EVT VT = N->getValueType(0); 2948 if (VT != MVT::i32) 2949 return SDValue(); 2950 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 2951 return SDValue(); 2952 2953 SmallVector<SDNode*,4> Parts(4, (SDNode*)0); 2954 // Look for either 2955 // (or (or (and), (and)), (or (and), (and))) 2956 // (or (or (or (and), (and)), (and)), (and)) 2957 if (N0.getOpcode() != ISD::OR) 2958 return SDValue(); 2959 SDValue N00 = N0.getOperand(0); 2960 SDValue N01 = N0.getOperand(1); 2961 2962 if (N1.getOpcode() == ISD::OR) { 2963 // (or (or (and), (and)), (or (and), (and))) 2964 SDValue N000 = N00.getOperand(0); 2965 if (!isBSwapHWordElement(N000, Parts)) 2966 return SDValue(); 2967 2968 SDValue N001 = N00.getOperand(1); 2969 if (!isBSwapHWordElement(N001, Parts)) 2970 return SDValue(); 2971 SDValue N010 = N01.getOperand(0); 2972 if (!isBSwapHWordElement(N010, Parts)) 2973 return SDValue(); 2974 SDValue N011 = N01.getOperand(1); 2975 if (!isBSwapHWordElement(N011, Parts)) 2976 return SDValue(); 2977 } else { 2978 // (or (or (or (and), (and)), (and)), (and)) 2979 if (!isBSwapHWordElement(N1, Parts)) 2980 return SDValue(); 2981 if (!isBSwapHWordElement(N01, Parts)) 2982 return SDValue(); 2983 if (N00.getOpcode() != ISD::OR) 2984 return SDValue(); 2985 SDValue N000 = N00.getOperand(0); 2986 if (!isBSwapHWordElement(N000, Parts)) 2987 return SDValue(); 2988 SDValue N001 = N00.getOperand(1); 2989 if (!isBSwapHWordElement(N001, Parts)) 2990 return SDValue(); 2991 } 2992 2993 // Make sure the parts are all coming from the same node. 2994 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 2995 return SDValue(); 2996 2997 SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, 2998 SDValue(Parts[0],0)); 2999 3000 // Result of the bswap should be rotated by 16. If it's not legal, than 3001 // do (x << 16) | (x >> 16). 3002 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 3003 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3004 return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); 3005 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3006 return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); 3007 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, 3008 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), 3009 DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); 3010} 3011 3012SDValue DAGCombiner::visitOR(SDNode *N) { 3013 SDValue N0 = N->getOperand(0); 3014 SDValue N1 = N->getOperand(1); 3015 SDValue LL, LR, RL, RR, CC0, CC1; 3016 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3017 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3018 EVT VT = N1.getValueType(); 3019 3020 // fold vector ops 3021 if (VT.isVector()) { 3022 SDValue FoldedVOp = SimplifyVBinOp(N); 3023 if (FoldedVOp.getNode()) return FoldedVOp; 3024 } 3025 3026 // fold (or x, undef) -> -1 3027 if (!LegalOperations && 3028 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 3029 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3030 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 3031 } 3032 // fold (or c1, c2) -> c1|c2 3033 if (N0C && N1C) 3034 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 3035 // canonicalize constant to RHS 3036 if (N0C && !N1C) 3037 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); 3038 // fold (or x, 0) -> x 3039 if (N1C && N1C->isNullValue()) 3040 return N0; 3041 // fold (or x, -1) -> -1 3042 if (N1C && N1C->isAllOnesValue()) 3043 return N1; 3044 // fold (or x, c) -> c iff (x & ~c) == 0 3045 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3046 return N1; 3047 3048 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3049 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 3050 if (BSwap.getNode() != 0) 3051 return BSwap; 3052 BSwap = MatchBSwapHWordLow(N, N0, N1); 3053 if (BSwap.getNode() != 0) 3054 return BSwap; 3055 3056 // reassociate or 3057 SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); 3058 if (ROR.getNode() != 0) 3059 return ROR; 3060 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3061 // iff (c1 & c2) == 0. 3062 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3063 isa<ConstantSDNode>(N0.getOperand(1))) { 3064 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3065 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) 3066 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 3067 DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, 3068 N0.getOperand(0), N1), 3069 DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); 3070 } 3071 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3072 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3073 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3074 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3075 3076 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 3077 LL.getValueType().isInteger()) { 3078 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3079 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3080 if (cast<ConstantSDNode>(LR)->isNullValue() && 3081 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3082 SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), 3083 LR.getValueType(), LL, RL); 3084 AddToWorkList(ORNode.getNode()); 3085 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 3086 } 3087 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3088 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3089 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 3090 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3091 SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), 3092 LR.getValueType(), LL, RL); 3093 AddToWorkList(ANDNode.getNode()); 3094 return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); 3095 } 3096 } 3097 // canonicalize equivalent to ll == rl 3098 if (LL == RR && LR == RL) { 3099 Op1 = ISD::getSetCCSwappedOperands(Op1); 3100 std::swap(RL, RR); 3101 } 3102 if (LL == RL && LR == RR) { 3103 bool isInteger = LL.getValueType().isInteger(); 3104 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3105 if (Result != ISD::SETCC_INVALID && 3106 (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) 3107 return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), 3108 LL, LR, Result); 3109 } 3110 } 3111 3112 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3113 if (N0.getOpcode() == N1.getOpcode()) { 3114 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3115 if (Tmp.getNode()) return Tmp; 3116 } 3117 3118 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3119 if (N0.getOpcode() == ISD::AND && 3120 N1.getOpcode() == ISD::AND && 3121 N0.getOperand(1).getOpcode() == ISD::Constant && 3122 N1.getOperand(1).getOpcode() == ISD::Constant && 3123 // Don't increase # computations. 3124 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3125 // We can only do this xform if we know that bits from X that are set in C2 3126 // but not in C1 are already zero. Likewise for Y. 3127 const APInt &LHSMask = 3128 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3129 const APInt &RHSMask = 3130 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 3131 3132 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3133 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3134 SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, 3135 N0.getOperand(0), N1.getOperand(0)); 3136 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, 3137 DAG.getConstant(LHSMask | RHSMask, VT)); 3138 } 3139 } 3140 3141 // See if this is some rotate idiom. 3142 if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) 3143 return SDValue(Rot, 0); 3144 3145 // Simplify the operands using demanded-bits information. 3146 if (!VT.isVector() && 3147 SimplifyDemandedBits(SDValue(N, 0))) 3148 return SDValue(N, 0); 3149 3150 return SDValue(); 3151} 3152 3153/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. 3154static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3155 if (Op.getOpcode() == ISD::AND) { 3156 if (isa<ConstantSDNode>(Op.getOperand(1))) { 3157 Mask = Op.getOperand(1); 3158 Op = Op.getOperand(0); 3159 } else { 3160 return false; 3161 } 3162 } 3163 3164 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3165 Shift = Op; 3166 return true; 3167 } 3168 3169 return false; 3170} 3171 3172// MatchRotate - Handle an 'or' of two operands. If this is one of the many 3173// idioms for rotate, and if the target supports rotation instructions, generate 3174// a rot[lr]. 3175SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { 3176 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3177 EVT VT = LHS.getValueType(); 3178 if (!TLI.isTypeLegal(VT)) return 0; 3179 3180 // The target must have at least one rotate flavor. 3181 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3182 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3183 if (!HasROTL && !HasROTR) return 0; 3184 3185 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3186 SDValue LHSShift; // The shift. 3187 SDValue LHSMask; // AND value if any. 3188 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3189 return 0; // Not part of a rotate. 3190 3191 SDValue RHSShift; // The shift. 3192 SDValue RHSMask; // AND value if any. 3193 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3194 return 0; // Not part of a rotate. 3195 3196 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3197 return 0; // Not shifting the same value. 3198 3199 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3200 return 0; // Shifts must disagree. 3201 3202 // Canonicalize shl to left side in a shl/srl pair. 3203 if (RHSShift.getOpcode() == ISD::SHL) { 3204 std::swap(LHS, RHS); 3205 std::swap(LHSShift, RHSShift); 3206 std::swap(LHSMask , RHSMask ); 3207 } 3208 3209 unsigned OpSizeInBits = VT.getSizeInBits(); 3210 SDValue LHSShiftArg = LHSShift.getOperand(0); 3211 SDValue LHSShiftAmt = LHSShift.getOperand(1); 3212 SDValue RHSShiftAmt = RHSShift.getOperand(1); 3213 3214 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 3215 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 3216 if (LHSShiftAmt.getOpcode() == ISD::Constant && 3217 RHSShiftAmt.getOpcode() == ISD::Constant) { 3218 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 3219 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 3220 if ((LShVal + RShVal) != OpSizeInBits) 3221 return 0; 3222 3223 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3224 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 3225 3226 // If there is an AND of either shifted operand, apply it to the result. 3227 if (LHSMask.getNode() || RHSMask.getNode()) { 3228 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 3229 3230 if (LHSMask.getNode()) { 3231 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 3232 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 3233 } 3234 if (RHSMask.getNode()) { 3235 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 3236 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 3237 } 3238 3239 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 3240 } 3241 3242 return Rot.getNode(); 3243 } 3244 3245 // If there is a mask here, and we have a variable shift, we can't be sure 3246 // that we're masking out the right stuff. 3247 if (LHSMask.getNode() || RHSMask.getNode()) 3248 return 0; 3249 3250 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) 3251 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) 3252 if (RHSShiftAmt.getOpcode() == ISD::SUB && 3253 LHSShiftAmt == RHSShiftAmt.getOperand(1)) { 3254 if (ConstantSDNode *SUBC = 3255 dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { 3256 if (SUBC->getAPIntValue() == OpSizeInBits) { 3257 return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, 3258 HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); 3259 } 3260 } 3261 } 3262 3263 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) 3264 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) 3265 if (LHSShiftAmt.getOpcode() == ISD::SUB && 3266 RHSShiftAmt == LHSShiftAmt.getOperand(1)) { 3267 if (ConstantSDNode *SUBC = 3268 dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { 3269 if (SUBC->getAPIntValue() == OpSizeInBits) { 3270 return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, 3271 HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); 3272 } 3273 } 3274 } 3275 3276 // Look for sign/zext/any-extended or truncate cases: 3277 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3278 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3279 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3280 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3281 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3282 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3283 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3284 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3285 SDValue LExtOp0 = LHSShiftAmt.getOperand(0); 3286 SDValue RExtOp0 = RHSShiftAmt.getOperand(0); 3287 if (RExtOp0.getOpcode() == ISD::SUB && 3288 RExtOp0.getOperand(1) == LExtOp0) { 3289 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 3290 // (rotl x, y) 3291 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 3292 // (rotr x, (sub 32, y)) 3293 if (ConstantSDNode *SUBC = 3294 dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { 3295 if (SUBC->getAPIntValue() == OpSizeInBits) { 3296 return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3297 LHSShiftArg, 3298 HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); 3299 } 3300 } 3301 } else if (LExtOp0.getOpcode() == ISD::SUB && 3302 RExtOp0 == LExtOp0.getOperand(1)) { 3303 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 3304 // (rotr x, y) 3305 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 3306 // (rotl x, (sub 32, y)) 3307 if (ConstantSDNode *SUBC = 3308 dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { 3309 if (SUBC->getAPIntValue() == OpSizeInBits) { 3310 return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, 3311 LHSShiftArg, 3312 HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); 3313 } 3314 } 3315 } 3316 } 3317 3318 return 0; 3319} 3320 3321SDValue DAGCombiner::visitXOR(SDNode *N) { 3322 SDValue N0 = N->getOperand(0); 3323 SDValue N1 = N->getOperand(1); 3324 SDValue LHS, RHS, CC; 3325 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3326 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3327 EVT VT = N0.getValueType(); 3328 3329 // fold vector ops 3330 if (VT.isVector()) { 3331 SDValue FoldedVOp = SimplifyVBinOp(N); 3332 if (FoldedVOp.getNode()) return FoldedVOp; 3333 } 3334 3335 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3336 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3337 return DAG.getConstant(0, VT); 3338 // fold (xor x, undef) -> undef 3339 if (N0.getOpcode() == ISD::UNDEF) 3340 return N0; 3341 if (N1.getOpcode() == ISD::UNDEF) 3342 return N1; 3343 // fold (xor c1, c2) -> c1^c2 3344 if (N0C && N1C) 3345 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 3346 // canonicalize constant to RHS 3347 if (N0C && !N1C) 3348 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); 3349 // fold (xor x, 0) -> x 3350 if (N1C && N1C->isNullValue()) 3351 return N0; 3352 // reassociate xor 3353 SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); 3354 if (RXOR.getNode() != 0) 3355 return RXOR; 3356 3357 // fold !(x cc y) -> (x !cc y) 3358 if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { 3359 bool isInt = LHS.getValueType().isInteger(); 3360 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3361 isInt); 3362 3363 if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { 3364 switch (N0.getOpcode()) { 3365 default: 3366 llvm_unreachable("Unhandled SetCC Equivalent!"); 3367 case ISD::SETCC: 3368 return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); 3369 case ISD::SELECT_CC: 3370 return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), 3371 N0.getOperand(3), NotCC); 3372 } 3373 } 3374 } 3375 3376 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 3377 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 3378 N0.getNode()->hasOneUse() && 3379 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 3380 SDValue V = N0.getOperand(0); 3381 V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, 3382 DAG.getConstant(1, V.getValueType())); 3383 AddToWorkList(V.getNode()); 3384 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); 3385 } 3386 3387 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 3388 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 3389 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3390 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3391 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 3392 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3393 LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS 3394 RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS 3395 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 3396 return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); 3397 } 3398 } 3399 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 3400 if (N1C && N1C->isAllOnesValue() && 3401 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3402 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3403 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 3404 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3405 LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS 3406 RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS 3407 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 3408 return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); 3409 } 3410 } 3411 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 3412 if (N1C && N0.getOpcode() == ISD::XOR) { 3413 ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 3414 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3415 if (N00C) 3416 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), 3417 DAG.getConstant(N1C->getAPIntValue() ^ 3418 N00C->getAPIntValue(), VT)); 3419 if (N01C) 3420 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), 3421 DAG.getConstant(N1C->getAPIntValue() ^ 3422 N01C->getAPIntValue(), VT)); 3423 } 3424 // fold (xor x, x) -> 0 3425 if (N0 == N1) 3426 return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); 3427 3428 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 3429 if (N0.getOpcode() == N1.getOpcode()) { 3430 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3431 if (Tmp.getNode()) return Tmp; 3432 } 3433 3434 // Simplify the expression using non-local knowledge. 3435 if (!VT.isVector() && 3436 SimplifyDemandedBits(SDValue(N, 0))) 3437 return SDValue(N, 0); 3438 3439 return SDValue(); 3440} 3441 3442/// visitShiftByConstant - Handle transforms common to the three shifts, when 3443/// the shift amount is a constant. 3444SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { 3445 SDNode *LHS = N->getOperand(0).getNode(); 3446 if (!LHS->hasOneUse()) return SDValue(); 3447 3448 // We want to pull some binops through shifts, so that we have (and (shift)) 3449 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 3450 // thing happens with address calculations, so it's important to canonicalize 3451 // it. 3452 bool HighBitSet = false; // Can we transform this if the high bit is set? 3453 3454 switch (LHS->getOpcode()) { 3455 default: return SDValue(); 3456 case ISD::OR: 3457 case ISD::XOR: 3458 HighBitSet = false; // We can only transform sra if the high bit is clear. 3459 break; 3460 case ISD::AND: 3461 HighBitSet = true; // We can only transform sra if the high bit is set. 3462 break; 3463 case ISD::ADD: 3464 if (N->getOpcode() != ISD::SHL) 3465 return SDValue(); // only shl(add) not sr[al](add). 3466 HighBitSet = false; // We can only transform sra if the high bit is clear. 3467 break; 3468 } 3469 3470 // We require the RHS of the binop to be a constant as well. 3471 ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 3472 if (!BinOpCst) return SDValue(); 3473 3474 // FIXME: disable this unless the input to the binop is a shift by a constant. 3475 // If it is not a shift, it pessimizes some common cases like: 3476 // 3477 // void foo(int *X, int i) { X[i & 1235] = 1; } 3478 // int bar(int *X, int i) { return X[i & 255]; } 3479 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 3480 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 3481 BinOpLHSVal->getOpcode() != ISD::SRA && 3482 BinOpLHSVal->getOpcode() != ISD::SRL) || 3483 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 3484 return SDValue(); 3485 3486 EVT VT = N->getValueType(0); 3487 3488 // If this is a signed shift right, and the high bit is modified by the 3489 // logical operation, do not perform the transformation. The highBitSet 3490 // boolean indicates the value of the high bit of the constant which would 3491 // cause it to be modified for this operation. 3492 if (N->getOpcode() == ISD::SRA) { 3493 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 3494 if (BinOpRHSSignSet != HighBitSet) 3495 return SDValue(); 3496 } 3497 3498 // Fold the constants, shifting the binop RHS by the shift amount. 3499 SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), 3500 N->getValueType(0), 3501 LHS->getOperand(1), N->getOperand(1)); 3502 3503 // Create the new shift. 3504 SDValue NewShift = DAG.getNode(N->getOpcode(), 3505 LHS->getOperand(0).getDebugLoc(), 3506 VT, LHS->getOperand(0), N->getOperand(1)); 3507 3508 // Create the new binop. 3509 return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); 3510} 3511 3512SDValue DAGCombiner::visitSHL(SDNode *N) { 3513 SDValue N0 = N->getOperand(0); 3514 SDValue N1 = N->getOperand(1); 3515 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3516 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3517 EVT VT = N0.getValueType(); 3518 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3519 3520 // fold (shl c1, c2) -> c1<<c2 3521 if (N0C && N1C) 3522 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 3523 // fold (shl 0, x) -> 0 3524 if (N0C && N0C->isNullValue()) 3525 return N0; 3526 // fold (shl x, c >= size(x)) -> undef 3527 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3528 return DAG.getUNDEF(VT); 3529 // fold (shl x, 0) -> x 3530 if (N1C && N1C->isNullValue()) 3531 return N0; 3532 // fold (shl undef, x) -> 0 3533 if (N0.getOpcode() == ISD::UNDEF) 3534 return DAG.getConstant(0, VT); 3535 // if (shl x, c) is known to be zero, return 0 3536 if (DAG.MaskedValueIsZero(SDValue(N, 0), 3537 APInt::getAllOnesValue(OpSizeInBits))) 3538 return DAG.getConstant(0, VT); 3539 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 3540 if (N1.getOpcode() == ISD::TRUNCATE && 3541 N1.getOperand(0).getOpcode() == ISD::AND && 3542 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3543 SDValue N101 = N1.getOperand(0).getOperand(1); 3544 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3545 EVT TruncVT = N1.getValueType(); 3546 SDValue N100 = N1.getOperand(0).getOperand(0); 3547 APInt TruncC = N101C->getAPIntValue(); 3548 TruncC = TruncC.trunc(TruncVT.getSizeInBits()); 3549 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 3550 DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, 3551 DAG.getNode(ISD::TRUNCATE, 3552 N->getDebugLoc(), 3553 TruncVT, N100), 3554 DAG.getConstant(TruncC, TruncVT))); 3555 } 3556 } 3557 3558 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3559 return SDValue(N, 0); 3560 3561 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 3562 if (N1C && N0.getOpcode() == ISD::SHL && 3563 N0.getOperand(1).getOpcode() == ISD::Constant) { 3564 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3565 uint64_t c2 = N1C->getZExtValue(); 3566 if (c1 + c2 >= OpSizeInBits) 3567 return DAG.getConstant(0, VT); 3568 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), 3569 DAG.getConstant(c1 + c2, N1.getValueType())); 3570 } 3571 3572 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 3573 // For this to be valid, the second form must not preserve any of the bits 3574 // that are shifted out by the inner shift in the first form. This means 3575 // the outer shift size must be >= the number of bits added by the ext. 3576 // As a corollary, we don't care what kind of ext it is. 3577 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 3578 N0.getOpcode() == ISD::ANY_EXTEND || 3579 N0.getOpcode() == ISD::SIGN_EXTEND) && 3580 N0.getOperand(0).getOpcode() == ISD::SHL && 3581 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 3582 uint64_t c1 = 3583 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 3584 uint64_t c2 = N1C->getZExtValue(); 3585 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 3586 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 3587 if (c2 >= OpSizeInBits - InnerShiftSize) { 3588 if (c1 + c2 >= OpSizeInBits) 3589 return DAG.getConstant(0, VT); 3590 return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, 3591 DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, 3592 N0.getOperand(0)->getOperand(0)), 3593 DAG.getConstant(c1 + c2, N1.getValueType())); 3594 } 3595 } 3596 3597 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 3598 // (and (srl x, (sub c1, c2), MASK) 3599 // Only fold this if the inner shift has no other uses -- if it does, folding 3600 // this will increase the total number of instructions. 3601 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && 3602 N0.getOperand(1).getOpcode() == ISD::Constant) { 3603 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3604 if (c1 < VT.getSizeInBits()) { 3605 uint64_t c2 = N1C->getZExtValue(); 3606 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 3607 VT.getSizeInBits() - c1); 3608 SDValue Shift; 3609 if (c2 > c1) { 3610 Mask = Mask.shl(c2-c1); 3611 Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), 3612 DAG.getConstant(c2-c1, N1.getValueType())); 3613 } else { 3614 Mask = Mask.lshr(c1-c2); 3615 Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), 3616 DAG.getConstant(c1-c2, N1.getValueType())); 3617 } 3618 return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, 3619 DAG.getConstant(Mask, VT)); 3620 } 3621 } 3622 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 3623 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 3624 SDValue HiBitsMask = 3625 DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), 3626 VT.getSizeInBits() - 3627 N1C->getZExtValue()), 3628 VT); 3629 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), 3630 HiBitsMask); 3631 } 3632 3633 if (N1C) { 3634 SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); 3635 if (NewSHL.getNode()) 3636 return NewSHL; 3637 } 3638 3639 return SDValue(); 3640} 3641 3642SDValue DAGCombiner::visitSRA(SDNode *N) { 3643 SDValue N0 = N->getOperand(0); 3644 SDValue N1 = N->getOperand(1); 3645 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3646 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3647 EVT VT = N0.getValueType(); 3648 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3649 3650 // fold (sra c1, c2) -> (sra c1, c2) 3651 if (N0C && N1C) 3652 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 3653 // fold (sra 0, x) -> 0 3654 if (N0C && N0C->isNullValue()) 3655 return N0; 3656 // fold (sra -1, x) -> -1 3657 if (N0C && N0C->isAllOnesValue()) 3658 return N0; 3659 // fold (sra x, (setge c, size(x))) -> undef 3660 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3661 return DAG.getUNDEF(VT); 3662 // fold (sra x, 0) -> x 3663 if (N1C && N1C->isNullValue()) 3664 return N0; 3665 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 3666 // sext_inreg. 3667 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 3668 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 3669 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 3670 if (VT.isVector()) 3671 ExtVT = EVT::getVectorVT(*DAG.getContext(), 3672 ExtVT, VT.getVectorNumElements()); 3673 if ((!LegalOperations || 3674 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 3675 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 3676 N0.getOperand(0), DAG.getValueType(ExtVT)); 3677 } 3678 3679 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 3680 if (N1C && N0.getOpcode() == ISD::SRA) { 3681 if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3682 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 3683 if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; 3684 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), 3685 DAG.getConstant(Sum, N1C->getValueType(0))); 3686 } 3687 } 3688 3689 // fold (sra (shl X, m), (sub result_size, n)) 3690 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 3691 // result_size - n != m. 3692 // If truncate is free for the target sext(shl) is likely to result in better 3693 // code. 3694 if (N0.getOpcode() == ISD::SHL) { 3695 // Get the two constanst of the shifts, CN0 = m, CN = n. 3696 const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3697 if (N01C && N1C) { 3698 // Determine what the truncate's result bitsize and type would be. 3699 EVT TruncVT = 3700 EVT::getIntegerVT(*DAG.getContext(), 3701 OpSizeInBits - N1C->getZExtValue()); 3702 // Determine the residual right-shift amount. 3703 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 3704 3705 // If the shift is not a no-op (in which case this should be just a sign 3706 // extend already), the truncated to type is legal, sign_extend is legal 3707 // on that type, and the truncate to that type is both legal and free, 3708 // perform the transform. 3709 if ((ShiftAmt > 0) && 3710 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 3711 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 3712 TLI.isTruncateFree(VT, TruncVT)) { 3713 3714 SDValue Amt = DAG.getConstant(ShiftAmt, 3715 getShiftAmountTy(N0.getOperand(0).getValueType())); 3716 SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, 3717 N0.getOperand(0), Amt); 3718 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, 3719 Shift); 3720 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), 3721 N->getValueType(0), Trunc); 3722 } 3723 } 3724 } 3725 3726 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 3727 if (N1.getOpcode() == ISD::TRUNCATE && 3728 N1.getOperand(0).getOpcode() == ISD::AND && 3729 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3730 SDValue N101 = N1.getOperand(0).getOperand(1); 3731 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3732 EVT TruncVT = N1.getValueType(); 3733 SDValue N100 = N1.getOperand(0).getOperand(0); 3734 APInt TruncC = N101C->getAPIntValue(); 3735 TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); 3736 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, 3737 DAG.getNode(ISD::AND, N->getDebugLoc(), 3738 TruncVT, 3739 DAG.getNode(ISD::TRUNCATE, 3740 N->getDebugLoc(), 3741 TruncVT, N100), 3742 DAG.getConstant(TruncC, TruncVT))); 3743 } 3744 } 3745 3746 // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) 3747 // if c1 is equal to the number of bits the trunc removes 3748 if (N0.getOpcode() == ISD::TRUNCATE && 3749 (N0.getOperand(0).getOpcode() == ISD::SRL || 3750 N0.getOperand(0).getOpcode() == ISD::SRA) && 3751 N0.getOperand(0).hasOneUse() && 3752 N0.getOperand(0).getOperand(1).hasOneUse() && 3753 N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { 3754 EVT LargeVT = N0.getOperand(0).getValueType(); 3755 ConstantSDNode *LargeShiftAmt = 3756 cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); 3757 3758 if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == 3759 LargeShiftAmt->getZExtValue()) { 3760 SDValue Amt = 3761 DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), 3762 getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); 3763 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, 3764 N0.getOperand(0).getOperand(0), Amt); 3765 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); 3766 } 3767 } 3768 3769 // Simplify, based on bits shifted out of the LHS. 3770 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3771 return SDValue(N, 0); 3772 3773 3774 // If the sign bit is known to be zero, switch this to a SRL. 3775 if (DAG.SignBitIsZero(N0)) 3776 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); 3777 3778 if (N1C) { 3779 SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); 3780 if (NewSRA.getNode()) 3781 return NewSRA; 3782 } 3783 3784 return SDValue(); 3785} 3786 3787SDValue DAGCombiner::visitSRL(SDNode *N) { 3788 SDValue N0 = N->getOperand(0); 3789 SDValue N1 = N->getOperand(1); 3790 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3791 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3792 EVT VT = N0.getValueType(); 3793 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3794 3795 // fold (srl c1, c2) -> c1 >>u c2 3796 if (N0C && N1C) 3797 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 3798 // fold (srl 0, x) -> 0 3799 if (N0C && N0C->isNullValue()) 3800 return N0; 3801 // fold (srl x, c >= size(x)) -> undef 3802 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3803 return DAG.getUNDEF(VT); 3804 // fold (srl x, 0) -> x 3805 if (N1C && N1C->isNullValue()) 3806 return N0; 3807 // if (srl x, c) is known to be zero, return 0 3808 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 3809 APInt::getAllOnesValue(OpSizeInBits))) 3810 return DAG.getConstant(0, VT); 3811 3812 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 3813 if (N1C && N0.getOpcode() == ISD::SRL && 3814 N0.getOperand(1).getOpcode() == ISD::Constant) { 3815 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3816 uint64_t c2 = N1C->getZExtValue(); 3817 if (c1 + c2 >= OpSizeInBits) 3818 return DAG.getConstant(0, VT); 3819 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), 3820 DAG.getConstant(c1 + c2, N1.getValueType())); 3821 } 3822 3823 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 3824 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 3825 N0.getOperand(0).getOpcode() == ISD::SRL && 3826 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 3827 uint64_t c1 = 3828 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 3829 uint64_t c2 = N1C->getZExtValue(); 3830 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 3831 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 3832 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 3833 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 3834 if (c1 + OpSizeInBits == InnerShiftSize) { 3835 if (c1 + c2 >= InnerShiftSize) 3836 return DAG.getConstant(0, VT); 3837 return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, 3838 DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, 3839 N0.getOperand(0)->getOperand(0), 3840 DAG.getConstant(c1 + c2, ShiftCountVT))); 3841 } 3842 } 3843 3844 // fold (srl (shl x, c), c) -> (and x, cst2) 3845 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && 3846 N0.getValueSizeInBits() <= 64) { 3847 uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); 3848 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), 3849 DAG.getConstant(~0ULL >> ShAmt, VT)); 3850 } 3851 3852 3853 // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) 3854 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 3855 // Shifting in all undef bits? 3856 EVT SmallVT = N0.getOperand(0).getValueType(); 3857 if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) 3858 return DAG.getUNDEF(VT); 3859 3860 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 3861 uint64_t ShiftAmt = N1C->getZExtValue(); 3862 SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, 3863 N0.getOperand(0), 3864 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 3865 AddToWorkList(SmallShift.getNode()); 3866 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); 3867 } 3868 } 3869 3870 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 3871 // bit, which is unmodified by sra. 3872 if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { 3873 if (N0.getOpcode() == ISD::SRA) 3874 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); 3875 } 3876 3877 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 3878 if (N1C && N0.getOpcode() == ISD::CTLZ && 3879 N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { 3880 APInt KnownZero, KnownOne; 3881 DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); 3882 3883 // If any of the input bits are KnownOne, then the input couldn't be all 3884 // zeros, thus the result of the srl will always be zero. 3885 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 3886 3887 // If all of the bits input the to ctlz node are known to be zero, then 3888 // the result of the ctlz is "32" and the result of the shift is one. 3889 APInt UnknownBits = ~KnownZero; 3890 if (UnknownBits == 0) return DAG.getConstant(1, VT); 3891 3892 // Otherwise, check to see if there is exactly one bit input to the ctlz. 3893 if ((UnknownBits & (UnknownBits - 1)) == 0) { 3894 // Okay, we know that only that the single bit specified by UnknownBits 3895 // could be set on input to the CTLZ node. If this bit is set, the SRL 3896 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 3897 // to an SRL/XOR pair, which is likely to simplify more. 3898 unsigned ShAmt = UnknownBits.countTrailingZeros(); 3899 SDValue Op = N0.getOperand(0); 3900 3901 if (ShAmt) { 3902 Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, 3903 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 3904 AddToWorkList(Op.getNode()); 3905 } 3906 3907 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, 3908 Op, DAG.getConstant(1, VT)); 3909 } 3910 } 3911 3912 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 3913 if (N1.getOpcode() == ISD::TRUNCATE && 3914 N1.getOperand(0).getOpcode() == ISD::AND && 3915 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3916 SDValue N101 = N1.getOperand(0).getOperand(1); 3917 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3918 EVT TruncVT = N1.getValueType(); 3919 SDValue N100 = N1.getOperand(0).getOperand(0); 3920 APInt TruncC = N101C->getAPIntValue(); 3921 TruncC = TruncC.trunc(TruncVT.getSizeInBits()); 3922 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, 3923 DAG.getNode(ISD::AND, N->getDebugLoc(), 3924 TruncVT, 3925 DAG.getNode(ISD::TRUNCATE, 3926 N->getDebugLoc(), 3927 TruncVT, N100), 3928 DAG.getConstant(TruncC, TruncVT))); 3929 } 3930 } 3931 3932 // fold operands of srl based on knowledge that the low bits are not 3933 // demanded. 3934 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3935 return SDValue(N, 0); 3936 3937 if (N1C) { 3938 SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); 3939 if (NewSRL.getNode()) 3940 return NewSRL; 3941 } 3942 3943 // Attempt to convert a srl of a load into a narrower zero-extending load. 3944 SDValue NarrowLoad = ReduceLoadWidth(N); 3945 if (NarrowLoad.getNode()) 3946 return NarrowLoad; 3947 3948 // Here is a common situation. We want to optimize: 3949 // 3950 // %a = ... 3951 // %b = and i32 %a, 2 3952 // %c = srl i32 %b, 1 3953 // brcond i32 %c ... 3954 // 3955 // into 3956 // 3957 // %a = ... 3958 // %b = and %a, 2 3959 // %c = setcc eq %b, 0 3960 // brcond %c ... 3961 // 3962 // However when after the source operand of SRL is optimized into AND, the SRL 3963 // itself may not be optimized further. Look for it and add the BRCOND into 3964 // the worklist. 3965 if (N->hasOneUse()) { 3966 SDNode *Use = *N->use_begin(); 3967 if (Use->getOpcode() == ISD::BRCOND) 3968 AddToWorkList(Use); 3969 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 3970 // Also look pass the truncate. 3971 Use = *Use->use_begin(); 3972 if (Use->getOpcode() == ISD::BRCOND) 3973 AddToWorkList(Use); 3974 } 3975 } 3976 3977 return SDValue(); 3978} 3979 3980SDValue DAGCombiner::visitCTLZ(SDNode *N) { 3981 SDValue N0 = N->getOperand(0); 3982 EVT VT = N->getValueType(0); 3983 3984 // fold (ctlz c1) -> c2 3985 if (isa<ConstantSDNode>(N0)) 3986 return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); 3987 return SDValue(); 3988} 3989 3990SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 3991 SDValue N0 = N->getOperand(0); 3992 EVT VT = N->getValueType(0); 3993 3994 // fold (ctlz_zero_undef c1) -> c2 3995 if (isa<ConstantSDNode>(N0)) 3996 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); 3997 return SDValue(); 3998} 3999 4000SDValue DAGCombiner::visitCTTZ(SDNode *N) { 4001 SDValue N0 = N->getOperand(0); 4002 EVT VT = N->getValueType(0); 4003 4004 // fold (cttz c1) -> c2 4005 if (isa<ConstantSDNode>(N0)) 4006 return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); 4007 return SDValue(); 4008} 4009 4010SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 4011 SDValue N0 = N->getOperand(0); 4012 EVT VT = N->getValueType(0); 4013 4014 // fold (cttz_zero_undef c1) -> c2 4015 if (isa<ConstantSDNode>(N0)) 4016 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); 4017 return SDValue(); 4018} 4019 4020SDValue DAGCombiner::visitCTPOP(SDNode *N) { 4021 SDValue N0 = N->getOperand(0); 4022 EVT VT = N->getValueType(0); 4023 4024 // fold (ctpop c1) -> c2 4025 if (isa<ConstantSDNode>(N0)) 4026 return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); 4027 return SDValue(); 4028} 4029 4030SDValue DAGCombiner::visitSELECT(SDNode *N) { 4031 SDValue N0 = N->getOperand(0); 4032 SDValue N1 = N->getOperand(1); 4033 SDValue N2 = N->getOperand(2); 4034 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4035 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4036 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 4037 EVT VT = N->getValueType(0); 4038 EVT VT0 = N0.getValueType(); 4039 4040 // fold (select C, X, X) -> X 4041 if (N1 == N2) 4042 return N1; 4043 // fold (select true, X, Y) -> X 4044 if (N0C && !N0C->isNullValue()) 4045 return N1; 4046 // fold (select false, X, Y) -> Y 4047 if (N0C && N0C->isNullValue()) 4048 return N2; 4049 // fold (select C, 1, X) -> (or C, X) 4050 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 4051 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); 4052 // fold (select C, 0, 1) -> (xor C, 1) 4053 if (VT.isInteger() && 4054 (VT0 == MVT::i1 || 4055 (VT0.isInteger() && 4056 TLI.getBooleanContents(false) == 4057 TargetLowering::ZeroOrOneBooleanContent)) && 4058 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 4059 SDValue XORNode; 4060 if (VT == VT0) 4061 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, 4062 N0, DAG.getConstant(1, VT0)); 4063 XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, 4064 N0, DAG.getConstant(1, VT0)); 4065 AddToWorkList(XORNode.getNode()); 4066 if (VT.bitsGT(VT0)) 4067 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); 4068 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); 4069 } 4070 // fold (select C, 0, X) -> (and (not C), X) 4071 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 4072 SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); 4073 AddToWorkList(NOTNode.getNode()); 4074 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); 4075 } 4076 // fold (select C, X, 1) -> (or (not C), X) 4077 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 4078 SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); 4079 AddToWorkList(NOTNode.getNode()); 4080 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); 4081 } 4082 // fold (select C, X, 0) -> (and C, X) 4083 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 4084 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); 4085 // fold (select X, X, Y) -> (or X, Y) 4086 // fold (select X, 1, Y) -> (or X, Y) 4087 if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) 4088 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); 4089 // fold (select X, Y, X) -> (and X, Y) 4090 // fold (select X, Y, 0) -> (and X, Y) 4091 if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) 4092 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); 4093 4094 // If we can fold this based on the true/false value, do so. 4095 if (SimplifySelectOps(N, N1, N2)) 4096 return SDValue(N, 0); // Don't revisit N. 4097 4098 // fold selects based on a setcc into other things, such as min/max/abs 4099 if (N0.getOpcode() == ISD::SETCC) { 4100 // FIXME: 4101 // Check against MVT::Other for SELECT_CC, which is a workaround for targets 4102 // having to say they don't support SELECT_CC on every type the DAG knows 4103 // about, since there is no way to mark an opcode illegal at all value types 4104 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && 4105 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) 4106 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, 4107 N0.getOperand(0), N0.getOperand(1), 4108 N1, N2, N0.getOperand(2)); 4109 return SimplifySelect(N->getDebugLoc(), N0, N1, N2); 4110 } 4111 4112 return SDValue(); 4113} 4114 4115SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 4116 SDValue N0 = N->getOperand(0); 4117 SDValue N1 = N->getOperand(1); 4118 SDValue N2 = N->getOperand(2); 4119 SDValue N3 = N->getOperand(3); 4120 SDValue N4 = N->getOperand(4); 4121 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 4122 4123 // fold select_cc lhs, rhs, x, x, cc -> x 4124 if (N2 == N3) 4125 return N2; 4126 4127 // Determine if the condition we're dealing with is constant 4128 SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), 4129 N0, N1, CC, N->getDebugLoc(), false); 4130 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 4131 4132 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 4133 if (!SCCC->isNullValue()) 4134 return N2; // cond always true -> true val 4135 else 4136 return N3; // cond always false -> false val 4137 } 4138 4139 // Fold to a simpler select_cc 4140 if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) 4141 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), 4142 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 4143 SCC.getOperand(2)); 4144 4145 // If we can fold this based on the true/false value, do so. 4146 if (SimplifySelectOps(N, N2, N3)) 4147 return SDValue(N, 0); // Don't revisit N. 4148 4149 // fold select_cc into other things, such as min/max/abs 4150 return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); 4151} 4152 4153SDValue DAGCombiner::visitSETCC(SDNode *N) { 4154 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 4155 cast<CondCodeSDNode>(N->getOperand(2))->get(), 4156 N->getDebugLoc()); 4157} 4158 4159// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 4160// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 4161// transformation. Returns true if extension are possible and the above 4162// mentioned transformation is profitable. 4163static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 4164 unsigned ExtOpc, 4165 SmallVector<SDNode*, 4> &ExtendNodes, 4166 const TargetLowering &TLI) { 4167 bool HasCopyToRegUses = false; 4168 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 4169 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 4170 UE = N0.getNode()->use_end(); 4171 UI != UE; ++UI) { 4172 SDNode *User = *UI; 4173 if (User == N) 4174 continue; 4175 if (UI.getUse().getResNo() != N0.getResNo()) 4176 continue; 4177 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 4178 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 4179 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 4180 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 4181 // Sign bits will be lost after a zext. 4182 return false; 4183 bool Add = false; 4184 for (unsigned i = 0; i != 2; ++i) { 4185 SDValue UseOp = User->getOperand(i); 4186 if (UseOp == N0) 4187 continue; 4188 if (!isa<ConstantSDNode>(UseOp)) 4189 return false; 4190 Add = true; 4191 } 4192 if (Add) 4193 ExtendNodes.push_back(User); 4194 continue; 4195 } 4196 // If truncates aren't free and there are users we can't 4197 // extend, it isn't worthwhile. 4198 if (!isTruncFree) 4199 return false; 4200 // Remember if this value is live-out. 4201 if (User->getOpcode() == ISD::CopyToReg) 4202 HasCopyToRegUses = true; 4203 } 4204 4205 if (HasCopyToRegUses) { 4206 bool BothLiveOut = false; 4207 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 4208 UI != UE; ++UI) { 4209 SDUse &Use = UI.getUse(); 4210 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 4211 BothLiveOut = true; 4212 break; 4213 } 4214 } 4215 if (BothLiveOut) 4216 // Both unextended and extended values are live out. There had better be 4217 // a good reason for the transformation. 4218 return ExtendNodes.size(); 4219 } 4220 return true; 4221} 4222 4223void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, 4224 SDValue Trunc, SDValue ExtLoad, DebugLoc DL, 4225 ISD::NodeType ExtType) { 4226 // Extend SetCC uses if necessary. 4227 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 4228 SDNode *SetCC = SetCCs[i]; 4229 SmallVector<SDValue, 4> Ops; 4230 4231 for (unsigned j = 0; j != 2; ++j) { 4232 SDValue SOp = SetCC->getOperand(j); 4233 if (SOp == Trunc) 4234 Ops.push_back(ExtLoad); 4235 else 4236 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 4237 } 4238 4239 Ops.push_back(SetCC->getOperand(2)); 4240 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), 4241 &Ops[0], Ops.size())); 4242 } 4243} 4244 4245SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 4246 SDValue N0 = N->getOperand(0); 4247 EVT VT = N->getValueType(0); 4248 4249 // fold (sext c1) -> c1 4250 if (isa<ConstantSDNode>(N0)) 4251 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); 4252 4253 // fold (sext (sext x)) -> (sext x) 4254 // fold (sext (aext x)) -> (sext x) 4255 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 4256 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, 4257 N0.getOperand(0)); 4258 4259 if (N0.getOpcode() == ISD::TRUNCATE) { 4260 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 4261 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 4262 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4263 if (NarrowLoad.getNode()) { 4264 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4265 if (NarrowLoad.getNode() != N0.getNode()) { 4266 CombineTo(N0.getNode(), NarrowLoad); 4267 // CombineTo deleted the truncate, if needed, but not what's under it. 4268 AddToWorkList(oye); 4269 } 4270 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4271 } 4272 4273 // See if the value being truncated is already sign extended. If so, just 4274 // eliminate the trunc/sext pair. 4275 SDValue Op = N0.getOperand(0); 4276 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 4277 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 4278 unsigned DestBits = VT.getScalarType().getSizeInBits(); 4279 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 4280 4281 if (OpBits == DestBits) { 4282 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 4283 // bits, it is already ready. 4284 if (NumSignBits > DestBits-MidBits) 4285 return Op; 4286 } else if (OpBits < DestBits) { 4287 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 4288 // bits, just sext from i32. 4289 if (NumSignBits > OpBits-MidBits) 4290 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); 4291 } else { 4292 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 4293 // bits, just truncate to i32. 4294 if (NumSignBits > OpBits-MidBits) 4295 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4296 } 4297 4298 // fold (sext (truncate x)) -> (sextinreg x). 4299 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 4300 N0.getValueType())) { 4301 if (OpBits < DestBits) 4302 Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); 4303 else if (OpBits > DestBits) 4304 Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); 4305 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, 4306 DAG.getValueType(N0.getValueType())); 4307 } 4308 } 4309 4310 // fold (sext (load x)) -> (sext (truncate (sextload x))) 4311 // None of the supported targets knows how to perform load and sign extend 4312 // on vectors in one instruction. We only perform this transformation on 4313 // scalars. 4314 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4315 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4316 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { 4317 bool DoXform = true; 4318 SmallVector<SDNode*, 4> SetCCs; 4319 if (!N0.hasOneUse()) 4320 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 4321 if (DoXform) { 4322 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4323 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4324 LN0->getChain(), 4325 LN0->getBasePtr(), LN0->getPointerInfo(), 4326 N0.getValueType(), 4327 LN0->isVolatile(), LN0->isNonTemporal(), 4328 LN0->getAlignment()); 4329 CombineTo(N, ExtLoad); 4330 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4331 N0.getValueType(), ExtLoad); 4332 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4333 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4334 ISD::SIGN_EXTEND); 4335 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4336 } 4337 } 4338 4339 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 4340 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 4341 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 4342 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 4343 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4344 EVT MemVT = LN0->getMemoryVT(); 4345 if ((!LegalOperations && !LN0->isVolatile()) || 4346 TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { 4347 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4348 LN0->getChain(), 4349 LN0->getBasePtr(), LN0->getPointerInfo(), 4350 MemVT, 4351 LN0->isVolatile(), LN0->isNonTemporal(), 4352 LN0->getAlignment()); 4353 CombineTo(N, ExtLoad); 4354 CombineTo(N0.getNode(), 4355 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4356 N0.getValueType(), ExtLoad), 4357 ExtLoad.getValue(1)); 4358 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4359 } 4360 } 4361 4362 // fold (sext (and/or/xor (load x), cst)) -> 4363 // (and/or/xor (sextload x), (sext cst)) 4364 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 4365 N0.getOpcode() == ISD::XOR) && 4366 isa<LoadSDNode>(N0.getOperand(0)) && 4367 N0.getOperand(1).getOpcode() == ISD::Constant && 4368 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && 4369 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 4370 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 4371 if (LN0->getExtensionType() != ISD::ZEXTLOAD) { 4372 bool DoXform = true; 4373 SmallVector<SDNode*, 4> SetCCs; 4374 if (!N0.hasOneUse()) 4375 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 4376 SetCCs, TLI); 4377 if (DoXform) { 4378 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, 4379 LN0->getChain(), LN0->getBasePtr(), 4380 LN0->getPointerInfo(), 4381 LN0->getMemoryVT(), 4382 LN0->isVolatile(), 4383 LN0->isNonTemporal(), 4384 LN0->getAlignment()); 4385 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4386 Mask = Mask.sext(VT.getSizeInBits()); 4387 SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 4388 ExtLoad, DAG.getConstant(Mask, VT)); 4389 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 4390 N0.getOperand(0).getDebugLoc(), 4391 N0.getOperand(0).getValueType(), ExtLoad); 4392 CombineTo(N, And); 4393 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 4394 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4395 ISD::SIGN_EXTEND); 4396 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4397 } 4398 } 4399 } 4400 4401 if (N0.getOpcode() == ISD::SETCC) { 4402 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 4403 // Only do this before legalize for now. 4404 if (VT.isVector() && !LegalOperations) { 4405 EVT N0VT = N0.getOperand(0).getValueType(); 4406 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 4407 // of the same size as the compared operands. Only optimize sext(setcc()) 4408 // if this is the case. 4409 EVT SVT = TLI.getSetCCResultType(N0VT); 4410 4411 // We know that the # elements of the results is the same as the 4412 // # elements of the compare (and the # elements of the compare result 4413 // for that matter). Check to see that they are the same size. If so, 4414 // we know that the element size of the sext'd result matches the 4415 // element size of the compare operands. 4416 if (VT.getSizeInBits() == SVT.getSizeInBits()) 4417 return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4418 N0.getOperand(1), 4419 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4420 // If the desired elements are smaller or larger than the source 4421 // elements we can use a matching integer vector type and then 4422 // truncate/sign extend 4423 EVT MatchingElementType = 4424 EVT::getIntegerVT(*DAG.getContext(), 4425 N0VT.getScalarType().getSizeInBits()); 4426 EVT MatchingVectorType = 4427 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4428 N0VT.getVectorNumElements()); 4429 4430 if (SVT == MatchingVectorType) { 4431 SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, 4432 N0.getOperand(0), N0.getOperand(1), 4433 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4434 return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); 4435 } 4436 } 4437 4438 // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) 4439 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 4440 SDValue NegOne = 4441 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 4442 SDValue SCC = 4443 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4444 NegOne, DAG.getConstant(0, VT), 4445 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4446 if (SCC.getNode()) return SCC; 4447 if (!LegalOperations || 4448 TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) 4449 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 4450 DAG.getSetCC(N->getDebugLoc(), 4451 TLI.getSetCCResultType(VT), 4452 N0.getOperand(0), N0.getOperand(1), 4453 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 4454 NegOne, DAG.getConstant(0, VT)); 4455 } 4456 4457 // fold (sext x) -> (zext x) if the sign bit is known zero. 4458 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 4459 DAG.SignBitIsZero(N0)) 4460 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); 4461 4462 return SDValue(); 4463} 4464 4465// isTruncateOf - If N is a truncate of some other value, return true, record 4466// the value being truncated in Op and which of Op's bits are zero in KnownZero. 4467// This function computes KnownZero to avoid a duplicated call to 4468// ComputeMaskedBits in the caller. 4469static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 4470 APInt &KnownZero) { 4471 APInt KnownOne; 4472 if (N->getOpcode() == ISD::TRUNCATE) { 4473 Op = N->getOperand(0); 4474 DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); 4475 return true; 4476 } 4477 4478 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || 4479 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 4480 return false; 4481 4482 SDValue Op0 = N->getOperand(0); 4483 SDValue Op1 = N->getOperand(1); 4484 assert(Op0.getValueType() == Op1.getValueType()); 4485 4486 ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); 4487 ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); 4488 if (COp0 && COp0->isNullValue()) 4489 Op = Op1; 4490 else if (COp1 && COp1->isNullValue()) 4491 Op = Op0; 4492 else 4493 return false; 4494 4495 DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); 4496 4497 if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 4498 return false; 4499 4500 return true; 4501} 4502 4503SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 4504 SDValue N0 = N->getOperand(0); 4505 EVT VT = N->getValueType(0); 4506 4507 // fold (zext c1) -> c1 4508 if (isa<ConstantSDNode>(N0)) 4509 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); 4510 // fold (zext (zext x)) -> (zext x) 4511 // fold (zext (aext x)) -> (zext x) 4512 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 4513 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, 4514 N0.getOperand(0)); 4515 4516 // fold (zext (truncate x)) -> (zext x) or 4517 // (zext (truncate x)) -> (truncate x) 4518 // This is valid when the truncated bits of x are already zero. 4519 // FIXME: We should extend this to work for vectors too. 4520 SDValue Op; 4521 APInt KnownZero; 4522 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 4523 APInt TruncatedBits = 4524 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 4525 APInt(Op.getValueSizeInBits(), 0) : 4526 APInt::getBitsSet(Op.getValueSizeInBits(), 4527 N0.getValueSizeInBits(), 4528 std::min(Op.getValueSizeInBits(), 4529 VT.getSizeInBits())); 4530 if (TruncatedBits == (KnownZero & TruncatedBits)) { 4531 if (VT.bitsGT(Op.getValueType())) 4532 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); 4533 if (VT.bitsLT(Op.getValueType())) 4534 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4535 4536 return Op; 4537 } 4538 } 4539 4540 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 4541 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 4542 if (N0.getOpcode() == ISD::TRUNCATE) { 4543 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4544 if (NarrowLoad.getNode()) { 4545 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4546 if (NarrowLoad.getNode() != N0.getNode()) { 4547 CombineTo(N0.getNode(), NarrowLoad); 4548 // CombineTo deleted the truncate, if needed, but not what's under it. 4549 AddToWorkList(oye); 4550 } 4551 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4552 } 4553 } 4554 4555 // fold (zext (truncate x)) -> (and x, mask) 4556 if (N0.getOpcode() == ISD::TRUNCATE && 4557 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { 4558 4559 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 4560 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 4561 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4562 if (NarrowLoad.getNode()) { 4563 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4564 if (NarrowLoad.getNode() != N0.getNode()) { 4565 CombineTo(N0.getNode(), NarrowLoad); 4566 // CombineTo deleted the truncate, if needed, but not what's under it. 4567 AddToWorkList(oye); 4568 } 4569 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4570 } 4571 4572 SDValue Op = N0.getOperand(0); 4573 if (Op.getValueType().bitsLT(VT)) { 4574 Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); 4575 AddToWorkList(Op.getNode()); 4576 } else if (Op.getValueType().bitsGT(VT)) { 4577 Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4578 AddToWorkList(Op.getNode()); 4579 } 4580 return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), 4581 N0.getValueType().getScalarType()); 4582 } 4583 4584 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 4585 // if either of the casts is not free. 4586 if (N0.getOpcode() == ISD::AND && 4587 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 4588 N0.getOperand(1).getOpcode() == ISD::Constant && 4589 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 4590 N0.getValueType()) || 4591 !TLI.isZExtFree(N0.getValueType(), VT))) { 4592 SDValue X = N0.getOperand(0).getOperand(0); 4593 if (X.getValueType().bitsLT(VT)) { 4594 X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); 4595 } else if (X.getValueType().bitsGT(VT)) { 4596 X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); 4597 } 4598 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4599 Mask = Mask.zext(VT.getSizeInBits()); 4600 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4601 X, DAG.getConstant(Mask, VT)); 4602 } 4603 4604 // fold (zext (load x)) -> (zext (truncate (zextload x))) 4605 // None of the supported targets knows how to perform load and vector_zext 4606 // on vectors in one instruction. We only perform this transformation on 4607 // scalars. 4608 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4609 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4610 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { 4611 bool DoXform = true; 4612 SmallVector<SDNode*, 4> SetCCs; 4613 if (!N0.hasOneUse()) 4614 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 4615 if (DoXform) { 4616 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4617 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, 4618 LN0->getChain(), 4619 LN0->getBasePtr(), LN0->getPointerInfo(), 4620 N0.getValueType(), 4621 LN0->isVolatile(), LN0->isNonTemporal(), 4622 LN0->getAlignment()); 4623 CombineTo(N, ExtLoad); 4624 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4625 N0.getValueType(), ExtLoad); 4626 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4627 4628 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4629 ISD::ZERO_EXTEND); 4630 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4631 } 4632 } 4633 4634 // fold (zext (and/or/xor (load x), cst)) -> 4635 // (and/or/xor (zextload x), (zext cst)) 4636 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 4637 N0.getOpcode() == ISD::XOR) && 4638 isa<LoadSDNode>(N0.getOperand(0)) && 4639 N0.getOperand(1).getOpcode() == ISD::Constant && 4640 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && 4641 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 4642 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 4643 if (LN0->getExtensionType() != ISD::SEXTLOAD) { 4644 bool DoXform = true; 4645 SmallVector<SDNode*, 4> SetCCs; 4646 if (!N0.hasOneUse()) 4647 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 4648 SetCCs, TLI); 4649 if (DoXform) { 4650 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, 4651 LN0->getChain(), LN0->getBasePtr(), 4652 LN0->getPointerInfo(), 4653 LN0->getMemoryVT(), 4654 LN0->isVolatile(), 4655 LN0->isNonTemporal(), 4656 LN0->getAlignment()); 4657 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4658 Mask = Mask.zext(VT.getSizeInBits()); 4659 SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 4660 ExtLoad, DAG.getConstant(Mask, VT)); 4661 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 4662 N0.getOperand(0).getDebugLoc(), 4663 N0.getOperand(0).getValueType(), ExtLoad); 4664 CombineTo(N, And); 4665 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 4666 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4667 ISD::ZERO_EXTEND); 4668 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4669 } 4670 } 4671 } 4672 4673 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 4674 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 4675 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 4676 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 4677 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4678 EVT MemVT = LN0->getMemoryVT(); 4679 if ((!LegalOperations && !LN0->isVolatile()) || 4680 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { 4681 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, 4682 LN0->getChain(), 4683 LN0->getBasePtr(), LN0->getPointerInfo(), 4684 MemVT, 4685 LN0->isVolatile(), LN0->isNonTemporal(), 4686 LN0->getAlignment()); 4687 CombineTo(N, ExtLoad); 4688 CombineTo(N0.getNode(), 4689 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), 4690 ExtLoad), 4691 ExtLoad.getValue(1)); 4692 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4693 } 4694 } 4695 4696 if (N0.getOpcode() == ISD::SETCC) { 4697 if (!LegalOperations && VT.isVector()) { 4698 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 4699 // Only do this before legalize for now. 4700 EVT N0VT = N0.getOperand(0).getValueType(); 4701 EVT EltVT = VT.getVectorElementType(); 4702 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 4703 DAG.getConstant(1, EltVT)); 4704 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 4705 // We know that the # elements of the results is the same as the 4706 // # elements of the compare (and the # elements of the compare result 4707 // for that matter). Check to see that they are the same size. If so, 4708 // we know that the element size of the sext'd result matches the 4709 // element size of the compare operands. 4710 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4711 DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4712 N0.getOperand(1), 4713 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 4714 DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, 4715 &OneOps[0], OneOps.size())); 4716 4717 // If the desired elements are smaller or larger than the source 4718 // elements we can use a matching integer vector type and then 4719 // truncate/sign extend 4720 EVT MatchingElementType = 4721 EVT::getIntegerVT(*DAG.getContext(), 4722 N0VT.getScalarType().getSizeInBits()); 4723 EVT MatchingVectorType = 4724 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4725 N0VT.getVectorNumElements()); 4726 SDValue VsetCC = 4727 DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), 4728 N0.getOperand(1), 4729 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4730 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4731 DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), 4732 DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, 4733 &OneOps[0], OneOps.size())); 4734 } 4735 4736 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 4737 SDValue SCC = 4738 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4739 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 4740 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4741 if (SCC.getNode()) return SCC; 4742 } 4743 4744 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 4745 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 4746 isa<ConstantSDNode>(N0.getOperand(1)) && 4747 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 4748 N0.hasOneUse()) { 4749 SDValue ShAmt = N0.getOperand(1); 4750 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 4751 if (N0.getOpcode() == ISD::SHL) { 4752 SDValue InnerZExt = N0.getOperand(0); 4753 // If the original shl may be shifting out bits, do not perform this 4754 // transformation. 4755 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 4756 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 4757 if (ShAmtVal > KnownZeroBits) 4758 return SDValue(); 4759 } 4760 4761 DebugLoc DL = N->getDebugLoc(); 4762 4763 // Ensure that the shift amount is wide enough for the shifted value. 4764 if (VT.getSizeInBits() >= 256) 4765 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 4766 4767 return DAG.getNode(N0.getOpcode(), DL, VT, 4768 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 4769 ShAmt); 4770 } 4771 4772 return SDValue(); 4773} 4774 4775SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 4776 SDValue N0 = N->getOperand(0); 4777 EVT VT = N->getValueType(0); 4778 4779 // fold (aext c1) -> c1 4780 if (isa<ConstantSDNode>(N0)) 4781 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); 4782 // fold (aext (aext x)) -> (aext x) 4783 // fold (aext (zext x)) -> (zext x) 4784 // fold (aext (sext x)) -> (sext x) 4785 if (N0.getOpcode() == ISD::ANY_EXTEND || 4786 N0.getOpcode() == ISD::ZERO_EXTEND || 4787 N0.getOpcode() == ISD::SIGN_EXTEND) 4788 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); 4789 4790 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 4791 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 4792 if (N0.getOpcode() == ISD::TRUNCATE) { 4793 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4794 if (NarrowLoad.getNode()) { 4795 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4796 if (NarrowLoad.getNode() != N0.getNode()) { 4797 CombineTo(N0.getNode(), NarrowLoad); 4798 // CombineTo deleted the truncate, if needed, but not what's under it. 4799 AddToWorkList(oye); 4800 } 4801 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4802 } 4803 } 4804 4805 // fold (aext (truncate x)) 4806 if (N0.getOpcode() == ISD::TRUNCATE) { 4807 SDValue TruncOp = N0.getOperand(0); 4808 if (TruncOp.getValueType() == VT) 4809 return TruncOp; // x iff x size == zext size. 4810 if (TruncOp.getValueType().bitsGT(VT)) 4811 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); 4812 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); 4813 } 4814 4815 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 4816 // if the trunc is not free. 4817 if (N0.getOpcode() == ISD::AND && 4818 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 4819 N0.getOperand(1).getOpcode() == ISD::Constant && 4820 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 4821 N0.getValueType())) { 4822 SDValue X = N0.getOperand(0).getOperand(0); 4823 if (X.getValueType().bitsLT(VT)) { 4824 X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); 4825 } else if (X.getValueType().bitsGT(VT)) { 4826 X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); 4827 } 4828 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4829 Mask = Mask.zext(VT.getSizeInBits()); 4830 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4831 X, DAG.getConstant(Mask, VT)); 4832 } 4833 4834 // fold (aext (load x)) -> (aext (truncate (extload x))) 4835 // None of the supported targets knows how to perform load and any_ext 4836 // on vectors in one instruction. We only perform this transformation on 4837 // scalars. 4838 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4839 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4840 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 4841 bool DoXform = true; 4842 SmallVector<SDNode*, 4> SetCCs; 4843 if (!N0.hasOneUse()) 4844 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 4845 if (DoXform) { 4846 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4847 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, 4848 LN0->getChain(), 4849 LN0->getBasePtr(), LN0->getPointerInfo(), 4850 N0.getValueType(), 4851 LN0->isVolatile(), LN0->isNonTemporal(), 4852 LN0->getAlignment()); 4853 CombineTo(N, ExtLoad); 4854 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4855 N0.getValueType(), ExtLoad); 4856 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4857 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4858 ISD::ANY_EXTEND); 4859 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4860 } 4861 } 4862 4863 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 4864 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 4865 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 4866 if (N0.getOpcode() == ISD::LOAD && 4867 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 4868 N0.hasOneUse()) { 4869 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4870 EVT MemVT = LN0->getMemoryVT(); 4871 SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), 4872 VT, LN0->getChain(), LN0->getBasePtr(), 4873 LN0->getPointerInfo(), MemVT, 4874 LN0->isVolatile(), LN0->isNonTemporal(), 4875 LN0->getAlignment()); 4876 CombineTo(N, ExtLoad); 4877 CombineTo(N0.getNode(), 4878 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4879 N0.getValueType(), ExtLoad), 4880 ExtLoad.getValue(1)); 4881 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4882 } 4883 4884 if (N0.getOpcode() == ISD::SETCC) { 4885 // aext(setcc) -> sext_in_reg(vsetcc) for vectors. 4886 // Only do this before legalize for now. 4887 if (VT.isVector() && !LegalOperations) { 4888 EVT N0VT = N0.getOperand(0).getValueType(); 4889 // We know that the # elements of the results is the same as the 4890 // # elements of the compare (and the # elements of the compare result 4891 // for that matter). Check to see that they are the same size. If so, 4892 // we know that the element size of the sext'd result matches the 4893 // element size of the compare operands. 4894 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 4895 return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4896 N0.getOperand(1), 4897 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4898 // If the desired elements are smaller or larger than the source 4899 // elements we can use a matching integer vector type and then 4900 // truncate/sign extend 4901 else { 4902 EVT MatchingElementType = 4903 EVT::getIntegerVT(*DAG.getContext(), 4904 N0VT.getScalarType().getSizeInBits()); 4905 EVT MatchingVectorType = 4906 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4907 N0VT.getVectorNumElements()); 4908 SDValue VsetCC = 4909 DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), 4910 N0.getOperand(1), 4911 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4912 return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); 4913 } 4914 } 4915 4916 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 4917 SDValue SCC = 4918 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4919 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 4920 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4921 if (SCC.getNode()) 4922 return SCC; 4923 } 4924 4925 return SDValue(); 4926} 4927 4928/// GetDemandedBits - See if the specified operand can be simplified with the 4929/// knowledge that only the bits specified by Mask are used. If so, return the 4930/// simpler operand, otherwise return a null SDValue. 4931SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 4932 switch (V.getOpcode()) { 4933 default: break; 4934 case ISD::Constant: { 4935 const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); 4936 assert(CV != 0 && "Const value should be ConstSDNode."); 4937 const APInt &CVal = CV->getAPIntValue(); 4938 APInt NewVal = CVal & Mask; 4939 if (NewVal != CVal) { 4940 return DAG.getConstant(NewVal, V.getValueType()); 4941 } 4942 break; 4943 } 4944 case ISD::OR: 4945 case ISD::XOR: 4946 // If the LHS or RHS don't contribute bits to the or, drop them. 4947 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 4948 return V.getOperand(1); 4949 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 4950 return V.getOperand(0); 4951 break; 4952 case ISD::SRL: 4953 // Only look at single-use SRLs. 4954 if (!V.getNode()->hasOneUse()) 4955 break; 4956 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 4957 // See if we can recursively simplify the LHS. 4958 unsigned Amt = RHSC->getZExtValue(); 4959 4960 // Watch out for shift count overflow though. 4961 if (Amt >= Mask.getBitWidth()) break; 4962 APInt NewMask = Mask << Amt; 4963 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 4964 if (SimplifyLHS.getNode()) 4965 return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), 4966 SimplifyLHS, V.getOperand(1)); 4967 } 4968 } 4969 return SDValue(); 4970} 4971 4972/// ReduceLoadWidth - If the result of a wider load is shifted to right of N 4973/// bits and then truncated to a narrower type and where N is a multiple 4974/// of number of bits of the narrower type, transform it to a narrower load 4975/// from address + N / num of bits of new type. If the result is to be 4976/// extended, also fold the extension to form a extending load. 4977SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 4978 unsigned Opc = N->getOpcode(); 4979 4980 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 4981 SDValue N0 = N->getOperand(0); 4982 EVT VT = N->getValueType(0); 4983 EVT ExtVT = VT; 4984 4985 // This transformation isn't valid for vector loads. 4986 if (VT.isVector()) 4987 return SDValue(); 4988 4989 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 4990 // extended to VT. 4991 if (Opc == ISD::SIGN_EXTEND_INREG) { 4992 ExtType = ISD::SEXTLOAD; 4993 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 4994 } else if (Opc == ISD::SRL) { 4995 // Another special-case: SRL is basically zero-extending a narrower value. 4996 ExtType = ISD::ZEXTLOAD; 4997 N0 = SDValue(N, 0); 4998 ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4999 if (!N01) return SDValue(); 5000 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 5001 VT.getSizeInBits() - N01->getZExtValue()); 5002 } 5003 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) 5004 return SDValue(); 5005 5006 unsigned EVTBits = ExtVT.getSizeInBits(); 5007 5008 // Do not generate loads of non-round integer types since these can 5009 // be expensive (and would be wrong if the type is not byte sized). 5010 if (!ExtVT.isRound()) 5011 return SDValue(); 5012 5013 unsigned ShAmt = 0; 5014 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 5015 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 5016 ShAmt = N01->getZExtValue(); 5017 // Is the shift amount a multiple of size of VT? 5018 if ((ShAmt & (EVTBits-1)) == 0) { 5019 N0 = N0.getOperand(0); 5020 // Is the load width a multiple of size of VT? 5021 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 5022 return SDValue(); 5023 } 5024 5025 // At this point, we must have a load or else we can't do the transform. 5026 if (!isa<LoadSDNode>(N0)) return SDValue(); 5027 5028 // If the shift amount is larger than the input type then we're not 5029 // accessing any of the loaded bytes. If the load was a zextload/extload 5030 // then the result of the shift+trunc is zero/undef (handled elsewhere). 5031 // If the load was a sextload then the result is a splat of the sign bit 5032 // of the extended byte. This is not worth optimizing for. 5033 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 5034 return SDValue(); 5035 } 5036 } 5037 5038 // If the load is shifted left (and the result isn't shifted back right), 5039 // we can fold the truncate through the shift. 5040 unsigned ShLeftAmt = 0; 5041 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 5042 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 5043 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 5044 ShLeftAmt = N01->getZExtValue(); 5045 N0 = N0.getOperand(0); 5046 } 5047 } 5048 5049 // If we haven't found a load, we can't narrow it. Don't transform one with 5050 // multiple uses, this would require adding a new load. 5051 if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() || 5052 // Don't change the width of a volatile load. 5053 cast<LoadSDNode>(N0)->isVolatile()) 5054 return SDValue(); 5055 5056 // Verify that we are actually reducing a load width here. 5057 if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits) 5058 return SDValue(); 5059 5060 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5061 EVT PtrType = N0.getOperand(1).getValueType(); 5062 5063 if (PtrType == MVT::Untyped || PtrType.isExtended()) 5064 // It's not possible to generate a constant of extended or untyped type. 5065 return SDValue(); 5066 5067 // For big endian targets, we need to adjust the offset to the pointer to 5068 // load the correct bytes. 5069 if (TLI.isBigEndian()) { 5070 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 5071 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 5072 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 5073 } 5074 5075 uint64_t PtrOff = ShAmt / 8; 5076 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 5077 SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), 5078 PtrType, LN0->getBasePtr(), 5079 DAG.getConstant(PtrOff, PtrType)); 5080 AddToWorkList(NewPtr.getNode()); 5081 5082 SDValue Load; 5083 if (ExtType == ISD::NON_EXTLOAD) 5084 Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, 5085 LN0->getPointerInfo().getWithOffset(PtrOff), 5086 LN0->isVolatile(), LN0->isNonTemporal(), 5087 LN0->isInvariant(), NewAlign); 5088 else 5089 Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, 5090 LN0->getPointerInfo().getWithOffset(PtrOff), 5091 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 5092 NewAlign); 5093 5094 // Replace the old load's chain with the new load's chain. 5095 WorkListRemover DeadNodes(*this); 5096 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 5097 5098 // Shift the result left, if we've swallowed a left shift. 5099 SDValue Result = Load; 5100 if (ShLeftAmt != 0) { 5101 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 5102 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 5103 ShImmTy = VT; 5104 Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, 5105 Result, DAG.getConstant(ShLeftAmt, ShImmTy)); 5106 } 5107 5108 // Return the new loaded value. 5109 return Result; 5110} 5111 5112SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 5113 SDValue N0 = N->getOperand(0); 5114 SDValue N1 = N->getOperand(1); 5115 EVT VT = N->getValueType(0); 5116 EVT EVT = cast<VTSDNode>(N1)->getVT(); 5117 unsigned VTBits = VT.getScalarType().getSizeInBits(); 5118 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 5119 5120 // fold (sext_in_reg c1) -> c1 5121 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) 5122 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); 5123 5124 // If the input is already sign extended, just drop the extension. 5125 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 5126 return N0; 5127 5128 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 5129 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 5130 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { 5131 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 5132 N0.getOperand(0), N1); 5133 } 5134 5135 // fold (sext_in_reg (sext x)) -> (sext x) 5136 // fold (sext_in_reg (aext x)) -> (sext x) 5137 // if x is small enough. 5138 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 5139 SDValue N00 = N0.getOperand(0); 5140 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 5141 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 5142 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); 5143 } 5144 5145 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 5146 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 5147 return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); 5148 5149 // fold operands of sext_in_reg based on knowledge that the top bits are not 5150 // demanded. 5151 if (SimplifyDemandedBits(SDValue(N, 0))) 5152 return SDValue(N, 0); 5153 5154 // fold (sext_in_reg (load x)) -> (smaller sextload x) 5155 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 5156 SDValue NarrowLoad = ReduceLoadWidth(N); 5157 if (NarrowLoad.getNode()) 5158 return NarrowLoad; 5159 5160 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 5161 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 5162 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 5163 if (N0.getOpcode() == ISD::SRL) { 5164 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 5165 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 5166 // We can turn this into an SRA iff the input to the SRL is already sign 5167 // extended enough. 5168 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 5169 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 5170 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, 5171 N0.getOperand(0), N0.getOperand(1)); 5172 } 5173 } 5174 5175 // fold (sext_inreg (extload x)) -> (sextload x) 5176 if (ISD::isEXTLoad(N0.getNode()) && 5177 ISD::isUNINDEXEDLoad(N0.getNode()) && 5178 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 5179 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5180 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 5181 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5182 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 5183 LN0->getChain(), 5184 LN0->getBasePtr(), LN0->getPointerInfo(), 5185 EVT, 5186 LN0->isVolatile(), LN0->isNonTemporal(), 5187 LN0->getAlignment()); 5188 CombineTo(N, ExtLoad); 5189 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 5190 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5191 } 5192 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 5193 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 5194 N0.hasOneUse() && 5195 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 5196 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5197 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 5198 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5199 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 5200 LN0->getChain(), 5201 LN0->getBasePtr(), LN0->getPointerInfo(), 5202 EVT, 5203 LN0->isVolatile(), LN0->isNonTemporal(), 5204 LN0->getAlignment()); 5205 CombineTo(N, ExtLoad); 5206 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 5207 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5208 } 5209 5210 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 5211 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 5212 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 5213 N0.getOperand(1), false); 5214 if (BSwap.getNode() != 0) 5215 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 5216 BSwap, N1); 5217 } 5218 5219 return SDValue(); 5220} 5221 5222SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 5223 SDValue N0 = N->getOperand(0); 5224 EVT VT = N->getValueType(0); 5225 bool isLE = TLI.isLittleEndian(); 5226 5227 // noop truncate 5228 if (N0.getValueType() == N->getValueType(0)) 5229 return N0; 5230 // fold (truncate c1) -> c1 5231 if (isa<ConstantSDNode>(N0)) 5232 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); 5233 // fold (truncate (truncate x)) -> (truncate x) 5234 if (N0.getOpcode() == ISD::TRUNCATE) 5235 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); 5236 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 5237 if (N0.getOpcode() == ISD::ZERO_EXTEND || 5238 N0.getOpcode() == ISD::SIGN_EXTEND || 5239 N0.getOpcode() == ISD::ANY_EXTEND) { 5240 if (N0.getOperand(0).getValueType().bitsLT(VT)) 5241 // if the source is smaller than the dest, we still need an extend 5242 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 5243 N0.getOperand(0)); 5244 if (N0.getOperand(0).getValueType().bitsGT(VT)) 5245 // if the source is larger than the dest, than we just need the truncate 5246 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); 5247 // if the source and dest are the same type, we can drop both the extend 5248 // and the truncate. 5249 return N0.getOperand(0); 5250 } 5251 5252 // Fold extract-and-trunc into a narrow extract. For example: 5253 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 5254 // i32 y = TRUNCATE(i64 x) 5255 // -- becomes -- 5256 // v16i8 b = BITCAST (v2i64 val) 5257 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 5258 // 5259 // Note: We only run this optimization after type legalization (which often 5260 // creates this pattern) and before operation legalization after which 5261 // we need to be more careful about the vector instructions that we generate. 5262 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 5263 LegalTypes && !LegalOperations && N0->hasOneUse()) { 5264 5265 EVT VecTy = N0.getOperand(0).getValueType(); 5266 EVT ExTy = N0.getValueType(); 5267 EVT TrTy = N->getValueType(0); 5268 5269 unsigned NumElem = VecTy.getVectorNumElements(); 5270 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 5271 5272 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 5273 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 5274 5275 SDValue EltNo = N0->getOperand(1); 5276 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 5277 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 5278 EVT IndexTy = N0->getOperand(1).getValueType(); 5279 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 5280 5281 SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 5282 NVT, N0.getOperand(0)); 5283 5284 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 5285 N->getDebugLoc(), TrTy, V, 5286 DAG.getConstant(Index, IndexTy)); 5287 } 5288 } 5289 5290 // See if we can simplify the input to this truncate through knowledge that 5291 // only the low bits are being used. 5292 // For example "trunc (or (shl x, 8), y)" // -> trunc y 5293 // Currently we only perform this optimization on scalars because vectors 5294 // may have different active low bits. 5295 if (!VT.isVector()) { 5296 SDValue Shorter = 5297 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 5298 VT.getSizeInBits())); 5299 if (Shorter.getNode()) 5300 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); 5301 } 5302 // fold (truncate (load x)) -> (smaller load x) 5303 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 5304 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 5305 SDValue Reduced = ReduceLoadWidth(N); 5306 if (Reduced.getNode()) 5307 return Reduced; 5308 } 5309 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 5310 // where ... are all 'undef'. 5311 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 5312 SmallVector<EVT, 8> VTs; 5313 SDValue V; 5314 unsigned Idx = 0; 5315 unsigned NumDefs = 0; 5316 5317 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 5318 SDValue X = N0.getOperand(i); 5319 if (X.getOpcode() != ISD::UNDEF) { 5320 V = X; 5321 Idx = i; 5322 NumDefs++; 5323 } 5324 // Stop if more than one members are non-undef. 5325 if (NumDefs > 1) 5326 break; 5327 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 5328 VT.getVectorElementType(), 5329 X.getValueType().getVectorNumElements())); 5330 } 5331 5332 if (NumDefs == 0) 5333 return DAG.getUNDEF(VT); 5334 5335 if (NumDefs == 1) { 5336 assert(V.getNode() && "The single defined operand is empty!"); 5337 SmallVector<SDValue, 8> Opnds; 5338 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 5339 if (i != Idx) { 5340 Opnds.push_back(DAG.getUNDEF(VTs[i])); 5341 continue; 5342 } 5343 SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V); 5344 AddToWorkList(NV.getNode()); 5345 Opnds.push_back(NV); 5346 } 5347 return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, 5348 &Opnds[0], Opnds.size()); 5349 } 5350 } 5351 5352 // Simplify the operands using demanded-bits information. 5353 if (!VT.isVector() && 5354 SimplifyDemandedBits(SDValue(N, 0))) 5355 return SDValue(N, 0); 5356 5357 return SDValue(); 5358} 5359 5360static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 5361 SDValue Elt = N->getOperand(i); 5362 if (Elt.getOpcode() != ISD::MERGE_VALUES) 5363 return Elt.getNode(); 5364 return Elt.getOperand(Elt.getResNo()).getNode(); 5365} 5366 5367/// CombineConsecutiveLoads - build_pair (load, load) -> load 5368/// if load locations are consecutive. 5369SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 5370 assert(N->getOpcode() == ISD::BUILD_PAIR); 5371 5372 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 5373 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 5374 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 5375 LD1->getPointerInfo().getAddrSpace() != 5376 LD2->getPointerInfo().getAddrSpace()) 5377 return SDValue(); 5378 EVT LD1VT = LD1->getValueType(0); 5379 5380 if (ISD::isNON_EXTLoad(LD2) && 5381 LD2->hasOneUse() && 5382 // If both are volatile this would reduce the number of volatile loads. 5383 // If one is volatile it might be ok, but play conservative and bail out. 5384 !LD1->isVolatile() && 5385 !LD2->isVolatile() && 5386 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 5387 unsigned Align = LD1->getAlignment(); 5388 unsigned NewAlign = TLI.getDataLayout()-> 5389 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 5390 5391 if (NewAlign <= Align && 5392 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 5393 return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), 5394 LD1->getBasePtr(), LD1->getPointerInfo(), 5395 false, false, false, Align); 5396 } 5397 5398 return SDValue(); 5399} 5400 5401SDValue DAGCombiner::visitBITCAST(SDNode *N) { 5402 SDValue N0 = N->getOperand(0); 5403 EVT VT = N->getValueType(0); 5404 5405 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 5406 // Only do this before legalize, since afterward the target may be depending 5407 // on the bitconvert. 5408 // First check to see if this is all constant. 5409 if (!LegalTypes && 5410 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 5411 VT.isVector()) { 5412 bool isSimple = true; 5413 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) 5414 if (N0.getOperand(i).getOpcode() != ISD::UNDEF && 5415 N0.getOperand(i).getOpcode() != ISD::Constant && 5416 N0.getOperand(i).getOpcode() != ISD::ConstantFP) { 5417 isSimple = false; 5418 break; 5419 } 5420 5421 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 5422 assert(!DestEltVT.isVector() && 5423 "Element type of vector ValueType must not be vector!"); 5424 if (isSimple) 5425 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 5426 } 5427 5428 // If the input is a constant, let getNode fold it. 5429 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 5430 SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); 5431 if (Res.getNode() != N) { 5432 if (!LegalOperations || 5433 TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) 5434 return Res; 5435 5436 // Folding it resulted in an illegal node, and it's too late to 5437 // do that. Clean up the old node and forego the transformation. 5438 // Ideally this won't happen very often, because instcombine 5439 // and the earlier dagcombine runs (where illegal nodes are 5440 // permitted) should have folded most of them already. 5441 DAG.DeleteNode(Res.getNode()); 5442 } 5443 } 5444 5445 // (conv (conv x, t1), t2) -> (conv x, t2) 5446 if (N0.getOpcode() == ISD::BITCAST) 5447 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, 5448 N0.getOperand(0)); 5449 5450 // fold (conv (load x)) -> (load (conv*)x) 5451 // If the resultant load doesn't need a higher alignment than the original! 5452 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 5453 // Do not change the width of a volatile load. 5454 !cast<LoadSDNode>(N0)->isVolatile() && 5455 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { 5456 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5457 unsigned Align = TLI.getDataLayout()-> 5458 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 5459 unsigned OrigAlign = LN0->getAlignment(); 5460 5461 if (Align <= OrigAlign) { 5462 SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), 5463 LN0->getBasePtr(), LN0->getPointerInfo(), 5464 LN0->isVolatile(), LN0->isNonTemporal(), 5465 LN0->isInvariant(), OrigAlign); 5466 AddToWorkList(N); 5467 CombineTo(N0.getNode(), 5468 DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5469 N0.getValueType(), Load), 5470 Load.getValue(1)); 5471 return Load; 5472 } 5473 } 5474 5475 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 5476 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 5477 // This often reduces constant pool loads. 5478 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || 5479 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && 5480 N0.getNode()->hasOneUse() && VT.isInteger() && 5481 !VT.isVector() && !N0.getValueType().isVector()) { 5482 SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, 5483 N0.getOperand(0)); 5484 AddToWorkList(NewConv.getNode()); 5485 5486 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 5487 if (N0.getOpcode() == ISD::FNEG) 5488 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, 5489 NewConv, DAG.getConstant(SignBit, VT)); 5490 assert(N0.getOpcode() == ISD::FABS); 5491 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 5492 NewConv, DAG.getConstant(~SignBit, VT)); 5493 } 5494 5495 // fold (bitconvert (fcopysign cst, x)) -> 5496 // (or (and (bitconvert x), sign), (and cst, (not sign))) 5497 // Note that we don't handle (copysign x, cst) because this can always be 5498 // folded to an fneg or fabs. 5499 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 5500 isa<ConstantFPSDNode>(N0.getOperand(0)) && 5501 VT.isInteger() && !VT.isVector()) { 5502 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 5503 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 5504 if (isTypeLegal(IntXVT)) { 5505 SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5506 IntXVT, N0.getOperand(1)); 5507 AddToWorkList(X.getNode()); 5508 5509 // If X has a different width than the result/lhs, sext it or truncate it. 5510 unsigned VTWidth = VT.getSizeInBits(); 5511 if (OrigXWidth < VTWidth) { 5512 X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); 5513 AddToWorkList(X.getNode()); 5514 } else if (OrigXWidth > VTWidth) { 5515 // To get the sign bit in the right place, we have to shift it right 5516 // before truncating. 5517 X = DAG.getNode(ISD::SRL, X.getDebugLoc(), 5518 X.getValueType(), X, 5519 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 5520 AddToWorkList(X.getNode()); 5521 X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); 5522 AddToWorkList(X.getNode()); 5523 } 5524 5525 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 5526 X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, 5527 X, DAG.getConstant(SignBit, VT)); 5528 AddToWorkList(X.getNode()); 5529 5530 SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5531 VT, N0.getOperand(0)); 5532 Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, 5533 Cst, DAG.getConstant(~SignBit, VT)); 5534 AddToWorkList(Cst.getNode()); 5535 5536 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); 5537 } 5538 } 5539 5540 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 5541 if (N0.getOpcode() == ISD::BUILD_PAIR) { 5542 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 5543 if (CombineLD.getNode()) 5544 return CombineLD; 5545 } 5546 5547 return SDValue(); 5548} 5549 5550SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 5551 EVT VT = N->getValueType(0); 5552 return CombineConsecutiveLoads(N, VT); 5553} 5554 5555/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector 5556/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the 5557/// destination element value type. 5558SDValue DAGCombiner:: 5559ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 5560 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 5561 5562 // If this is already the right type, we're done. 5563 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 5564 5565 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 5566 unsigned DstBitSize = DstEltVT.getSizeInBits(); 5567 5568 // If this is a conversion of N elements of one type to N elements of another 5569 // type, convert each element. This handles FP<->INT cases. 5570 if (SrcBitSize == DstBitSize) { 5571 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 5572 BV->getValueType(0).getVectorNumElements()); 5573 5574 // Due to the FP element handling below calling this routine recursively, 5575 // we can end up with a scalar-to-vector node here. 5576 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 5577 return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 5578 DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), 5579 DstEltVT, BV->getOperand(0))); 5580 5581 SmallVector<SDValue, 8> Ops; 5582 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 5583 SDValue Op = BV->getOperand(i); 5584 // If the vector element type is not legal, the BUILD_VECTOR operands 5585 // are promoted and implicitly truncated. Make that explicit here. 5586 if (Op.getValueType() != SrcEltVT) 5587 Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); 5588 Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), 5589 DstEltVT, Op)); 5590 AddToWorkList(Ops.back().getNode()); 5591 } 5592 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5593 &Ops[0], Ops.size()); 5594 } 5595 5596 // Otherwise, we're growing or shrinking the elements. To avoid having to 5597 // handle annoying details of growing/shrinking FP values, we convert them to 5598 // int first. 5599 if (SrcEltVT.isFloatingPoint()) { 5600 // Convert the input float vector to a int vector where the elements are the 5601 // same sizes. 5602 assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); 5603 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 5604 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 5605 SrcEltVT = IntVT; 5606 } 5607 5608 // Now we know the input is an integer vector. If the output is a FP type, 5609 // convert to integer first, then to FP of the right size. 5610 if (DstEltVT.isFloatingPoint()) { 5611 assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); 5612 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 5613 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 5614 5615 // Next, convert to FP elements of the same size. 5616 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 5617 } 5618 5619 // Okay, we know the src/dst types are both integers of differing types. 5620 // Handling growing first. 5621 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 5622 if (SrcBitSize < DstBitSize) { 5623 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 5624 5625 SmallVector<SDValue, 8> Ops; 5626 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 5627 i += NumInputsPerOutput) { 5628 bool isLE = TLI.isLittleEndian(); 5629 APInt NewBits = APInt(DstBitSize, 0); 5630 bool EltIsUndef = true; 5631 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 5632 // Shift the previously computed bits over. 5633 NewBits <<= SrcBitSize; 5634 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 5635 if (Op.getOpcode() == ISD::UNDEF) continue; 5636 EltIsUndef = false; 5637 5638 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 5639 zextOrTrunc(SrcBitSize).zext(DstBitSize); 5640 } 5641 5642 if (EltIsUndef) 5643 Ops.push_back(DAG.getUNDEF(DstEltVT)); 5644 else 5645 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 5646 } 5647 5648 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 5649 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5650 &Ops[0], Ops.size()); 5651 } 5652 5653 // Finally, this must be the case where we are shrinking elements: each input 5654 // turns into multiple outputs. 5655 bool isS2V = ISD::isScalarToVector(BV); 5656 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 5657 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 5658 NumOutputsPerInput*BV->getNumOperands()); 5659 SmallVector<SDValue, 8> Ops; 5660 5661 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 5662 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 5663 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 5664 Ops.push_back(DAG.getUNDEF(DstEltVT)); 5665 continue; 5666 } 5667 5668 APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> 5669 getAPIntValue().zextOrTrunc(SrcBitSize); 5670 5671 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 5672 APInt ThisVal = OpVal.trunc(DstBitSize); 5673 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 5674 if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) 5675 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 5676 return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 5677 Ops[0]); 5678 OpVal = OpVal.lshr(DstBitSize); 5679 } 5680 5681 // For big endian targets, swap the order of the pieces of each element. 5682 if (TLI.isBigEndian()) 5683 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 5684 } 5685 5686 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5687 &Ops[0], Ops.size()); 5688} 5689 5690SDValue DAGCombiner::visitFADD(SDNode *N) { 5691 SDValue N0 = N->getOperand(0); 5692 SDValue N1 = N->getOperand(1); 5693 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5694 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5695 EVT VT = N->getValueType(0); 5696 5697 // fold vector ops 5698 if (VT.isVector()) { 5699 SDValue FoldedVOp = SimplifyVBinOp(N); 5700 if (FoldedVOp.getNode()) return FoldedVOp; 5701 } 5702 5703 // fold (fadd c1, c2) -> c1 + c2 5704 if (N0CFP && N1CFP) 5705 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); 5706 // canonicalize constant to RHS 5707 if (N0CFP && !N1CFP) 5708 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); 5709 // fold (fadd A, 0) -> A 5710 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 5711 N1CFP->getValueAPF().isZero()) 5712 return N0; 5713 // fold (fadd A, (fneg B)) -> (fsub A, B) 5714 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 5715 isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) 5716 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, 5717 GetNegatedExpression(N1, DAG, LegalOperations)); 5718 // fold (fadd (fneg A), B) -> (fsub B, A) 5719 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 5720 isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) 5721 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, 5722 GetNegatedExpression(N0, DAG, LegalOperations)); 5723 5724 // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 5725 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 5726 N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 5727 isa<ConstantFPSDNode>(N0.getOperand(1))) 5728 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), 5729 DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5730 N0.getOperand(1), N1)); 5731 5732 // If allow, fold (fadd (fneg x), x) -> 0.0 5733 if (DAG.getTarget().Options.UnsafeFPMath && 5734 N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { 5735 return DAG.getConstantFP(0.0, VT); 5736 } 5737 5738 // If allow, fold (fadd x, (fneg x)) -> 0.0 5739 if (DAG.getTarget().Options.UnsafeFPMath && 5740 N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { 5741 return DAG.getConstantFP(0.0, VT); 5742 } 5743 5744 // In unsafe math mode, we can fold chains of FADD's of the same value 5745 // into multiplications. This transform is not safe in general because 5746 // we are reducing the number of rounding steps. 5747 if (DAG.getTarget().Options.UnsafeFPMath && 5748 TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && 5749 !N0CFP && !N1CFP) { 5750 if (N0.getOpcode() == ISD::FMUL) { 5751 ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 5752 ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 5753 5754 // (fadd (fmul c, x), x) -> (fmul c+1, x) 5755 if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { 5756 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5757 SDValue(CFP00, 0), 5758 DAG.getConstantFP(1.0, VT)); 5759 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5760 N1, NewCFP); 5761 } 5762 5763 // (fadd (fmul x, c), x) -> (fmul c+1, x) 5764 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 5765 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5766 SDValue(CFP01, 0), 5767 DAG.getConstantFP(1.0, VT)); 5768 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5769 N1, NewCFP); 5770 } 5771 5772 // (fadd (fadd x, x), x) -> (fmul 3.0, x) 5773 if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) && 5774 N0.getOperand(0) == N1) { 5775 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5776 N1, DAG.getConstantFP(3.0, VT)); 5777 } 5778 5779 // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) 5780 if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && 5781 N1.getOperand(0) == N1.getOperand(1) && 5782 N0.getOperand(1) == N1.getOperand(0)) { 5783 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5784 SDValue(CFP00, 0), 5785 DAG.getConstantFP(2.0, VT)); 5786 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5787 N0.getOperand(1), NewCFP); 5788 } 5789 5790 // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) 5791 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 5792 N1.getOperand(0) == N1.getOperand(1) && 5793 N0.getOperand(0) == N1.getOperand(0)) { 5794 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5795 SDValue(CFP01, 0), 5796 DAG.getConstantFP(2.0, VT)); 5797 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5798 N0.getOperand(0), NewCFP); 5799 } 5800 } 5801 5802 if (N1.getOpcode() == ISD::FMUL) { 5803 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 5804 ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); 5805 5806 // (fadd x, (fmul c, x)) -> (fmul c+1, x) 5807 if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { 5808 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5809 SDValue(CFP10, 0), 5810 DAG.getConstantFP(1.0, VT)); 5811 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5812 N0, NewCFP); 5813 } 5814 5815 // (fadd x, (fmul x, c)) -> (fmul c+1, x) 5816 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 5817 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5818 SDValue(CFP11, 0), 5819 DAG.getConstantFP(1.0, VT)); 5820 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5821 N0, NewCFP); 5822 } 5823 5824 // (fadd x, (fadd x, x)) -> (fmul 3.0, x) 5825 if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) && 5826 N1.getOperand(0) == N0) { 5827 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5828 N0, DAG.getConstantFP(3.0, VT)); 5829 } 5830 5831 // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) 5832 if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && 5833 N1.getOperand(0) == N1.getOperand(1) && 5834 N0.getOperand(1) == N1.getOperand(0)) { 5835 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5836 SDValue(CFP10, 0), 5837 DAG.getConstantFP(2.0, VT)); 5838 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5839 N0.getOperand(1), NewCFP); 5840 } 5841 5842 // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) 5843 if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && 5844 N1.getOperand(0) == N1.getOperand(1) && 5845 N0.getOperand(0) == N1.getOperand(0)) { 5846 SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5847 SDValue(CFP11, 0), 5848 DAG.getConstantFP(2.0, VT)); 5849 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5850 N0.getOperand(0), NewCFP); 5851 } 5852 } 5853 5854 // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) 5855 if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 5856 N0.getOperand(0) == N0.getOperand(1) && 5857 N1.getOperand(0) == N1.getOperand(1) && 5858 N0.getOperand(0) == N1.getOperand(0)) { 5859 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5860 N0.getOperand(0), 5861 DAG.getConstantFP(4.0, VT)); 5862 } 5863 } 5864 5865 // FADD -> FMA combines: 5866 if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || 5867 DAG.getTarget().Options.UnsafeFPMath) && 5868 DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && 5869 TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { 5870 5871 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 5872 if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { 5873 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, 5874 N0.getOperand(0), N0.getOperand(1), N1); 5875 } 5876 5877 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 5878 // Note: Commutes FADD operands. 5879 if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { 5880 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, 5881 N1.getOperand(0), N1.getOperand(1), N0); 5882 } 5883 } 5884 5885 return SDValue(); 5886} 5887 5888SDValue DAGCombiner::visitFSUB(SDNode *N) { 5889 SDValue N0 = N->getOperand(0); 5890 SDValue N1 = N->getOperand(1); 5891 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5892 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5893 EVT VT = N->getValueType(0); 5894 DebugLoc dl = N->getDebugLoc(); 5895 5896 // fold vector ops 5897 if (VT.isVector()) { 5898 SDValue FoldedVOp = SimplifyVBinOp(N); 5899 if (FoldedVOp.getNode()) return FoldedVOp; 5900 } 5901 5902 // fold (fsub c1, c2) -> c1-c2 5903 if (N0CFP && N1CFP) 5904 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); 5905 // fold (fsub A, 0) -> A 5906 if (DAG.getTarget().Options.UnsafeFPMath && 5907 N1CFP && N1CFP->getValueAPF().isZero()) 5908 return N0; 5909 // fold (fsub 0, B) -> -B 5910 if (DAG.getTarget().Options.UnsafeFPMath && 5911 N0CFP && N0CFP->getValueAPF().isZero()) { 5912 if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) 5913 return GetNegatedExpression(N1, DAG, LegalOperations); 5914 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 5915 return DAG.getNode(ISD::FNEG, dl, VT, N1); 5916 } 5917 // fold (fsub A, (fneg B)) -> (fadd A, B) 5918 if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) 5919 return DAG.getNode(ISD::FADD, dl, VT, N0, 5920 GetNegatedExpression(N1, DAG, LegalOperations)); 5921 5922 // If 'unsafe math' is enabled, fold 5923 // (fsub x, x) -> 0.0 & 5924 // (fsub x, (fadd x, y)) -> (fneg y) & 5925 // (fsub x, (fadd y, x)) -> (fneg y) 5926 if (DAG.getTarget().Options.UnsafeFPMath) { 5927 if (N0 == N1) 5928 return DAG.getConstantFP(0.0f, VT); 5929 5930 if (N1.getOpcode() == ISD::FADD) { 5931 SDValue N10 = N1->getOperand(0); 5932 SDValue N11 = N1->getOperand(1); 5933 5934 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, 5935 &DAG.getTarget().Options)) 5936 return GetNegatedExpression(N11, DAG, LegalOperations); 5937 else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, 5938 &DAG.getTarget().Options)) 5939 return GetNegatedExpression(N10, DAG, LegalOperations); 5940 } 5941 } 5942 5943 // FSUB -> FMA combines: 5944 if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || 5945 DAG.getTarget().Options.UnsafeFPMath) && 5946 DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && 5947 TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { 5948 5949 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 5950 if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { 5951 return DAG.getNode(ISD::FMA, dl, VT, 5952 N0.getOperand(0), N0.getOperand(1), 5953 DAG.getNode(ISD::FNEG, dl, VT, N1)); 5954 } 5955 5956 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 5957 // Note: Commutes FSUB operands. 5958 if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { 5959 return DAG.getNode(ISD::FMA, dl, VT, 5960 DAG.getNode(ISD::FNEG, dl, VT, 5961 N1.getOperand(0)), 5962 N1.getOperand(1), N0); 5963 } 5964 5965 // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 5966 if (N0.getOpcode() == ISD::FNEG && 5967 N0.getOperand(0).getOpcode() == ISD::FMUL && 5968 N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { 5969 SDValue N00 = N0.getOperand(0).getOperand(0); 5970 SDValue N01 = N0.getOperand(0).getOperand(1); 5971 return DAG.getNode(ISD::FMA, dl, VT, 5972 DAG.getNode(ISD::FNEG, dl, VT, N00), N01, 5973 DAG.getNode(ISD::FNEG, dl, VT, N1)); 5974 } 5975 } 5976 5977 return SDValue(); 5978} 5979 5980SDValue DAGCombiner::visitFMUL(SDNode *N) { 5981 SDValue N0 = N->getOperand(0); 5982 SDValue N1 = N->getOperand(1); 5983 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5984 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5985 EVT VT = N->getValueType(0); 5986 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5987 5988 // fold vector ops 5989 if (VT.isVector()) { 5990 SDValue FoldedVOp = SimplifyVBinOp(N); 5991 if (FoldedVOp.getNode()) return FoldedVOp; 5992 } 5993 5994 // fold (fmul c1, c2) -> c1*c2 5995 if (N0CFP && N1CFP) 5996 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); 5997 // canonicalize constant to RHS 5998 if (N0CFP && !N1CFP) 5999 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); 6000 // fold (fmul A, 0) -> 0 6001 if (DAG.getTarget().Options.UnsafeFPMath && 6002 N1CFP && N1CFP->getValueAPF().isZero()) 6003 return N1; 6004 // fold (fmul A, 0) -> 0, vector edition. 6005 if (DAG.getTarget().Options.UnsafeFPMath && 6006 ISD::isBuildVectorAllZeros(N1.getNode())) 6007 return N1; 6008 // fold (fmul A, 1.0) -> A 6009 if (N1CFP && N1CFP->isExactlyValue(1.0)) 6010 return N0; 6011 // fold (fmul X, 2.0) -> (fadd X, X) 6012 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 6013 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); 6014 // fold (fmul X, -1.0) -> (fneg X) 6015 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 6016 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 6017 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); 6018 6019 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 6020 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, 6021 &DAG.getTarget().Options)) { 6022 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 6023 &DAG.getTarget().Options)) { 6024 // Both can be negated for free, check to see if at least one is cheaper 6025 // negated. 6026 if (LHSNeg == 2 || RHSNeg == 2) 6027 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 6028 GetNegatedExpression(N0, DAG, LegalOperations), 6029 GetNegatedExpression(N1, DAG, LegalOperations)); 6030 } 6031 } 6032 6033 // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 6034 if (DAG.getTarget().Options.UnsafeFPMath && 6035 N1CFP && N0.getOpcode() == ISD::FMUL && 6036 N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) 6037 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), 6038 DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 6039 N0.getOperand(1), N1)); 6040 6041 return SDValue(); 6042} 6043 6044SDValue DAGCombiner::visitFMA(SDNode *N) { 6045 SDValue N0 = N->getOperand(0); 6046 SDValue N1 = N->getOperand(1); 6047 SDValue N2 = N->getOperand(2); 6048 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6049 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6050 EVT VT = N->getValueType(0); 6051 DebugLoc dl = N->getDebugLoc(); 6052 6053 if (DAG.getTarget().Options.UnsafeFPMath) { 6054 if (N0CFP && N0CFP->isZero()) 6055 return N2; 6056 if (N1CFP && N1CFP->isZero()) 6057 return N2; 6058 } 6059 if (N0CFP && N0CFP->isExactlyValue(1.0)) 6060 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); 6061 if (N1CFP && N1CFP->isExactlyValue(1.0)) 6062 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2); 6063 6064 // Canonicalize (fma c, x, y) -> (fma x, c, y) 6065 if (N0CFP && !N1CFP) 6066 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); 6067 6068 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 6069 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 6070 N2.getOpcode() == ISD::FMUL && 6071 N0 == N2.getOperand(0) && 6072 N2.getOperand(1).getOpcode() == ISD::ConstantFP) { 6073 return DAG.getNode(ISD::FMUL, dl, VT, N0, 6074 DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); 6075 } 6076 6077 6078 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 6079 if (DAG.getTarget().Options.UnsafeFPMath && 6080 N0.getOpcode() == ISD::FMUL && N1CFP && 6081 N0.getOperand(1).getOpcode() == ISD::ConstantFP) { 6082 return DAG.getNode(ISD::FMA, dl, VT, 6083 N0.getOperand(0), 6084 DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), 6085 N2); 6086 } 6087 6088 // (fma x, 1, y) -> (fadd x, y) 6089 // (fma x, -1, y) -> (fadd (fneg x), y) 6090 if (N1CFP) { 6091 if (N1CFP->isExactlyValue(1.0)) 6092 return DAG.getNode(ISD::FADD, dl, VT, N0, N2); 6093 6094 if (N1CFP->isExactlyValue(-1.0) && 6095 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 6096 SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); 6097 AddToWorkList(RHSNeg.getNode()); 6098 return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); 6099 } 6100 } 6101 6102 // (fma x, c, x) -> (fmul x, (c+1)) 6103 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { 6104 return DAG.getNode(ISD::FMUL, dl, VT, 6105 N0, 6106 DAG.getNode(ISD::FADD, dl, VT, 6107 N1, DAG.getConstantFP(1.0, VT))); 6108 } 6109 6110 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 6111 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 6112 N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { 6113 return DAG.getNode(ISD::FMUL, dl, VT, 6114 N0, 6115 DAG.getNode(ISD::FADD, dl, VT, 6116 N1, DAG.getConstantFP(-1.0, VT))); 6117 } 6118 6119 6120 return SDValue(); 6121} 6122 6123SDValue DAGCombiner::visitFDIV(SDNode *N) { 6124 SDValue N0 = N->getOperand(0); 6125 SDValue N1 = N->getOperand(1); 6126 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6127 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6128 EVT VT = N->getValueType(0); 6129 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6130 6131 // fold vector ops 6132 if (VT.isVector()) { 6133 SDValue FoldedVOp = SimplifyVBinOp(N); 6134 if (FoldedVOp.getNode()) return FoldedVOp; 6135 } 6136 6137 // fold (fdiv c1, c2) -> c1/c2 6138 if (N0CFP && N1CFP) 6139 return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); 6140 6141 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 6142 if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { 6143 // Compute the reciprocal 1.0 / c2. 6144 APFloat N1APF = N1CFP->getValueAPF(); 6145 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 6146 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 6147 // Only do the transform if the reciprocal is a legal fp immediate that 6148 // isn't too nasty (eg NaN, denormal, ...). 6149 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 6150 (!LegalOperations || 6151 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 6152 // backend)... we should handle this gracefully after Legalize. 6153 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || 6154 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || 6155 TLI.isFPImmLegal(Recip, VT))) 6156 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, 6157 DAG.getConstantFP(Recip, VT)); 6158 } 6159 6160 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 6161 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, 6162 &DAG.getTarget().Options)) { 6163 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 6164 &DAG.getTarget().Options)) { 6165 // Both can be negated for free, check to see if at least one is cheaper 6166 // negated. 6167 if (LHSNeg == 2 || RHSNeg == 2) 6168 return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, 6169 GetNegatedExpression(N0, DAG, LegalOperations), 6170 GetNegatedExpression(N1, DAG, LegalOperations)); 6171 } 6172 } 6173 6174 return SDValue(); 6175} 6176 6177SDValue DAGCombiner::visitFREM(SDNode *N) { 6178 SDValue N0 = N->getOperand(0); 6179 SDValue N1 = N->getOperand(1); 6180 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6181 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6182 EVT VT = N->getValueType(0); 6183 6184 // fold (frem c1, c2) -> fmod(c1,c2) 6185 if (N0CFP && N1CFP) 6186 return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); 6187 6188 return SDValue(); 6189} 6190 6191SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 6192 SDValue N0 = N->getOperand(0); 6193 SDValue N1 = N->getOperand(1); 6194 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6195 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6196 EVT VT = N->getValueType(0); 6197 6198 if (N0CFP && N1CFP) // Constant fold 6199 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); 6200 6201 if (N1CFP) { 6202 const APFloat& V = N1CFP->getValueAPF(); 6203 // copysign(x, c1) -> fabs(x) iff ispos(c1) 6204 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 6205 if (!V.isNegative()) { 6206 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 6207 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 6208 } else { 6209 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 6210 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, 6211 DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); 6212 } 6213 } 6214 6215 // copysign(fabs(x), y) -> copysign(x, y) 6216 // copysign(fneg(x), y) -> copysign(x, y) 6217 // copysign(copysign(x,z), y) -> copysign(x, y) 6218 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 6219 N0.getOpcode() == ISD::FCOPYSIGN) 6220 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 6221 N0.getOperand(0), N1); 6222 6223 // copysign(x, abs(y)) -> abs(x) 6224 if (N1.getOpcode() == ISD::FABS) 6225 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 6226 6227 // copysign(x, copysign(y,z)) -> copysign(x, z) 6228 if (N1.getOpcode() == ISD::FCOPYSIGN) 6229 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 6230 N0, N1.getOperand(1)); 6231 6232 // copysign(x, fp_extend(y)) -> copysign(x, y) 6233 // copysign(x, fp_round(y)) -> copysign(x, y) 6234 if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) 6235 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 6236 N0, N1.getOperand(0)); 6237 6238 return SDValue(); 6239} 6240 6241SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 6242 SDValue N0 = N->getOperand(0); 6243 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 6244 EVT VT = N->getValueType(0); 6245 EVT OpVT = N0.getValueType(); 6246 6247 // fold (sint_to_fp c1) -> c1fp 6248 if (N0C && 6249 // ...but only if the target supports immediate floating-point values 6250 (!LegalOperations || 6251 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 6252 return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); 6253 6254 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 6255 // but UINT_TO_FP is legal on this target, try to convert. 6256 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 6257 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 6258 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 6259 if (DAG.SignBitIsZero(N0)) 6260 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); 6261 } 6262 6263 // The next optimizations are desireable only if SELECT_CC can be lowered. 6264 // Check against MVT::Other for SELECT_CC, which is a workaround for targets 6265 // having to say they don't support SELECT_CC on every type the DAG knows 6266 // about, since there is no way to mark an opcode illegal at all value types 6267 // (See also visitSELECT) 6268 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { 6269 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 6270 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 6271 !VT.isVector() && 6272 (!LegalOperations || 6273 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 6274 SDValue Ops[] = 6275 { N0.getOperand(0), N0.getOperand(1), 6276 DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), 6277 N0.getOperand(2) }; 6278 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); 6279 } 6280 6281 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 6282 // (select_cc x, y, 1.0, 0.0,, cc) 6283 if (N0.getOpcode() == ISD::ZERO_EXTEND && 6284 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 6285 (!LegalOperations || 6286 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 6287 SDValue Ops[] = 6288 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 6289 DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), 6290 N0.getOperand(0).getOperand(2) }; 6291 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); 6292 } 6293 } 6294 6295 return SDValue(); 6296} 6297 6298SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 6299 SDValue N0 = N->getOperand(0); 6300 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 6301 EVT VT = N->getValueType(0); 6302 EVT OpVT = N0.getValueType(); 6303 6304 // fold (uint_to_fp c1) -> c1fp 6305 if (N0C && 6306 // ...but only if the target supports immediate floating-point values 6307 (!LegalOperations || 6308 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 6309 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); 6310 6311 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 6312 // but SINT_TO_FP is legal on this target, try to convert. 6313 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 6314 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 6315 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 6316 if (DAG.SignBitIsZero(N0)) 6317 return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); 6318 } 6319 6320 // The next optimizations are desireable only if SELECT_CC can be lowered. 6321 // Check against MVT::Other for SELECT_CC, which is a workaround for targets 6322 // having to say they don't support SELECT_CC on every type the DAG knows 6323 // about, since there is no way to mark an opcode illegal at all value types 6324 // (See also visitSELECT) 6325 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { 6326 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 6327 6328 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 6329 (!LegalOperations || 6330 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 6331 SDValue Ops[] = 6332 { N0.getOperand(0), N0.getOperand(1), 6333 DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), 6334 N0.getOperand(2) }; 6335 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); 6336 } 6337 } 6338 6339 return SDValue(); 6340} 6341 6342SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 6343 SDValue N0 = N->getOperand(0); 6344 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6345 EVT VT = N->getValueType(0); 6346 6347 // fold (fp_to_sint c1fp) -> c1 6348 if (N0CFP) 6349 return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); 6350 6351 return SDValue(); 6352} 6353 6354SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 6355 SDValue N0 = N->getOperand(0); 6356 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6357 EVT VT = N->getValueType(0); 6358 6359 // fold (fp_to_uint c1fp) -> c1 6360 if (N0CFP) 6361 return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); 6362 6363 return SDValue(); 6364} 6365 6366SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 6367 SDValue N0 = N->getOperand(0); 6368 SDValue N1 = N->getOperand(1); 6369 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6370 EVT VT = N->getValueType(0); 6371 6372 // fold (fp_round c1fp) -> c1fp 6373 if (N0CFP) 6374 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); 6375 6376 // fold (fp_round (fp_extend x)) -> x 6377 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 6378 return N0.getOperand(0); 6379 6380 // fold (fp_round (fp_round x)) -> (fp_round x) 6381 if (N0.getOpcode() == ISD::FP_ROUND) { 6382 // This is a value preserving truncation if both round's are. 6383 bool IsTrunc = N->getConstantOperandVal(1) == 1 && 6384 N0.getNode()->getConstantOperandVal(1) == 1; 6385 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), 6386 DAG.getIntPtrConstant(IsTrunc)); 6387 } 6388 6389 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 6390 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 6391 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, 6392 N0.getOperand(0), N1); 6393 AddToWorkList(Tmp.getNode()); 6394 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 6395 Tmp, N0.getOperand(1)); 6396 } 6397 6398 return SDValue(); 6399} 6400 6401SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 6402 SDValue N0 = N->getOperand(0); 6403 EVT VT = N->getValueType(0); 6404 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6405 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6406 6407 // fold (fp_round_inreg c1fp) -> c1fp 6408 if (N0CFP && isTypeLegal(EVT)) { 6409 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); 6410 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); 6411 } 6412 6413 return SDValue(); 6414} 6415 6416SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 6417 SDValue N0 = N->getOperand(0); 6418 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6419 EVT VT = N->getValueType(0); 6420 6421 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 6422 if (N->hasOneUse() && 6423 N->use_begin()->getOpcode() == ISD::FP_ROUND) 6424 return SDValue(); 6425 6426 // fold (fp_extend c1fp) -> c1fp 6427 if (N0CFP) 6428 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); 6429 6430 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 6431 // value of X. 6432 if (N0.getOpcode() == ISD::FP_ROUND 6433 && N0.getNode()->getConstantOperandVal(1) == 1) { 6434 SDValue In = N0.getOperand(0); 6435 if (In.getValueType() == VT) return In; 6436 if (VT.bitsLT(In.getValueType())) 6437 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, 6438 In, N0.getOperand(1)); 6439 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); 6440 } 6441 6442 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 6443 if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && 6444 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6445 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 6446 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6447 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, 6448 LN0->getChain(), 6449 LN0->getBasePtr(), LN0->getPointerInfo(), 6450 N0.getValueType(), 6451 LN0->isVolatile(), LN0->isNonTemporal(), 6452 LN0->getAlignment()); 6453 CombineTo(N, ExtLoad); 6454 CombineTo(N0.getNode(), 6455 DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), 6456 N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), 6457 ExtLoad.getValue(1)); 6458 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6459 } 6460 6461 return SDValue(); 6462} 6463 6464SDValue DAGCombiner::visitFNEG(SDNode *N) { 6465 SDValue N0 = N->getOperand(0); 6466 EVT VT = N->getValueType(0); 6467 6468 if (VT.isVector()) { 6469 SDValue FoldedVOp = SimplifyVUnaryOp(N); 6470 if (FoldedVOp.getNode()) return FoldedVOp; 6471 } 6472 6473 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 6474 &DAG.getTarget().Options)) 6475 return GetNegatedExpression(N0, DAG, LegalOperations); 6476 6477 // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading 6478 // constant pool values. 6479 if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && 6480 !VT.isVector() && 6481 N0.getNode()->hasOneUse() && 6482 N0.getOperand(0).getValueType().isInteger()) { 6483 SDValue Int = N0.getOperand(0); 6484 EVT IntVT = Int.getValueType(); 6485 if (IntVT.isInteger() && !IntVT.isVector()) { 6486 Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, 6487 DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 6488 AddToWorkList(Int.getNode()); 6489 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 6490 VT, Int); 6491 } 6492 } 6493 6494 // (fneg (fmul c, x)) -> (fmul -c, x) 6495 if (N0.getOpcode() == ISD::FMUL) { 6496 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 6497 if (CFP1) { 6498 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 6499 N0.getOperand(0), 6500 DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, 6501 N0.getOperand(1))); 6502 } 6503 } 6504 6505 return SDValue(); 6506} 6507 6508SDValue DAGCombiner::visitFCEIL(SDNode *N) { 6509 SDValue N0 = N->getOperand(0); 6510 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6511 EVT VT = N->getValueType(0); 6512 6513 // fold (fceil c1) -> fceil(c1) 6514 if (N0CFP) 6515 return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); 6516 6517 return SDValue(); 6518} 6519 6520SDValue DAGCombiner::visitFTRUNC(SDNode *N) { 6521 SDValue N0 = N->getOperand(0); 6522 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6523 EVT VT = N->getValueType(0); 6524 6525 // fold (ftrunc c1) -> ftrunc(c1) 6526 if (N0CFP) 6527 return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); 6528 6529 return SDValue(); 6530} 6531 6532SDValue DAGCombiner::visitFFLOOR(SDNode *N) { 6533 SDValue N0 = N->getOperand(0); 6534 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6535 EVT VT = N->getValueType(0); 6536 6537 // fold (ffloor c1) -> ffloor(c1) 6538 if (N0CFP) 6539 return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); 6540 6541 return SDValue(); 6542} 6543 6544SDValue DAGCombiner::visitFABS(SDNode *N) { 6545 SDValue N0 = N->getOperand(0); 6546 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6547 EVT VT = N->getValueType(0); 6548 6549 if (VT.isVector()) { 6550 SDValue FoldedVOp = SimplifyVUnaryOp(N); 6551 if (FoldedVOp.getNode()) return FoldedVOp; 6552 } 6553 6554 // fold (fabs c1) -> fabs(c1) 6555 if (N0CFP) 6556 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 6557 // fold (fabs (fabs x)) -> (fabs x) 6558 if (N0.getOpcode() == ISD::FABS) 6559 return N->getOperand(0); 6560 // fold (fabs (fneg x)) -> (fabs x) 6561 // fold (fabs (fcopysign x, y)) -> (fabs x) 6562 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 6563 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); 6564 6565 // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading 6566 // constant pool values. 6567 if (!TLI.isFAbsFree(VT) && 6568 N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && 6569 N0.getOperand(0).getValueType().isInteger() && 6570 !N0.getOperand(0).getValueType().isVector()) { 6571 SDValue Int = N0.getOperand(0); 6572 EVT IntVT = Int.getValueType(); 6573 if (IntVT.isInteger() && !IntVT.isVector()) { 6574 Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, 6575 DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 6576 AddToWorkList(Int.getNode()); 6577 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 6578 N->getValueType(0), Int); 6579 } 6580 } 6581 6582 return SDValue(); 6583} 6584 6585SDValue DAGCombiner::visitBRCOND(SDNode *N) { 6586 SDValue Chain = N->getOperand(0); 6587 SDValue N1 = N->getOperand(1); 6588 SDValue N2 = N->getOperand(2); 6589 6590 // If N is a constant we could fold this into a fallthrough or unconditional 6591 // branch. However that doesn't happen very often in normal code, because 6592 // Instcombine/SimplifyCFG should have handled the available opportunities. 6593 // If we did this folding here, it would be necessary to update the 6594 // MachineBasicBlock CFG, which is awkward. 6595 6596 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 6597 // on the target. 6598 if (N1.getOpcode() == ISD::SETCC && 6599 TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { 6600 return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, 6601 Chain, N1.getOperand(2), 6602 N1.getOperand(0), N1.getOperand(1), N2); 6603 } 6604 6605 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || 6606 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 6607 (N1.getOperand(0).hasOneUse() && 6608 N1.getOperand(0).getOpcode() == ISD::SRL))) { 6609 SDNode *Trunc = 0; 6610 if (N1.getOpcode() == ISD::TRUNCATE) { 6611 // Look pass the truncate. 6612 Trunc = N1.getNode(); 6613 N1 = N1.getOperand(0); 6614 } 6615 6616 // Match this pattern so that we can generate simpler code: 6617 // 6618 // %a = ... 6619 // %b = and i32 %a, 2 6620 // %c = srl i32 %b, 1 6621 // brcond i32 %c ... 6622 // 6623 // into 6624 // 6625 // %a = ... 6626 // %b = and i32 %a, 2 6627 // %c = setcc eq %b, 0 6628 // brcond %c ... 6629 // 6630 // This applies only when the AND constant value has one bit set and the 6631 // SRL constant is equal to the log2 of the AND constant. The back-end is 6632 // smart enough to convert the result into a TEST/JMP sequence. 6633 SDValue Op0 = N1.getOperand(0); 6634 SDValue Op1 = N1.getOperand(1); 6635 6636 if (Op0.getOpcode() == ISD::AND && 6637 Op1.getOpcode() == ISD::Constant) { 6638 SDValue AndOp1 = Op0.getOperand(1); 6639 6640 if (AndOp1.getOpcode() == ISD::Constant) { 6641 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 6642 6643 if (AndConst.isPowerOf2() && 6644 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 6645 SDValue SetCC = 6646 DAG.getSetCC(N->getDebugLoc(), 6647 TLI.getSetCCResultType(Op0.getValueType()), 6648 Op0, DAG.getConstant(0, Op0.getValueType()), 6649 ISD::SETNE); 6650 6651 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 6652 MVT::Other, Chain, SetCC, N2); 6653 // Don't add the new BRCond into the worklist or else SimplifySelectCC 6654 // will convert it back to (X & C1) >> C2. 6655 CombineTo(N, NewBRCond, false); 6656 // Truncate is dead. 6657 if (Trunc) { 6658 removeFromWorkList(Trunc); 6659 DAG.DeleteNode(Trunc); 6660 } 6661 // Replace the uses of SRL with SETCC 6662 WorkListRemover DeadNodes(*this); 6663 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 6664 removeFromWorkList(N1.getNode()); 6665 DAG.DeleteNode(N1.getNode()); 6666 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6667 } 6668 } 6669 } 6670 6671 if (Trunc) 6672 // Restore N1 if the above transformation doesn't match. 6673 N1 = N->getOperand(1); 6674 } 6675 6676 // Transform br(xor(x, y)) -> br(x != y) 6677 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 6678 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 6679 SDNode *TheXor = N1.getNode(); 6680 SDValue Op0 = TheXor->getOperand(0); 6681 SDValue Op1 = TheXor->getOperand(1); 6682 if (Op0.getOpcode() == Op1.getOpcode()) { 6683 // Avoid missing important xor optimizations. 6684 SDValue Tmp = visitXOR(TheXor); 6685 if (Tmp.getNode() && Tmp.getNode() != TheXor) { 6686 DEBUG(dbgs() << "\nReplacing.8 "; 6687 TheXor->dump(&DAG); 6688 dbgs() << "\nWith: "; 6689 Tmp.getNode()->dump(&DAG); 6690 dbgs() << '\n'); 6691 WorkListRemover DeadNodes(*this); 6692 DAG.ReplaceAllUsesOfValueWith(N1, Tmp); 6693 removeFromWorkList(TheXor); 6694 DAG.DeleteNode(TheXor); 6695 return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 6696 MVT::Other, Chain, Tmp, N2); 6697 } 6698 } 6699 6700 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 6701 bool Equal = false; 6702 if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) 6703 if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && 6704 Op0.getOpcode() == ISD::XOR) { 6705 TheXor = Op0.getNode(); 6706 Equal = true; 6707 } 6708 6709 EVT SetCCVT = N1.getValueType(); 6710 if (LegalTypes) 6711 SetCCVT = TLI.getSetCCResultType(SetCCVT); 6712 SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), 6713 SetCCVT, 6714 Op0, Op1, 6715 Equal ? ISD::SETEQ : ISD::SETNE); 6716 // Replace the uses of XOR with SETCC 6717 WorkListRemover DeadNodes(*this); 6718 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 6719 removeFromWorkList(N1.getNode()); 6720 DAG.DeleteNode(N1.getNode()); 6721 return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 6722 MVT::Other, Chain, SetCC, N2); 6723 } 6724 } 6725 6726 return SDValue(); 6727} 6728 6729// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 6730// 6731SDValue DAGCombiner::visitBR_CC(SDNode *N) { 6732 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 6733 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 6734 6735 // If N is a constant we could fold this into a fallthrough or unconditional 6736 // branch. However that doesn't happen very often in normal code, because 6737 // Instcombine/SimplifyCFG should have handled the available opportunities. 6738 // If we did this folding here, it would be necessary to update the 6739 // MachineBasicBlock CFG, which is awkward. 6740 6741 // Use SimplifySetCC to simplify SETCC's. 6742 SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), 6743 CondLHS, CondRHS, CC->get(), N->getDebugLoc(), 6744 false); 6745 if (Simp.getNode()) AddToWorkList(Simp.getNode()); 6746 6747 // fold to a simpler setcc 6748 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 6749 return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, 6750 N->getOperand(0), Simp.getOperand(2), 6751 Simp.getOperand(0), Simp.getOperand(1), 6752 N->getOperand(4)); 6753 6754 return SDValue(); 6755} 6756 6757/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that 6758/// uses N as its base pointer and that N may be folded in the load / store 6759/// addressing mode. 6760static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 6761 SelectionDAG &DAG, 6762 const TargetLowering &TLI) { 6763 EVT VT; 6764 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 6765 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 6766 return false; 6767 VT = Use->getValueType(0); 6768 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 6769 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 6770 return false; 6771 VT = ST->getValue().getValueType(); 6772 } else 6773 return false; 6774 6775 AddrMode AM; 6776 if (N->getOpcode() == ISD::ADD) { 6777 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 6778 if (Offset) 6779 // [reg +/- imm] 6780 AM.BaseOffs = Offset->getSExtValue(); 6781 else 6782 // [reg +/- reg] 6783 AM.Scale = 1; 6784 } else if (N->getOpcode() == ISD::SUB) { 6785 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 6786 if (Offset) 6787 // [reg +/- imm] 6788 AM.BaseOffs = -Offset->getSExtValue(); 6789 else 6790 // [reg +/- reg] 6791 AM.Scale = 1; 6792 } else 6793 return false; 6794 6795 return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); 6796} 6797 6798/// CombineToPreIndexedLoadStore - Try turning a load / store into a 6799/// pre-indexed load / store when the base pointer is an add or subtract 6800/// and it has other uses besides the load / store. After the 6801/// transformation, the new indexed load / store has effectively folded 6802/// the add / subtract in and all of its other uses are redirected to the 6803/// new load / store. 6804bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 6805 if (Level < AfterLegalizeDAG) 6806 return false; 6807 6808 bool isLoad = true; 6809 SDValue Ptr; 6810 EVT VT; 6811 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 6812 if (LD->isIndexed()) 6813 return false; 6814 VT = LD->getMemoryVT(); 6815 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 6816 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 6817 return false; 6818 Ptr = LD->getBasePtr(); 6819 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 6820 if (ST->isIndexed()) 6821 return false; 6822 VT = ST->getMemoryVT(); 6823 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 6824 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 6825 return false; 6826 Ptr = ST->getBasePtr(); 6827 isLoad = false; 6828 } else { 6829 return false; 6830 } 6831 6832 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 6833 // out. There is no reason to make this a preinc/predec. 6834 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 6835 Ptr.getNode()->hasOneUse()) 6836 return false; 6837 6838 // Ask the target to do addressing mode selection. 6839 SDValue BasePtr; 6840 SDValue Offset; 6841 ISD::MemIndexedMode AM = ISD::UNINDEXED; 6842 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 6843 return false; 6844 // Don't create a indexed load / store with zero offset. 6845 if (isa<ConstantSDNode>(Offset) && 6846 cast<ConstantSDNode>(Offset)->isNullValue()) 6847 return false; 6848 6849 // Try turning it into a pre-indexed load / store except when: 6850 // 1) The new base ptr is a frame index. 6851 // 2) If N is a store and the new base ptr is either the same as or is a 6852 // predecessor of the value being stored. 6853 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 6854 // that would create a cycle. 6855 // 4) All uses are load / store ops that use it as old base ptr. 6856 6857 // Check #1. Preinc'ing a frame index would require copying the stack pointer 6858 // (plus the implicit offset) to a register to preinc anyway. 6859 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 6860 return false; 6861 6862 // Check #2. 6863 if (!isLoad) { 6864 SDValue Val = cast<StoreSDNode>(N)->getValue(); 6865 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 6866 return false; 6867 } 6868 6869 // Now check for #3 and #4. 6870 bool RealUse = false; 6871 6872 // Caches for hasPredecessorHelper 6873 SmallPtrSet<const SDNode *, 32> Visited; 6874 SmallVector<const SDNode *, 16> Worklist; 6875 6876 for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), 6877 E = Ptr.getNode()->use_end(); I != E; ++I) { 6878 SDNode *Use = *I; 6879 if (Use == N) 6880 continue; 6881 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 6882 return false; 6883 6884 // If Ptr may be folded in addressing mode of other use, then it's 6885 // not profitable to do this transformation. 6886 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 6887 RealUse = true; 6888 } 6889 6890 if (!RealUse) 6891 return false; 6892 6893 SDValue Result; 6894 if (isLoad) 6895 Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), 6896 BasePtr, Offset, AM); 6897 else 6898 Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), 6899 BasePtr, Offset, AM); 6900 ++PreIndexedNodes; 6901 ++NodesCombined; 6902 DEBUG(dbgs() << "\nReplacing.4 "; 6903 N->dump(&DAG); 6904 dbgs() << "\nWith: "; 6905 Result.getNode()->dump(&DAG); 6906 dbgs() << '\n'); 6907 WorkListRemover DeadNodes(*this); 6908 if (isLoad) { 6909 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 6910 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 6911 } else { 6912 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 6913 } 6914 6915 // Finally, since the node is now dead, remove it from the graph. 6916 DAG.DeleteNode(N); 6917 6918 // Replace the uses of Ptr with uses of the updated base value. 6919 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 6920 removeFromWorkList(Ptr.getNode()); 6921 DAG.DeleteNode(Ptr.getNode()); 6922 6923 return true; 6924} 6925 6926/// CombineToPostIndexedLoadStore - Try to combine a load / store with a 6927/// add / sub of the base pointer node into a post-indexed load / store. 6928/// The transformation folded the add / subtract into the new indexed 6929/// load / store effectively and all of its uses are redirected to the 6930/// new load / store. 6931bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 6932 if (Level < AfterLegalizeDAG) 6933 return false; 6934 6935 bool isLoad = true; 6936 SDValue Ptr; 6937 EVT VT; 6938 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 6939 if (LD->isIndexed()) 6940 return false; 6941 VT = LD->getMemoryVT(); 6942 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 6943 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 6944 return false; 6945 Ptr = LD->getBasePtr(); 6946 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 6947 if (ST->isIndexed()) 6948 return false; 6949 VT = ST->getMemoryVT(); 6950 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 6951 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 6952 return false; 6953 Ptr = ST->getBasePtr(); 6954 isLoad = false; 6955 } else { 6956 return false; 6957 } 6958 6959 if (Ptr.getNode()->hasOneUse()) 6960 return false; 6961 6962 for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), 6963 E = Ptr.getNode()->use_end(); I != E; ++I) { 6964 SDNode *Op = *I; 6965 if (Op == N || 6966 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 6967 continue; 6968 6969 SDValue BasePtr; 6970 SDValue Offset; 6971 ISD::MemIndexedMode AM = ISD::UNINDEXED; 6972 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 6973 // Don't create a indexed load / store with zero offset. 6974 if (isa<ConstantSDNode>(Offset) && 6975 cast<ConstantSDNode>(Offset)->isNullValue()) 6976 continue; 6977 6978 // Try turning it into a post-indexed load / store except when 6979 // 1) All uses are load / store ops that use it as base ptr (and 6980 // it may be folded as addressing mmode). 6981 // 2) Op must be independent of N, i.e. Op is neither a predecessor 6982 // nor a successor of N. Otherwise, if Op is folded that would 6983 // create a cycle. 6984 6985 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 6986 continue; 6987 6988 // Check for #1. 6989 bool TryNext = false; 6990 for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), 6991 EE = BasePtr.getNode()->use_end(); II != EE; ++II) { 6992 SDNode *Use = *II; 6993 if (Use == Ptr.getNode()) 6994 continue; 6995 6996 // If all the uses are load / store addresses, then don't do the 6997 // transformation. 6998 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 6999 bool RealUse = false; 7000 for (SDNode::use_iterator III = Use->use_begin(), 7001 EEE = Use->use_end(); III != EEE; ++III) { 7002 SDNode *UseUse = *III; 7003 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 7004 RealUse = true; 7005 } 7006 7007 if (!RealUse) { 7008 TryNext = true; 7009 break; 7010 } 7011 } 7012 } 7013 7014 if (TryNext) 7015 continue; 7016 7017 // Check for #2 7018 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 7019 SDValue Result = isLoad 7020 ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), 7021 BasePtr, Offset, AM) 7022 : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), 7023 BasePtr, Offset, AM); 7024 ++PostIndexedNodes; 7025 ++NodesCombined; 7026 DEBUG(dbgs() << "\nReplacing.5 "; 7027 N->dump(&DAG); 7028 dbgs() << "\nWith: "; 7029 Result.getNode()->dump(&DAG); 7030 dbgs() << '\n'); 7031 WorkListRemover DeadNodes(*this); 7032 if (isLoad) { 7033 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 7034 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 7035 } else { 7036 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 7037 } 7038 7039 // Finally, since the node is now dead, remove it from the graph. 7040 DAG.DeleteNode(N); 7041 7042 // Replace the uses of Use with uses of the updated base value. 7043 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 7044 Result.getValue(isLoad ? 1 : 0)); 7045 removeFromWorkList(Op); 7046 DAG.DeleteNode(Op); 7047 return true; 7048 } 7049 } 7050 } 7051 7052 return false; 7053} 7054 7055SDValue DAGCombiner::visitLOAD(SDNode *N) { 7056 LoadSDNode *LD = cast<LoadSDNode>(N); 7057 SDValue Chain = LD->getChain(); 7058 SDValue Ptr = LD->getBasePtr(); 7059 7060 // If load is not volatile and there are no uses of the loaded value (and 7061 // the updated indexed value in case of indexed loads), change uses of the 7062 // chain value into uses of the chain input (i.e. delete the dead load). 7063 if (!LD->isVolatile()) { 7064 if (N->getValueType(1) == MVT::Other) { 7065 // Unindexed loads. 7066 if (!N->hasAnyUseOfValue(0)) { 7067 // It's not safe to use the two value CombineTo variant here. e.g. 7068 // v1, chain2 = load chain1, loc 7069 // v2, chain3 = load chain2, loc 7070 // v3 = add v2, c 7071 // Now we replace use of chain2 with chain1. This makes the second load 7072 // isomorphic to the one we are deleting, and thus makes this load live. 7073 DEBUG(dbgs() << "\nReplacing.6 "; 7074 N->dump(&DAG); 7075 dbgs() << "\nWith chain: "; 7076 Chain.getNode()->dump(&DAG); 7077 dbgs() << "\n"); 7078 WorkListRemover DeadNodes(*this); 7079 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 7080 7081 if (N->use_empty()) { 7082 removeFromWorkList(N); 7083 DAG.DeleteNode(N); 7084 } 7085 7086 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7087 } 7088 } else { 7089 // Indexed loads. 7090 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 7091 if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { 7092 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 7093 DEBUG(dbgs() << "\nReplacing.7 "; 7094 N->dump(&DAG); 7095 dbgs() << "\nWith: "; 7096 Undef.getNode()->dump(&DAG); 7097 dbgs() << " and 2 other values\n"); 7098 WorkListRemover DeadNodes(*this); 7099 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 7100 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), 7101 DAG.getUNDEF(N->getValueType(1))); 7102 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 7103 removeFromWorkList(N); 7104 DAG.DeleteNode(N); 7105 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7106 } 7107 } 7108 } 7109 7110 // If this load is directly stored, replace the load value with the stored 7111 // value. 7112 // TODO: Handle store large -> read small portion. 7113 // TODO: Handle TRUNCSTORE/LOADEXT 7114 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 7115 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 7116 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 7117 if (PrevST->getBasePtr() == Ptr && 7118 PrevST->getValue().getValueType() == N->getValueType(0)) 7119 return CombineTo(N, Chain.getOperand(1), Chain); 7120 } 7121 } 7122 7123 // Try to infer better alignment information than the load already has. 7124 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 7125 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 7126 if (Align > LD->getAlignment()) 7127 return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), 7128 LD->getValueType(0), 7129 Chain, Ptr, LD->getPointerInfo(), 7130 LD->getMemoryVT(), 7131 LD->isVolatile(), LD->isNonTemporal(), Align); 7132 } 7133 } 7134 7135 if (CombinerAA) { 7136 // Walk up chain skipping non-aliasing memory nodes. 7137 SDValue BetterChain = FindBetterChain(N, Chain); 7138 7139 // If there is a better chain. 7140 if (Chain != BetterChain) { 7141 SDValue ReplLoad; 7142 7143 // Replace the chain to void dependency. 7144 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 7145 ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), 7146 BetterChain, Ptr, LD->getPointerInfo(), 7147 LD->isVolatile(), LD->isNonTemporal(), 7148 LD->isInvariant(), LD->getAlignment()); 7149 } else { 7150 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), 7151 LD->getValueType(0), 7152 BetterChain, Ptr, LD->getPointerInfo(), 7153 LD->getMemoryVT(), 7154 LD->isVolatile(), 7155 LD->isNonTemporal(), 7156 LD->getAlignment()); 7157 } 7158 7159 // Create token factor to keep old chain connected. 7160 SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 7161 MVT::Other, Chain, ReplLoad.getValue(1)); 7162 7163 // Make sure the new and old chains are cleaned up. 7164 AddToWorkList(Token.getNode()); 7165 7166 // Replace uses with load result and token factor. Don't add users 7167 // to work list. 7168 return CombineTo(N, ReplLoad.getValue(0), Token, false); 7169 } 7170 } 7171 7172 // Try transforming N to an indexed load. 7173 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 7174 return SDValue(N, 0); 7175 7176 return SDValue(); 7177} 7178 7179/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the 7180/// load is having specific bytes cleared out. If so, return the byte size 7181/// being masked out and the shift amount. 7182static std::pair<unsigned, unsigned> 7183CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 7184 std::pair<unsigned, unsigned> Result(0, 0); 7185 7186 // Check for the structure we're looking for. 7187 if (V->getOpcode() != ISD::AND || 7188 !isa<ConstantSDNode>(V->getOperand(1)) || 7189 !ISD::isNormalLoad(V->getOperand(0).getNode())) 7190 return Result; 7191 7192 // Check the chain and pointer. 7193 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 7194 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 7195 7196 // The store should be chained directly to the load or be an operand of a 7197 // tokenfactor. 7198 if (LD == Chain.getNode()) 7199 ; // ok. 7200 else if (Chain->getOpcode() != ISD::TokenFactor) 7201 return Result; // Fail. 7202 else { 7203 bool isOk = false; 7204 for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) 7205 if (Chain->getOperand(i).getNode() == LD) { 7206 isOk = true; 7207 break; 7208 } 7209 if (!isOk) return Result; 7210 } 7211 7212 // This only handles simple types. 7213 if (V.getValueType() != MVT::i16 && 7214 V.getValueType() != MVT::i32 && 7215 V.getValueType() != MVT::i64) 7216 return Result; 7217 7218 // Check the constant mask. Invert it so that the bits being masked out are 7219 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 7220 // follow the sign bit for uniformity. 7221 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 7222 unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); 7223 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 7224 unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); 7225 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 7226 if (NotMaskLZ == 64) return Result; // All zero mask. 7227 7228 // See if we have a continuous run of bits. If so, we have 0*1+0* 7229 if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) 7230 return Result; 7231 7232 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 7233 if (V.getValueType() != MVT::i64 && NotMaskLZ) 7234 NotMaskLZ -= 64-V.getValueSizeInBits(); 7235 7236 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 7237 switch (MaskedBytes) { 7238 case 1: 7239 case 2: 7240 case 4: break; 7241 default: return Result; // All one mask, or 5-byte mask. 7242 } 7243 7244 // Verify that the first bit starts at a multiple of mask so that the access 7245 // is aligned the same as the access width. 7246 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 7247 7248 Result.first = MaskedBytes; 7249 Result.second = NotMaskTZ/8; 7250 return Result; 7251} 7252 7253 7254/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that 7255/// provides a value as specified by MaskInfo. If so, replace the specified 7256/// store with a narrower store of truncated IVal. 7257static SDNode * 7258ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 7259 SDValue IVal, StoreSDNode *St, 7260 DAGCombiner *DC) { 7261 unsigned NumBytes = MaskInfo.first; 7262 unsigned ByteShift = MaskInfo.second; 7263 SelectionDAG &DAG = DC->getDAG(); 7264 7265 // Check to see if IVal is all zeros in the part being masked in by the 'or' 7266 // that uses this. If not, this is not a replacement. 7267 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 7268 ByteShift*8, (ByteShift+NumBytes)*8); 7269 if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; 7270 7271 // Check that it is legal on the target to do this. It is legal if the new 7272 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 7273 // legalization. 7274 MVT VT = MVT::getIntegerVT(NumBytes*8); 7275 if (!DC->isTypeLegal(VT)) 7276 return 0; 7277 7278 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 7279 // shifted by ByteShift and truncated down to NumBytes. 7280 if (ByteShift) 7281 IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, 7282 DAG.getConstant(ByteShift*8, 7283 DC->getShiftAmountTy(IVal.getValueType()))); 7284 7285 // Figure out the offset for the store and the alignment of the access. 7286 unsigned StOffset; 7287 unsigned NewAlign = St->getAlignment(); 7288 7289 if (DAG.getTargetLoweringInfo().isLittleEndian()) 7290 StOffset = ByteShift; 7291 else 7292 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 7293 7294 SDValue Ptr = St->getBasePtr(); 7295 if (StOffset) { 7296 Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), 7297 Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); 7298 NewAlign = MinAlign(NewAlign, StOffset); 7299 } 7300 7301 // Truncate down to the new size. 7302 IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); 7303 7304 ++OpsNarrowed; 7305 return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 7306 St->getPointerInfo().getWithOffset(StOffset), 7307 false, false, NewAlign).getNode(); 7308} 7309 7310 7311/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is 7312/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some 7313/// of the loaded bits, try narrowing the load and store if it would end up 7314/// being a win for performance or code size. 7315SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 7316 StoreSDNode *ST = cast<StoreSDNode>(N); 7317 if (ST->isVolatile()) 7318 return SDValue(); 7319 7320 SDValue Chain = ST->getChain(); 7321 SDValue Value = ST->getValue(); 7322 SDValue Ptr = ST->getBasePtr(); 7323 EVT VT = Value.getValueType(); 7324 7325 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 7326 return SDValue(); 7327 7328 unsigned Opc = Value.getOpcode(); 7329 7330 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 7331 // is a byte mask indicating a consecutive number of bytes, check to see if 7332 // Y is known to provide just those bytes. If so, we try to replace the 7333 // load + replace + store sequence with a single (narrower) store, which makes 7334 // the load dead. 7335 if (Opc == ISD::OR) { 7336 std::pair<unsigned, unsigned> MaskedLoad; 7337 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 7338 if (MaskedLoad.first) 7339 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 7340 Value.getOperand(1), ST,this)) 7341 return SDValue(NewST, 0); 7342 7343 // Or is commutative, so try swapping X and Y. 7344 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 7345 if (MaskedLoad.first) 7346 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 7347 Value.getOperand(0), ST,this)) 7348 return SDValue(NewST, 0); 7349 } 7350 7351 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 7352 Value.getOperand(1).getOpcode() != ISD::Constant) 7353 return SDValue(); 7354 7355 SDValue N0 = Value.getOperand(0); 7356 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 7357 Chain == SDValue(N0.getNode(), 1)) { 7358 LoadSDNode *LD = cast<LoadSDNode>(N0); 7359 if (LD->getBasePtr() != Ptr || 7360 LD->getPointerInfo().getAddrSpace() != 7361 ST->getPointerInfo().getAddrSpace()) 7362 return SDValue(); 7363 7364 // Find the type to narrow it the load / op / store to. 7365 SDValue N1 = Value.getOperand(1); 7366 unsigned BitWidth = N1.getValueSizeInBits(); 7367 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 7368 if (Opc == ISD::AND) 7369 Imm ^= APInt::getAllOnesValue(BitWidth); 7370 if (Imm == 0 || Imm.isAllOnesValue()) 7371 return SDValue(); 7372 unsigned ShAmt = Imm.countTrailingZeros(); 7373 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 7374 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 7375 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 7376 while (NewBW < BitWidth && 7377 !(TLI.isOperationLegalOrCustom(Opc, NewVT) && 7378 TLI.isNarrowingProfitable(VT, NewVT))) { 7379 NewBW = NextPowerOf2(NewBW); 7380 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 7381 } 7382 if (NewBW >= BitWidth) 7383 return SDValue(); 7384 7385 // If the lsb changed does not start at the type bitwidth boundary, 7386 // start at the previous one. 7387 if (ShAmt % NewBW) 7388 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 7389 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); 7390 if ((Imm & Mask) == Imm) { 7391 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 7392 if (Opc == ISD::AND) 7393 NewImm ^= APInt::getAllOnesValue(NewBW); 7394 uint64_t PtrOff = ShAmt / 8; 7395 // For big endian targets, we need to adjust the offset to the pointer to 7396 // load the correct bytes. 7397 if (TLI.isBigEndian()) 7398 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 7399 7400 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 7401 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 7402 if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) 7403 return SDValue(); 7404 7405 SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), 7406 Ptr.getValueType(), Ptr, 7407 DAG.getConstant(PtrOff, Ptr.getValueType())); 7408 SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), 7409 LD->getChain(), NewPtr, 7410 LD->getPointerInfo().getWithOffset(PtrOff), 7411 LD->isVolatile(), LD->isNonTemporal(), 7412 LD->isInvariant(), NewAlign); 7413 SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, 7414 DAG.getConstant(NewImm, NewVT)); 7415 SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), 7416 NewVal, NewPtr, 7417 ST->getPointerInfo().getWithOffset(PtrOff), 7418 false, false, NewAlign); 7419 7420 AddToWorkList(NewPtr.getNode()); 7421 AddToWorkList(NewLD.getNode()); 7422 AddToWorkList(NewVal.getNode()); 7423 WorkListRemover DeadNodes(*this); 7424 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 7425 ++OpsNarrowed; 7426 return NewST; 7427 } 7428 } 7429 7430 return SDValue(); 7431} 7432 7433/// TransformFPLoadStorePair - For a given floating point load / store pair, 7434/// if the load value isn't used by any other operations, then consider 7435/// transforming the pair to integer load / store operations if the target 7436/// deems the transformation profitable. 7437SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 7438 StoreSDNode *ST = cast<StoreSDNode>(N); 7439 SDValue Chain = ST->getChain(); 7440 SDValue Value = ST->getValue(); 7441 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 7442 Value.hasOneUse() && 7443 Chain == SDValue(Value.getNode(), 1)) { 7444 LoadSDNode *LD = cast<LoadSDNode>(Value); 7445 EVT VT = LD->getMemoryVT(); 7446 if (!VT.isFloatingPoint() || 7447 VT != ST->getMemoryVT() || 7448 LD->isNonTemporal() || 7449 ST->isNonTemporal() || 7450 LD->getPointerInfo().getAddrSpace() != 0 || 7451 ST->getPointerInfo().getAddrSpace() != 0) 7452 return SDValue(); 7453 7454 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 7455 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 7456 !TLI.isOperationLegal(ISD::STORE, IntVT) || 7457 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 7458 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 7459 return SDValue(); 7460 7461 unsigned LDAlign = LD->getAlignment(); 7462 unsigned STAlign = ST->getAlignment(); 7463 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 7464 unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); 7465 if (LDAlign < ABIAlign || STAlign < ABIAlign) 7466 return SDValue(); 7467 7468 SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), 7469 LD->getChain(), LD->getBasePtr(), 7470 LD->getPointerInfo(), 7471 false, false, false, LDAlign); 7472 7473 SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), 7474 NewLD, ST->getBasePtr(), 7475 ST->getPointerInfo(), 7476 false, false, STAlign); 7477 7478 AddToWorkList(NewLD.getNode()); 7479 AddToWorkList(NewST.getNode()); 7480 WorkListRemover DeadNodes(*this); 7481 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 7482 ++LdStFP2Int; 7483 return NewST; 7484 } 7485 7486 return SDValue(); 7487} 7488 7489/// Returns the base pointer and an integer offset from that object. 7490static std::pair<SDValue, int64_t> GetPointerBaseAndOffset(SDValue Ptr) { 7491 if (Ptr->getOpcode() == ISD::ADD && isa<ConstantSDNode>(Ptr->getOperand(1))) { 7492 int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); 7493 SDValue Base = Ptr->getOperand(0); 7494 return std::make_pair(Base, Offset); 7495 } 7496 7497 return std::make_pair(Ptr, 0); 7498} 7499 7500/// Holds a pointer to an LSBaseSDNode as well as information on where it 7501/// is located in a sequence of memory operations connected by a chain. 7502struct MemOpLink { 7503 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 7504 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 7505 // Ptr to the mem node. 7506 LSBaseSDNode *MemNode; 7507 // Offset from the base ptr. 7508 int64_t OffsetFromBase; 7509 // What is the sequence number of this mem node. 7510 // Lowest mem operand in the DAG starts at zero. 7511 unsigned SequenceNum; 7512}; 7513 7514/// Sorts store nodes in a link according to their offset from a shared 7515// base ptr. 7516struct ConsecutiveMemoryChainSorter { 7517 bool operator()(MemOpLink LHS, MemOpLink RHS) { 7518 return LHS.OffsetFromBase < RHS.OffsetFromBase; 7519 } 7520}; 7521 7522bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { 7523 EVT MemVT = St->getMemoryVT(); 7524 int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; 7525 7526 // Don't merge vectors into wider inputs. 7527 if (MemVT.isVector() || !MemVT.isSimple()) 7528 return false; 7529 7530 // Perform an early exit check. Do not bother looking at stored values that 7531 // are not constants or loads. 7532 SDValue StoredVal = St->getValue(); 7533 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 7534 if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && 7535 !IsLoadSrc) 7536 return false; 7537 7538 // Only look at ends of store sequences. 7539 SDValue Chain = SDValue(St, 1); 7540 if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) 7541 return false; 7542 7543 // This holds the base pointer and the offset in bytes from the base pointer. 7544 std::pair<SDValue, int64_t> BasePtr = 7545 GetPointerBaseAndOffset(St->getBasePtr()); 7546 7547 // We must have a base and an offset. 7548 if (!BasePtr.first.getNode()) 7549 return false; 7550 7551 // Do not handle stores to undef base pointers. 7552 if (BasePtr.first.getOpcode() == ISD::UNDEF) 7553 return false; 7554 7555 SmallVector<MemOpLink, 8> StoreNodes; 7556 // Walk up the chain and look for nodes with offsets from the same 7557 // base pointer. Stop when reaching an instruction with a different kind 7558 // or instruction which has a different base pointer. 7559 unsigned Seq = 0; 7560 StoreSDNode *Index = St; 7561 while (Index) { 7562 // If the chain has more than one use, then we can't reorder the mem ops. 7563 if (Index != St && !SDValue(Index, 1)->hasOneUse()) 7564 break; 7565 7566 // Find the base pointer and offset for this memory node. 7567 std::pair<SDValue, int64_t> Ptr = 7568 GetPointerBaseAndOffset(Index->getBasePtr()); 7569 7570 // Check that the base pointer is the same as the original one. 7571 if (Ptr.first.getNode() != BasePtr.first.getNode()) 7572 break; 7573 7574 // Check that the alignment is the same. 7575 if (Index->getAlignment() != St->getAlignment()) 7576 break; 7577 7578 // The memory operands must not be volatile. 7579 if (Index->isVolatile() || Index->isIndexed()) 7580 break; 7581 7582 // No truncation. 7583 if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) 7584 if (St->isTruncatingStore()) 7585 break; 7586 7587 // The stored memory type must be the same. 7588 if (Index->getMemoryVT() != MemVT) 7589 break; 7590 7591 // We do not allow unaligned stores because we want to prevent overriding 7592 // stores. 7593 if (Index->getAlignment()*8 != MemVT.getSizeInBits()) 7594 break; 7595 7596 // We found a potential memory operand to merge. 7597 StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++)); 7598 7599 // Move up the chain to the next memory operation. 7600 Index = dyn_cast<StoreSDNode>(Index->getChain().getNode()); 7601 } 7602 7603 // Check if there is anything to merge. 7604 if (StoreNodes.size() < 2) 7605 return false; 7606 7607 // Sort the memory operands according to their distance from the base pointer. 7608 std::sort(StoreNodes.begin(), StoreNodes.end(), 7609 ConsecutiveMemoryChainSorter()); 7610 7611 // Scan the memory operations on the chain and find the first non-consecutive 7612 // store memory address. 7613 unsigned LastConsecutiveStore = 0; 7614 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 7615 for (unsigned i=1; i<StoreNodes.size(); ++i) { 7616 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 7617 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 7618 break; 7619 7620 // Mark this node as useful. 7621 LastConsecutiveStore = i; 7622 } 7623 7624 // The node with the lowest store address. 7625 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 7626 7627 // Store the constants into memory as one consecutive store. 7628 if (!IsLoadSrc) { 7629 unsigned LastLegalType = 0; 7630 unsigned LastLegalVectorType = 0; 7631 bool NonZero = false; 7632 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 7633 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 7634 SDValue StoredVal = St->getValue(); 7635 7636 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { 7637 NonZero |= !C->isNullValue(); 7638 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { 7639 NonZero |= !C->getConstantFPValue()->isNullValue(); 7640 } else { 7641 // Non constant. 7642 break; 7643 } 7644 7645 // Find a legal type for the constant store. 7646 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 7647 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 7648 if (TLI.isTypeLegal(StoreTy)) 7649 LastLegalType = i+1; 7650 7651 // Find a legal type for the vector store. 7652 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 7653 if (TLI.isTypeLegal(Ty)) 7654 LastLegalVectorType = i + 1; 7655 } 7656 7657 // We only use vectors if the constant is known to be zero. 7658 if (NonZero) 7659 LastLegalVectorType = 0; 7660 7661 // Check if we found a legal integer type to store. 7662 if (LastLegalType == 0 && LastLegalVectorType == 0) 7663 return false; 7664 7665 bool UseVector = LastLegalVectorType > LastLegalType; 7666 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; 7667 7668 // Make sure we have something to merge. 7669 if (NumElem < 2) 7670 return false; 7671 7672 unsigned EarliestNodeUsed = 0; 7673 for (unsigned i=0; i < NumElem; ++i) { 7674 // Find a chain for the new wide-store operand. Notice that some 7675 // of the store nodes that we found may not be selected for inclusion 7676 // in the wide store. The chain we use needs to be the chain of the 7677 // earliest store node which is *used* and replaced by the wide store. 7678 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 7679 EarliestNodeUsed = i; 7680 } 7681 7682 // The earliest Node in the DAG. 7683 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 7684 DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); 7685 7686 SDValue StoredVal; 7687 if (UseVector) { 7688 // Find a legal type for the vector store. 7689 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 7690 assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); 7691 StoredVal = DAG.getConstant(0, Ty); 7692 } else { 7693 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 7694 APInt StoreInt(StoreBW, 0); 7695 7696 // Construct a single integer constant which is made of the smaller 7697 // constant inputs. 7698 bool IsLE = TLI.isLittleEndian(); 7699 for (unsigned i = 0; i < NumElem ; ++i) { 7700 unsigned Idx = IsLE ?(NumElem - 1 - i) : i; 7701 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 7702 SDValue Val = St->getValue(); 7703 StoreInt<<=ElementSizeBytes*8; 7704 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { 7705 StoreInt|=C->getAPIntValue().zext(StoreBW); 7706 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { 7707 StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); 7708 } else { 7709 assert(false && "Invalid constant element type"); 7710 } 7711 } 7712 7713 // Create the new Load and Store operations. 7714 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 7715 StoredVal = DAG.getConstant(StoreInt, StoreTy); 7716 } 7717 7718 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, 7719 FirstInChain->getBasePtr(), 7720 FirstInChain->getPointerInfo(), 7721 false, false, 7722 FirstInChain->getAlignment()); 7723 7724 // Replace the first store with the new store 7725 CombineTo(EarliestOp, NewStore); 7726 // Erase all other stores. 7727 for (unsigned i = 0; i < NumElem ; ++i) { 7728 if (StoreNodes[i].MemNode == EarliestOp) 7729 continue; 7730 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 7731 // ReplaceAllUsesWith will replace all uses that existed when it was 7732 // called, but graph optimizations may cause new ones to appear. For 7733 // example, the case in pr14333 looks like 7734 // 7735 // St's chain -> St -> another store -> X 7736 // 7737 // And the only difference from St to the other store is the chain. 7738 // When we change it's chain to be St's chain they become identical, 7739 // get CSEed and the net result is that X is now a use of St. 7740 // Since we know that St is redundant, just iterate. 7741 while (!St->use_empty()) 7742 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); 7743 removeFromWorkList(St); 7744 DAG.DeleteNode(St); 7745 } 7746 7747 return true; 7748 } 7749 7750 // Below we handle the case of multiple consecutive stores that 7751 // come from multiple consecutive loads. We merge them into a single 7752 // wide load and a single wide store. 7753 7754 // Look for load nodes which are used by the stored values. 7755 SmallVector<MemOpLink, 8> LoadNodes; 7756 7757 // Find acceptable loads. Loads need to have the same chain (token factor), 7758 // must not be zext, volatile, indexed, and they must be consecutive. 7759 SDValue LdBasePtr; 7760 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 7761 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 7762 LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); 7763 if (!Ld) break; 7764 7765 // Loads must only have one use. 7766 if (!Ld->hasNUsesOfValue(1, 0)) 7767 break; 7768 7769 // Check that the alignment is the same as the stores. 7770 if (Ld->getAlignment() != St->getAlignment()) 7771 break; 7772 7773 // The memory operands must not be volatile. 7774 if (Ld->isVolatile() || Ld->isIndexed()) 7775 break; 7776 7777 // We do not accept ext loads. 7778 if (Ld->getExtensionType() != ISD::NON_EXTLOAD) 7779 break; 7780 7781 // The stored memory type must be the same. 7782 if (Ld->getMemoryVT() != MemVT) 7783 break; 7784 7785 std::pair<SDValue, int64_t> LdPtr = 7786 GetPointerBaseAndOffset(Ld->getBasePtr()); 7787 7788 // If this is not the first ptr that we check. 7789 if (LdBasePtr.getNode()) { 7790 // The base ptr must be the same. 7791 if (LdPtr.first != LdBasePtr) 7792 break; 7793 } else { 7794 // Check that all other base pointers are the same as this one. 7795 LdBasePtr = LdPtr.first; 7796 } 7797 7798 // We found a potential memory operand to merge. 7799 LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0)); 7800 } 7801 7802 if (LoadNodes.size() < 2) 7803 return false; 7804 7805 // Scan the memory operations on the chain and find the first non-consecutive 7806 // load memory address. These variables hold the index in the store node 7807 // array. 7808 unsigned LastConsecutiveLoad = 0; 7809 // This variable refers to the size and not index in the array. 7810 unsigned LastLegalVectorType = 0; 7811 unsigned LastLegalIntegerType = 0; 7812 StartAddress = LoadNodes[0].OffsetFromBase; 7813 SDValue FirstChain = LoadNodes[0].MemNode->getChain(); 7814 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 7815 // All loads much share the same chain. 7816 if (LoadNodes[i].MemNode->getChain() != FirstChain) 7817 break; 7818 7819 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 7820 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 7821 break; 7822 LastConsecutiveLoad = i; 7823 7824 // Find a legal type for the vector store. 7825 EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 7826 if (TLI.isTypeLegal(StoreTy)) 7827 LastLegalVectorType = i + 1; 7828 7829 // Find a legal type for the integer store. 7830 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 7831 StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 7832 if (TLI.isTypeLegal(StoreTy)) 7833 LastLegalIntegerType = i + 1; 7834 } 7835 7836 // Only use vector types if the vector type is larger than the integer type. 7837 // If they are the same, use integers. 7838 bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType; 7839 unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); 7840 7841 // We add +1 here because the LastXXX variables refer to location while 7842 // the NumElem refers to array/index size. 7843 unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; 7844 NumElem = std::min(LastLegalType, NumElem); 7845 7846 if (NumElem < 2) 7847 return false; 7848 7849 // The earliest Node in the DAG. 7850 unsigned EarliestNodeUsed = 0; 7851 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 7852 for (unsigned i=1; i<NumElem; ++i) { 7853 // Find a chain for the new wide-store operand. Notice that some 7854 // of the store nodes that we found may not be selected for inclusion 7855 // in the wide store. The chain we use needs to be the chain of the 7856 // earliest store node which is *used* and replaced by the wide store. 7857 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 7858 EarliestNodeUsed = i; 7859 } 7860 7861 // Find if it is better to use vectors or integers to load and store 7862 // to memory. 7863 EVT JointMemOpVT; 7864 if (UseVectorTy) { 7865 JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 7866 } else { 7867 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 7868 JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 7869 } 7870 7871 DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc(); 7872 DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc(); 7873 7874 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 7875 SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, 7876 FirstLoad->getChain(), 7877 FirstLoad->getBasePtr(), 7878 FirstLoad->getPointerInfo(), 7879 false, false, false, 7880 FirstLoad->getAlignment()); 7881 7882 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, 7883 FirstInChain->getBasePtr(), 7884 FirstInChain->getPointerInfo(), false, false, 7885 FirstInChain->getAlignment()); 7886 7887 // Replace one of the loads with the new load. 7888 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); 7889 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 7890 SDValue(NewLoad.getNode(), 1)); 7891 7892 // Remove the rest of the load chains. 7893 for (unsigned i = 1; i < NumElem ; ++i) { 7894 // Replace all chain users of the old load nodes with the chain of the new 7895 // load node. 7896 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 7897 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); 7898 } 7899 7900 // Replace the first store with the new store. 7901 CombineTo(EarliestOp, NewStore); 7902 // Erase all other stores. 7903 for (unsigned i = 0; i < NumElem ; ++i) { 7904 // Remove all Store nodes. 7905 if (StoreNodes[i].MemNode == EarliestOp) 7906 continue; 7907 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 7908 DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); 7909 removeFromWorkList(St); 7910 DAG.DeleteNode(St); 7911 } 7912 7913 return true; 7914} 7915 7916SDValue DAGCombiner::visitSTORE(SDNode *N) { 7917 StoreSDNode *ST = cast<StoreSDNode>(N); 7918 SDValue Chain = ST->getChain(); 7919 SDValue Value = ST->getValue(); 7920 SDValue Ptr = ST->getBasePtr(); 7921 7922 // If this is a store of a bit convert, store the input value if the 7923 // resultant store does not need a higher alignment than the original. 7924 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 7925 ST->isUnindexed()) { 7926 unsigned OrigAlign = ST->getAlignment(); 7927 EVT SVT = Value.getOperand(0).getValueType(); 7928 unsigned Align = TLI.getDataLayout()-> 7929 getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); 7930 if (Align <= OrigAlign && 7931 ((!LegalOperations && !ST->isVolatile()) || 7932 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 7933 return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), 7934 Ptr, ST->getPointerInfo(), ST->isVolatile(), 7935 ST->isNonTemporal(), OrigAlign); 7936 } 7937 7938 // Turn 'store undef, Ptr' -> nothing. 7939 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 7940 return Chain; 7941 7942 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 7943 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { 7944 // NOTE: If the original store is volatile, this transform must not increase 7945 // the number of stores. For example, on x86-32 an f64 can be stored in one 7946 // processor operation but an i64 (which is not legal) requires two. So the 7947 // transform should not be done in this case. 7948 if (Value.getOpcode() != ISD::TargetConstantFP) { 7949 SDValue Tmp; 7950 switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { 7951 default: llvm_unreachable("Unknown FP type"); 7952 case MVT::f16: // We don't do this for these yet. 7953 case MVT::f80: 7954 case MVT::f128: 7955 case MVT::ppcf128: 7956 break; 7957 case MVT::f32: 7958 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 7959 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 7960 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 7961 bitcastToAPInt().getZExtValue(), MVT::i32); 7962 return DAG.getStore(Chain, N->getDebugLoc(), Tmp, 7963 Ptr, ST->getPointerInfo(), ST->isVolatile(), 7964 ST->isNonTemporal(), ST->getAlignment()); 7965 } 7966 break; 7967 case MVT::f64: 7968 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 7969 !ST->isVolatile()) || 7970 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 7971 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 7972 getZExtValue(), MVT::i64); 7973 return DAG.getStore(Chain, N->getDebugLoc(), Tmp, 7974 Ptr, ST->getPointerInfo(), ST->isVolatile(), 7975 ST->isNonTemporal(), ST->getAlignment()); 7976 } 7977 7978 if (!ST->isVolatile() && 7979 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 7980 // Many FP stores are not made apparent until after legalize, e.g. for 7981 // argument passing. Since this is so common, custom legalize the 7982 // 64-bit integer store into two 32-bit stores. 7983 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 7984 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); 7985 SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); 7986 if (TLI.isBigEndian()) std::swap(Lo, Hi); 7987 7988 unsigned Alignment = ST->getAlignment(); 7989 bool isVolatile = ST->isVolatile(); 7990 bool isNonTemporal = ST->isNonTemporal(); 7991 7992 SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, 7993 Ptr, ST->getPointerInfo(), 7994 isVolatile, isNonTemporal, 7995 ST->getAlignment()); 7996 Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, 7997 DAG.getConstant(4, Ptr.getValueType())); 7998 Alignment = MinAlign(Alignment, 4U); 7999 SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, 8000 Ptr, ST->getPointerInfo().getWithOffset(4), 8001 isVolatile, isNonTemporal, 8002 Alignment); 8003 return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 8004 St0, St1); 8005 } 8006 8007 break; 8008 } 8009 } 8010 } 8011 8012 // Try to infer better alignment information than the store already has. 8013 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 8014 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 8015 if (Align > ST->getAlignment()) 8016 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, 8017 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 8018 ST->isVolatile(), ST->isNonTemporal(), Align); 8019 } 8020 } 8021 8022 // Try transforming a pair floating point load / store ops to integer 8023 // load / store ops. 8024 SDValue NewST = TransformFPLoadStorePair(N); 8025 if (NewST.getNode()) 8026 return NewST; 8027 8028 if (CombinerAA) { 8029 // Walk up chain skipping non-aliasing memory nodes. 8030 SDValue BetterChain = FindBetterChain(N, Chain); 8031 8032 // If there is a better chain. 8033 if (Chain != BetterChain) { 8034 SDValue ReplStore; 8035 8036 // Replace the chain to avoid dependency. 8037 if (ST->isTruncatingStore()) { 8038 ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, 8039 ST->getPointerInfo(), 8040 ST->getMemoryVT(), ST->isVolatile(), 8041 ST->isNonTemporal(), ST->getAlignment()); 8042 } else { 8043 ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, 8044 ST->getPointerInfo(), 8045 ST->isVolatile(), ST->isNonTemporal(), 8046 ST->getAlignment()); 8047 } 8048 8049 // Create token to keep both nodes around. 8050 SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 8051 MVT::Other, Chain, ReplStore); 8052 8053 // Make sure the new and old chains are cleaned up. 8054 AddToWorkList(Token.getNode()); 8055 8056 // Don't add users to work list. 8057 return CombineTo(N, Token, false); 8058 } 8059 } 8060 8061 // Try transforming N to an indexed store. 8062 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 8063 return SDValue(N, 0); 8064 8065 // FIXME: is there such a thing as a truncating indexed store? 8066 if (ST->isTruncatingStore() && ST->isUnindexed() && 8067 Value.getValueType().isInteger()) { 8068 // See if we can simplify the input to this truncstore with knowledge that 8069 // only the low bits are being used. For example: 8070 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 8071 SDValue Shorter = 8072 GetDemandedBits(Value, 8073 APInt::getLowBitsSet( 8074 Value.getValueType().getScalarType().getSizeInBits(), 8075 ST->getMemoryVT().getScalarType().getSizeInBits())); 8076 AddToWorkList(Value.getNode()); 8077 if (Shorter.getNode()) 8078 return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, 8079 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 8080 ST->isVolatile(), ST->isNonTemporal(), 8081 ST->getAlignment()); 8082 8083 // Otherwise, see if we can simplify the operation with 8084 // SimplifyDemandedBits, which only works if the value has a single use. 8085 if (SimplifyDemandedBits(Value, 8086 APInt::getLowBitsSet( 8087 Value.getValueType().getScalarType().getSizeInBits(), 8088 ST->getMemoryVT().getScalarType().getSizeInBits()))) 8089 return SDValue(N, 0); 8090 } 8091 8092 // If this is a load followed by a store to the same location, then the store 8093 // is dead/noop. 8094 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 8095 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 8096 ST->isUnindexed() && !ST->isVolatile() && 8097 // There can't be any side effects between the load and store, such as 8098 // a call or store. 8099 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 8100 // The store is dead, remove it. 8101 return Chain; 8102 } 8103 } 8104 8105 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 8106 // truncating store. We can do this even if this is already a truncstore. 8107 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 8108 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 8109 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 8110 ST->getMemoryVT())) { 8111 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), 8112 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 8113 ST->isVolatile(), ST->isNonTemporal(), 8114 ST->getAlignment()); 8115 } 8116 8117 // Only perform this optimization before the types are legal, because we 8118 // don't want to perform this optimization on every DAGCombine invocation. 8119 if (!LegalTypes && MergeConsecutiveStores(ST)) 8120 return SDValue(N, 0); 8121 8122 return ReduceLoadOpStoreWidth(N); 8123} 8124 8125SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 8126 SDValue InVec = N->getOperand(0); 8127 SDValue InVal = N->getOperand(1); 8128 SDValue EltNo = N->getOperand(2); 8129 DebugLoc dl = N->getDebugLoc(); 8130 8131 // If the inserted element is an UNDEF, just use the input vector. 8132 if (InVal.getOpcode() == ISD::UNDEF) 8133 return InVec; 8134 8135 EVT VT = InVec.getValueType(); 8136 8137 // If we can't generate a legal BUILD_VECTOR, exit 8138 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 8139 return SDValue(); 8140 8141 // Check that we know which element is being inserted 8142 if (!isa<ConstantSDNode>(EltNo)) 8143 return SDValue(); 8144 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 8145 8146 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 8147 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 8148 // vector elements. 8149 SmallVector<SDValue, 8> Ops; 8150 if (InVec.getOpcode() == ISD::BUILD_VECTOR) { 8151 Ops.append(InVec.getNode()->op_begin(), 8152 InVec.getNode()->op_end()); 8153 } else if (InVec.getOpcode() == ISD::UNDEF) { 8154 unsigned NElts = VT.getVectorNumElements(); 8155 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 8156 } else { 8157 return SDValue(); 8158 } 8159 8160 // Insert the element 8161 if (Elt < Ops.size()) { 8162 // All the operands of BUILD_VECTOR must have the same type; 8163 // we enforce that here. 8164 EVT OpVT = Ops[0].getValueType(); 8165 if (InVal.getValueType() != OpVT) 8166 InVal = OpVT.bitsGT(InVal.getValueType()) ? 8167 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 8168 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 8169 Ops[Elt] = InVal; 8170 } 8171 8172 // Return the new vector 8173 return DAG.getNode(ISD::BUILD_VECTOR, dl, 8174 VT, &Ops[0], Ops.size()); 8175} 8176 8177SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 8178 // (vextract (scalar_to_vector val, 0) -> val 8179 SDValue InVec = N->getOperand(0); 8180 EVT VT = InVec.getValueType(); 8181 EVT NVT = N->getValueType(0); 8182 8183 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 8184 // Check if the result type doesn't match the inserted element type. A 8185 // SCALAR_TO_VECTOR may truncate the inserted element and the 8186 // EXTRACT_VECTOR_ELT may widen the extracted vector. 8187 SDValue InOp = InVec.getOperand(0); 8188 if (InOp.getValueType() != NVT) { 8189 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 8190 return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); 8191 } 8192 return InOp; 8193 } 8194 8195 SDValue EltNo = N->getOperand(1); 8196 bool ConstEltNo = isa<ConstantSDNode>(EltNo); 8197 8198 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 8199 // We only perform this optimization before the op legalization phase because 8200 // we may introduce new vector instructions which are not backed by TD 8201 // patterns. For example on AVX, extracting elements from a wide vector 8202 // without using extract_subvector. 8203 if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE 8204 && ConstEltNo && !LegalOperations) { 8205 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 8206 int NumElem = VT.getVectorNumElements(); 8207 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); 8208 // Find the new index to extract from. 8209 int OrigElt = SVOp->getMaskElt(Elt); 8210 8211 // Extracting an undef index is undef. 8212 if (OrigElt == -1) 8213 return DAG.getUNDEF(NVT); 8214 8215 // Select the right vector half to extract from. 8216 if (OrigElt < NumElem) { 8217 InVec = InVec->getOperand(0); 8218 } else { 8219 InVec = InVec->getOperand(1); 8220 OrigElt -= NumElem; 8221 } 8222 8223 EVT IndexTy = N->getOperand(1).getValueType(); 8224 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, 8225 InVec, DAG.getConstant(OrigElt, IndexTy)); 8226 } 8227 8228 // Perform only after legalization to ensure build_vector / vector_shuffle 8229 // optimizations have already been done. 8230 if (!LegalOperations) return SDValue(); 8231 8232 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 8233 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 8234 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 8235 8236 if (ConstEltNo) { 8237 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 8238 bool NewLoad = false; 8239 bool BCNumEltsChanged = false; 8240 EVT ExtVT = VT.getVectorElementType(); 8241 EVT LVT = ExtVT; 8242 8243 // If the result of load has to be truncated, then it's not necessarily 8244 // profitable. 8245 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 8246 return SDValue(); 8247 8248 if (InVec.getOpcode() == ISD::BITCAST) { 8249 // Don't duplicate a load with other uses. 8250 if (!InVec.hasOneUse()) 8251 return SDValue(); 8252 8253 EVT BCVT = InVec.getOperand(0).getValueType(); 8254 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 8255 return SDValue(); 8256 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 8257 BCNumEltsChanged = true; 8258 InVec = InVec.getOperand(0); 8259 ExtVT = BCVT.getVectorElementType(); 8260 NewLoad = true; 8261 } 8262 8263 LoadSDNode *LN0 = NULL; 8264 const ShuffleVectorSDNode *SVN = NULL; 8265 if (ISD::isNormalLoad(InVec.getNode())) { 8266 LN0 = cast<LoadSDNode>(InVec); 8267 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 8268 InVec.getOperand(0).getValueType() == ExtVT && 8269 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 8270 // Don't duplicate a load with other uses. 8271 if (!InVec.hasOneUse()) 8272 return SDValue(); 8273 8274 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 8275 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 8276 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 8277 // => 8278 // (load $addr+1*size) 8279 8280 // Don't duplicate a load with other uses. 8281 if (!InVec.hasOneUse()) 8282 return SDValue(); 8283 8284 // If the bit convert changed the number of elements, it is unsafe 8285 // to examine the mask. 8286 if (BCNumEltsChanged) 8287 return SDValue(); 8288 8289 // Select the input vector, guarding against out of range extract vector. 8290 unsigned NumElems = VT.getVectorNumElements(); 8291 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 8292 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 8293 8294 if (InVec.getOpcode() == ISD::BITCAST) { 8295 // Don't duplicate a load with other uses. 8296 if (!InVec.hasOneUse()) 8297 return SDValue(); 8298 8299 InVec = InVec.getOperand(0); 8300 } 8301 if (ISD::isNormalLoad(InVec.getNode())) { 8302 LN0 = cast<LoadSDNode>(InVec); 8303 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 8304 } 8305 } 8306 8307 // Make sure we found a non-volatile load and the extractelement is 8308 // the only use. 8309 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 8310 return SDValue(); 8311 8312 // If Idx was -1 above, Elt is going to be -1, so just return undef. 8313 if (Elt == -1) 8314 return DAG.getUNDEF(LVT); 8315 8316 unsigned Align = LN0->getAlignment(); 8317 if (NewLoad) { 8318 // Check the resultant load doesn't need a higher alignment than the 8319 // original load. 8320 unsigned NewAlign = 8321 TLI.getDataLayout() 8322 ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); 8323 8324 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) 8325 return SDValue(); 8326 8327 Align = NewAlign; 8328 } 8329 8330 SDValue NewPtr = LN0->getBasePtr(); 8331 unsigned PtrOff = 0; 8332 8333 if (Elt) { 8334 PtrOff = LVT.getSizeInBits() * Elt / 8; 8335 EVT PtrType = NewPtr.getValueType(); 8336 if (TLI.isBigEndian()) 8337 PtrOff = VT.getSizeInBits() / 8 - PtrOff; 8338 NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, 8339 DAG.getConstant(PtrOff, PtrType)); 8340 } 8341 8342 // The replacement we need to do here is a little tricky: we need to 8343 // replace an extractelement of a load with a load. 8344 // Use ReplaceAllUsesOfValuesWith to do the replacement. 8345 // Note that this replacement assumes that the extractvalue is the only 8346 // use of the load; that's okay because we don't want to perform this 8347 // transformation in other cases anyway. 8348 SDValue Load; 8349 SDValue Chain; 8350 if (NVT.bitsGT(LVT)) { 8351 // If the result type of vextract is wider than the load, then issue an 8352 // extending load instead. 8353 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) 8354 ? ISD::ZEXTLOAD : ISD::EXTLOAD; 8355 Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), 8356 NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), 8357 LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); 8358 Chain = Load.getValue(1); 8359 } else { 8360 Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, 8361 LN0->getPointerInfo().getWithOffset(PtrOff), 8362 LN0->isVolatile(), LN0->isNonTemporal(), 8363 LN0->isInvariant(), Align); 8364 Chain = Load.getValue(1); 8365 if (NVT.bitsLT(LVT)) 8366 Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); 8367 else 8368 Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); 8369 } 8370 WorkListRemover DeadNodes(*this); 8371 SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; 8372 SDValue To[] = { Load, Chain }; 8373 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 8374 // Since we're explcitly calling ReplaceAllUses, add the new node to the 8375 // worklist explicitly as well. 8376 AddToWorkList(Load.getNode()); 8377 AddUsersToWorkList(Load.getNode()); // Add users too 8378 // Make sure to revisit this node to clean it up; it will usually be dead. 8379 AddToWorkList(N); 8380 return SDValue(N, 0); 8381 } 8382 8383 return SDValue(); 8384} 8385 8386// Simplify (build_vec (ext )) to (bitcast (build_vec )) 8387SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { 8388 // We perform this optimization post type-legalization because 8389 // the type-legalizer often scalarizes integer-promoted vectors. 8390 // Performing this optimization before may create bit-casts which 8391 // will be type-legalized to complex code sequences. 8392 // We perform this optimization only before the operation legalizer because we 8393 // may introduce illegal operations. 8394 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 8395 return SDValue(); 8396 8397 unsigned NumInScalars = N->getNumOperands(); 8398 DebugLoc dl = N->getDebugLoc(); 8399 EVT VT = N->getValueType(0); 8400 8401 // Check to see if this is a BUILD_VECTOR of a bunch of values 8402 // which come from any_extend or zero_extend nodes. If so, we can create 8403 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 8404 // optimizations. We do not handle sign-extend because we can't fill the sign 8405 // using shuffles. 8406 EVT SourceType = MVT::Other; 8407 bool AllAnyExt = true; 8408 8409 for (unsigned i = 0; i != NumInScalars; ++i) { 8410 SDValue In = N->getOperand(i); 8411 // Ignore undef inputs. 8412 if (In.getOpcode() == ISD::UNDEF) continue; 8413 8414 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 8415 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 8416 8417 // Abort if the element is not an extension. 8418 if (!ZeroExt && !AnyExt) { 8419 SourceType = MVT::Other; 8420 break; 8421 } 8422 8423 // The input is a ZeroExt or AnyExt. Check the original type. 8424 EVT InTy = In.getOperand(0).getValueType(); 8425 8426 // Check that all of the widened source types are the same. 8427 if (SourceType == MVT::Other) 8428 // First time. 8429 SourceType = InTy; 8430 else if (InTy != SourceType) { 8431 // Multiple income types. Abort. 8432 SourceType = MVT::Other; 8433 break; 8434 } 8435 8436 // Check if all of the extends are ANY_EXTENDs. 8437 AllAnyExt &= AnyExt; 8438 } 8439 8440 // In order to have valid types, all of the inputs must be extended from the 8441 // same source type and all of the inputs must be any or zero extend. 8442 // Scalar sizes must be a power of two. 8443 EVT OutScalarTy = VT.getScalarType(); 8444 bool ValidTypes = SourceType != MVT::Other && 8445 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 8446 isPowerOf2_32(SourceType.getSizeInBits()); 8447 8448 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 8449 // turn into a single shuffle instruction. 8450 if (!ValidTypes) 8451 return SDValue(); 8452 8453 bool isLE = TLI.isLittleEndian(); 8454 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 8455 assert(ElemRatio > 1 && "Invalid element size ratio"); 8456 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 8457 DAG.getConstant(0, SourceType); 8458 8459 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 8460 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 8461 8462 // Populate the new build_vector 8463 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 8464 SDValue Cast = N->getOperand(i); 8465 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 8466 Cast.getOpcode() == ISD::ZERO_EXTEND || 8467 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 8468 SDValue In; 8469 if (Cast.getOpcode() == ISD::UNDEF) 8470 In = DAG.getUNDEF(SourceType); 8471 else 8472 In = Cast->getOperand(0); 8473 unsigned Index = isLE ? (i * ElemRatio) : 8474 (i * ElemRatio + (ElemRatio - 1)); 8475 8476 assert(Index < Ops.size() && "Invalid index"); 8477 Ops[Index] = In; 8478 } 8479 8480 // The type of the new BUILD_VECTOR node. 8481 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 8482 assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 8483 "Invalid vector size"); 8484 // Check if the new vector type is legal. 8485 if (!isTypeLegal(VecVT)) return SDValue(); 8486 8487 // Make the new BUILD_VECTOR. 8488 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, &Ops[0], Ops.size()); 8489 8490 // The new BUILD_VECTOR node has the potential to be further optimized. 8491 AddToWorkList(BV.getNode()); 8492 // Bitcast to the desired type. 8493 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 8494} 8495 8496SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { 8497 EVT VT = N->getValueType(0); 8498 8499 unsigned NumInScalars = N->getNumOperands(); 8500 DebugLoc dl = N->getDebugLoc(); 8501 8502 EVT SrcVT = MVT::Other; 8503 unsigned Opcode = ISD::DELETED_NODE; 8504 unsigned NumDefs = 0; 8505 8506 for (unsigned i = 0; i != NumInScalars; ++i) { 8507 SDValue In = N->getOperand(i); 8508 unsigned Opc = In.getOpcode(); 8509 8510 if (Opc == ISD::UNDEF) 8511 continue; 8512 8513 // If all scalar values are floats and converted from integers. 8514 if (Opcode == ISD::DELETED_NODE && 8515 (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { 8516 Opcode = Opc; 8517 // If not supported by target, bail out. 8518 if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal && 8519 TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom) 8520 return SDValue(); 8521 } 8522 if (Opc != Opcode) 8523 return SDValue(); 8524 8525 EVT InVT = In.getOperand(0).getValueType(); 8526 8527 // If all scalar values are typed differently, bail out. It's chosen to 8528 // simplify BUILD_VECTOR of integer types. 8529 if (SrcVT == MVT::Other) 8530 SrcVT = InVT; 8531 if (SrcVT != InVT) 8532 return SDValue(); 8533 NumDefs++; 8534 } 8535 8536 // If the vector has just one element defined, it's not worth to fold it into 8537 // a vectorized one. 8538 if (NumDefs < 2) 8539 return SDValue(); 8540 8541 assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) 8542 && "Should only handle conversion from integer to float."); 8543 assert(SrcVT != MVT::Other && "Cannot determine source type!"); 8544 8545 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); 8546 SmallVector<SDValue, 8> Opnds; 8547 for (unsigned i = 0; i != NumInScalars; ++i) { 8548 SDValue In = N->getOperand(i); 8549 8550 if (In.getOpcode() == ISD::UNDEF) 8551 Opnds.push_back(DAG.getUNDEF(SrcVT)); 8552 else 8553 Opnds.push_back(In.getOperand(0)); 8554 } 8555 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, 8556 &Opnds[0], Opnds.size()); 8557 AddToWorkList(BV.getNode()); 8558 8559 return DAG.getNode(Opcode, dl, VT, BV); 8560} 8561 8562SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 8563 unsigned NumInScalars = N->getNumOperands(); 8564 DebugLoc dl = N->getDebugLoc(); 8565 EVT VT = N->getValueType(0); 8566 8567 // A vector built entirely of undefs is undef. 8568 if (ISD::allOperandsUndef(N)) 8569 return DAG.getUNDEF(VT); 8570 8571 SDValue V = reduceBuildVecExtToExtBuildVec(N); 8572 if (V.getNode()) 8573 return V; 8574 8575 V = reduceBuildVecConvertToConvertBuildVec(N); 8576 if (V.getNode()) 8577 return V; 8578 8579 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 8580 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 8581 // at most two distinct vectors, turn this into a shuffle node. 8582 8583 // May only combine to shuffle after legalize if shuffle is legal. 8584 if (LegalOperations && 8585 !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) 8586 return SDValue(); 8587 8588 SDValue VecIn1, VecIn2; 8589 for (unsigned i = 0; i != NumInScalars; ++i) { 8590 // Ignore undef inputs. 8591 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 8592 8593 // If this input is something other than a EXTRACT_VECTOR_ELT with a 8594 // constant index, bail out. 8595 if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || 8596 !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { 8597 VecIn1 = VecIn2 = SDValue(0, 0); 8598 break; 8599 } 8600 8601 // We allow up to two distinct input vectors. 8602 SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); 8603 if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) 8604 continue; 8605 8606 if (VecIn1.getNode() == 0) { 8607 VecIn1 = ExtractedFromVec; 8608 } else if (VecIn2.getNode() == 0) { 8609 VecIn2 = ExtractedFromVec; 8610 } else { 8611 // Too many inputs. 8612 VecIn1 = VecIn2 = SDValue(0, 0); 8613 break; 8614 } 8615 } 8616 8617 // If everything is good, we can make a shuffle operation. 8618 if (VecIn1.getNode()) { 8619 SmallVector<int, 8> Mask; 8620 for (unsigned i = 0; i != NumInScalars; ++i) { 8621 if (N->getOperand(i).getOpcode() == ISD::UNDEF) { 8622 Mask.push_back(-1); 8623 continue; 8624 } 8625 8626 // If extracting from the first vector, just use the index directly. 8627 SDValue Extract = N->getOperand(i); 8628 SDValue ExtVal = Extract.getOperand(1); 8629 if (Extract.getOperand(0) == VecIn1) { 8630 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 8631 if (ExtIndex > VT.getVectorNumElements()) 8632 return SDValue(); 8633 8634 Mask.push_back(ExtIndex); 8635 continue; 8636 } 8637 8638 // Otherwise, use InIdx + VecSize 8639 unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 8640 Mask.push_back(Idx+NumInScalars); 8641 } 8642 8643 // We can't generate a shuffle node with mismatched input and output types. 8644 // Attempt to transform a single input vector to the correct type. 8645 if ((VT != VecIn1.getValueType())) { 8646 // We don't support shuffeling between TWO values of different types. 8647 if (VecIn2.getNode() != 0) 8648 return SDValue(); 8649 8650 // We only support widening of vectors which are half the size of the 8651 // output registers. For example XMM->YMM widening on X86 with AVX. 8652 if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) 8653 return SDValue(); 8654 8655 // If the input vector type has a different base type to the output 8656 // vector type, bail out. 8657 if (VecIn1.getValueType().getVectorElementType() != 8658 VT.getVectorElementType()) 8659 return SDValue(); 8660 8661 // Widen the input vector by adding undef values. 8662 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, 8663 VecIn1, DAG.getUNDEF(VecIn1.getValueType())); 8664 } 8665 8666 // If VecIn2 is unused then change it to undef. 8667 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 8668 8669 // Check that we were able to transform all incoming values to the same 8670 // type. 8671 if (VecIn2.getValueType() != VecIn1.getValueType() || 8672 VecIn1.getValueType() != VT) 8673 return SDValue(); 8674 8675 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 8676 if (!isTypeLegal(VT)) 8677 return SDValue(); 8678 8679 // Return the new VECTOR_SHUFFLE node. 8680 SDValue Ops[2]; 8681 Ops[0] = VecIn1; 8682 Ops[1] = VecIn2; 8683 return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); 8684 } 8685 8686 return SDValue(); 8687} 8688 8689SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 8690 // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of 8691 // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector 8692 // inputs come from at most two distinct vectors, turn this into a shuffle 8693 // node. 8694 8695 // If we only have one input vector, we don't need to do any concatenation. 8696 if (N->getNumOperands() == 1) 8697 return N->getOperand(0); 8698 8699 // Check if all of the operands are undefs. 8700 if (ISD::allOperandsUndef(N)) 8701 return DAG.getUNDEF(N->getValueType(0)); 8702 8703 return SDValue(); 8704} 8705 8706SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 8707 EVT NVT = N->getValueType(0); 8708 SDValue V = N->getOperand(0); 8709 8710 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 8711 // Handle only simple case where vector being inserted and vector 8712 // being extracted are of same type, and are half size of larger vectors. 8713 EVT BigVT = V->getOperand(0).getValueType(); 8714 EVT SmallVT = V->getOperand(1).getValueType(); 8715 if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) 8716 return SDValue(); 8717 8718 // Only handle cases where both indexes are constants with the same type. 8719 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 8720 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 8721 8722 if (InsIdx && ExtIdx && 8723 InsIdx->getValueType(0).getSizeInBits() <= 64 && 8724 ExtIdx->getValueType(0).getSizeInBits() <= 64) { 8725 // Combine: 8726 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 8727 // Into: 8728 // indices are equal => V1 8729 // otherwise => (extract_subvec V1, ExtIdx) 8730 if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) 8731 return V->getOperand(1); 8732 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, 8733 V->getOperand(0), N->getOperand(1)); 8734 } 8735 } 8736 8737 if (V->getOpcode() == ISD::CONCAT_VECTORS) { 8738 // Combine: 8739 // (extract_subvec (concat V1, V2, ...), i) 8740 // Into: 8741 // Vi if possible 8742 // Only operand 0 is checked as 'concat' assumes all inputs of the same type. 8743 if (V->getOperand(0).getValueType() != NVT) 8744 return SDValue(); 8745 unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 8746 unsigned NumElems = NVT.getVectorNumElements(); 8747 assert((Idx % NumElems) == 0 && 8748 "IDX in concat is not a multiple of the result vector length."); 8749 return V->getOperand(Idx / NumElems); 8750 } 8751 8752 return SDValue(); 8753} 8754 8755SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 8756 EVT VT = N->getValueType(0); 8757 unsigned NumElts = VT.getVectorNumElements(); 8758 8759 SDValue N0 = N->getOperand(0); 8760 SDValue N1 = N->getOperand(1); 8761 8762 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 8763 8764 // Canonicalize shuffle undef, undef -> undef 8765 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 8766 return DAG.getUNDEF(VT); 8767 8768 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 8769 8770 // Canonicalize shuffle v, v -> v, undef 8771 if (N0 == N1) { 8772 SmallVector<int, 8> NewMask; 8773 for (unsigned i = 0; i != NumElts; ++i) { 8774 int Idx = SVN->getMaskElt(i); 8775 if (Idx >= (int)NumElts) Idx -= NumElts; 8776 NewMask.push_back(Idx); 8777 } 8778 return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), 8779 &NewMask[0]); 8780 } 8781 8782 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 8783 if (N0.getOpcode() == ISD::UNDEF) { 8784 SmallVector<int, 8> NewMask; 8785 for (unsigned i = 0; i != NumElts; ++i) { 8786 int Idx = SVN->getMaskElt(i); 8787 if (Idx >= 0) { 8788 if (Idx < (int)NumElts) 8789 Idx += NumElts; 8790 else 8791 Idx -= NumElts; 8792 } 8793 NewMask.push_back(Idx); 8794 } 8795 return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), 8796 &NewMask[0]); 8797 } 8798 8799 // Remove references to rhs if it is undef 8800 if (N1.getOpcode() == ISD::UNDEF) { 8801 bool Changed = false; 8802 SmallVector<int, 8> NewMask; 8803 for (unsigned i = 0; i != NumElts; ++i) { 8804 int Idx = SVN->getMaskElt(i); 8805 if (Idx >= (int)NumElts) { 8806 Idx = -1; 8807 Changed = true; 8808 } 8809 NewMask.push_back(Idx); 8810 } 8811 if (Changed) 8812 return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); 8813 } 8814 8815 // If it is a splat, check if the argument vector is another splat or a 8816 // build_vector with all scalar elements the same. 8817 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 8818 SDNode *V = N0.getNode(); 8819 8820 // If this is a bit convert that changes the element type of the vector but 8821 // not the number of vector elements, look through it. Be careful not to 8822 // look though conversions that change things like v4f32 to v2f64. 8823 if (V->getOpcode() == ISD::BITCAST) { 8824 SDValue ConvInput = V->getOperand(0); 8825 if (ConvInput.getValueType().isVector() && 8826 ConvInput.getValueType().getVectorNumElements() == NumElts) 8827 V = ConvInput.getNode(); 8828 } 8829 8830 if (V->getOpcode() == ISD::BUILD_VECTOR) { 8831 assert(V->getNumOperands() == NumElts && 8832 "BUILD_VECTOR has wrong number of operands"); 8833 SDValue Base; 8834 bool AllSame = true; 8835 for (unsigned i = 0; i != NumElts; ++i) { 8836 if (V->getOperand(i).getOpcode() != ISD::UNDEF) { 8837 Base = V->getOperand(i); 8838 break; 8839 } 8840 } 8841 // Splat of <u, u, u, u>, return <u, u, u, u> 8842 if (!Base.getNode()) 8843 return N0; 8844 for (unsigned i = 0; i != NumElts; ++i) { 8845 if (V->getOperand(i) != Base) { 8846 AllSame = false; 8847 break; 8848 } 8849 } 8850 // Splat of <x, x, x, x>, return <x, x, x, x> 8851 if (AllSame) 8852 return N0; 8853 } 8854 } 8855 8856 // If this shuffle node is simply a swizzle of another shuffle node, 8857 // and it reverses the swizzle of the previous shuffle then we can 8858 // optimize shuffle(shuffle(x, undef), undef) -> x. 8859 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 8860 N1.getOpcode() == ISD::UNDEF) { 8861 8862 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); 8863 8864 // Shuffle nodes can only reverse shuffles with a single non-undef value. 8865 if (N0.getOperand(1).getOpcode() != ISD::UNDEF) 8866 return SDValue(); 8867 8868 // The incoming shuffle must be of the same type as the result of the 8869 // current shuffle. 8870 assert(OtherSV->getOperand(0).getValueType() == VT && 8871 "Shuffle types don't match"); 8872 8873 for (unsigned i = 0; i != NumElts; ++i) { 8874 int Idx = SVN->getMaskElt(i); 8875 assert(Idx < (int)NumElts && "Index references undef operand"); 8876 // Next, this index comes from the first value, which is the incoming 8877 // shuffle. Adopt the incoming index. 8878 if (Idx >= 0) 8879 Idx = OtherSV->getMaskElt(Idx); 8880 8881 // The combined shuffle must map each index to itself. 8882 if (Idx >= 0 && (unsigned)Idx != i) 8883 return SDValue(); 8884 } 8885 8886 return OtherSV->getOperand(0); 8887 } 8888 8889 return SDValue(); 8890} 8891 8892SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { 8893 if (!TLI.getShouldFoldAtomicFences()) 8894 return SDValue(); 8895 8896 SDValue atomic = N->getOperand(0); 8897 switch (atomic.getOpcode()) { 8898 case ISD::ATOMIC_CMP_SWAP: 8899 case ISD::ATOMIC_SWAP: 8900 case ISD::ATOMIC_LOAD_ADD: 8901 case ISD::ATOMIC_LOAD_SUB: 8902 case ISD::ATOMIC_LOAD_AND: 8903 case ISD::ATOMIC_LOAD_OR: 8904 case ISD::ATOMIC_LOAD_XOR: 8905 case ISD::ATOMIC_LOAD_NAND: 8906 case ISD::ATOMIC_LOAD_MIN: 8907 case ISD::ATOMIC_LOAD_MAX: 8908 case ISD::ATOMIC_LOAD_UMIN: 8909 case ISD::ATOMIC_LOAD_UMAX: 8910 break; 8911 default: 8912 return SDValue(); 8913 } 8914 8915 SDValue fence = atomic.getOperand(0); 8916 if (fence.getOpcode() != ISD::MEMBARRIER) 8917 return SDValue(); 8918 8919 switch (atomic.getOpcode()) { 8920 case ISD::ATOMIC_CMP_SWAP: 8921 return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), 8922 fence.getOperand(0), 8923 atomic.getOperand(1), atomic.getOperand(2), 8924 atomic.getOperand(3)), atomic.getResNo()); 8925 case ISD::ATOMIC_SWAP: 8926 case ISD::ATOMIC_LOAD_ADD: 8927 case ISD::ATOMIC_LOAD_SUB: 8928 case ISD::ATOMIC_LOAD_AND: 8929 case ISD::ATOMIC_LOAD_OR: 8930 case ISD::ATOMIC_LOAD_XOR: 8931 case ISD::ATOMIC_LOAD_NAND: 8932 case ISD::ATOMIC_LOAD_MIN: 8933 case ISD::ATOMIC_LOAD_MAX: 8934 case ISD::ATOMIC_LOAD_UMIN: 8935 case ISD::ATOMIC_LOAD_UMAX: 8936 return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), 8937 fence.getOperand(0), 8938 atomic.getOperand(1), atomic.getOperand(2)), 8939 atomic.getResNo()); 8940 default: 8941 return SDValue(); 8942 } 8943} 8944 8945/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform 8946/// an AND to a vector_shuffle with the destination vector and a zero vector. 8947/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 8948/// vector_shuffle V, Zero, <0, 4, 2, 4> 8949SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 8950 EVT VT = N->getValueType(0); 8951 DebugLoc dl = N->getDebugLoc(); 8952 SDValue LHS = N->getOperand(0); 8953 SDValue RHS = N->getOperand(1); 8954 if (N->getOpcode() == ISD::AND) { 8955 if (RHS.getOpcode() == ISD::BITCAST) 8956 RHS = RHS.getOperand(0); 8957 if (RHS.getOpcode() == ISD::BUILD_VECTOR) { 8958 SmallVector<int, 8> Indices; 8959 unsigned NumElts = RHS.getNumOperands(); 8960 for (unsigned i = 0; i != NumElts; ++i) { 8961 SDValue Elt = RHS.getOperand(i); 8962 if (!isa<ConstantSDNode>(Elt)) 8963 return SDValue(); 8964 8965 if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) 8966 Indices.push_back(i); 8967 else if (cast<ConstantSDNode>(Elt)->isNullValue()) 8968 Indices.push_back(NumElts); 8969 else 8970 return SDValue(); 8971 } 8972 8973 // Let's see if the target supports this vector_shuffle. 8974 EVT RVT = RHS.getValueType(); 8975 if (!TLI.isVectorClearMaskLegal(Indices, RVT)) 8976 return SDValue(); 8977 8978 // Return the new VECTOR_SHUFFLE node. 8979 EVT EltVT = RVT.getVectorElementType(); 8980 SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), 8981 DAG.getConstant(0, EltVT)); 8982 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 8983 RVT, &ZeroOps[0], ZeroOps.size()); 8984 LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); 8985 SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); 8986 return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); 8987 } 8988 } 8989 8990 return SDValue(); 8991} 8992 8993/// SimplifyVBinOp - Visit a binary vector operation, like ADD. 8994SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 8995 // After legalize, the target may be depending on adds and other 8996 // binary ops to provide legal ways to construct constants or other 8997 // things. Simplifying them may result in a loss of legality. 8998 if (LegalOperations) return SDValue(); 8999 9000 assert(N->getValueType(0).isVector() && 9001 "SimplifyVBinOp only works on vectors!"); 9002 9003 SDValue LHS = N->getOperand(0); 9004 SDValue RHS = N->getOperand(1); 9005 SDValue Shuffle = XformToShuffleWithZero(N); 9006 if (Shuffle.getNode()) return Shuffle; 9007 9008 // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold 9009 // this operation. 9010 if (LHS.getOpcode() == ISD::BUILD_VECTOR && 9011 RHS.getOpcode() == ISD::BUILD_VECTOR) { 9012 SmallVector<SDValue, 8> Ops; 9013 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 9014 SDValue LHSOp = LHS.getOperand(i); 9015 SDValue RHSOp = RHS.getOperand(i); 9016 // If these two elements can't be folded, bail out. 9017 if ((LHSOp.getOpcode() != ISD::UNDEF && 9018 LHSOp.getOpcode() != ISD::Constant && 9019 LHSOp.getOpcode() != ISD::ConstantFP) || 9020 (RHSOp.getOpcode() != ISD::UNDEF && 9021 RHSOp.getOpcode() != ISD::Constant && 9022 RHSOp.getOpcode() != ISD::ConstantFP)) 9023 break; 9024 9025 // Can't fold divide by zero. 9026 if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || 9027 N->getOpcode() == ISD::FDIV) { 9028 if ((RHSOp.getOpcode() == ISD::Constant && 9029 cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || 9030 (RHSOp.getOpcode() == ISD::ConstantFP && 9031 cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) 9032 break; 9033 } 9034 9035 EVT VT = LHSOp.getValueType(); 9036 EVT RVT = RHSOp.getValueType(); 9037 if (RVT != VT) { 9038 // Integer BUILD_VECTOR operands may have types larger than the element 9039 // size (e.g., when the element type is not legal). Prior to type 9040 // legalization, the types may not match between the two BUILD_VECTORS. 9041 // Truncate one of the operands to make them match. 9042 if (RVT.getSizeInBits() > VT.getSizeInBits()) { 9043 RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); 9044 } else { 9045 LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); 9046 VT = RVT; 9047 } 9048 } 9049 SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, 9050 LHSOp, RHSOp); 9051 if (FoldOp.getOpcode() != ISD::UNDEF && 9052 FoldOp.getOpcode() != ISD::Constant && 9053 FoldOp.getOpcode() != ISD::ConstantFP) 9054 break; 9055 Ops.push_back(FoldOp); 9056 AddToWorkList(FoldOp.getNode()); 9057 } 9058 9059 if (Ops.size() == LHS.getNumOperands()) 9060 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 9061 LHS.getValueType(), &Ops[0], Ops.size()); 9062 } 9063 9064 return SDValue(); 9065} 9066 9067/// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. 9068SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { 9069 // After legalize, the target may be depending on adds and other 9070 // binary ops to provide legal ways to construct constants or other 9071 // things. Simplifying them may result in a loss of legality. 9072 if (LegalOperations) return SDValue(); 9073 9074 assert(N->getValueType(0).isVector() && 9075 "SimplifyVUnaryOp only works on vectors!"); 9076 9077 SDValue N0 = N->getOperand(0); 9078 9079 if (N0.getOpcode() != ISD::BUILD_VECTOR) 9080 return SDValue(); 9081 9082 // Operand is a BUILD_VECTOR node, see if we can constant fold it. 9083 SmallVector<SDValue, 8> Ops; 9084 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 9085 SDValue Op = N0.getOperand(i); 9086 if (Op.getOpcode() != ISD::UNDEF && 9087 Op.getOpcode() != ISD::ConstantFP) 9088 break; 9089 EVT EltVT = Op.getValueType(); 9090 SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op); 9091 if (FoldOp.getOpcode() != ISD::UNDEF && 9092 FoldOp.getOpcode() != ISD::ConstantFP) 9093 break; 9094 Ops.push_back(FoldOp); 9095 AddToWorkList(FoldOp.getNode()); 9096 } 9097 9098 if (Ops.size() != N0.getNumOperands()) 9099 return SDValue(); 9100 9101 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 9102 N0.getValueType(), &Ops[0], Ops.size()); 9103} 9104 9105SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, 9106 SDValue N1, SDValue N2){ 9107 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 9108 9109 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 9110 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 9111 9112 // If we got a simplified select_cc node back from SimplifySelectCC, then 9113 // break it down into a new SETCC node, and a new SELECT node, and then return 9114 // the SELECT node, since we were called with a SELECT node. 9115 if (SCC.getNode()) { 9116 // Check to see if we got a select_cc back (to turn into setcc/select). 9117 // Otherwise, just return whatever node we got back, like fabs. 9118 if (SCC.getOpcode() == ISD::SELECT_CC) { 9119 SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), 9120 N0.getValueType(), 9121 SCC.getOperand(0), SCC.getOperand(1), 9122 SCC.getOperand(4)); 9123 AddToWorkList(SETCC.getNode()); 9124 return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), 9125 SCC.getOperand(2), SCC.getOperand(3), SETCC); 9126 } 9127 9128 return SCC; 9129 } 9130 return SDValue(); 9131} 9132 9133/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS 9134/// are the two values being selected between, see if we can simplify the 9135/// select. Callers of this should assume that TheSelect is deleted if this 9136/// returns true. As such, they should return the appropriate thing (e.g. the 9137/// node) back to the top-level of the DAG combiner loop to avoid it being 9138/// looked at. 9139bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 9140 SDValue RHS) { 9141 9142 // Cannot simplify select with vector condition 9143 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 9144 9145 // If this is a select from two identical things, try to pull the operation 9146 // through the select. 9147 if (LHS.getOpcode() != RHS.getOpcode() || 9148 !LHS.hasOneUse() || !RHS.hasOneUse()) 9149 return false; 9150 9151 // If this is a load and the token chain is identical, replace the select 9152 // of two loads with a load through a select of the address to load from. 9153 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 9154 // constants have been dropped into the constant pool. 9155 if (LHS.getOpcode() == ISD::LOAD) { 9156 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 9157 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 9158 9159 // Token chains must be identical. 9160 if (LHS.getOperand(0) != RHS.getOperand(0) || 9161 // Do not let this transformation reduce the number of volatile loads. 9162 LLD->isVolatile() || RLD->isVolatile() || 9163 // If this is an EXTLOAD, the VT's must match. 9164 LLD->getMemoryVT() != RLD->getMemoryVT() || 9165 // If this is an EXTLOAD, the kind of extension must match. 9166 (LLD->getExtensionType() != RLD->getExtensionType() && 9167 // The only exception is if one of the extensions is anyext. 9168 LLD->getExtensionType() != ISD::EXTLOAD && 9169 RLD->getExtensionType() != ISD::EXTLOAD) || 9170 // FIXME: this discards src value information. This is 9171 // over-conservative. It would be beneficial to be able to remember 9172 // both potential memory locations. Since we are discarding 9173 // src value info, don't do the transformation if the memory 9174 // locations are not in the default address space. 9175 LLD->getPointerInfo().getAddrSpace() != 0 || 9176 RLD->getPointerInfo().getAddrSpace() != 0) 9177 return false; 9178 9179 // Check that the select condition doesn't reach either load. If so, 9180 // folding this will induce a cycle into the DAG. If not, this is safe to 9181 // xform, so create a select of the addresses. 9182 SDValue Addr; 9183 if (TheSelect->getOpcode() == ISD::SELECT) { 9184 SDNode *CondNode = TheSelect->getOperand(0).getNode(); 9185 if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || 9186 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) 9187 return false; 9188 // The loads must not depend on one another. 9189 if (LLD->isPredecessorOf(RLD) || 9190 RLD->isPredecessorOf(LLD)) 9191 return false; 9192 Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), 9193 LLD->getBasePtr().getValueType(), 9194 TheSelect->getOperand(0), LLD->getBasePtr(), 9195 RLD->getBasePtr()); 9196 } else { // Otherwise SELECT_CC 9197 SDNode *CondLHS = TheSelect->getOperand(0).getNode(); 9198 SDNode *CondRHS = TheSelect->getOperand(1).getNode(); 9199 9200 if ((LLD->hasAnyUseOfValue(1) && 9201 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || 9202 (RLD->hasAnyUseOfValue(1) && 9203 (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) 9204 return false; 9205 9206 Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), 9207 LLD->getBasePtr().getValueType(), 9208 TheSelect->getOperand(0), 9209 TheSelect->getOperand(1), 9210 LLD->getBasePtr(), RLD->getBasePtr(), 9211 TheSelect->getOperand(4)); 9212 } 9213 9214 SDValue Load; 9215 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 9216 Load = DAG.getLoad(TheSelect->getValueType(0), 9217 TheSelect->getDebugLoc(), 9218 // FIXME: Discards pointer info. 9219 LLD->getChain(), Addr, MachinePointerInfo(), 9220 LLD->isVolatile(), LLD->isNonTemporal(), 9221 LLD->isInvariant(), LLD->getAlignment()); 9222 } else { 9223 Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? 9224 RLD->getExtensionType() : LLD->getExtensionType(), 9225 TheSelect->getDebugLoc(), 9226 TheSelect->getValueType(0), 9227 // FIXME: Discards pointer info. 9228 LLD->getChain(), Addr, MachinePointerInfo(), 9229 LLD->getMemoryVT(), LLD->isVolatile(), 9230 LLD->isNonTemporal(), LLD->getAlignment()); 9231 } 9232 9233 // Users of the select now use the result of the load. 9234 CombineTo(TheSelect, Load); 9235 9236 // Users of the old loads now use the new load's chain. We know the 9237 // old-load value is dead now. 9238 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 9239 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 9240 return true; 9241 } 9242 9243 return false; 9244} 9245 9246/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 9247/// where 'cond' is the comparison specified by CC. 9248SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, 9249 SDValue N2, SDValue N3, 9250 ISD::CondCode CC, bool NotExtCompare) { 9251 // (x ? y : y) -> y. 9252 if (N2 == N3) return N2; 9253 9254 EVT VT = N2.getValueType(); 9255 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 9256 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 9257 ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); 9258 9259 // Determine if the condition we're dealing with is constant 9260 SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), 9261 N0, N1, CC, DL, false); 9262 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 9263 ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); 9264 9265 // fold select_cc true, x, y -> x 9266 if (SCCC && !SCCC->isNullValue()) 9267 return N2; 9268 // fold select_cc false, x, y -> y 9269 if (SCCC && SCCC->isNullValue()) 9270 return N3; 9271 9272 // Check to see if we can simplify the select into an fabs node 9273 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { 9274 // Allow either -0.0 or 0.0 9275 if (CFP->getValueAPF().isZero()) { 9276 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs 9277 if ((CC == ISD::SETGE || CC == ISD::SETGT) && 9278 N0 == N2 && N3.getOpcode() == ISD::FNEG && 9279 N2 == N3.getOperand(0)) 9280 return DAG.getNode(ISD::FABS, DL, VT, N0); 9281 9282 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs 9283 if ((CC == ISD::SETLT || CC == ISD::SETLE) && 9284 N0 == N3 && N2.getOpcode() == ISD::FNEG && 9285 N2.getOperand(0) == N3) 9286 return DAG.getNode(ISD::FABS, DL, VT, N3); 9287 } 9288 } 9289 9290 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 9291 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 9292 // in it. This is a win when the constant is not otherwise available because 9293 // it replaces two constant pool loads with one. We only do this if the FP 9294 // type is known to be legal, because if it isn't, then we are before legalize 9295 // types an we want the other legalization to happen first (e.g. to avoid 9296 // messing with soft float) and if the ConstantFP is not legal, because if 9297 // it is legal, we may not need to store the FP constant in a constant pool. 9298 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) 9299 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { 9300 if (TLI.isTypeLegal(N2.getValueType()) && 9301 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 9302 TargetLowering::Legal) && 9303 // If both constants have multiple uses, then we won't need to do an 9304 // extra load, they are likely around in registers for other users. 9305 (TV->hasOneUse() || FV->hasOneUse())) { 9306 Constant *Elts[] = { 9307 const_cast<ConstantFP*>(FV->getConstantFPValue()), 9308 const_cast<ConstantFP*>(TV->getConstantFPValue()) 9309 }; 9310 Type *FPTy = Elts[0]->getType(); 9311 const DataLayout &TD = *TLI.getDataLayout(); 9312 9313 // Create a ConstantArray of the two constants. 9314 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 9315 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), 9316 TD.getPrefTypeAlignment(FPTy)); 9317 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 9318 9319 // Get the offsets to the 0 and 1 element of the array so that we can 9320 // select between them. 9321 SDValue Zero = DAG.getIntPtrConstant(0); 9322 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 9323 SDValue One = DAG.getIntPtrConstant(EltSize); 9324 9325 SDValue Cond = DAG.getSetCC(DL, 9326 TLI.getSetCCResultType(N0.getValueType()), 9327 N0, N1, CC); 9328 AddToWorkList(Cond.getNode()); 9329 SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), 9330 Cond, One, Zero); 9331 AddToWorkList(CstOffset.getNode()); 9332 CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, 9333 CstOffset); 9334 AddToWorkList(CPIdx.getNode()); 9335 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 9336 MachinePointerInfo::getConstantPool(), false, 9337 false, false, Alignment); 9338 9339 } 9340 } 9341 9342 // Check to see if we can perform the "gzip trick", transforming 9343 // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) 9344 if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && 9345 (N1C->isNullValue() || // (a < 0) ? b : 0 9346 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 9347 EVT XType = N0.getValueType(); 9348 EVT AType = N2.getValueType(); 9349 if (XType.bitsGE(AType)) { 9350 // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a 9351 // single-bit constant. 9352 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { 9353 unsigned ShCtV = N2C->getAPIntValue().logBase2(); 9354 ShCtV = XType.getSizeInBits()-ShCtV-1; 9355 SDValue ShCt = DAG.getConstant(ShCtV, 9356 getShiftAmountTy(N0.getValueType())); 9357 SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), 9358 XType, N0, ShCt); 9359 AddToWorkList(Shift.getNode()); 9360 9361 if (XType.bitsGT(AType)) { 9362 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 9363 AddToWorkList(Shift.getNode()); 9364 } 9365 9366 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 9367 } 9368 9369 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), 9370 XType, N0, 9371 DAG.getConstant(XType.getSizeInBits()-1, 9372 getShiftAmountTy(N0.getValueType()))); 9373 AddToWorkList(Shift.getNode()); 9374 9375 if (XType.bitsGT(AType)) { 9376 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 9377 AddToWorkList(Shift.getNode()); 9378 } 9379 9380 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 9381 } 9382 } 9383 9384 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 9385 // where y is has a single bit set. 9386 // A plaintext description would be, we can turn the SELECT_CC into an AND 9387 // when the condition can be materialized as an all-ones register. Any 9388 // single bit-test can be materialized as an all-ones register with 9389 // shift-left and shift-right-arith. 9390 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 9391 N0->getValueType(0) == VT && 9392 N1C && N1C->isNullValue() && 9393 N2C && N2C->isNullValue()) { 9394 SDValue AndLHS = N0->getOperand(0); 9395 ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 9396 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 9397 // Shift the tested bit over the sign bit. 9398 APInt AndMask = ConstAndRHS->getAPIntValue(); 9399 SDValue ShlAmt = 9400 DAG.getConstant(AndMask.countLeadingZeros(), 9401 getShiftAmountTy(AndLHS.getValueType())); 9402 SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); 9403 9404 // Now arithmetic right shift it all the way over, so the result is either 9405 // all-ones, or zero. 9406 SDValue ShrAmt = 9407 DAG.getConstant(AndMask.getBitWidth()-1, 9408 getShiftAmountTy(Shl.getValueType())); 9409 SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); 9410 9411 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 9412 } 9413 } 9414 9415 // fold select C, 16, 0 -> shl C, 4 9416 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && 9417 TLI.getBooleanContents(N0.getValueType().isVector()) == 9418 TargetLowering::ZeroOrOneBooleanContent) { 9419 9420 // If the caller doesn't want us to simplify this into a zext of a compare, 9421 // don't do it. 9422 if (NotExtCompare && N2C->getAPIntValue() == 1) 9423 return SDValue(); 9424 9425 // Get a SetCC of the condition 9426 // NOTE: Don't create a SETCC if it's not legal on this target. 9427 if (!LegalOperations || 9428 TLI.isOperationLegal(ISD::SETCC, 9429 LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) { 9430 SDValue Temp, SCC; 9431 // cast from setcc result type to select result type 9432 if (LegalTypes) { 9433 SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), 9434 N0, N1, CC); 9435 if (N2.getValueType().bitsLT(SCC.getValueType())) 9436 Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), 9437 N2.getValueType()); 9438 else 9439 Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), 9440 N2.getValueType(), SCC); 9441 } else { 9442 SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); 9443 Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), 9444 N2.getValueType(), SCC); 9445 } 9446 9447 AddToWorkList(SCC.getNode()); 9448 AddToWorkList(Temp.getNode()); 9449 9450 if (N2C->getAPIntValue() == 1) 9451 return Temp; 9452 9453 // shl setcc result by log2 n2c 9454 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, 9455 DAG.getConstant(N2C->getAPIntValue().logBase2(), 9456 getShiftAmountTy(Temp.getValueType()))); 9457 } 9458 } 9459 9460 // Check to see if this is the equivalent of setcc 9461 // FIXME: Turn all of these into setcc if setcc if setcc is legal 9462 // otherwise, go ahead with the folds. 9463 if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { 9464 EVT XType = N0.getValueType(); 9465 if (!LegalOperations || 9466 TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { 9467 SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); 9468 if (Res.getValueType() != VT) 9469 Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); 9470 return Res; 9471 } 9472 9473 // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) 9474 if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && 9475 (!LegalOperations || 9476 TLI.isOperationLegal(ISD::CTLZ, XType))) { 9477 SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); 9478 return DAG.getNode(ISD::SRL, DL, XType, Ctlz, 9479 DAG.getConstant(Log2_32(XType.getSizeInBits()), 9480 getShiftAmountTy(Ctlz.getValueType()))); 9481 } 9482 // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) 9483 if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { 9484 SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), 9485 XType, DAG.getConstant(0, XType), N0); 9486 SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); 9487 return DAG.getNode(ISD::SRL, DL, XType, 9488 DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), 9489 DAG.getConstant(XType.getSizeInBits()-1, 9490 getShiftAmountTy(XType))); 9491 } 9492 // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) 9493 if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { 9494 SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, 9495 DAG.getConstant(XType.getSizeInBits()-1, 9496 getShiftAmountTy(N0.getValueType()))); 9497 return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); 9498 } 9499 } 9500 9501 // Check to see if this is an integer abs. 9502 // select_cc setg[te] X, 0, X, -X -> 9503 // select_cc setgt X, -1, X, -X -> 9504 // select_cc setl[te] X, 0, -X, X -> 9505 // select_cc setlt X, 1, -X, X -> 9506 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 9507 if (N1C) { 9508 ConstantSDNode *SubC = NULL; 9509 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || 9510 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 9511 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 9512 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 9513 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || 9514 (N1C->isOne() && CC == ISD::SETLT)) && 9515 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 9516 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 9517 9518 EVT XType = N0.getValueType(); 9519 if (SubC && SubC->isNullValue() && XType.isInteger()) { 9520 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, 9521 N0, 9522 DAG.getConstant(XType.getSizeInBits()-1, 9523 getShiftAmountTy(N0.getValueType()))); 9524 SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), 9525 XType, N0, Shift); 9526 AddToWorkList(Shift.getNode()); 9527 AddToWorkList(Add.getNode()); 9528 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 9529 } 9530 } 9531 9532 return SDValue(); 9533} 9534 9535/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. 9536SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, 9537 SDValue N1, ISD::CondCode Cond, 9538 DebugLoc DL, bool foldBooleans) { 9539 TargetLowering::DAGCombinerInfo 9540 DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); 9541 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 9542} 9543 9544/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 9545/// return a DAG expression to select that will generate the same value by 9546/// multiplying by a magic number. See: 9547/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 9548SDValue DAGCombiner::BuildSDIV(SDNode *N) { 9549 std::vector<SDNode*> Built; 9550 SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); 9551 9552 for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); 9553 ii != ee; ++ii) 9554 AddToWorkList(*ii); 9555 return S; 9556} 9557 9558/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, 9559/// return a DAG expression to select that will generate the same value by 9560/// multiplying by a magic number. See: 9561/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 9562SDValue DAGCombiner::BuildUDIV(SDNode *N) { 9563 std::vector<SDNode*> Built; 9564 SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); 9565 9566 for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); 9567 ii != ee; ++ii) 9568 AddToWorkList(*ii); 9569 return S; 9570} 9571 9572/// FindBaseOffset - Return true if base is a frame index, which is known not 9573// to alias with anything but itself. Provides base object and offset as 9574// results. 9575static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, 9576 const GlobalValue *&GV, const void *&CV) { 9577 // Assume it is a primitive operation. 9578 Base = Ptr; Offset = 0; GV = 0; CV = 0; 9579 9580 // If it's an adding a simple constant then integrate the offset. 9581 if (Base.getOpcode() == ISD::ADD) { 9582 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { 9583 Base = Base.getOperand(0); 9584 Offset += C->getZExtValue(); 9585 } 9586 } 9587 9588 // Return the underlying GlobalValue, and update the Offset. Return false 9589 // for GlobalAddressSDNode since the same GlobalAddress may be represented 9590 // by multiple nodes with different offsets. 9591 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { 9592 GV = G->getGlobal(); 9593 Offset += G->getOffset(); 9594 return false; 9595 } 9596 9597 // Return the underlying Constant value, and update the Offset. Return false 9598 // for ConstantSDNodes since the same constant pool entry may be represented 9599 // by multiple nodes with different offsets. 9600 if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { 9601 CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() 9602 : (const void *)C->getConstVal(); 9603 Offset += C->getOffset(); 9604 return false; 9605 } 9606 // If it's any of the following then it can't alias with anything but itself. 9607 return isa<FrameIndexSDNode>(Base); 9608} 9609 9610/// isAlias - Return true if there is any possibility that the two addresses 9611/// overlap. 9612bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, 9613 const Value *SrcValue1, int SrcValueOffset1, 9614 unsigned SrcValueAlign1, 9615 const MDNode *TBAAInfo1, 9616 SDValue Ptr2, int64_t Size2, 9617 const Value *SrcValue2, int SrcValueOffset2, 9618 unsigned SrcValueAlign2, 9619 const MDNode *TBAAInfo2) const { 9620 // If they are the same then they must be aliases. 9621 if (Ptr1 == Ptr2) return true; 9622 9623 // Gather base node and offset information. 9624 SDValue Base1, Base2; 9625 int64_t Offset1, Offset2; 9626 const GlobalValue *GV1, *GV2; 9627 const void *CV1, *CV2; 9628 bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); 9629 bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); 9630 9631 // If they have a same base address then check to see if they overlap. 9632 if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) 9633 return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); 9634 9635 // It is possible for different frame indices to alias each other, mostly 9636 // when tail call optimization reuses return address slots for arguments. 9637 // To catch this case, look up the actual index of frame indices to compute 9638 // the real alias relationship. 9639 if (isFrameIndex1 && isFrameIndex2) { 9640 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 9641 Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); 9642 Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); 9643 return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); 9644 } 9645 9646 // Otherwise, if we know what the bases are, and they aren't identical, then 9647 // we know they cannot alias. 9648 if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) 9649 return false; 9650 9651 // If we know required SrcValue1 and SrcValue2 have relatively large alignment 9652 // compared to the size and offset of the access, we may be able to prove they 9653 // do not alias. This check is conservative for now to catch cases created by 9654 // splitting vector types. 9655 if ((SrcValueAlign1 == SrcValueAlign2) && 9656 (SrcValueOffset1 != SrcValueOffset2) && 9657 (Size1 == Size2) && (SrcValueAlign1 > Size1)) { 9658 int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; 9659 int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; 9660 9661 // There is no overlap between these relatively aligned accesses of similar 9662 // size, return no alias. 9663 if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) 9664 return false; 9665 } 9666 9667 if (CombinerGlobalAA) { 9668 // Use alias analysis information. 9669 int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); 9670 int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; 9671 int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; 9672 AliasAnalysis::AliasResult AAResult = 9673 AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), 9674 AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); 9675 if (AAResult == AliasAnalysis::NoAlias) 9676 return false; 9677 } 9678 9679 // Otherwise we have to assume they alias. 9680 return true; 9681} 9682 9683/// FindAliasInfo - Extracts the relevant alias information from the memory 9684/// node. Returns true if the operand was a load. 9685bool DAGCombiner::FindAliasInfo(SDNode *N, 9686 SDValue &Ptr, int64_t &Size, 9687 const Value *&SrcValue, 9688 int &SrcValueOffset, 9689 unsigned &SrcValueAlign, 9690 const MDNode *&TBAAInfo) const { 9691 LSBaseSDNode *LS = cast<LSBaseSDNode>(N); 9692 9693 Ptr = LS->getBasePtr(); 9694 Size = LS->getMemoryVT().getSizeInBits() >> 3; 9695 SrcValue = LS->getSrcValue(); 9696 SrcValueOffset = LS->getSrcValueOffset(); 9697 SrcValueAlign = LS->getOriginalAlignment(); 9698 TBAAInfo = LS->getTBAAInfo(); 9699 return isa<LoadSDNode>(LS); 9700} 9701 9702/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 9703/// looking for aliasing nodes and adding them to the Aliases vector. 9704void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 9705 SmallVector<SDValue, 8> &Aliases) { 9706 SmallVector<SDValue, 8> Chains; // List of chains to visit. 9707 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 9708 9709 // Get alias information for node. 9710 SDValue Ptr; 9711 int64_t Size; 9712 const Value *SrcValue; 9713 int SrcValueOffset; 9714 unsigned SrcValueAlign; 9715 const MDNode *SrcTBAAInfo; 9716 bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 9717 SrcValueAlign, SrcTBAAInfo); 9718 9719 // Starting off. 9720 Chains.push_back(OriginalChain); 9721 unsigned Depth = 0; 9722 9723 // Look at each chain and determine if it is an alias. If so, add it to the 9724 // aliases list. If not, then continue up the chain looking for the next 9725 // candidate. 9726 while (!Chains.empty()) { 9727 SDValue Chain = Chains.back(); 9728 Chains.pop_back(); 9729 9730 // For TokenFactor nodes, look at each operand and only continue up the 9731 // chain until we find two aliases. If we've seen two aliases, assume we'll 9732 // find more and revert to original chain since the xform is unlikely to be 9733 // profitable. 9734 // 9735 // FIXME: The depth check could be made to return the last non-aliasing 9736 // chain we found before we hit a tokenfactor rather than the original 9737 // chain. 9738 if (Depth > 6 || Aliases.size() == 2) { 9739 Aliases.clear(); 9740 Aliases.push_back(OriginalChain); 9741 break; 9742 } 9743 9744 // Don't bother if we've been before. 9745 if (!Visited.insert(Chain.getNode())) 9746 continue; 9747 9748 switch (Chain.getOpcode()) { 9749 case ISD::EntryToken: 9750 // Entry token is ideal chain operand, but handled in FindBetterChain. 9751 break; 9752 9753 case ISD::LOAD: 9754 case ISD::STORE: { 9755 // Get alias information for Chain. 9756 SDValue OpPtr; 9757 int64_t OpSize; 9758 const Value *OpSrcValue; 9759 int OpSrcValueOffset; 9760 unsigned OpSrcValueAlign; 9761 const MDNode *OpSrcTBAAInfo; 9762 bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, 9763 OpSrcValue, OpSrcValueOffset, 9764 OpSrcValueAlign, 9765 OpSrcTBAAInfo); 9766 9767 // If chain is alias then stop here. 9768 if (!(IsLoad && IsOpLoad) && 9769 isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, 9770 SrcTBAAInfo, 9771 OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, 9772 OpSrcValueAlign, OpSrcTBAAInfo)) { 9773 Aliases.push_back(Chain); 9774 } else { 9775 // Look further up the chain. 9776 Chains.push_back(Chain.getOperand(0)); 9777 ++Depth; 9778 } 9779 break; 9780 } 9781 9782 case ISD::TokenFactor: 9783 // We have to check each of the operands of the token factor for "small" 9784 // token factors, so we queue them up. Adding the operands to the queue 9785 // (stack) in reverse order maintains the original order and increases the 9786 // likelihood that getNode will find a matching token factor (CSE.) 9787 if (Chain.getNumOperands() > 16) { 9788 Aliases.push_back(Chain); 9789 break; 9790 } 9791 for (unsigned n = Chain.getNumOperands(); n;) 9792 Chains.push_back(Chain.getOperand(--n)); 9793 ++Depth; 9794 break; 9795 9796 default: 9797 // For all other instructions we will just have to take what we can get. 9798 Aliases.push_back(Chain); 9799 break; 9800 } 9801 } 9802} 9803 9804/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking 9805/// for a better chain (aliasing node.) 9806SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 9807 SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. 9808 9809 // Accumulate all the aliases to this node. 9810 GatherAllAliases(N, OldChain, Aliases); 9811 9812 // If no operands then chain to entry token. 9813 if (Aliases.size() == 0) 9814 return DAG.getEntryNode(); 9815 9816 // If a single operand then chain to it. We don't need to revisit it. 9817 if (Aliases.size() == 1) 9818 return Aliases[0]; 9819 9820 // Construct a custom tailored token factor. 9821 return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 9822 &Aliases[0], Aliases.size()); 9823} 9824 9825// SelectionDAG::Combine - This is the entry point for the file. 9826// 9827void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, 9828 CodeGenOpt::Level OptLevel) { 9829 /// run - This is the main entry point to this class. 9830 /// 9831 DAGCombiner(*this, AA, OptLevel).Run(Level); 9832} 9833