DAGCombiner.cpp revision 3a2a7d379409ccf8f5cefeb23c5c4feaf59652fd
1//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11// both before and after the DAG is legalized. 12// 13// This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14// primarily intended to handle simplification opportunities that are implicit 15// in the LLVM IR and exposed by the various codegen lowering phases. 16// 17//===----------------------------------------------------------------------===// 18 19#define DEBUG_TYPE "dagcombine" 20#include "llvm/CodeGen/SelectionDAG.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/LLVMContext.h" 23#include "llvm/CodeGen/MachineFunction.h" 24#include "llvm/CodeGen/MachineFrameInfo.h" 25#include "llvm/CodeGen/PseudoSourceValue.h" 26#include "llvm/Analysis/AliasAnalysis.h" 27#include "llvm/Target/TargetData.h" 28#include "llvm/Target/TargetFrameInfo.h" 29#include "llvm/Target/TargetLowering.h" 30#include "llvm/Target/TargetMachine.h" 31#include "llvm/Target/TargetOptions.h" 32#include "llvm/ADT/SmallPtrSet.h" 33#include "llvm/ADT/Statistic.h" 34#include "llvm/Support/CommandLine.h" 35#include "llvm/Support/Debug.h" 36#include "llvm/Support/ErrorHandling.h" 37#include "llvm/Support/MathExtras.h" 38#include "llvm/Support/raw_ostream.h" 39#include <algorithm> 40using namespace llvm; 41 42STATISTIC(NodesCombined , "Number of dag nodes combined"); 43STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 44STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 45STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 46 47namespace { 48 static cl::opt<bool> 49 CombinerAA("combiner-alias-analysis", cl::Hidden, 50 cl::desc("Turn on alias analysis during testing")); 51 52 static cl::opt<bool> 53 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 54 cl::desc("Include global information in alias analysis")); 55 56//------------------------------ DAGCombiner ---------------------------------// 57 58 class DAGCombiner { 59 SelectionDAG &DAG; 60 const TargetLowering &TLI; 61 CombineLevel Level; 62 CodeGenOpt::Level OptLevel; 63 bool LegalOperations; 64 bool LegalTypes; 65 66 // Worklist of all of the nodes that need to be simplified. 67 std::vector<SDNode*> WorkList; 68 69 // AA - Used for DAG load/store alias analysis. 70 AliasAnalysis &AA; 71 72 /// AddUsersToWorkList - When an instruction is simplified, add all users of 73 /// the instruction to the work lists because they might get more simplified 74 /// now. 75 /// 76 void AddUsersToWorkList(SDNode *N) { 77 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 78 UI != UE; ++UI) 79 AddToWorkList(*UI); 80 } 81 82 /// visit - call the node-specific routine that knows how to fold each 83 /// particular type of node. 84 SDValue visit(SDNode *N); 85 86 public: 87 /// AddToWorkList - Add to the work list making sure it's instance is at the 88 /// the back (next to be processed.) 89 void AddToWorkList(SDNode *N) { 90 removeFromWorkList(N); 91 WorkList.push_back(N); 92 } 93 94 /// removeFromWorkList - remove all instances of N from the worklist. 95 /// 96 void removeFromWorkList(SDNode *N) { 97 WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N), 98 WorkList.end()); 99 } 100 101 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 102 bool AddTo = true); 103 104 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 105 return CombineTo(N, &Res, 1, AddTo); 106 } 107 108 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 109 bool AddTo = true) { 110 SDValue To[] = { Res0, Res1 }; 111 return CombineTo(N, To, 2, AddTo); 112 } 113 114 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 115 116 private: 117 118 /// SimplifyDemandedBits - Check the specified integer node value to see if 119 /// it can be simplified or if things it uses can be simplified by bit 120 /// propagation. If so, return true. 121 bool SimplifyDemandedBits(SDValue Op) { 122 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 123 APInt Demanded = APInt::getAllOnesValue(BitWidth); 124 return SimplifyDemandedBits(Op, Demanded); 125 } 126 127 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 128 129 bool CombineToPreIndexedLoadStore(SDNode *N); 130 bool CombineToPostIndexedLoadStore(SDNode *N); 131 132 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 133 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 134 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 135 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 136 SDValue PromoteIntBinOp(SDValue Op); 137 SDValue PromoteIntShiftOp(SDValue Op); 138 SDValue PromoteExtend(SDValue Op); 139 bool PromoteLoad(SDValue Op); 140 141 /// combine - call the node-specific routine that knows how to fold each 142 /// particular type of node. If that doesn't do anything, try the 143 /// target-specific DAG combines. 144 SDValue combine(SDNode *N); 145 146 // Visitation implementation - Implement dag node combining for different 147 // node types. The semantics are as follows: 148 // Return Value: 149 // SDValue.getNode() == 0 - No change was made 150 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 151 // otherwise - N should be replaced by the returned Operand. 152 // 153 SDValue visitTokenFactor(SDNode *N); 154 SDValue visitMERGE_VALUES(SDNode *N); 155 SDValue visitADD(SDNode *N); 156 SDValue visitSUB(SDNode *N); 157 SDValue visitADDC(SDNode *N); 158 SDValue visitADDE(SDNode *N); 159 SDValue visitMUL(SDNode *N); 160 SDValue visitSDIV(SDNode *N); 161 SDValue visitUDIV(SDNode *N); 162 SDValue visitSREM(SDNode *N); 163 SDValue visitUREM(SDNode *N); 164 SDValue visitMULHU(SDNode *N); 165 SDValue visitMULHS(SDNode *N); 166 SDValue visitSMUL_LOHI(SDNode *N); 167 SDValue visitUMUL_LOHI(SDNode *N); 168 SDValue visitSDIVREM(SDNode *N); 169 SDValue visitUDIVREM(SDNode *N); 170 SDValue visitAND(SDNode *N); 171 SDValue visitOR(SDNode *N); 172 SDValue visitXOR(SDNode *N); 173 SDValue SimplifyVBinOp(SDNode *N); 174 SDValue visitSHL(SDNode *N); 175 SDValue visitSRA(SDNode *N); 176 SDValue visitSRL(SDNode *N); 177 SDValue visitCTLZ(SDNode *N); 178 SDValue visitCTTZ(SDNode *N); 179 SDValue visitCTPOP(SDNode *N); 180 SDValue visitSELECT(SDNode *N); 181 SDValue visitSELECT_CC(SDNode *N); 182 SDValue visitSETCC(SDNode *N); 183 SDValue visitSIGN_EXTEND(SDNode *N); 184 SDValue visitZERO_EXTEND(SDNode *N); 185 SDValue visitANY_EXTEND(SDNode *N); 186 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 187 SDValue visitTRUNCATE(SDNode *N); 188 SDValue visitBIT_CONVERT(SDNode *N); 189 SDValue visitBUILD_PAIR(SDNode *N); 190 SDValue visitFADD(SDNode *N); 191 SDValue visitFSUB(SDNode *N); 192 SDValue visitFMUL(SDNode *N); 193 SDValue visitFDIV(SDNode *N); 194 SDValue visitFREM(SDNode *N); 195 SDValue visitFCOPYSIGN(SDNode *N); 196 SDValue visitSINT_TO_FP(SDNode *N); 197 SDValue visitUINT_TO_FP(SDNode *N); 198 SDValue visitFP_TO_SINT(SDNode *N); 199 SDValue visitFP_TO_UINT(SDNode *N); 200 SDValue visitFP_ROUND(SDNode *N); 201 SDValue visitFP_ROUND_INREG(SDNode *N); 202 SDValue visitFP_EXTEND(SDNode *N); 203 SDValue visitFNEG(SDNode *N); 204 SDValue visitFABS(SDNode *N); 205 SDValue visitBRCOND(SDNode *N); 206 SDValue visitBR_CC(SDNode *N); 207 SDValue visitLOAD(SDNode *N); 208 SDValue visitSTORE(SDNode *N); 209 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 210 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 211 SDValue visitBUILD_VECTOR(SDNode *N); 212 SDValue visitCONCAT_VECTORS(SDNode *N); 213 SDValue visitVECTOR_SHUFFLE(SDNode *N); 214 215 SDValue XformToShuffleWithZero(SDNode *N); 216 SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); 217 218 SDValue visitShiftByConstant(SDNode *N, unsigned Amt); 219 220 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 221 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 222 SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); 223 SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, 224 SDValue N3, ISD::CondCode CC, 225 bool NotExtCompare = false); 226 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 227 DebugLoc DL, bool foldBooleans = true); 228 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 229 unsigned HiOp); 230 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 231 SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT); 232 SDValue BuildSDIV(SDNode *N); 233 SDValue BuildUDIV(SDNode *N); 234 SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); 235 SDValue ReduceLoadWidth(SDNode *N); 236 SDValue ReduceLoadOpStoreWidth(SDNode *N); 237 238 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 239 240 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 241 /// looking for aliasing nodes and adding them to the Aliases vector. 242 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 243 SmallVector<SDValue, 8> &Aliases); 244 245 /// isAlias - Return true if there is any possibility that the two addresses 246 /// overlap. 247 bool isAlias(SDValue Ptr1, int64_t Size1, 248 const Value *SrcValue1, int SrcValueOffset1, 249 unsigned SrcValueAlign1, 250 SDValue Ptr2, int64_t Size2, 251 const Value *SrcValue2, int SrcValueOffset2, 252 unsigned SrcValueAlign2) const; 253 254 /// FindAliasInfo - Extracts the relevant alias information from the memory 255 /// node. Returns true if the operand was a load. 256 bool FindAliasInfo(SDNode *N, 257 SDValue &Ptr, int64_t &Size, 258 const Value *&SrcValue, int &SrcValueOffset, 259 unsigned &SrcValueAlignment) const; 260 261 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, 262 /// looking for a better chain (aliasing node.) 263 SDValue FindBetterChain(SDNode *N, SDValue Chain); 264 265 public: 266 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 267 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), 268 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} 269 270 /// Run - runs the dag combiner on all nodes in the work list 271 void Run(CombineLevel AtLevel); 272 273 SelectionDAG &getDAG() const { return DAG; } 274 275 /// getShiftAmountTy - Returns a type large enough to hold any valid 276 /// shift amount - before type legalization these can be huge. 277 EVT getShiftAmountTy() { 278 return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy(); 279 } 280 281 /// isTypeLegal - This method returns true if we are running before type 282 /// legalization or if the specified VT is legal. 283 bool isTypeLegal(const EVT &VT) { 284 if (!LegalTypes) return true; 285 return TLI.isTypeLegal(VT); 286 } 287 }; 288} 289 290 291namespace { 292/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted 293/// nodes from the worklist. 294class WorkListRemover : public SelectionDAG::DAGUpdateListener { 295 DAGCombiner &DC; 296public: 297 explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {} 298 299 virtual void NodeDeleted(SDNode *N, SDNode *E) { 300 DC.removeFromWorkList(N); 301 } 302 303 virtual void NodeUpdated(SDNode *N) { 304 // Ignore updates. 305 } 306}; 307} 308 309//===----------------------------------------------------------------------===// 310// TargetLowering::DAGCombinerInfo implementation 311//===----------------------------------------------------------------------===// 312 313void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 314 ((DAGCombiner*)DC)->AddToWorkList(N); 315} 316 317SDValue TargetLowering::DAGCombinerInfo:: 318CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { 319 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 320} 321 322SDValue TargetLowering::DAGCombinerInfo:: 323CombineTo(SDNode *N, SDValue Res, bool AddTo) { 324 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 325} 326 327 328SDValue TargetLowering::DAGCombinerInfo:: 329CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 330 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 331} 332 333void TargetLowering::DAGCombinerInfo:: 334CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 335 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 336} 337 338//===----------------------------------------------------------------------===// 339// Helper Functions 340//===----------------------------------------------------------------------===// 341 342/// isNegatibleForFree - Return 1 if we can compute the negated form of the 343/// specified expression for the same cost as the expression itself, or 2 if we 344/// can compute the negated form more cheaply than the expression itself. 345static char isNegatibleForFree(SDValue Op, bool LegalOperations, 346 unsigned Depth = 0) { 347 // No compile time optimizations on this type. 348 if (Op.getValueType() == MVT::ppcf128) 349 return 0; 350 351 // fneg is removable even if it has multiple uses. 352 if (Op.getOpcode() == ISD::FNEG) return 2; 353 354 // Don't allow anything with multiple uses. 355 if (!Op.hasOneUse()) return 0; 356 357 // Don't recurse exponentially. 358 if (Depth > 6) return 0; 359 360 switch (Op.getOpcode()) { 361 default: return false; 362 case ISD::ConstantFP: 363 // Don't invert constant FP values after legalize. The negated constant 364 // isn't necessarily legal. 365 return LegalOperations ? 0 : 1; 366 case ISD::FADD: 367 // FIXME: determine better conditions for this xform. 368 if (!UnsafeFPMath) return 0; 369 370 // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) 371 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) 372 return V; 373 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 374 return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); 375 case ISD::FSUB: 376 // We can't turn -(A-B) into B-A when we honor signed zeros. 377 if (!UnsafeFPMath) return 0; 378 379 // fold (fneg (fsub A, B)) -> (fsub B, A) 380 return 1; 381 382 case ISD::FMUL: 383 case ISD::FDIV: 384 if (HonorSignDependentRoundingFPMath()) return 0; 385 386 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 387 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) 388 return V; 389 390 return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1); 391 392 case ISD::FP_EXTEND: 393 case ISD::FP_ROUND: 394 case ISD::FSIN: 395 return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1); 396 } 397} 398 399/// GetNegatedExpression - If isNegatibleForFree returns true, this function 400/// returns the newly negated expression. 401static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 402 bool LegalOperations, unsigned Depth = 0) { 403 // fneg is removable even if it has multiple uses. 404 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 405 406 // Don't allow anything with multiple uses. 407 assert(Op.hasOneUse() && "Unknown reuse!"); 408 409 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 410 switch (Op.getOpcode()) { 411 default: llvm_unreachable("Unknown code"); 412 case ISD::ConstantFP: { 413 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 414 V.changeSign(); 415 return DAG.getConstantFP(V, Op.getValueType()); 416 } 417 case ISD::FADD: 418 // FIXME: determine better conditions for this xform. 419 assert(UnsafeFPMath); 420 421 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 422 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) 423 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 424 GetNegatedExpression(Op.getOperand(0), DAG, 425 LegalOperations, Depth+1), 426 Op.getOperand(1)); 427 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 428 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 429 GetNegatedExpression(Op.getOperand(1), DAG, 430 LegalOperations, Depth+1), 431 Op.getOperand(0)); 432 case ISD::FSUB: 433 // We can't turn -(A-B) into B-A when we honor signed zeros. 434 assert(UnsafeFPMath); 435 436 // fold (fneg (fsub 0, B)) -> B 437 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 438 if (N0CFP->getValueAPF().isZero()) 439 return Op.getOperand(1); 440 441 // fold (fneg (fsub A, B)) -> (fsub B, A) 442 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 443 Op.getOperand(1), Op.getOperand(0)); 444 445 case ISD::FMUL: 446 case ISD::FDIV: 447 assert(!HonorSignDependentRoundingFPMath()); 448 449 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 450 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1)) 451 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 452 GetNegatedExpression(Op.getOperand(0), DAG, 453 LegalOperations, Depth+1), 454 Op.getOperand(1)); 455 456 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 457 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 458 Op.getOperand(0), 459 GetNegatedExpression(Op.getOperand(1), DAG, 460 LegalOperations, Depth+1)); 461 462 case ISD::FP_EXTEND: 463 case ISD::FSIN: 464 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 465 GetNegatedExpression(Op.getOperand(0), DAG, 466 LegalOperations, Depth+1)); 467 case ISD::FP_ROUND: 468 return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), 469 GetNegatedExpression(Op.getOperand(0), DAG, 470 LegalOperations, Depth+1), 471 Op.getOperand(1)); 472 } 473} 474 475 476// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc 477// that selects between the values 1 and 0, making it equivalent to a setcc. 478// Also, set the incoming LHS, RHS, and CC references to the appropriate 479// nodes based on the type of node we are checking. This simplifies life a 480// bit for the callers. 481static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 482 SDValue &CC) { 483 if (N.getOpcode() == ISD::SETCC) { 484 LHS = N.getOperand(0); 485 RHS = N.getOperand(1); 486 CC = N.getOperand(2); 487 return true; 488 } 489 if (N.getOpcode() == ISD::SELECT_CC && 490 N.getOperand(2).getOpcode() == ISD::Constant && 491 N.getOperand(3).getOpcode() == ISD::Constant && 492 cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && 493 cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { 494 LHS = N.getOperand(0); 495 RHS = N.getOperand(1); 496 CC = N.getOperand(4); 497 return true; 498 } 499 return false; 500} 501 502// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only 503// one use. If this is true, it allows the users to invert the operation for 504// free when it is profitable to do so. 505static bool isOneUseSetCC(SDValue N) { 506 SDValue N0, N1, N2; 507 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 508 return true; 509 return false; 510} 511 512SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, 513 SDValue N0, SDValue N1) { 514 EVT VT = N0.getValueType(); 515 if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { 516 if (isa<ConstantSDNode>(N1)) { 517 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 518 SDValue OpNode = 519 DAG.FoldConstantArithmetic(Opc, VT, 520 cast<ConstantSDNode>(N0.getOperand(1)), 521 cast<ConstantSDNode>(N1)); 522 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 523 } else if (N0.hasOneUse()) { 524 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use 525 SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, 526 N0.getOperand(0), N1); 527 AddToWorkList(OpNode.getNode()); 528 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 529 } 530 } 531 532 if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { 533 if (isa<ConstantSDNode>(N0)) { 534 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 535 SDValue OpNode = 536 DAG.FoldConstantArithmetic(Opc, VT, 537 cast<ConstantSDNode>(N1.getOperand(1)), 538 cast<ConstantSDNode>(N0)); 539 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 540 } else if (N1.hasOneUse()) { 541 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use 542 SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, 543 N1.getOperand(0), N0); 544 AddToWorkList(OpNode.getNode()); 545 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 546 } 547 } 548 549 return SDValue(); 550} 551 552SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 553 bool AddTo) { 554 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 555 ++NodesCombined; 556 DEBUG(dbgs() << "\nReplacing.1 "; 557 N->dump(&DAG); 558 dbgs() << "\nWith: "; 559 To[0].getNode()->dump(&DAG); 560 dbgs() << " and " << NumTo-1 << " other values\n"; 561 for (unsigned i = 0, e = NumTo; i != e; ++i) 562 assert((!To[i].getNode() || 563 N->getValueType(i) == To[i].getValueType()) && 564 "Cannot combine value to value of different type!")); 565 WorkListRemover DeadNodes(*this); 566 DAG.ReplaceAllUsesWith(N, To, &DeadNodes); 567 568 if (AddTo) { 569 // Push the new nodes and any users onto the worklist 570 for (unsigned i = 0, e = NumTo; i != e; ++i) { 571 if (To[i].getNode()) { 572 AddToWorkList(To[i].getNode()); 573 AddUsersToWorkList(To[i].getNode()); 574 } 575 } 576 } 577 578 // Finally, if the node is now dead, remove it from the graph. The node 579 // may not be dead if the replacement process recursively simplified to 580 // something else needing this node. 581 if (N->use_empty()) { 582 // Nodes can be reintroduced into the worklist. Make sure we do not 583 // process a node that has been replaced. 584 removeFromWorkList(N); 585 586 // Finally, since the node is now dead, remove it from the graph. 587 DAG.DeleteNode(N); 588 } 589 return SDValue(N, 0); 590} 591 592void DAGCombiner:: 593CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 594 // Replace all uses. If any nodes become isomorphic to other nodes and 595 // are deleted, make sure to remove them from our worklist. 596 WorkListRemover DeadNodes(*this); 597 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); 598 599 // Push the new node and any (possibly new) users onto the worklist. 600 AddToWorkList(TLO.New.getNode()); 601 AddUsersToWorkList(TLO.New.getNode()); 602 603 // Finally, if the node is now dead, remove it from the graph. The node 604 // may not be dead if the replacement process recursively simplified to 605 // something else needing this node. 606 if (TLO.Old.getNode()->use_empty()) { 607 removeFromWorkList(TLO.Old.getNode()); 608 609 // If the operands of this node are only used by the node, they will now 610 // be dead. Make sure to visit them first to delete dead nodes early. 611 for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) 612 if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) 613 AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); 614 615 DAG.DeleteNode(TLO.Old.getNode()); 616 } 617} 618 619/// SimplifyDemandedBits - Check the specified integer node value to see if 620/// it can be simplified or if things it uses can be simplified by bit 621/// propagation. If so, return true. 622bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 623 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 624 APInt KnownZero, KnownOne; 625 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 626 return false; 627 628 // Revisit the node. 629 AddToWorkList(Op.getNode()); 630 631 // Replace the old value with the new one. 632 ++NodesCombined; 633 DEBUG(dbgs() << "\nReplacing.2 "; 634 TLO.Old.getNode()->dump(&DAG); 635 dbgs() << "\nWith: "; 636 TLO.New.getNode()->dump(&DAG); 637 dbgs() << '\n'); 638 639 CommitTargetLoweringOpt(TLO); 640 return true; 641} 642 643void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 644 DebugLoc dl = Load->getDebugLoc(); 645 EVT VT = Load->getValueType(0); 646 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 647 648 DEBUG(dbgs() << "\nReplacing.9 "; 649 Load->dump(&DAG); 650 dbgs() << "\nWith: "; 651 Trunc.getNode()->dump(&DAG); 652 dbgs() << '\n'); 653 WorkListRemover DeadNodes(*this); 654 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes); 655 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1), 656 &DeadNodes); 657 removeFromWorkList(Load); 658 DAG.DeleteNode(Load); 659 AddToWorkList(Trunc.getNode()); 660} 661 662SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 663 Replace = false; 664 DebugLoc dl = Op.getDebugLoc(); 665 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 666 EVT MemVT = LD->getMemoryVT(); 667 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 668 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) 669 : LD->getExtensionType(); 670 Replace = true; 671 return DAG.getExtLoad(ExtType, dl, PVT, 672 LD->getChain(), LD->getBasePtr(), 673 LD->getSrcValue(), LD->getSrcValueOffset(), 674 MemVT, LD->isVolatile(), 675 LD->isNonTemporal(), LD->getAlignment()); 676 } 677 678 unsigned Opc = Op.getOpcode(); 679 switch (Opc) { 680 default: break; 681 case ISD::AssertSext: 682 return DAG.getNode(ISD::AssertSext, dl, PVT, 683 SExtPromoteOperand(Op.getOperand(0), PVT), 684 Op.getOperand(1)); 685 case ISD::AssertZext: 686 return DAG.getNode(ISD::AssertZext, dl, PVT, 687 ZExtPromoteOperand(Op.getOperand(0), PVT), 688 Op.getOperand(1)); 689 case ISD::Constant: { 690 unsigned ExtOpc = 691 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 692 return DAG.getNode(ExtOpc, dl, PVT, Op); 693 } 694 } 695 696 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 697 return SDValue(); 698 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 699} 700 701SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 702 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 703 return SDValue(); 704 EVT OldVT = Op.getValueType(); 705 DebugLoc dl = Op.getDebugLoc(); 706 bool Replace = false; 707 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 708 if (NewOp.getNode() == 0) 709 return SDValue(); 710 AddToWorkList(NewOp.getNode()); 711 712 if (Replace) 713 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 714 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 715 DAG.getValueType(OldVT)); 716} 717 718SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 719 EVT OldVT = Op.getValueType(); 720 DebugLoc dl = Op.getDebugLoc(); 721 bool Replace = false; 722 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 723 if (NewOp.getNode() == 0) 724 return SDValue(); 725 AddToWorkList(NewOp.getNode()); 726 727 if (Replace) 728 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 729 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 730} 731 732/// PromoteIntBinOp - Promote the specified integer binary operation if the 733/// target indicates it is beneficial. e.g. On x86, it's usually better to 734/// promote i16 operations to i32 since i16 instructions are longer. 735SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 736 if (!LegalOperations) 737 return SDValue(); 738 739 EVT VT = Op.getValueType(); 740 if (VT.isVector() || !VT.isInteger()) 741 return SDValue(); 742 743 // If operation type is 'undesirable', e.g. i16 on x86, consider 744 // promoting it. 745 unsigned Opc = Op.getOpcode(); 746 if (TLI.isTypeDesirableForOp(Opc, VT)) 747 return SDValue(); 748 749 EVT PVT = VT; 750 // Consult target whether it is a good idea to promote this operation and 751 // what's the right type to promote it to. 752 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 753 assert(PVT != VT && "Don't know what type to promote to!"); 754 755 bool Replace0 = false; 756 SDValue N0 = Op.getOperand(0); 757 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 758 if (NN0.getNode() == 0) 759 return SDValue(); 760 761 bool Replace1 = false; 762 SDValue N1 = Op.getOperand(1); 763 SDValue NN1 = PromoteOperand(N1, PVT, Replace1); 764 if (NN1.getNode() == 0) 765 return SDValue(); 766 767 AddToWorkList(NN0.getNode()); 768 AddToWorkList(NN1.getNode()); 769 770 if (Replace0) 771 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 772 if (Replace1) 773 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 774 775 DEBUG(dbgs() << "\nPromoting "; 776 Op.getNode()->dump(&DAG)); 777 DebugLoc dl = Op.getDebugLoc(); 778 return DAG.getNode(ISD::TRUNCATE, dl, VT, 779 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 780 } 781 return SDValue(); 782} 783 784/// PromoteIntShiftOp - Promote the specified integer shift operation if the 785/// target indicates it is beneficial. e.g. On x86, it's usually better to 786/// promote i16 operations to i32 since i16 instructions are longer. 787SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 788 if (!LegalOperations) 789 return SDValue(); 790 791 EVT VT = Op.getValueType(); 792 if (VT.isVector() || !VT.isInteger()) 793 return SDValue(); 794 795 // If operation type is 'undesirable', e.g. i16 on x86, consider 796 // promoting it. 797 unsigned Opc = Op.getOpcode(); 798 if (TLI.isTypeDesirableForOp(Opc, VT)) 799 return SDValue(); 800 801 EVT PVT = VT; 802 // Consult target whether it is a good idea to promote this operation and 803 // what's the right type to promote it to. 804 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 805 assert(PVT != VT && "Don't know what type to promote to!"); 806 807 bool Replace = false; 808 SDValue N0 = Op.getOperand(0); 809 if (Opc == ISD::SRA) 810 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 811 else if (Opc == ISD::SRL) 812 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 813 else 814 N0 = PromoteOperand(N0, PVT, Replace); 815 if (N0.getNode() == 0) 816 return SDValue(); 817 818 AddToWorkList(N0.getNode()); 819 if (Replace) 820 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 821 822 DEBUG(dbgs() << "\nPromoting "; 823 Op.getNode()->dump(&DAG)); 824 DebugLoc dl = Op.getDebugLoc(); 825 return DAG.getNode(ISD::TRUNCATE, dl, VT, 826 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 827 } 828 return SDValue(); 829} 830 831SDValue DAGCombiner::PromoteExtend(SDValue Op) { 832 if (!LegalOperations) 833 return SDValue(); 834 835 EVT VT = Op.getValueType(); 836 if (VT.isVector() || !VT.isInteger()) 837 return SDValue(); 838 839 // If operation type is 'undesirable', e.g. i16 on x86, consider 840 // promoting it. 841 unsigned Opc = Op.getOpcode(); 842 if (TLI.isTypeDesirableForOp(Opc, VT)) 843 return SDValue(); 844 845 EVT PVT = VT; 846 // Consult target whether it is a good idea to promote this operation and 847 // what's the right type to promote it to. 848 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 849 assert(PVT != VT && "Don't know what type to promote to!"); 850 // fold (aext (aext x)) -> (aext x) 851 // fold (aext (zext x)) -> (zext x) 852 // fold (aext (sext x)) -> (sext x) 853 DEBUG(dbgs() << "\nPromoting "; 854 Op.getNode()->dump(&DAG)); 855 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); 856 } 857 return SDValue(); 858} 859 860bool DAGCombiner::PromoteLoad(SDValue Op) { 861 if (!LegalOperations) 862 return false; 863 864 EVT VT = Op.getValueType(); 865 if (VT.isVector() || !VT.isInteger()) 866 return false; 867 868 // If operation type is 'undesirable', e.g. i16 on x86, consider 869 // promoting it. 870 unsigned Opc = Op.getOpcode(); 871 if (TLI.isTypeDesirableForOp(Opc, VT)) 872 return false; 873 874 EVT PVT = VT; 875 // Consult target whether it is a good idea to promote this operation and 876 // what's the right type to promote it to. 877 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 878 assert(PVT != VT && "Don't know what type to promote to!"); 879 880 DebugLoc dl = Op.getDebugLoc(); 881 SDNode *N = Op.getNode(); 882 LoadSDNode *LD = cast<LoadSDNode>(N); 883 EVT MemVT = LD->getMemoryVT(); 884 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 885 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) 886 : LD->getExtensionType(); 887 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 888 LD->getChain(), LD->getBasePtr(), 889 LD->getSrcValue(), LD->getSrcValueOffset(), 890 MemVT, LD->isVolatile(), 891 LD->isNonTemporal(), LD->getAlignment()); 892 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 893 894 DEBUG(dbgs() << "\nPromoting "; 895 N->dump(&DAG); 896 dbgs() << "\nTo: "; 897 Result.getNode()->dump(&DAG); 898 dbgs() << '\n'); 899 WorkListRemover DeadNodes(*this); 900 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes); 901 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes); 902 removeFromWorkList(N); 903 DAG.DeleteNode(N); 904 AddToWorkList(Result.getNode()); 905 return true; 906 } 907 return false; 908} 909 910 911//===----------------------------------------------------------------------===// 912// Main DAG Combiner implementation 913//===----------------------------------------------------------------------===// 914 915void DAGCombiner::Run(CombineLevel AtLevel) { 916 // set the instance variables, so that the various visit routines may use it. 917 Level = AtLevel; 918 LegalOperations = Level >= NoIllegalOperations; 919 LegalTypes = Level >= NoIllegalTypes; 920 921 // Add all the dag nodes to the worklist. 922 WorkList.reserve(DAG.allnodes_size()); 923 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 924 E = DAG.allnodes_end(); I != E; ++I) 925 WorkList.push_back(I); 926 927 // Create a dummy node (which is not added to allnodes), that adds a reference 928 // to the root node, preventing it from being deleted, and tracking any 929 // changes of the root. 930 HandleSDNode Dummy(DAG.getRoot()); 931 932 // The root of the dag may dangle to deleted nodes until the dag combiner is 933 // done. Set it to null to avoid confusion. 934 DAG.setRoot(SDValue()); 935 936 // while the worklist isn't empty, inspect the node on the end of it and 937 // try and combine it. 938 while (!WorkList.empty()) { 939 SDNode *N = WorkList.back(); 940 WorkList.pop_back(); 941 942 // If N has no uses, it is dead. Make sure to revisit all N's operands once 943 // N is deleted from the DAG, since they too may now be dead or may have a 944 // reduced number of uses, allowing other xforms. 945 if (N->use_empty() && N != &Dummy) { 946 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 947 AddToWorkList(N->getOperand(i).getNode()); 948 949 DAG.DeleteNode(N); 950 continue; 951 } 952 953 SDValue RV = combine(N); 954 955 if (RV.getNode() == 0) 956 continue; 957 958 ++NodesCombined; 959 960 // If we get back the same node we passed in, rather than a new node or 961 // zero, we know that the node must have defined multiple values and 962 // CombineTo was used. Since CombineTo takes care of the worklist 963 // mechanics for us, we have no work to do in this case. 964 if (RV.getNode() == N) 965 continue; 966 967 assert(N->getOpcode() != ISD::DELETED_NODE && 968 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 969 "Node was deleted but visit returned new node!"); 970 971 DEBUG(dbgs() << "\nReplacing.3 "; 972 N->dump(&DAG); 973 dbgs() << "\nWith: "; 974 RV.getNode()->dump(&DAG); 975 dbgs() << '\n'); 976 WorkListRemover DeadNodes(*this); 977 if (N->getNumValues() == RV.getNode()->getNumValues()) 978 DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes); 979 else { 980 assert(N->getValueType(0) == RV.getValueType() && 981 N->getNumValues() == 1 && "Type mismatch"); 982 SDValue OpV = RV; 983 DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes); 984 } 985 986 // Push the new node and any users onto the worklist 987 AddToWorkList(RV.getNode()); 988 AddUsersToWorkList(RV.getNode()); 989 990 // Add any uses of the old node to the worklist in case this node is the 991 // last one that uses them. They may become dead after this node is 992 // deleted. 993 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 994 AddToWorkList(N->getOperand(i).getNode()); 995 996 // Finally, if the node is now dead, remove it from the graph. The node 997 // may not be dead if the replacement process recursively simplified to 998 // something else needing this node. 999 if (N->use_empty()) { 1000 // Nodes can be reintroduced into the worklist. Make sure we do not 1001 // process a node that has been replaced. 1002 removeFromWorkList(N); 1003 1004 // Finally, since the node is now dead, remove it from the graph. 1005 DAG.DeleteNode(N); 1006 } 1007 } 1008 1009 // If the root changed (e.g. it was a dead load, update the root). 1010 DAG.setRoot(Dummy.getValue()); 1011} 1012 1013SDValue DAGCombiner::visit(SDNode *N) { 1014 switch(N->getOpcode()) { 1015 default: break; 1016 case ISD::TokenFactor: return visitTokenFactor(N); 1017 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1018 case ISD::ADD: return visitADD(N); 1019 case ISD::SUB: return visitSUB(N); 1020 case ISD::ADDC: return visitADDC(N); 1021 case ISD::ADDE: return visitADDE(N); 1022 case ISD::MUL: return visitMUL(N); 1023 case ISD::SDIV: return visitSDIV(N); 1024 case ISD::UDIV: return visitUDIV(N); 1025 case ISD::SREM: return visitSREM(N); 1026 case ISD::UREM: return visitUREM(N); 1027 case ISD::MULHU: return visitMULHU(N); 1028 case ISD::MULHS: return visitMULHS(N); 1029 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1030 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1031 case ISD::SDIVREM: return visitSDIVREM(N); 1032 case ISD::UDIVREM: return visitUDIVREM(N); 1033 case ISD::AND: return visitAND(N); 1034 case ISD::OR: return visitOR(N); 1035 case ISD::XOR: return visitXOR(N); 1036 case ISD::SHL: return visitSHL(N); 1037 case ISD::SRA: return visitSRA(N); 1038 case ISD::SRL: return visitSRL(N); 1039 case ISD::CTLZ: return visitCTLZ(N); 1040 case ISD::CTTZ: return visitCTTZ(N); 1041 case ISD::CTPOP: return visitCTPOP(N); 1042 case ISD::SELECT: return visitSELECT(N); 1043 case ISD::SELECT_CC: return visitSELECT_CC(N); 1044 case ISD::SETCC: return visitSETCC(N); 1045 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1046 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1047 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1048 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1049 case ISD::TRUNCATE: return visitTRUNCATE(N); 1050 case ISD::BIT_CONVERT: return visitBIT_CONVERT(N); 1051 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1052 case ISD::FADD: return visitFADD(N); 1053 case ISD::FSUB: return visitFSUB(N); 1054 case ISD::FMUL: return visitFMUL(N); 1055 case ISD::FDIV: return visitFDIV(N); 1056 case ISD::FREM: return visitFREM(N); 1057 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1058 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1059 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1060 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1061 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1062 case ISD::FP_ROUND: return visitFP_ROUND(N); 1063 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1064 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1065 case ISD::FNEG: return visitFNEG(N); 1066 case ISD::FABS: return visitFABS(N); 1067 case ISD::BRCOND: return visitBRCOND(N); 1068 case ISD::BR_CC: return visitBR_CC(N); 1069 case ISD::LOAD: return visitLOAD(N); 1070 case ISD::STORE: return visitSTORE(N); 1071 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1072 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1073 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1074 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1075 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1076 } 1077 return SDValue(); 1078} 1079 1080SDValue DAGCombiner::combine(SDNode *N) { 1081 SDValue RV = visit(N); 1082 1083 // If nothing happened, try a target-specific DAG combine. 1084 if (RV.getNode() == 0) { 1085 assert(N->getOpcode() != ISD::DELETED_NODE && 1086 "Node was deleted but visit returned NULL!"); 1087 1088 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1089 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1090 1091 // Expose the DAG combiner to the target combiner impls. 1092 TargetLowering::DAGCombinerInfo 1093 DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); 1094 1095 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1096 } 1097 } 1098 1099 // If N is a commutative binary node, try commuting it to enable more 1100 // sdisel CSE. 1101 if (RV.getNode() == 0 && 1102 SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1103 N->getNumValues() == 1) { 1104 SDValue N0 = N->getOperand(0); 1105 SDValue N1 = N->getOperand(1); 1106 1107 // Constant operands are canonicalized to RHS. 1108 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1109 SDValue Ops[] = { N1, N0 }; 1110 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), 1111 Ops, 2); 1112 if (CSENode) 1113 return SDValue(CSENode, 0); 1114 } 1115 } 1116 1117 return RV; 1118} 1119 1120/// getInputChainForNode - Given a node, return its input chain if it has one, 1121/// otherwise return a null sd operand. 1122static SDValue getInputChainForNode(SDNode *N) { 1123 if (unsigned NumOps = N->getNumOperands()) { 1124 if (N->getOperand(0).getValueType() == MVT::Other) 1125 return N->getOperand(0); 1126 else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1127 return N->getOperand(NumOps-1); 1128 for (unsigned i = 1; i < NumOps-1; ++i) 1129 if (N->getOperand(i).getValueType() == MVT::Other) 1130 return N->getOperand(i); 1131 } 1132 return SDValue(); 1133} 1134 1135SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1136 // If N has two operands, where one has an input chain equal to the other, 1137 // the 'other' chain is redundant. 1138 if (N->getNumOperands() == 2) { 1139 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1140 return N->getOperand(0); 1141 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1142 return N->getOperand(1); 1143 } 1144 1145 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1146 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1147 SmallPtrSet<SDNode*, 16> SeenOps; 1148 bool Changed = false; // If we should replace this token factor. 1149 1150 // Start out with this token factor. 1151 TFs.push_back(N); 1152 1153 // Iterate through token factors. The TFs grows when new token factors are 1154 // encountered. 1155 for (unsigned i = 0; i < TFs.size(); ++i) { 1156 SDNode *TF = TFs[i]; 1157 1158 // Check each of the operands. 1159 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1160 SDValue Op = TF->getOperand(i); 1161 1162 switch (Op.getOpcode()) { 1163 case ISD::EntryToken: 1164 // Entry tokens don't need to be added to the list. They are 1165 // rededundant. 1166 Changed = true; 1167 break; 1168 1169 case ISD::TokenFactor: 1170 if (Op.hasOneUse() && 1171 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1172 // Queue up for processing. 1173 TFs.push_back(Op.getNode()); 1174 // Clean up in case the token factor is removed. 1175 AddToWorkList(Op.getNode()); 1176 Changed = true; 1177 break; 1178 } 1179 // Fall thru 1180 1181 default: 1182 // Only add if it isn't already in the list. 1183 if (SeenOps.insert(Op.getNode())) 1184 Ops.push_back(Op); 1185 else 1186 Changed = true; 1187 break; 1188 } 1189 } 1190 } 1191 1192 SDValue Result; 1193 1194 // If we've change things around then replace token factor. 1195 if (Changed) { 1196 if (Ops.empty()) { 1197 // The entry token is the only possible outcome. 1198 Result = DAG.getEntryNode(); 1199 } else { 1200 // New and improved token factor. 1201 Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 1202 MVT::Other, &Ops[0], Ops.size()); 1203 } 1204 1205 // Don't add users to work list. 1206 return CombineTo(N, Result, false); 1207 } 1208 1209 return Result; 1210} 1211 1212/// MERGE_VALUES can always be eliminated. 1213SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1214 WorkListRemover DeadNodes(*this); 1215 // Replacing results may cause a different MERGE_VALUES to suddenly 1216 // be CSE'd with N, and carry its uses with it. Iterate until no 1217 // uses remain, to ensure that the node can be safely deleted. 1218 do { 1219 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1220 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i), 1221 &DeadNodes); 1222 } while (!N->use_empty()); 1223 removeFromWorkList(N); 1224 DAG.DeleteNode(N); 1225 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1226} 1227 1228static 1229SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, 1230 SelectionDAG &DAG) { 1231 EVT VT = N0.getValueType(); 1232 SDValue N00 = N0.getOperand(0); 1233 SDValue N01 = N0.getOperand(1); 1234 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); 1235 1236 if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && 1237 isa<ConstantSDNode>(N00.getOperand(1))) { 1238 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1239 N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, 1240 DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, 1241 N00.getOperand(0), N01), 1242 DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, 1243 N00.getOperand(1), N01)); 1244 return DAG.getNode(ISD::ADD, DL, VT, N0, N1); 1245 } 1246 1247 return SDValue(); 1248} 1249 1250SDValue DAGCombiner::visitADD(SDNode *N) { 1251 SDValue N0 = N->getOperand(0); 1252 SDValue N1 = N->getOperand(1); 1253 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1254 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1255 EVT VT = N0.getValueType(); 1256 1257 // fold vector ops 1258 if (VT.isVector()) { 1259 SDValue FoldedVOp = SimplifyVBinOp(N); 1260 if (FoldedVOp.getNode()) return FoldedVOp; 1261 } 1262 1263 // fold (add x, undef) -> undef 1264 if (N0.getOpcode() == ISD::UNDEF) 1265 return N0; 1266 if (N1.getOpcode() == ISD::UNDEF) 1267 return N1; 1268 // fold (add c1, c2) -> c1+c2 1269 if (N0C && N1C) 1270 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1271 // canonicalize constant to RHS 1272 if (N0C && !N1C) 1273 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); 1274 // fold (add x, 0) -> x 1275 if (N1C && N1C->isNullValue()) 1276 return N0; 1277 // fold (add Sym, c) -> Sym+c 1278 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1279 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1280 GA->getOpcode() == ISD::GlobalAddress) 1281 return DAG.getGlobalAddress(GA->getGlobal(), VT, 1282 GA->getOffset() + 1283 (uint64_t)N1C->getSExtValue()); 1284 // fold ((c1-A)+c2) -> (c1+c2)-A 1285 if (N1C && N0.getOpcode() == ISD::SUB) 1286 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1287 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1288 DAG.getConstant(N1C->getAPIntValue()+ 1289 N0C->getAPIntValue(), VT), 1290 N0.getOperand(1)); 1291 // reassociate add 1292 SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); 1293 if (RADD.getNode() != 0) 1294 return RADD; 1295 // fold ((0-A) + B) -> B-A 1296 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1297 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1298 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); 1299 // fold (A + (0-B)) -> A-B 1300 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1301 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1302 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); 1303 // fold (A+(B-A)) -> B 1304 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1305 return N1.getOperand(0); 1306 // fold ((B-A)+A) -> B 1307 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1308 return N0.getOperand(0); 1309 // fold (A+(B-(A+C))) to (B-C) 1310 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1311 N0 == N1.getOperand(1).getOperand(0)) 1312 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), 1313 N1.getOperand(1).getOperand(1)); 1314 // fold (A+(B-(C+A))) to (B-C) 1315 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1316 N0 == N1.getOperand(1).getOperand(1)) 1317 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), 1318 N1.getOperand(1).getOperand(0)); 1319 // fold (A+((B-A)+or-C)) to (B+or-C) 1320 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1321 N1.getOperand(0).getOpcode() == ISD::SUB && 1322 N0 == N1.getOperand(0).getOperand(1)) 1323 return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, 1324 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1325 1326 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1327 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1328 SDValue N00 = N0.getOperand(0); 1329 SDValue N01 = N0.getOperand(1); 1330 SDValue N10 = N1.getOperand(0); 1331 SDValue N11 = N1.getOperand(1); 1332 1333 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1334 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1335 DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), 1336 DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); 1337 } 1338 1339 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1340 return SDValue(N, 0); 1341 1342 // fold (a+b) -> (a|b) iff a and b share no bits. 1343 if (VT.isInteger() && !VT.isVector()) { 1344 APInt LHSZero, LHSOne; 1345 APInt RHSZero, RHSOne; 1346 APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); 1347 DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); 1348 1349 if (LHSZero.getBoolValue()) { 1350 DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); 1351 1352 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1353 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1354 if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || 1355 (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) 1356 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); 1357 } 1358 } 1359 1360 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1361 if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { 1362 SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); 1363 if (Result.getNode()) return Result; 1364 } 1365 if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { 1366 SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); 1367 if (Result.getNode()) return Result; 1368 } 1369 1370 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1371 if (N1.getOpcode() == ISD::SHL && 1372 N1.getOperand(0).getOpcode() == ISD::SUB) 1373 if (ConstantSDNode *C = 1374 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1375 if (C->getAPIntValue() == 0) 1376 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, 1377 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1378 N1.getOperand(0).getOperand(1), 1379 N1.getOperand(1))); 1380 if (N0.getOpcode() == ISD::SHL && 1381 N0.getOperand(0).getOpcode() == ISD::SUB) 1382 if (ConstantSDNode *C = 1383 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1384 if (C->getAPIntValue() == 0) 1385 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, 1386 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1387 N0.getOperand(0).getOperand(1), 1388 N0.getOperand(1))); 1389 1390 return PromoteIntBinOp(SDValue(N, 0)); 1391} 1392 1393SDValue DAGCombiner::visitADDC(SDNode *N) { 1394 SDValue N0 = N->getOperand(0); 1395 SDValue N1 = N->getOperand(1); 1396 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1397 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1398 EVT VT = N0.getValueType(); 1399 1400 // If the flag result is dead, turn this into an ADD. 1401 if (N->hasNUsesOfValue(0, 1)) 1402 return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0), 1403 DAG.getNode(ISD::CARRY_FALSE, 1404 N->getDebugLoc(), MVT::Flag)); 1405 1406 // canonicalize constant to RHS. 1407 if (N0C && !N1C) 1408 return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); 1409 1410 // fold (addc x, 0) -> x + no carry out 1411 if (N1C && N1C->isNullValue()) 1412 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1413 N->getDebugLoc(), MVT::Flag)); 1414 1415 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1416 APInt LHSZero, LHSOne; 1417 APInt RHSZero, RHSOne; 1418 APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); 1419 DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne); 1420 1421 if (LHSZero.getBoolValue()) { 1422 DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne); 1423 1424 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1425 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1426 if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) || 1427 (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask)) 1428 return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), 1429 DAG.getNode(ISD::CARRY_FALSE, 1430 N->getDebugLoc(), MVT::Flag)); 1431 } 1432 1433 return SDValue(); 1434} 1435 1436SDValue DAGCombiner::visitADDE(SDNode *N) { 1437 SDValue N0 = N->getOperand(0); 1438 SDValue N1 = N->getOperand(1); 1439 SDValue CarryIn = N->getOperand(2); 1440 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1441 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1442 1443 // canonicalize constant to RHS 1444 if (N0C && !N1C) 1445 return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), 1446 N1, N0, CarryIn); 1447 1448 // fold (adde x, y, false) -> (addc x, y) 1449 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1450 return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); 1451 1452 return SDValue(); 1453} 1454 1455SDValue DAGCombiner::visitSUB(SDNode *N) { 1456 SDValue N0 = N->getOperand(0); 1457 SDValue N1 = N->getOperand(1); 1458 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1459 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1460 EVT VT = N0.getValueType(); 1461 1462 // fold vector ops 1463 if (VT.isVector()) { 1464 SDValue FoldedVOp = SimplifyVBinOp(N); 1465 if (FoldedVOp.getNode()) return FoldedVOp; 1466 } 1467 1468 // fold (sub x, x) -> 0 1469 if (N0 == N1) 1470 return DAG.getConstant(0, N->getValueType(0)); 1471 // fold (sub c1, c2) -> c1-c2 1472 if (N0C && N1C) 1473 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1474 // fold (sub x, c) -> (add x, -c) 1475 if (N1C) 1476 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, 1477 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1478 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1479 if (N0C && N0C->isAllOnesValue()) 1480 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); 1481 // fold (A+B)-A -> B 1482 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1483 return N0.getOperand(1); 1484 // fold (A+B)-B -> A 1485 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1486 return N0.getOperand(0); 1487 // fold ((A+(B+or-C))-B) -> A+or-C 1488 if (N0.getOpcode() == ISD::ADD && 1489 (N0.getOperand(1).getOpcode() == ISD::SUB || 1490 N0.getOperand(1).getOpcode() == ISD::ADD) && 1491 N0.getOperand(1).getOperand(0) == N1) 1492 return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, 1493 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1494 // fold ((A+(C+B))-B) -> A+C 1495 if (N0.getOpcode() == ISD::ADD && 1496 N0.getOperand(1).getOpcode() == ISD::ADD && 1497 N0.getOperand(1).getOperand(1) == N1) 1498 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, 1499 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1500 // fold ((A-(B-C))-C) -> A-B 1501 if (N0.getOpcode() == ISD::SUB && 1502 N0.getOperand(1).getOpcode() == ISD::SUB && 1503 N0.getOperand(1).getOperand(1) == N1) 1504 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1505 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1506 1507 // If either operand of a sub is undef, the result is undef 1508 if (N0.getOpcode() == ISD::UNDEF) 1509 return N0; 1510 if (N1.getOpcode() == ISD::UNDEF) 1511 return N1; 1512 1513 // If the relocation model supports it, consider symbol offsets. 1514 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1515 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1516 // fold (sub Sym, c) -> Sym-c 1517 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1518 return DAG.getGlobalAddress(GA->getGlobal(), VT, 1519 GA->getOffset() - 1520 (uint64_t)N1C->getSExtValue()); 1521 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1522 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1523 if (GA->getGlobal() == GB->getGlobal()) 1524 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1525 VT); 1526 } 1527 1528 return PromoteIntBinOp(SDValue(N, 0)); 1529} 1530 1531SDValue DAGCombiner::visitMUL(SDNode *N) { 1532 SDValue N0 = N->getOperand(0); 1533 SDValue N1 = N->getOperand(1); 1534 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1535 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1536 EVT VT = N0.getValueType(); 1537 1538 // fold vector ops 1539 if (VT.isVector()) { 1540 SDValue FoldedVOp = SimplifyVBinOp(N); 1541 if (FoldedVOp.getNode()) return FoldedVOp; 1542 } 1543 1544 // fold (mul x, undef) -> 0 1545 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1546 return DAG.getConstant(0, VT); 1547 // fold (mul c1, c2) -> c1*c2 1548 if (N0C && N1C) 1549 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); 1550 // canonicalize constant to RHS 1551 if (N0C && !N1C) 1552 return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); 1553 // fold (mul x, 0) -> 0 1554 if (N1C && N1C->isNullValue()) 1555 return N1; 1556 // fold (mul x, -1) -> 0-x 1557 if (N1C && N1C->isAllOnesValue()) 1558 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1559 DAG.getConstant(0, VT), N0); 1560 // fold (mul x, (1 << c)) -> x << c 1561 if (N1C && N1C->getAPIntValue().isPowerOf2()) 1562 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 1563 DAG.getConstant(N1C->getAPIntValue().logBase2(), 1564 getShiftAmountTy())); 1565 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 1566 if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { 1567 unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); 1568 // FIXME: If the input is something that is easily negated (e.g. a 1569 // single-use add), we should put the negate there. 1570 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1571 DAG.getConstant(0, VT), 1572 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 1573 DAG.getConstant(Log2Val, getShiftAmountTy()))); 1574 } 1575 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 1576 if (N1C && N0.getOpcode() == ISD::SHL && 1577 isa<ConstantSDNode>(N0.getOperand(1))) { 1578 SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1579 N1, N0.getOperand(1)); 1580 AddToWorkList(C3.getNode()); 1581 return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1582 N0.getOperand(0), C3); 1583 } 1584 1585 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 1586 // use. 1587 { 1588 SDValue Sh(0,0), Y(0,0); 1589 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 1590 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 1591 N0.getNode()->hasOneUse()) { 1592 Sh = N0; Y = N1; 1593 } else if (N1.getOpcode() == ISD::SHL && 1594 isa<ConstantSDNode>(N1.getOperand(1)) && 1595 N1.getNode()->hasOneUse()) { 1596 Sh = N1; Y = N0; 1597 } 1598 1599 if (Sh.getNode()) { 1600 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1601 Sh.getOperand(0), Y); 1602 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1603 Mul, Sh.getOperand(1)); 1604 } 1605 } 1606 1607 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 1608 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 1609 isa<ConstantSDNode>(N0.getOperand(1))) 1610 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, 1611 DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, 1612 N0.getOperand(0), N1), 1613 DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, 1614 N0.getOperand(1), N1)); 1615 1616 // reassociate mul 1617 SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); 1618 if (RMUL.getNode() != 0) 1619 return RMUL; 1620 1621 return PromoteIntBinOp(SDValue(N, 0)); 1622} 1623 1624SDValue DAGCombiner::visitSDIV(SDNode *N) { 1625 SDValue N0 = N->getOperand(0); 1626 SDValue N1 = N->getOperand(1); 1627 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1628 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1629 EVT VT = N->getValueType(0); 1630 1631 // fold vector ops 1632 if (VT.isVector()) { 1633 SDValue FoldedVOp = SimplifyVBinOp(N); 1634 if (FoldedVOp.getNode()) return FoldedVOp; 1635 } 1636 1637 // fold (sdiv c1, c2) -> c1/c2 1638 if (N0C && N1C && !N1C->isNullValue()) 1639 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 1640 // fold (sdiv X, 1) -> X 1641 if (N1C && N1C->getSExtValue() == 1LL) 1642 return N0; 1643 // fold (sdiv X, -1) -> 0-X 1644 if (N1C && N1C->isAllOnesValue()) 1645 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1646 DAG.getConstant(0, VT), N0); 1647 // If we know the sign bits of both operands are zero, strength reduce to a 1648 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 1649 if (!VT.isVector()) { 1650 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 1651 return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), 1652 N0, N1); 1653 } 1654 // fold (sdiv X, pow2) -> simple ops after legalize 1655 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() && 1656 (isPowerOf2_64(N1C->getSExtValue()) || 1657 isPowerOf2_64(-N1C->getSExtValue()))) { 1658 // If dividing by powers of two is cheap, then don't perform the following 1659 // fold. 1660 if (TLI.isPow2DivCheap()) 1661 return SDValue(); 1662 1663 int64_t pow2 = N1C->getSExtValue(); 1664 int64_t abs2 = pow2 > 0 ? pow2 : -pow2; 1665 unsigned lg2 = Log2_64(abs2); 1666 1667 // Splat the sign bit into the register 1668 SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, 1669 DAG.getConstant(VT.getSizeInBits()-1, 1670 getShiftAmountTy())); 1671 AddToWorkList(SGN.getNode()); 1672 1673 // Add (N0 < 0) ? abs2 - 1 : 0; 1674 SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, 1675 DAG.getConstant(VT.getSizeInBits() - lg2, 1676 getShiftAmountTy())); 1677 SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); 1678 AddToWorkList(SRL.getNode()); 1679 AddToWorkList(ADD.getNode()); // Divide by pow2 1680 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, 1681 DAG.getConstant(lg2, getShiftAmountTy())); 1682 1683 // If we're dividing by a positive value, we're done. Otherwise, we must 1684 // negate the result. 1685 if (pow2 > 0) 1686 return SRA; 1687 1688 AddToWorkList(SRA.getNode()); 1689 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1690 DAG.getConstant(0, VT), SRA); 1691 } 1692 1693 // if integer divide is expensive and we satisfy the requirements, emit an 1694 // alternate sequence. 1695 if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) && 1696 !TLI.isIntDivCheap()) { 1697 SDValue Op = BuildSDIV(N); 1698 if (Op.getNode()) return Op; 1699 } 1700 1701 // undef / X -> 0 1702 if (N0.getOpcode() == ISD::UNDEF) 1703 return DAG.getConstant(0, VT); 1704 // X / undef -> undef 1705 if (N1.getOpcode() == ISD::UNDEF) 1706 return N1; 1707 1708 return SDValue(); 1709} 1710 1711SDValue DAGCombiner::visitUDIV(SDNode *N) { 1712 SDValue N0 = N->getOperand(0); 1713 SDValue N1 = N->getOperand(1); 1714 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1715 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1716 EVT VT = N->getValueType(0); 1717 1718 // fold vector ops 1719 if (VT.isVector()) { 1720 SDValue FoldedVOp = SimplifyVBinOp(N); 1721 if (FoldedVOp.getNode()) return FoldedVOp; 1722 } 1723 1724 // fold (udiv c1, c2) -> c1/c2 1725 if (N0C && N1C && !N1C->isNullValue()) 1726 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 1727 // fold (udiv x, (1 << c)) -> x >>u c 1728 if (N1C && N1C->getAPIntValue().isPowerOf2()) 1729 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, 1730 DAG.getConstant(N1C->getAPIntValue().logBase2(), 1731 getShiftAmountTy())); 1732 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 1733 if (N1.getOpcode() == ISD::SHL) { 1734 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 1735 if (SHC->getAPIntValue().isPowerOf2()) { 1736 EVT ADDVT = N1.getOperand(1).getValueType(); 1737 SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, 1738 N1.getOperand(1), 1739 DAG.getConstant(SHC->getAPIntValue() 1740 .logBase2(), 1741 ADDVT)); 1742 AddToWorkList(Add.getNode()); 1743 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); 1744 } 1745 } 1746 } 1747 // fold (udiv x, c) -> alternate 1748 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 1749 SDValue Op = BuildUDIV(N); 1750 if (Op.getNode()) return Op; 1751 } 1752 1753 // undef / X -> 0 1754 if (N0.getOpcode() == ISD::UNDEF) 1755 return DAG.getConstant(0, VT); 1756 // X / undef -> undef 1757 if (N1.getOpcode() == ISD::UNDEF) 1758 return N1; 1759 1760 return SDValue(); 1761} 1762 1763SDValue DAGCombiner::visitSREM(SDNode *N) { 1764 SDValue N0 = N->getOperand(0); 1765 SDValue N1 = N->getOperand(1); 1766 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1767 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1768 EVT VT = N->getValueType(0); 1769 1770 // fold (srem c1, c2) -> c1%c2 1771 if (N0C && N1C && !N1C->isNullValue()) 1772 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 1773 // If we know the sign bits of both operands are zero, strength reduce to a 1774 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 1775 if (!VT.isVector()) { 1776 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 1777 return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); 1778 } 1779 1780 // If X/C can be simplified by the division-by-constant logic, lower 1781 // X%C to the equivalent of X-X/C*C. 1782 if (N1C && !N1C->isNullValue()) { 1783 SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); 1784 AddToWorkList(Div.getNode()); 1785 SDValue OptimizedDiv = combine(Div.getNode()); 1786 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 1787 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1788 OptimizedDiv, N1); 1789 SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); 1790 AddToWorkList(Mul.getNode()); 1791 return Sub; 1792 } 1793 } 1794 1795 // undef % X -> 0 1796 if (N0.getOpcode() == ISD::UNDEF) 1797 return DAG.getConstant(0, VT); 1798 // X % undef -> undef 1799 if (N1.getOpcode() == ISD::UNDEF) 1800 return N1; 1801 1802 return SDValue(); 1803} 1804 1805SDValue DAGCombiner::visitUREM(SDNode *N) { 1806 SDValue N0 = N->getOperand(0); 1807 SDValue N1 = N->getOperand(1); 1808 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1809 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1810 EVT VT = N->getValueType(0); 1811 1812 // fold (urem c1, c2) -> c1%c2 1813 if (N0C && N1C && !N1C->isNullValue()) 1814 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 1815 // fold (urem x, pow2) -> (and x, pow2-1) 1816 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 1817 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, 1818 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 1819 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 1820 if (N1.getOpcode() == ISD::SHL) { 1821 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 1822 if (SHC->getAPIntValue().isPowerOf2()) { 1823 SDValue Add = 1824 DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, 1825 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 1826 VT)); 1827 AddToWorkList(Add.getNode()); 1828 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); 1829 } 1830 } 1831 } 1832 1833 // If X/C can be simplified by the division-by-constant logic, lower 1834 // X%C to the equivalent of X-X/C*C. 1835 if (N1C && !N1C->isNullValue()) { 1836 SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); 1837 AddToWorkList(Div.getNode()); 1838 SDValue OptimizedDiv = combine(Div.getNode()); 1839 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 1840 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1841 OptimizedDiv, N1); 1842 SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); 1843 AddToWorkList(Mul.getNode()); 1844 return Sub; 1845 } 1846 } 1847 1848 // undef % X -> 0 1849 if (N0.getOpcode() == ISD::UNDEF) 1850 return DAG.getConstant(0, VT); 1851 // X % undef -> undef 1852 if (N1.getOpcode() == ISD::UNDEF) 1853 return N1; 1854 1855 return SDValue(); 1856} 1857 1858SDValue DAGCombiner::visitMULHS(SDNode *N) { 1859 SDValue N0 = N->getOperand(0); 1860 SDValue N1 = N->getOperand(1); 1861 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1862 EVT VT = N->getValueType(0); 1863 1864 // fold (mulhs x, 0) -> 0 1865 if (N1C && N1C->isNullValue()) 1866 return N1; 1867 // fold (mulhs x, 1) -> (sra x, size(x)-1) 1868 if (N1C && N1C->getAPIntValue() == 1) 1869 return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, 1870 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 1871 getShiftAmountTy())); 1872 // fold (mulhs x, undef) -> 0 1873 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1874 return DAG.getConstant(0, VT); 1875 1876 return SDValue(); 1877} 1878 1879SDValue DAGCombiner::visitMULHU(SDNode *N) { 1880 SDValue N0 = N->getOperand(0); 1881 SDValue N1 = N->getOperand(1); 1882 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1883 EVT VT = N->getValueType(0); 1884 1885 // fold (mulhu x, 0) -> 0 1886 if (N1C && N1C->isNullValue()) 1887 return N1; 1888 // fold (mulhu x, 1) -> 0 1889 if (N1C && N1C->getAPIntValue() == 1) 1890 return DAG.getConstant(0, N0.getValueType()); 1891 // fold (mulhu x, undef) -> 0 1892 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1893 return DAG.getConstant(0, VT); 1894 1895 return SDValue(); 1896} 1897 1898/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that 1899/// compute two values. LoOp and HiOp give the opcodes for the two computations 1900/// that are being performed. Return true if a simplification was made. 1901/// 1902SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 1903 unsigned HiOp) { 1904 // If the high half is not needed, just compute the low half. 1905 bool HiExists = N->hasAnyUseOfValue(1); 1906 if (!HiExists && 1907 (!LegalOperations || 1908 TLI.isOperationLegal(LoOp, N->getValueType(0)))) { 1909 SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), 1910 N->op_begin(), N->getNumOperands()); 1911 return CombineTo(N, Res, Res); 1912 } 1913 1914 // If the low half is not needed, just compute the high half. 1915 bool LoExists = N->hasAnyUseOfValue(0); 1916 if (!LoExists && 1917 (!LegalOperations || 1918 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 1919 SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), 1920 N->op_begin(), N->getNumOperands()); 1921 return CombineTo(N, Res, Res); 1922 } 1923 1924 // If both halves are used, return as it is. 1925 if (LoExists && HiExists) 1926 return SDValue(); 1927 1928 // If the two computed results can be simplified separately, separate them. 1929 if (LoExists) { 1930 SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), 1931 N->op_begin(), N->getNumOperands()); 1932 AddToWorkList(Lo.getNode()); 1933 SDValue LoOpt = combine(Lo.getNode()); 1934 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 1935 (!LegalOperations || 1936 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 1937 return CombineTo(N, LoOpt, LoOpt); 1938 } 1939 1940 if (HiExists) { 1941 SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), 1942 N->op_begin(), N->getNumOperands()); 1943 AddToWorkList(Hi.getNode()); 1944 SDValue HiOpt = combine(Hi.getNode()); 1945 if (HiOpt.getNode() && HiOpt != Hi && 1946 (!LegalOperations || 1947 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 1948 return CombineTo(N, HiOpt, HiOpt); 1949 } 1950 1951 return SDValue(); 1952} 1953 1954SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 1955 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 1956 if (Res.getNode()) return Res; 1957 1958 return SDValue(); 1959} 1960 1961SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 1962 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 1963 if (Res.getNode()) return Res; 1964 1965 return SDValue(); 1966} 1967 1968SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 1969 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 1970 if (Res.getNode()) return Res; 1971 1972 return SDValue(); 1973} 1974 1975SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 1976 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 1977 if (Res.getNode()) return Res; 1978 1979 return SDValue(); 1980} 1981 1982/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with 1983/// two operands of the same opcode, try to simplify it. 1984SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 1985 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 1986 EVT VT = N0.getValueType(); 1987 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 1988 1989 // Bail early if none of these transforms apply. 1990 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 1991 1992 // For each of OP in AND/OR/XOR: 1993 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 1994 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 1995 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 1996 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) 1997 // 1998 // do not sink logical op inside of a vector extend, since it may combine 1999 // into a vsetcc. 2000 EVT Op0VT = N0.getOperand(0).getValueType(); 2001 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2002 N0.getOpcode() == ISD::SIGN_EXTEND || 2003 // Avoid infinite looping with PromoteIntBinOp. 2004 (N0.getOpcode() == ISD::ANY_EXTEND && 2005 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2006 (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && 2007 !VT.isVector() && 2008 Op0VT == N1.getOperand(0).getValueType() && 2009 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2010 SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), 2011 N0.getOperand(0).getValueType(), 2012 N0.getOperand(0), N1.getOperand(0)); 2013 AddToWorkList(ORNode.getNode()); 2014 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); 2015 } 2016 2017 // For each of OP in SHL/SRL/SRA/AND... 2018 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2019 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2020 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2021 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2022 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2023 N0.getOperand(1) == N1.getOperand(1)) { 2024 SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), 2025 N0.getOperand(0).getValueType(), 2026 N0.getOperand(0), N1.getOperand(0)); 2027 AddToWorkList(ORNode.getNode()); 2028 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 2029 ORNode, N0.getOperand(1)); 2030 } 2031 2032 return SDValue(); 2033} 2034 2035SDValue DAGCombiner::visitAND(SDNode *N) { 2036 SDValue N0 = N->getOperand(0); 2037 SDValue N1 = N->getOperand(1); 2038 SDValue LL, LR, RL, RR, CC0, CC1; 2039 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2040 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2041 EVT VT = N1.getValueType(); 2042 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2043 2044 // fold vector ops 2045 if (VT.isVector()) { 2046 SDValue FoldedVOp = SimplifyVBinOp(N); 2047 if (FoldedVOp.getNode()) return FoldedVOp; 2048 } 2049 2050 // fold (and x, undef) -> 0 2051 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2052 return DAG.getConstant(0, VT); 2053 // fold (and c1, c2) -> c1&c2 2054 if (N0C && N1C) 2055 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2056 // canonicalize constant to RHS 2057 if (N0C && !N1C) 2058 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); 2059 // fold (and x, -1) -> x 2060 if (N1C && N1C->isAllOnesValue()) 2061 return N0; 2062 // if (and x, c) is known to be zero, return 0 2063 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2064 APInt::getAllOnesValue(BitWidth))) 2065 return DAG.getConstant(0, VT); 2066 // reassociate and 2067 SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); 2068 if (RAND.getNode() != 0) 2069 return RAND; 2070 // fold (and (or x, C), D) -> D if (C & D) == D 2071 if (N1C && N0.getOpcode() == ISD::OR) 2072 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2073 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2074 return N1; 2075 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2076 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2077 SDValue N0Op0 = N0.getOperand(0); 2078 APInt Mask = ~N1C->getAPIntValue(); 2079 Mask.trunc(N0Op0.getValueSizeInBits()); 2080 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2081 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), 2082 N0.getValueType(), N0Op0); 2083 2084 // Replace uses of the AND with uses of the Zero extend node. 2085 CombineTo(N, Zext); 2086 2087 // We actually want to replace all uses of the any_extend with the 2088 // zero_extend, to avoid duplicating things. This will later cause this 2089 // AND to be folded. 2090 CombineTo(N0.getNode(), Zext); 2091 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2092 } 2093 } 2094 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2095 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2096 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2097 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2098 2099 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2100 LL.getValueType().isInteger()) { 2101 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2102 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2103 SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), 2104 LR.getValueType(), LL, RL); 2105 AddToWorkList(ORNode.getNode()); 2106 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2107 } 2108 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2109 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2110 SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), 2111 LR.getValueType(), LL, RL); 2112 AddToWorkList(ANDNode.getNode()); 2113 return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); 2114 } 2115 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2116 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2117 SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), 2118 LR.getValueType(), LL, RL); 2119 AddToWorkList(ORNode.getNode()); 2120 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2121 } 2122 } 2123 // canonicalize equivalent to ll == rl 2124 if (LL == RR && LR == RL) { 2125 Op1 = ISD::getSetCCSwappedOperands(Op1); 2126 std::swap(RL, RR); 2127 } 2128 if (LL == RL && LR == RR) { 2129 bool isInteger = LL.getValueType().isInteger(); 2130 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2131 if (Result != ISD::SETCC_INVALID && 2132 (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) 2133 return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), 2134 LL, LR, Result); 2135 } 2136 } 2137 2138 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 2139 if (N0.getOpcode() == N1.getOpcode()) { 2140 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2141 if (Tmp.getNode()) return Tmp; 2142 } 2143 2144 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 2145 // fold (and (sra)) -> (and (srl)) when possible. 2146 if (!VT.isVector() && 2147 SimplifyDemandedBits(SDValue(N, 0))) 2148 return SDValue(N, 0); 2149 2150 // fold (zext_inreg (extload x)) -> (zextload x) 2151 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 2152 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2153 EVT MemVT = LN0->getMemoryVT(); 2154 // If we zero all the possible extended bits, then we can turn this into 2155 // a zextload if we are running before legalize or the operation is legal. 2156 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2157 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2158 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2159 ((!LegalOperations && !LN0->isVolatile()) || 2160 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2161 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, 2162 LN0->getChain(), LN0->getBasePtr(), 2163 LN0->getSrcValue(), 2164 LN0->getSrcValueOffset(), MemVT, 2165 LN0->isVolatile(), LN0->isNonTemporal(), 2166 LN0->getAlignment()); 2167 AddToWorkList(N); 2168 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2169 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2170 } 2171 } 2172 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 2173 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 2174 N0.hasOneUse()) { 2175 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2176 EVT MemVT = LN0->getMemoryVT(); 2177 // If we zero all the possible extended bits, then we can turn this into 2178 // a zextload if we are running before legalize or the operation is legal. 2179 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2180 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2181 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2182 ((!LegalOperations && !LN0->isVolatile()) || 2183 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2184 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, 2185 LN0->getChain(), 2186 LN0->getBasePtr(), LN0->getSrcValue(), 2187 LN0->getSrcValueOffset(), MemVT, 2188 LN0->isVolatile(), LN0->isNonTemporal(), 2189 LN0->getAlignment()); 2190 AddToWorkList(N); 2191 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2192 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2193 } 2194 } 2195 2196 // fold (and (load x), 255) -> (zextload x, i8) 2197 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2198 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2199 if (N1C && (N0.getOpcode() == ISD::LOAD || 2200 (N0.getOpcode() == ISD::ANY_EXTEND && 2201 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2202 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2203 LoadSDNode *LN0 = HasAnyExt 2204 ? cast<LoadSDNode>(N0.getOperand(0)) 2205 : cast<LoadSDNode>(N0); 2206 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2207 LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { 2208 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2209 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2210 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2211 EVT LoadedVT = LN0->getMemoryVT(); 2212 2213 if (ExtVT == LoadedVT && 2214 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2215 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2216 2217 SDValue NewLoad = 2218 DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, 2219 LN0->getChain(), LN0->getBasePtr(), 2220 LN0->getSrcValue(), LN0->getSrcValueOffset(), 2221 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2222 LN0->getAlignment()); 2223 AddToWorkList(N); 2224 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 2225 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2226 } 2227 2228 // Do not change the width of a volatile load. 2229 // Do not generate loads of non-round integer types since these can 2230 // be expensive (and would be wrong if the type is not byte sized). 2231 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 2232 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2233 EVT PtrType = LN0->getOperand(1).getValueType(); 2234 2235 unsigned Alignment = LN0->getAlignment(); 2236 SDValue NewPtr = LN0->getBasePtr(); 2237 2238 // For big endian targets, we need to add an offset to the pointer 2239 // to load the correct bytes. For little endian systems, we merely 2240 // need to read fewer bytes from the same pointer. 2241 if (TLI.isBigEndian()) { 2242 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 2243 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 2244 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 2245 NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, 2246 NewPtr, DAG.getConstant(PtrOff, PtrType)); 2247 Alignment = MinAlign(Alignment, PtrOff); 2248 } 2249 2250 AddToWorkList(NewPtr.getNode()); 2251 2252 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2253 SDValue Load = 2254 DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, 2255 LN0->getChain(), NewPtr, 2256 LN0->getSrcValue(), LN0->getSrcValueOffset(), 2257 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2258 Alignment); 2259 AddToWorkList(N); 2260 CombineTo(LN0, Load, Load.getValue(1)); 2261 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2262 } 2263 } 2264 } 2265 } 2266 2267 return PromoteIntBinOp(SDValue(N, 0)); 2268} 2269 2270SDValue DAGCombiner::visitOR(SDNode *N) { 2271 SDValue N0 = N->getOperand(0); 2272 SDValue N1 = N->getOperand(1); 2273 SDValue LL, LR, RL, RR, CC0, CC1; 2274 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2275 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2276 EVT VT = N1.getValueType(); 2277 2278 // fold vector ops 2279 if (VT.isVector()) { 2280 SDValue FoldedVOp = SimplifyVBinOp(N); 2281 if (FoldedVOp.getNode()) return FoldedVOp; 2282 } 2283 2284 // fold (or x, undef) -> -1 2285 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) { 2286 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 2287 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 2288 } 2289 // fold (or c1, c2) -> c1|c2 2290 if (N0C && N1C) 2291 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 2292 // canonicalize constant to RHS 2293 if (N0C && !N1C) 2294 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); 2295 // fold (or x, 0) -> x 2296 if (N1C && N1C->isNullValue()) 2297 return N0; 2298 // fold (or x, -1) -> -1 2299 if (N1C && N1C->isAllOnesValue()) 2300 return N1; 2301 // fold (or x, c) -> c iff (x & ~c) == 0 2302 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 2303 return N1; 2304 // reassociate or 2305 SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); 2306 if (ROR.getNode() != 0) 2307 return ROR; 2308 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 2309 // iff (c1 & c2) == 0. 2310 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 2311 isa<ConstantSDNode>(N0.getOperand(1))) { 2312 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 2313 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) 2314 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 2315 DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, 2316 N0.getOperand(0), N1), 2317 DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); 2318 } 2319 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 2320 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2321 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2322 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2323 2324 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2325 LL.getValueType().isInteger()) { 2326 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 2327 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 2328 if (cast<ConstantSDNode>(LR)->isNullValue() && 2329 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 2330 SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), 2331 LR.getValueType(), LL, RL); 2332 AddToWorkList(ORNode.getNode()); 2333 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2334 } 2335 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 2336 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 2337 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 2338 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 2339 SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), 2340 LR.getValueType(), LL, RL); 2341 AddToWorkList(ANDNode.getNode()); 2342 return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); 2343 } 2344 } 2345 // canonicalize equivalent to ll == rl 2346 if (LL == RR && LR == RL) { 2347 Op1 = ISD::getSetCCSwappedOperands(Op1); 2348 std::swap(RL, RR); 2349 } 2350 if (LL == RL && LR == RR) { 2351 bool isInteger = LL.getValueType().isInteger(); 2352 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 2353 if (Result != ISD::SETCC_INVALID && 2354 (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) 2355 return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), 2356 LL, LR, Result); 2357 } 2358 } 2359 2360 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 2361 if (N0.getOpcode() == N1.getOpcode()) { 2362 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2363 if (Tmp.getNode()) return Tmp; 2364 } 2365 2366 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 2367 if (N0.getOpcode() == ISD::AND && 2368 N1.getOpcode() == ISD::AND && 2369 N0.getOperand(1).getOpcode() == ISD::Constant && 2370 N1.getOperand(1).getOpcode() == ISD::Constant && 2371 // Don't increase # computations. 2372 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 2373 // We can only do this xform if we know that bits from X that are set in C2 2374 // but not in C1 are already zero. Likewise for Y. 2375 const APInt &LHSMask = 2376 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 2377 const APInt &RHSMask = 2378 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 2379 2380 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 2381 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 2382 SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, 2383 N0.getOperand(0), N1.getOperand(0)); 2384 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, 2385 DAG.getConstant(LHSMask | RHSMask, VT)); 2386 } 2387 } 2388 2389 // See if this is some rotate idiom. 2390 if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) 2391 return SDValue(Rot, 0); 2392 2393 return PromoteIntBinOp(SDValue(N, 0)); 2394} 2395 2396/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. 2397static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 2398 if (Op.getOpcode() == ISD::AND) { 2399 if (isa<ConstantSDNode>(Op.getOperand(1))) { 2400 Mask = Op.getOperand(1); 2401 Op = Op.getOperand(0); 2402 } else { 2403 return false; 2404 } 2405 } 2406 2407 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 2408 Shift = Op; 2409 return true; 2410 } 2411 2412 return false; 2413} 2414 2415// MatchRotate - Handle an 'or' of two operands. If this is one of the many 2416// idioms for rotate, and if the target supports rotation instructions, generate 2417// a rot[lr]. 2418SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { 2419 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 2420 EVT VT = LHS.getValueType(); 2421 if (!TLI.isTypeLegal(VT)) return 0; 2422 2423 // The target must have at least one rotate flavor. 2424 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 2425 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 2426 if (!HasROTL && !HasROTR) return 0; 2427 2428 // Match "(X shl/srl V1) & V2" where V2 may not be present. 2429 SDValue LHSShift; // The shift. 2430 SDValue LHSMask; // AND value if any. 2431 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 2432 return 0; // Not part of a rotate. 2433 2434 SDValue RHSShift; // The shift. 2435 SDValue RHSMask; // AND value if any. 2436 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 2437 return 0; // Not part of a rotate. 2438 2439 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 2440 return 0; // Not shifting the same value. 2441 2442 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 2443 return 0; // Shifts must disagree. 2444 2445 // Canonicalize shl to left side in a shl/srl pair. 2446 if (RHSShift.getOpcode() == ISD::SHL) { 2447 std::swap(LHS, RHS); 2448 std::swap(LHSShift, RHSShift); 2449 std::swap(LHSMask , RHSMask ); 2450 } 2451 2452 unsigned OpSizeInBits = VT.getSizeInBits(); 2453 SDValue LHSShiftArg = LHSShift.getOperand(0); 2454 SDValue LHSShiftAmt = LHSShift.getOperand(1); 2455 SDValue RHSShiftAmt = RHSShift.getOperand(1); 2456 2457 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 2458 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 2459 if (LHSShiftAmt.getOpcode() == ISD::Constant && 2460 RHSShiftAmt.getOpcode() == ISD::Constant) { 2461 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 2462 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 2463 if ((LShVal + RShVal) != OpSizeInBits) 2464 return 0; 2465 2466 SDValue Rot; 2467 if (HasROTL) 2468 Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); 2469 else 2470 Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); 2471 2472 // If there is an AND of either shifted operand, apply it to the result. 2473 if (LHSMask.getNode() || RHSMask.getNode()) { 2474 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 2475 2476 if (LHSMask.getNode()) { 2477 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 2478 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 2479 } 2480 if (RHSMask.getNode()) { 2481 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 2482 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 2483 } 2484 2485 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 2486 } 2487 2488 return Rot.getNode(); 2489 } 2490 2491 // If there is a mask here, and we have a variable shift, we can't be sure 2492 // that we're masking out the right stuff. 2493 if (LHSMask.getNode() || RHSMask.getNode()) 2494 return 0; 2495 2496 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) 2497 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) 2498 if (RHSShiftAmt.getOpcode() == ISD::SUB && 2499 LHSShiftAmt == RHSShiftAmt.getOperand(1)) { 2500 if (ConstantSDNode *SUBC = 2501 dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { 2502 if (SUBC->getAPIntValue() == OpSizeInBits) { 2503 if (HasROTL) 2504 return DAG.getNode(ISD::ROTL, DL, VT, 2505 LHSShiftArg, LHSShiftAmt).getNode(); 2506 else 2507 return DAG.getNode(ISD::ROTR, DL, VT, 2508 LHSShiftArg, RHSShiftAmt).getNode(); 2509 } 2510 } 2511 } 2512 2513 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) 2514 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) 2515 if (LHSShiftAmt.getOpcode() == ISD::SUB && 2516 RHSShiftAmt == LHSShiftAmt.getOperand(1)) { 2517 if (ConstantSDNode *SUBC = 2518 dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { 2519 if (SUBC->getAPIntValue() == OpSizeInBits) { 2520 if (HasROTR) 2521 return DAG.getNode(ISD::ROTR, DL, VT, 2522 LHSShiftArg, RHSShiftAmt).getNode(); 2523 else 2524 return DAG.getNode(ISD::ROTL, DL, VT, 2525 LHSShiftArg, LHSShiftAmt).getNode(); 2526 } 2527 } 2528 } 2529 2530 // Look for sign/zext/any-extended or truncate cases: 2531 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND 2532 || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND 2533 || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND 2534 || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 2535 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND 2536 || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND 2537 || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND 2538 || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 2539 SDValue LExtOp0 = LHSShiftAmt.getOperand(0); 2540 SDValue RExtOp0 = RHSShiftAmt.getOperand(0); 2541 if (RExtOp0.getOpcode() == ISD::SUB && 2542 RExtOp0.getOperand(1) == LExtOp0) { 2543 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 2544 // (rotl x, y) 2545 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 2546 // (rotr x, (sub 32, y)) 2547 if (ConstantSDNode *SUBC = 2548 dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { 2549 if (SUBC->getAPIntValue() == OpSizeInBits) { 2550 return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 2551 LHSShiftArg, 2552 HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); 2553 } 2554 } 2555 } else if (LExtOp0.getOpcode() == ISD::SUB && 2556 RExtOp0 == LExtOp0.getOperand(1)) { 2557 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 2558 // (rotr x, y) 2559 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 2560 // (rotl x, (sub 32, y)) 2561 if (ConstantSDNode *SUBC = 2562 dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { 2563 if (SUBC->getAPIntValue() == OpSizeInBits) { 2564 return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, 2565 LHSShiftArg, 2566 HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); 2567 } 2568 } 2569 } 2570 } 2571 2572 return 0; 2573} 2574 2575SDValue DAGCombiner::visitXOR(SDNode *N) { 2576 SDValue N0 = N->getOperand(0); 2577 SDValue N1 = N->getOperand(1); 2578 SDValue LHS, RHS, CC; 2579 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2580 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2581 EVT VT = N0.getValueType(); 2582 2583 // fold vector ops 2584 if (VT.isVector()) { 2585 SDValue FoldedVOp = SimplifyVBinOp(N); 2586 if (FoldedVOp.getNode()) return FoldedVOp; 2587 } 2588 2589 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 2590 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 2591 return DAG.getConstant(0, VT); 2592 // fold (xor x, undef) -> undef 2593 if (N0.getOpcode() == ISD::UNDEF) 2594 return N0; 2595 if (N1.getOpcode() == ISD::UNDEF) 2596 return N1; 2597 // fold (xor c1, c2) -> c1^c2 2598 if (N0C && N1C) 2599 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 2600 // canonicalize constant to RHS 2601 if (N0C && !N1C) 2602 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); 2603 // fold (xor x, 0) -> x 2604 if (N1C && N1C->isNullValue()) 2605 return N0; 2606 // reassociate xor 2607 SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); 2608 if (RXOR.getNode() != 0) 2609 return RXOR; 2610 2611 // fold !(x cc y) -> (x !cc y) 2612 if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { 2613 bool isInt = LHS.getValueType().isInteger(); 2614 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 2615 isInt); 2616 2617 if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { 2618 switch (N0.getOpcode()) { 2619 default: 2620 llvm_unreachable("Unhandled SetCC Equivalent!"); 2621 case ISD::SETCC: 2622 return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); 2623 case ISD::SELECT_CC: 2624 return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), 2625 N0.getOperand(3), NotCC); 2626 } 2627 } 2628 } 2629 2630 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 2631 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 2632 N0.getNode()->hasOneUse() && 2633 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 2634 SDValue V = N0.getOperand(0); 2635 V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, 2636 DAG.getConstant(1, V.getValueType())); 2637 AddToWorkList(V.getNode()); 2638 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); 2639 } 2640 2641 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 2642 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 2643 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 2644 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 2645 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 2646 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 2647 LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS 2648 RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS 2649 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 2650 return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); 2651 } 2652 } 2653 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 2654 if (N1C && N1C->isAllOnesValue() && 2655 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 2656 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 2657 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 2658 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 2659 LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS 2660 RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS 2661 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 2662 return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); 2663 } 2664 } 2665 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 2666 if (N1C && N0.getOpcode() == ISD::XOR) { 2667 ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 2668 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2669 if (N00C) 2670 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), 2671 DAG.getConstant(N1C->getAPIntValue() ^ 2672 N00C->getAPIntValue(), VT)); 2673 if (N01C) 2674 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), 2675 DAG.getConstant(N1C->getAPIntValue() ^ 2676 N01C->getAPIntValue(), VT)); 2677 } 2678 // fold (xor x, x) -> 0 2679 if (N0 == N1) { 2680 if (!VT.isVector()) { 2681 return DAG.getConstant(0, VT); 2682 } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){ 2683 // Produce a vector of zeros. 2684 SDValue El = DAG.getConstant(0, VT.getVectorElementType()); 2685 std::vector<SDValue> Ops(VT.getVectorNumElements(), El); 2686 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, 2687 &Ops[0], Ops.size()); 2688 } 2689 } 2690 2691 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 2692 if (N0.getOpcode() == N1.getOpcode()) { 2693 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2694 if (Tmp.getNode()) return Tmp; 2695 } 2696 2697 // Simplify the expression using non-local knowledge. 2698 if (!VT.isVector() && 2699 SimplifyDemandedBits(SDValue(N, 0))) 2700 return SDValue(N, 0); 2701 2702 return PromoteIntBinOp(SDValue(N, 0)); 2703} 2704 2705/// visitShiftByConstant - Handle transforms common to the three shifts, when 2706/// the shift amount is a constant. 2707SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { 2708 SDNode *LHS = N->getOperand(0).getNode(); 2709 if (!LHS->hasOneUse()) return SDValue(); 2710 2711 // We want to pull some binops through shifts, so that we have (and (shift)) 2712 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 2713 // thing happens with address calculations, so it's important to canonicalize 2714 // it. 2715 bool HighBitSet = false; // Can we transform this if the high bit is set? 2716 2717 switch (LHS->getOpcode()) { 2718 default: return SDValue(); 2719 case ISD::OR: 2720 case ISD::XOR: 2721 HighBitSet = false; // We can only transform sra if the high bit is clear. 2722 break; 2723 case ISD::AND: 2724 HighBitSet = true; // We can only transform sra if the high bit is set. 2725 break; 2726 case ISD::ADD: 2727 if (N->getOpcode() != ISD::SHL) 2728 return SDValue(); // only shl(add) not sr[al](add). 2729 HighBitSet = false; // We can only transform sra if the high bit is clear. 2730 break; 2731 } 2732 2733 // We require the RHS of the binop to be a constant as well. 2734 ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 2735 if (!BinOpCst) return SDValue(); 2736 2737 // FIXME: disable this unless the input to the binop is a shift by a constant. 2738 // If it is not a shift, it pessimizes some common cases like: 2739 // 2740 // void foo(int *X, int i) { X[i & 1235] = 1; } 2741 // int bar(int *X, int i) { return X[i & 255]; } 2742 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 2743 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 2744 BinOpLHSVal->getOpcode() != ISD::SRA && 2745 BinOpLHSVal->getOpcode() != ISD::SRL) || 2746 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 2747 return SDValue(); 2748 2749 EVT VT = N->getValueType(0); 2750 2751 // If this is a signed shift right, and the high bit is modified by the 2752 // logical operation, do not perform the transformation. The highBitSet 2753 // boolean indicates the value of the high bit of the constant which would 2754 // cause it to be modified for this operation. 2755 if (N->getOpcode() == ISD::SRA) { 2756 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 2757 if (BinOpRHSSignSet != HighBitSet) 2758 return SDValue(); 2759 } 2760 2761 // Fold the constants, shifting the binop RHS by the shift amount. 2762 SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), 2763 N->getValueType(0), 2764 LHS->getOperand(1), N->getOperand(1)); 2765 2766 // Create the new shift. 2767 SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(), 2768 VT, LHS->getOperand(0), N->getOperand(1)); 2769 2770 // Create the new binop. 2771 return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); 2772} 2773 2774SDValue DAGCombiner::visitSHL(SDNode *N) { 2775 SDValue N0 = N->getOperand(0); 2776 SDValue N1 = N->getOperand(1); 2777 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2778 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2779 EVT VT = N0.getValueType(); 2780 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 2781 2782 // fold (shl c1, c2) -> c1<<c2 2783 if (N0C && N1C) 2784 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 2785 // fold (shl 0, x) -> 0 2786 if (N0C && N0C->isNullValue()) 2787 return N0; 2788 // fold (shl x, c >= size(x)) -> undef 2789 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 2790 return DAG.getUNDEF(VT); 2791 // fold (shl x, 0) -> x 2792 if (N1C && N1C->isNullValue()) 2793 return N0; 2794 // if (shl x, c) is known to be zero, return 0 2795 if (DAG.MaskedValueIsZero(SDValue(N, 0), 2796 APInt::getAllOnesValue(OpSizeInBits))) 2797 return DAG.getConstant(0, VT); 2798 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 2799 if (N1.getOpcode() == ISD::TRUNCATE && 2800 N1.getOperand(0).getOpcode() == ISD::AND && 2801 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 2802 SDValue N101 = N1.getOperand(0).getOperand(1); 2803 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 2804 EVT TruncVT = N1.getValueType(); 2805 SDValue N100 = N1.getOperand(0).getOperand(0); 2806 APInt TruncC = N101C->getAPIntValue(); 2807 TruncC.trunc(TruncVT.getSizeInBits()); 2808 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 2809 DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, 2810 DAG.getNode(ISD::TRUNCATE, 2811 N->getDebugLoc(), 2812 TruncVT, N100), 2813 DAG.getConstant(TruncC, TruncVT))); 2814 } 2815 } 2816 2817 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 2818 return SDValue(N, 0); 2819 2820 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 2821 if (N1C && N0.getOpcode() == ISD::SHL && 2822 N0.getOperand(1).getOpcode() == ISD::Constant) { 2823 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 2824 uint64_t c2 = N1C->getZExtValue(); 2825 if (c1 + c2 > OpSizeInBits) 2826 return DAG.getConstant(0, VT); 2827 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), 2828 DAG.getConstant(c1 + c2, N1.getValueType())); 2829 } 2830 // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or 2831 // (srl (and x, (shl -1, c1)), (sub c1, c2)) 2832 if (N1C && N0.getOpcode() == ISD::SRL && 2833 N0.getOperand(1).getOpcode() == ISD::Constant) { 2834 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 2835 if (c1 < VT.getSizeInBits()) { 2836 uint64_t c2 = N1C->getZExtValue(); 2837 SDValue HiBitsMask = 2838 DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), 2839 VT.getSizeInBits() - c1), 2840 VT); 2841 SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, 2842 N0.getOperand(0), 2843 HiBitsMask); 2844 if (c2 > c1) 2845 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask, 2846 DAG.getConstant(c2-c1, N1.getValueType())); 2847 else 2848 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask, 2849 DAG.getConstant(c1-c2, N1.getValueType())); 2850 } 2851 } 2852 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 2853 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 2854 SDValue HiBitsMask = 2855 DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), 2856 VT.getSizeInBits() - 2857 N1C->getZExtValue()), 2858 VT); 2859 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), 2860 HiBitsMask); 2861 } 2862 2863 if (N1C) { 2864 SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); 2865 if (NewSHL.getNode()) 2866 return NewSHL; 2867 } 2868 2869 return PromoteIntShiftOp(SDValue(N, 0)); 2870} 2871 2872SDValue DAGCombiner::visitSRA(SDNode *N) { 2873 SDValue N0 = N->getOperand(0); 2874 SDValue N1 = N->getOperand(1); 2875 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2876 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2877 EVT VT = N0.getValueType(); 2878 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 2879 2880 // fold (sra c1, c2) -> (sra c1, c2) 2881 if (N0C && N1C) 2882 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 2883 // fold (sra 0, x) -> 0 2884 if (N0C && N0C->isNullValue()) 2885 return N0; 2886 // fold (sra -1, x) -> -1 2887 if (N0C && N0C->isAllOnesValue()) 2888 return N0; 2889 // fold (sra x, (setge c, size(x))) -> undef 2890 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 2891 return DAG.getUNDEF(VT); 2892 // fold (sra x, 0) -> x 2893 if (N1C && N1C->isNullValue()) 2894 return N0; 2895 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 2896 // sext_inreg. 2897 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 2898 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 2899 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 2900 if (VT.isVector()) 2901 ExtVT = EVT::getVectorVT(*DAG.getContext(), 2902 ExtVT, VT.getVectorNumElements()); 2903 if ((!LegalOperations || 2904 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 2905 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 2906 N0.getOperand(0), DAG.getValueType(ExtVT)); 2907 } 2908 2909 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 2910 if (N1C && N0.getOpcode() == ISD::SRA) { 2911 if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2912 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 2913 if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; 2914 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), 2915 DAG.getConstant(Sum, N1C->getValueType(0))); 2916 } 2917 } 2918 2919 // fold (sra (shl X, m), (sub result_size, n)) 2920 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 2921 // result_size - n != m. 2922 // If truncate is free for the target sext(shl) is likely to result in better 2923 // code. 2924 if (N0.getOpcode() == ISD::SHL) { 2925 // Get the two constanst of the shifts, CN0 = m, CN = n. 2926 const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2927 if (N01C && N1C) { 2928 // Determine what the truncate's result bitsize and type would be. 2929 EVT TruncVT = 2930 EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue()); 2931 // Determine the residual right-shift amount. 2932 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 2933 2934 // If the shift is not a no-op (in which case this should be just a sign 2935 // extend already), the truncated to type is legal, sign_extend is legal 2936 // on that type, and the truncate to that type is both legal and free, 2937 // perform the transform. 2938 if ((ShiftAmt > 0) && 2939 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 2940 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 2941 TLI.isTruncateFree(VT, TruncVT)) { 2942 2943 SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy()); 2944 SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, 2945 N0.getOperand(0), Amt); 2946 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, 2947 Shift); 2948 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), 2949 N->getValueType(0), Trunc); 2950 } 2951 } 2952 } 2953 2954 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 2955 if (N1.getOpcode() == ISD::TRUNCATE && 2956 N1.getOperand(0).getOpcode() == ISD::AND && 2957 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 2958 SDValue N101 = N1.getOperand(0).getOperand(1); 2959 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 2960 EVT TruncVT = N1.getValueType(); 2961 SDValue N100 = N1.getOperand(0).getOperand(0); 2962 APInt TruncC = N101C->getAPIntValue(); 2963 TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); 2964 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, 2965 DAG.getNode(ISD::AND, N->getDebugLoc(), 2966 TruncVT, 2967 DAG.getNode(ISD::TRUNCATE, 2968 N->getDebugLoc(), 2969 TruncVT, N100), 2970 DAG.getConstant(TruncC, TruncVT))); 2971 } 2972 } 2973 2974 // Simplify, based on bits shifted out of the LHS. 2975 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 2976 return SDValue(N, 0); 2977 2978 2979 // If the sign bit is known to be zero, switch this to a SRL. 2980 if (DAG.SignBitIsZero(N0)) 2981 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); 2982 2983 if (N1C) { 2984 SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); 2985 if (NewSRA.getNode()) 2986 return NewSRA; 2987 } 2988 2989 return PromoteIntShiftOp(SDValue(N, 0)); 2990} 2991 2992SDValue DAGCombiner::visitSRL(SDNode *N) { 2993 SDValue N0 = N->getOperand(0); 2994 SDValue N1 = N->getOperand(1); 2995 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2996 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2997 EVT VT = N0.getValueType(); 2998 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 2999 3000 // fold (srl c1, c2) -> c1 >>u c2 3001 if (N0C && N1C) 3002 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 3003 // fold (srl 0, x) -> 0 3004 if (N0C && N0C->isNullValue()) 3005 return N0; 3006 // fold (srl x, c >= size(x)) -> undef 3007 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3008 return DAG.getUNDEF(VT); 3009 // fold (srl x, 0) -> x 3010 if (N1C && N1C->isNullValue()) 3011 return N0; 3012 // if (srl x, c) is known to be zero, return 0 3013 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 3014 APInt::getAllOnesValue(OpSizeInBits))) 3015 return DAG.getConstant(0, VT); 3016 3017 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 3018 if (N1C && N0.getOpcode() == ISD::SRL && 3019 N0.getOperand(1).getOpcode() == ISD::Constant) { 3020 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3021 uint64_t c2 = N1C->getZExtValue(); 3022 if (c1 + c2 > OpSizeInBits) 3023 return DAG.getConstant(0, VT); 3024 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), 3025 DAG.getConstant(c1 + c2, N1.getValueType())); 3026 } 3027 3028 // fold (srl (shl x, c), c) -> (and x, cst2) 3029 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && 3030 N0.getValueSizeInBits() <= 64) { 3031 uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); 3032 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), 3033 DAG.getConstant(~0ULL >> ShAmt, VT)); 3034 } 3035 3036 3037 // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) 3038 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 3039 // Shifting in all undef bits? 3040 EVT SmallVT = N0.getOperand(0).getValueType(); 3041 if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) 3042 return DAG.getUNDEF(VT); 3043 3044 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 3045 SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, 3046 N0.getOperand(0), N1); 3047 AddToWorkList(SmallShift.getNode()); 3048 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); 3049 } 3050 } 3051 3052 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 3053 // bit, which is unmodified by sra. 3054 if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { 3055 if (N0.getOpcode() == ISD::SRA) 3056 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); 3057 } 3058 3059 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 3060 if (N1C && N0.getOpcode() == ISD::CTLZ && 3061 N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { 3062 APInt KnownZero, KnownOne; 3063 APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()); 3064 DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne); 3065 3066 // If any of the input bits are KnownOne, then the input couldn't be all 3067 // zeros, thus the result of the srl will always be zero. 3068 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 3069 3070 // If all of the bits input the to ctlz node are known to be zero, then 3071 // the result of the ctlz is "32" and the result of the shift is one. 3072 APInt UnknownBits = ~KnownZero & Mask; 3073 if (UnknownBits == 0) return DAG.getConstant(1, VT); 3074 3075 // Otherwise, check to see if there is exactly one bit input to the ctlz. 3076 if ((UnknownBits & (UnknownBits - 1)) == 0) { 3077 // Okay, we know that only that the single bit specified by UnknownBits 3078 // could be set on input to the CTLZ node. If this bit is set, the SRL 3079 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 3080 // to an SRL/XOR pair, which is likely to simplify more. 3081 unsigned ShAmt = UnknownBits.countTrailingZeros(); 3082 SDValue Op = N0.getOperand(0); 3083 3084 if (ShAmt) { 3085 Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, 3086 DAG.getConstant(ShAmt, getShiftAmountTy())); 3087 AddToWorkList(Op.getNode()); 3088 } 3089 3090 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, 3091 Op, DAG.getConstant(1, VT)); 3092 } 3093 } 3094 3095 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 3096 if (N1.getOpcode() == ISD::TRUNCATE && 3097 N1.getOperand(0).getOpcode() == ISD::AND && 3098 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3099 SDValue N101 = N1.getOperand(0).getOperand(1); 3100 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3101 EVT TruncVT = N1.getValueType(); 3102 SDValue N100 = N1.getOperand(0).getOperand(0); 3103 APInt TruncC = N101C->getAPIntValue(); 3104 TruncC.trunc(TruncVT.getSizeInBits()); 3105 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, 3106 DAG.getNode(ISD::AND, N->getDebugLoc(), 3107 TruncVT, 3108 DAG.getNode(ISD::TRUNCATE, 3109 N->getDebugLoc(), 3110 TruncVT, N100), 3111 DAG.getConstant(TruncC, TruncVT))); 3112 } 3113 } 3114 3115 // fold operands of srl based on knowledge that the low bits are not 3116 // demanded. 3117 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3118 return SDValue(N, 0); 3119 3120 if (N1C) { 3121 SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); 3122 if (NewSRL.getNode()) 3123 return NewSRL; 3124 } 3125 3126 // Here is a common situation. We want to optimize: 3127 // 3128 // %a = ... 3129 // %b = and i32 %a, 2 3130 // %c = srl i32 %b, 1 3131 // brcond i32 %c ... 3132 // 3133 // into 3134 // 3135 // %a = ... 3136 // %b = and %a, 2 3137 // %c = setcc eq %b, 0 3138 // brcond %c ... 3139 // 3140 // However when after the source operand of SRL is optimized into AND, the SRL 3141 // itself may not be optimized further. Look for it and add the BRCOND into 3142 // the worklist. 3143 if (N->hasOneUse()) { 3144 SDNode *Use = *N->use_begin(); 3145 if (Use->getOpcode() == ISD::BRCOND) 3146 AddToWorkList(Use); 3147 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 3148 // Also look pass the truncate. 3149 Use = *Use->use_begin(); 3150 if (Use->getOpcode() == ISD::BRCOND) 3151 AddToWorkList(Use); 3152 } 3153 } 3154 3155 return PromoteIntShiftOp(SDValue(N, 0)); 3156} 3157 3158SDValue DAGCombiner::visitCTLZ(SDNode *N) { 3159 SDValue N0 = N->getOperand(0); 3160 EVT VT = N->getValueType(0); 3161 3162 // fold (ctlz c1) -> c2 3163 if (isa<ConstantSDNode>(N0)) 3164 return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); 3165 return SDValue(); 3166} 3167 3168SDValue DAGCombiner::visitCTTZ(SDNode *N) { 3169 SDValue N0 = N->getOperand(0); 3170 EVT VT = N->getValueType(0); 3171 3172 // fold (cttz c1) -> c2 3173 if (isa<ConstantSDNode>(N0)) 3174 return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); 3175 return SDValue(); 3176} 3177 3178SDValue DAGCombiner::visitCTPOP(SDNode *N) { 3179 SDValue N0 = N->getOperand(0); 3180 EVT VT = N->getValueType(0); 3181 3182 // fold (ctpop c1) -> c2 3183 if (isa<ConstantSDNode>(N0)) 3184 return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); 3185 return SDValue(); 3186} 3187 3188SDValue DAGCombiner::visitSELECT(SDNode *N) { 3189 SDValue N0 = N->getOperand(0); 3190 SDValue N1 = N->getOperand(1); 3191 SDValue N2 = N->getOperand(2); 3192 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3193 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3194 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3195 EVT VT = N->getValueType(0); 3196 EVT VT0 = N0.getValueType(); 3197 3198 // fold (select C, X, X) -> X 3199 if (N1 == N2) 3200 return N1; 3201 // fold (select true, X, Y) -> X 3202 if (N0C && !N0C->isNullValue()) 3203 return N1; 3204 // fold (select false, X, Y) -> Y 3205 if (N0C && N0C->isNullValue()) 3206 return N2; 3207 // fold (select C, 1, X) -> (or C, X) 3208 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 3209 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); 3210 // fold (select C, 0, 1) -> (xor C, 1) 3211 if (VT.isInteger() && 3212 (VT0 == MVT::i1 || 3213 (VT0.isInteger() && 3214 TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) && 3215 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 3216 SDValue XORNode; 3217 if (VT == VT0) 3218 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, 3219 N0, DAG.getConstant(1, VT0)); 3220 XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, 3221 N0, DAG.getConstant(1, VT0)); 3222 AddToWorkList(XORNode.getNode()); 3223 if (VT.bitsGT(VT0)) 3224 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); 3225 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); 3226 } 3227 // fold (select C, 0, X) -> (and (not C), X) 3228 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 3229 SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); 3230 AddToWorkList(NOTNode.getNode()); 3231 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); 3232 } 3233 // fold (select C, X, 1) -> (or (not C), X) 3234 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 3235 SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); 3236 AddToWorkList(NOTNode.getNode()); 3237 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); 3238 } 3239 // fold (select C, X, 0) -> (and C, X) 3240 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 3241 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); 3242 // fold (select X, X, Y) -> (or X, Y) 3243 // fold (select X, 1, Y) -> (or X, Y) 3244 if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) 3245 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); 3246 // fold (select X, Y, X) -> (and X, Y) 3247 // fold (select X, Y, 0) -> (and X, Y) 3248 if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) 3249 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); 3250 3251 // If we can fold this based on the true/false value, do so. 3252 if (SimplifySelectOps(N, N1, N2)) 3253 return SDValue(N, 0); // Don't revisit N. 3254 3255 // fold selects based on a setcc into other things, such as min/max/abs 3256 if (N0.getOpcode() == ISD::SETCC) { 3257 // FIXME: 3258 // Check against MVT::Other for SELECT_CC, which is a workaround for targets 3259 // having to say they don't support SELECT_CC on every type the DAG knows 3260 // about, since there is no way to mark an opcode illegal at all value types 3261 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && 3262 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) 3263 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, 3264 N0.getOperand(0), N0.getOperand(1), 3265 N1, N2, N0.getOperand(2)); 3266 return SimplifySelect(N->getDebugLoc(), N0, N1, N2); 3267 } 3268 3269 return SDValue(); 3270} 3271 3272SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 3273 SDValue N0 = N->getOperand(0); 3274 SDValue N1 = N->getOperand(1); 3275 SDValue N2 = N->getOperand(2); 3276 SDValue N3 = N->getOperand(3); 3277 SDValue N4 = N->getOperand(4); 3278 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 3279 3280 // fold select_cc lhs, rhs, x, x, cc -> x 3281 if (N2 == N3) 3282 return N2; 3283 3284 // Determine if the condition we're dealing with is constant 3285 SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), 3286 N0, N1, CC, N->getDebugLoc(), false); 3287 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 3288 3289 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 3290 if (!SCCC->isNullValue()) 3291 return N2; // cond always true -> true val 3292 else 3293 return N3; // cond always false -> false val 3294 } 3295 3296 // Fold to a simpler select_cc 3297 if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) 3298 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), 3299 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 3300 SCC.getOperand(2)); 3301 3302 // If we can fold this based on the true/false value, do so. 3303 if (SimplifySelectOps(N, N2, N3)) 3304 return SDValue(N, 0); // Don't revisit N. 3305 3306 // fold select_cc into other things, such as min/max/abs 3307 return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); 3308} 3309 3310SDValue DAGCombiner::visitSETCC(SDNode *N) { 3311 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 3312 cast<CondCodeSDNode>(N->getOperand(2))->get(), 3313 N->getDebugLoc()); 3314} 3315 3316// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 3317// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 3318// transformation. Returns true if extension are possible and the above 3319// mentioned transformation is profitable. 3320static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 3321 unsigned ExtOpc, 3322 SmallVector<SDNode*, 4> &ExtendNodes, 3323 const TargetLowering &TLI) { 3324 bool HasCopyToRegUses = false; 3325 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 3326 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 3327 UE = N0.getNode()->use_end(); 3328 UI != UE; ++UI) { 3329 SDNode *User = *UI; 3330 if (User == N) 3331 continue; 3332 if (UI.getUse().getResNo() != N0.getResNo()) 3333 continue; 3334 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 3335 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 3336 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 3337 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 3338 // Sign bits will be lost after a zext. 3339 return false; 3340 bool Add = false; 3341 for (unsigned i = 0; i != 2; ++i) { 3342 SDValue UseOp = User->getOperand(i); 3343 if (UseOp == N0) 3344 continue; 3345 if (!isa<ConstantSDNode>(UseOp)) 3346 return false; 3347 Add = true; 3348 } 3349 if (Add) 3350 ExtendNodes.push_back(User); 3351 continue; 3352 } 3353 // If truncates aren't free and there are users we can't 3354 // extend, it isn't worthwhile. 3355 if (!isTruncFree) 3356 return false; 3357 // Remember if this value is live-out. 3358 if (User->getOpcode() == ISD::CopyToReg) 3359 HasCopyToRegUses = true; 3360 } 3361 3362 if (HasCopyToRegUses) { 3363 bool BothLiveOut = false; 3364 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 3365 UI != UE; ++UI) { 3366 SDUse &Use = UI.getUse(); 3367 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 3368 BothLiveOut = true; 3369 break; 3370 } 3371 } 3372 if (BothLiveOut) 3373 // Both unextended and extended values are live out. There had better be 3374 // good a reason for the transformation. 3375 return ExtendNodes.size(); 3376 } 3377 return true; 3378} 3379 3380SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 3381 SDValue N0 = N->getOperand(0); 3382 EVT VT = N->getValueType(0); 3383 3384 // fold (sext c1) -> c1 3385 if (isa<ConstantSDNode>(N0)) 3386 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); 3387 3388 // fold (sext (sext x)) -> (sext x) 3389 // fold (sext (aext x)) -> (sext x) 3390 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 3391 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, 3392 N0.getOperand(0)); 3393 3394 if (N0.getOpcode() == ISD::TRUNCATE) { 3395 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 3396 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 3397 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 3398 if (NarrowLoad.getNode()) { 3399 if (NarrowLoad.getNode() != N0.getNode()) 3400 CombineTo(N0.getNode(), NarrowLoad); 3401 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3402 } 3403 3404 // See if the value being truncated is already sign extended. If so, just 3405 // eliminate the trunc/sext pair. 3406 SDValue Op = N0.getOperand(0); 3407 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 3408 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 3409 unsigned DestBits = VT.getScalarType().getSizeInBits(); 3410 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 3411 3412 if (OpBits == DestBits) { 3413 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 3414 // bits, it is already ready. 3415 if (NumSignBits > DestBits-MidBits) 3416 return Op; 3417 } else if (OpBits < DestBits) { 3418 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 3419 // bits, just sext from i32. 3420 if (NumSignBits > OpBits-MidBits) 3421 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); 3422 } else { 3423 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 3424 // bits, just truncate to i32. 3425 if (NumSignBits > OpBits-MidBits) 3426 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 3427 } 3428 3429 // fold (sext (truncate x)) -> (sextinreg x). 3430 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 3431 N0.getValueType())) { 3432 if (OpBits < DestBits) 3433 Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); 3434 else if (OpBits > DestBits) 3435 Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); 3436 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, 3437 DAG.getValueType(N0.getValueType())); 3438 } 3439 } 3440 3441 // fold (sext (load x)) -> (sext (truncate (sextload x))) 3442 if (ISD::isNON_EXTLoad(N0.getNode()) && 3443 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 3444 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { 3445 bool DoXform = true; 3446 SmallVector<SDNode*, 4> SetCCs; 3447 if (!N0.hasOneUse()) 3448 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 3449 if (DoXform) { 3450 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3451 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 3452 LN0->getChain(), 3453 LN0->getBasePtr(), LN0->getSrcValue(), 3454 LN0->getSrcValueOffset(), 3455 N0.getValueType(), 3456 LN0->isVolatile(), LN0->isNonTemporal(), 3457 LN0->getAlignment()); 3458 CombineTo(N, ExtLoad); 3459 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 3460 N0.getValueType(), ExtLoad); 3461 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 3462 3463 // Extend SetCC uses if necessary. 3464 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 3465 SDNode *SetCC = SetCCs[i]; 3466 SmallVector<SDValue, 4> Ops; 3467 3468 for (unsigned j = 0; j != 2; ++j) { 3469 SDValue SOp = SetCC->getOperand(j); 3470 if (SOp == Trunc) 3471 Ops.push_back(ExtLoad); 3472 else 3473 Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, 3474 N->getDebugLoc(), VT, SOp)); 3475 } 3476 3477 Ops.push_back(SetCC->getOperand(2)); 3478 CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), 3479 SetCC->getValueType(0), 3480 &Ops[0], Ops.size())); 3481 } 3482 3483 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3484 } 3485 } 3486 3487 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 3488 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 3489 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 3490 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 3491 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3492 EVT MemVT = LN0->getMemoryVT(); 3493 if ((!LegalOperations && !LN0->isVolatile()) || 3494 TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { 3495 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 3496 LN0->getChain(), 3497 LN0->getBasePtr(), LN0->getSrcValue(), 3498 LN0->getSrcValueOffset(), MemVT, 3499 LN0->isVolatile(), LN0->isNonTemporal(), 3500 LN0->getAlignment()); 3501 CombineTo(N, ExtLoad); 3502 CombineTo(N0.getNode(), 3503 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 3504 N0.getValueType(), ExtLoad), 3505 ExtLoad.getValue(1)); 3506 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3507 } 3508 } 3509 3510 if (N0.getOpcode() == ISD::SETCC) { 3511 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 3512 if (VT.isVector() && 3513 // We know that the # elements of the results is the same as the 3514 // # elements of the compare (and the # elements of the compare result 3515 // for that matter). Check to see that they are the same size. If so, 3516 // we know that the element size of the sext'd result matches the 3517 // element size of the compare operands. 3518 VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() && 3519 3520 // Only do this before legalize for now. 3521 !LegalOperations) { 3522 return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 3523 N0.getOperand(1), 3524 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 3525 } 3526 3527 // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) 3528 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 3529 SDValue NegOne = 3530 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 3531 SDValue SCC = 3532 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 3533 NegOne, DAG.getConstant(0, VT), 3534 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 3535 if (SCC.getNode()) return SCC; 3536 if (!LegalOperations || 3537 TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) 3538 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 3539 DAG.getSetCC(N->getDebugLoc(), 3540 TLI.getSetCCResultType(VT), 3541 N0.getOperand(0), N0.getOperand(1), 3542 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 3543 NegOne, DAG.getConstant(0, VT)); 3544 } 3545 3546 3547 3548 // fold (sext x) -> (zext x) if the sign bit is known zero. 3549 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 3550 DAG.SignBitIsZero(N0)) 3551 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); 3552 3553 return PromoteExtend(SDValue(N, 0)); 3554} 3555 3556SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 3557 SDValue N0 = N->getOperand(0); 3558 EVT VT = N->getValueType(0); 3559 3560 // fold (zext c1) -> c1 3561 if (isa<ConstantSDNode>(N0)) 3562 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); 3563 // fold (zext (zext x)) -> (zext x) 3564 // fold (zext (aext x)) -> (zext x) 3565 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 3566 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, 3567 N0.getOperand(0)); 3568 3569 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 3570 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 3571 if (N0.getOpcode() == ISD::TRUNCATE) { 3572 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 3573 if (NarrowLoad.getNode()) { 3574 if (NarrowLoad.getNode() != N0.getNode()) 3575 CombineTo(N0.getNode(), NarrowLoad); 3576 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad); 3577 } 3578 } 3579 3580 // fold (zext (truncate x)) -> (and x, mask) 3581 if (N0.getOpcode() == ISD::TRUNCATE && 3582 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && 3583 (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), 3584 N0.getValueType()) || 3585 !TLI.isZExtFree(N0.getValueType(), VT))) { 3586 SDValue Op = N0.getOperand(0); 3587 if (Op.getValueType().bitsLT(VT)) { 3588 Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); 3589 } else if (Op.getValueType().bitsGT(VT)) { 3590 Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 3591 } 3592 return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), 3593 N0.getValueType().getScalarType()); 3594 } 3595 3596 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 3597 // if either of the casts is not free. 3598 if (N0.getOpcode() == ISD::AND && 3599 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 3600 N0.getOperand(1).getOpcode() == ISD::Constant && 3601 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 3602 N0.getValueType()) || 3603 !TLI.isZExtFree(N0.getValueType(), VT))) { 3604 SDValue X = N0.getOperand(0).getOperand(0); 3605 if (X.getValueType().bitsLT(VT)) { 3606 X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); 3607 } else if (X.getValueType().bitsGT(VT)) { 3608 X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); 3609 } 3610 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3611 Mask.zext(VT.getSizeInBits()); 3612 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 3613 X, DAG.getConstant(Mask, VT)); 3614 } 3615 3616 // fold (zext (load x)) -> (zext (truncate (zextload x))) 3617 if (ISD::isNON_EXTLoad(N0.getNode()) && 3618 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 3619 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { 3620 bool DoXform = true; 3621 SmallVector<SDNode*, 4> SetCCs; 3622 if (!N0.hasOneUse()) 3623 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 3624 if (DoXform) { 3625 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3626 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, 3627 LN0->getChain(), 3628 LN0->getBasePtr(), LN0->getSrcValue(), 3629 LN0->getSrcValueOffset(), 3630 N0.getValueType(), 3631 LN0->isVolatile(), LN0->isNonTemporal(), 3632 LN0->getAlignment()); 3633 CombineTo(N, ExtLoad); 3634 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 3635 N0.getValueType(), ExtLoad); 3636 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 3637 3638 // Extend SetCC uses if necessary. 3639 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 3640 SDNode *SetCC = SetCCs[i]; 3641 SmallVector<SDValue, 4> Ops; 3642 3643 for (unsigned j = 0; j != 2; ++j) { 3644 SDValue SOp = SetCC->getOperand(j); 3645 if (SOp == Trunc) 3646 Ops.push_back(ExtLoad); 3647 else 3648 Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND, 3649 N->getDebugLoc(), VT, SOp)); 3650 } 3651 3652 Ops.push_back(SetCC->getOperand(2)); 3653 CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), 3654 SetCC->getValueType(0), 3655 &Ops[0], Ops.size())); 3656 } 3657 3658 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3659 } 3660 } 3661 3662 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 3663 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 3664 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 3665 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 3666 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3667 EVT MemVT = LN0->getMemoryVT(); 3668 if ((!LegalOperations && !LN0->isVolatile()) || 3669 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { 3670 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, 3671 LN0->getChain(), 3672 LN0->getBasePtr(), LN0->getSrcValue(), 3673 LN0->getSrcValueOffset(), MemVT, 3674 LN0->isVolatile(), LN0->isNonTemporal(), 3675 LN0->getAlignment()); 3676 CombineTo(N, ExtLoad); 3677 CombineTo(N0.getNode(), 3678 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), 3679 ExtLoad), 3680 ExtLoad.getValue(1)); 3681 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3682 } 3683 } 3684 3685 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 3686 if (N0.getOpcode() == ISD::SETCC) { 3687 SDValue SCC = 3688 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 3689 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 3690 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 3691 if (SCC.getNode()) return SCC; 3692 } 3693 3694 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 3695 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 3696 isa<ConstantSDNode>(N0.getOperand(1)) && 3697 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 3698 N0.hasOneUse()) { 3699 if (N0.getOpcode() == ISD::SHL) { 3700 // If the original shl may be shifting out bits, do not perform this 3701 // transformation. 3702 unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3703 unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() - 3704 N0.getOperand(0).getOperand(0).getValueType().getSizeInBits(); 3705 if (ShAmt > KnownZeroBits) 3706 return SDValue(); 3707 } 3708 DebugLoc dl = N->getDebugLoc(); 3709 return DAG.getNode(N0.getOpcode(), dl, VT, 3710 DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)), 3711 DAG.getNode(ISD::ZERO_EXTEND, dl, 3712 N0.getOperand(1).getValueType(), 3713 N0.getOperand(1))); 3714 } 3715 3716 return PromoteExtend(SDValue(N, 0)); 3717} 3718 3719SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 3720 SDValue N0 = N->getOperand(0); 3721 EVT VT = N->getValueType(0); 3722 3723 // fold (aext c1) -> c1 3724 if (isa<ConstantSDNode>(N0)) 3725 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); 3726 // fold (aext (aext x)) -> (aext x) 3727 // fold (aext (zext x)) -> (zext x) 3728 // fold (aext (sext x)) -> (sext x) 3729 if (N0.getOpcode() == ISD::ANY_EXTEND || 3730 N0.getOpcode() == ISD::ZERO_EXTEND || 3731 N0.getOpcode() == ISD::SIGN_EXTEND) 3732 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); 3733 3734 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 3735 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 3736 if (N0.getOpcode() == ISD::TRUNCATE) { 3737 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 3738 if (NarrowLoad.getNode()) { 3739 if (NarrowLoad.getNode() != N0.getNode()) 3740 CombineTo(N0.getNode(), NarrowLoad); 3741 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad); 3742 } 3743 } 3744 3745 // fold (aext (truncate x)) 3746 if (N0.getOpcode() == ISD::TRUNCATE) { 3747 SDValue TruncOp = N0.getOperand(0); 3748 if (TruncOp.getValueType() == VT) 3749 return TruncOp; // x iff x size == zext size. 3750 if (TruncOp.getValueType().bitsGT(VT)) 3751 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); 3752 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); 3753 } 3754 3755 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 3756 // if the trunc is not free. 3757 if (N0.getOpcode() == ISD::AND && 3758 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 3759 N0.getOperand(1).getOpcode() == ISD::Constant && 3760 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 3761 N0.getValueType())) { 3762 SDValue X = N0.getOperand(0).getOperand(0); 3763 if (X.getValueType().bitsLT(VT)) { 3764 X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); 3765 } else if (X.getValueType().bitsGT(VT)) { 3766 X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); 3767 } 3768 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3769 Mask.zext(VT.getSizeInBits()); 3770 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 3771 X, DAG.getConstant(Mask, VT)); 3772 } 3773 3774 // fold (aext (load x)) -> (aext (truncate (extload x))) 3775 if (ISD::isNON_EXTLoad(N0.getNode()) && 3776 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 3777 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 3778 bool DoXform = true; 3779 SmallVector<SDNode*, 4> SetCCs; 3780 if (!N0.hasOneUse()) 3781 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 3782 if (DoXform) { 3783 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3784 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, 3785 LN0->getChain(), 3786 LN0->getBasePtr(), LN0->getSrcValue(), 3787 LN0->getSrcValueOffset(), 3788 N0.getValueType(), 3789 LN0->isVolatile(), LN0->isNonTemporal(), 3790 LN0->getAlignment()); 3791 CombineTo(N, ExtLoad); 3792 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 3793 N0.getValueType(), ExtLoad); 3794 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 3795 3796 // Extend SetCC uses if necessary. 3797 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 3798 SDNode *SetCC = SetCCs[i]; 3799 SmallVector<SDValue, 4> Ops; 3800 3801 for (unsigned j = 0; j != 2; ++j) { 3802 SDValue SOp = SetCC->getOperand(j); 3803 if (SOp == Trunc) 3804 Ops.push_back(ExtLoad); 3805 else 3806 Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, 3807 N->getDebugLoc(), VT, SOp)); 3808 } 3809 3810 Ops.push_back(SetCC->getOperand(2)); 3811 CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), 3812 SetCC->getValueType(0), 3813 &Ops[0], Ops.size())); 3814 } 3815 3816 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3817 } 3818 } 3819 3820 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 3821 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 3822 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 3823 if (N0.getOpcode() == ISD::LOAD && 3824 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 3825 N0.hasOneUse()) { 3826 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3827 EVT MemVT = LN0->getMemoryVT(); 3828 SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), 3829 VT, LN0->getChain(), LN0->getBasePtr(), 3830 LN0->getSrcValue(), 3831 LN0->getSrcValueOffset(), MemVT, 3832 LN0->isVolatile(), LN0->isNonTemporal(), 3833 LN0->getAlignment()); 3834 CombineTo(N, ExtLoad); 3835 CombineTo(N0.getNode(), 3836 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 3837 N0.getValueType(), ExtLoad), 3838 ExtLoad.getValue(1)); 3839 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3840 } 3841 3842 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 3843 if (N0.getOpcode() == ISD::SETCC) { 3844 SDValue SCC = 3845 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 3846 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 3847 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 3848 if (SCC.getNode()) 3849 return SCC; 3850 } 3851 3852 return PromoteExtend(SDValue(N, 0)); 3853} 3854 3855/// GetDemandedBits - See if the specified operand can be simplified with the 3856/// knowledge that only the bits specified by Mask are used. If so, return the 3857/// simpler operand, otherwise return a null SDValue. 3858SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 3859 switch (V.getOpcode()) { 3860 default: break; 3861 case ISD::OR: 3862 case ISD::XOR: 3863 // If the LHS or RHS don't contribute bits to the or, drop them. 3864 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 3865 return V.getOperand(1); 3866 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 3867 return V.getOperand(0); 3868 break; 3869 case ISD::SRL: 3870 // Only look at single-use SRLs. 3871 if (!V.getNode()->hasOneUse()) 3872 break; 3873 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 3874 // See if we can recursively simplify the LHS. 3875 unsigned Amt = RHSC->getZExtValue(); 3876 3877 // Watch out for shift count overflow though. 3878 if (Amt >= Mask.getBitWidth()) break; 3879 APInt NewMask = Mask << Amt; 3880 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 3881 if (SimplifyLHS.getNode()) 3882 return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), 3883 SimplifyLHS, V.getOperand(1)); 3884 } 3885 } 3886 return SDValue(); 3887} 3888 3889/// ReduceLoadWidth - If the result of a wider load is shifted to right of N 3890/// bits and then truncated to a narrower type and where N is a multiple 3891/// of number of bits of the narrower type, transform it to a narrower load 3892/// from address + N / num of bits of new type. If the result is to be 3893/// extended, also fold the extension to form a extending load. 3894SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 3895 unsigned Opc = N->getOpcode(); 3896 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 3897 SDValue N0 = N->getOperand(0); 3898 EVT VT = N->getValueType(0); 3899 EVT ExtVT = VT; 3900 3901 // This transformation isn't valid for vector loads. 3902 if (VT.isVector()) 3903 return SDValue(); 3904 3905 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 3906 // extended to VT. 3907 if (Opc == ISD::SIGN_EXTEND_INREG) { 3908 ExtType = ISD::SEXTLOAD; 3909 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 3910 if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) 3911 return SDValue(); 3912 } 3913 3914 unsigned EVTBits = ExtVT.getSizeInBits(); 3915 unsigned ShAmt = 0; 3916 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) { 3917 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3918 ShAmt = N01->getZExtValue(); 3919 // Is the shift amount a multiple of size of VT? 3920 if ((ShAmt & (EVTBits-1)) == 0) { 3921 N0 = N0.getOperand(0); 3922 // Is the load width a multiple of size of VT? 3923 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 3924 return SDValue(); 3925 } 3926 } 3927 } 3928 3929 // Do not generate loads of non-round integer types since these can 3930 // be expensive (and would be wrong if the type is not byte sized). 3931 if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() && 3932 cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits && 3933 // Do not change the width of a volatile load. 3934 !cast<LoadSDNode>(N0)->isVolatile()) { 3935 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3936 EVT PtrType = N0.getOperand(1).getValueType(); 3937 3938 // For big endian targets, we need to adjust the offset to the pointer to 3939 // load the correct bytes. 3940 if (TLI.isBigEndian()) { 3941 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 3942 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 3943 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 3944 } 3945 3946 uint64_t PtrOff = ShAmt / 8; 3947 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 3948 SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), 3949 PtrType, LN0->getBasePtr(), 3950 DAG.getConstant(PtrOff, PtrType)); 3951 AddToWorkList(NewPtr.getNode()); 3952 3953 SDValue Load = (ExtType == ISD::NON_EXTLOAD) 3954 ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, 3955 LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, 3956 LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) 3957 : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, 3958 LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, 3959 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 3960 NewAlign); 3961 3962 // Replace the old load's chain with the new load's chain. 3963 WorkListRemover DeadNodes(*this); 3964 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1), 3965 &DeadNodes); 3966 3967 // Return the new loaded value. 3968 return Load; 3969 } 3970 3971 return SDValue(); 3972} 3973 3974SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 3975 SDValue N0 = N->getOperand(0); 3976 SDValue N1 = N->getOperand(1); 3977 EVT VT = N->getValueType(0); 3978 EVT EVT = cast<VTSDNode>(N1)->getVT(); 3979 unsigned VTBits = VT.getScalarType().getSizeInBits(); 3980 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 3981 3982 // fold (sext_in_reg c1) -> c1 3983 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) 3984 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); 3985 3986 // If the input is already sign extended, just drop the extension. 3987 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 3988 return N0; 3989 3990 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 3991 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 3992 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { 3993 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 3994 N0.getOperand(0), N1); 3995 } 3996 3997 // fold (sext_in_reg (sext x)) -> (sext x) 3998 // fold (sext_in_reg (aext x)) -> (sext x) 3999 // if x is small enough. 4000 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 4001 SDValue N00 = N0.getOperand(0); 4002 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 4003 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 4004 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); 4005 } 4006 4007 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 4008 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 4009 return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); 4010 4011 // fold operands of sext_in_reg based on knowledge that the top bits are not 4012 // demanded. 4013 if (SimplifyDemandedBits(SDValue(N, 0))) 4014 return SDValue(N, 0); 4015 4016 // fold (sext_in_reg (load x)) -> (smaller sextload x) 4017 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 4018 SDValue NarrowLoad = ReduceLoadWidth(N); 4019 if (NarrowLoad.getNode()) 4020 return NarrowLoad; 4021 4022 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 4023 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 4024 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 4025 if (N0.getOpcode() == ISD::SRL) { 4026 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 4027 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 4028 // We can turn this into an SRA iff the input to the SRL is already sign 4029 // extended enough. 4030 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 4031 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 4032 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, 4033 N0.getOperand(0), N0.getOperand(1)); 4034 } 4035 } 4036 4037 // fold (sext_inreg (extload x)) -> (sextload x) 4038 if (ISD::isEXTLoad(N0.getNode()) && 4039 ISD::isUNINDEXEDLoad(N0.getNode()) && 4040 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 4041 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4042 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 4043 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4044 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4045 LN0->getChain(), 4046 LN0->getBasePtr(), LN0->getSrcValue(), 4047 LN0->getSrcValueOffset(), EVT, 4048 LN0->isVolatile(), LN0->isNonTemporal(), 4049 LN0->getAlignment()); 4050 CombineTo(N, ExtLoad); 4051 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 4052 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4053 } 4054 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 4055 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 4056 N0.hasOneUse() && 4057 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 4058 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4059 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 4060 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4061 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4062 LN0->getChain(), 4063 LN0->getBasePtr(), LN0->getSrcValue(), 4064 LN0->getSrcValueOffset(), EVT, 4065 LN0->isVolatile(), LN0->isNonTemporal(), 4066 LN0->getAlignment()); 4067 CombineTo(N, ExtLoad); 4068 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 4069 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4070 } 4071 return SDValue(); 4072} 4073 4074SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 4075 SDValue N0 = N->getOperand(0); 4076 EVT VT = N->getValueType(0); 4077 4078 // noop truncate 4079 if (N0.getValueType() == N->getValueType(0)) 4080 return N0; 4081 // fold (truncate c1) -> c1 4082 if (isa<ConstantSDNode>(N0)) 4083 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); 4084 // fold (truncate (truncate x)) -> (truncate x) 4085 if (N0.getOpcode() == ISD::TRUNCATE) 4086 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); 4087 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 4088 if (N0.getOpcode() == ISD::ZERO_EXTEND || 4089 N0.getOpcode() == ISD::SIGN_EXTEND || 4090 N0.getOpcode() == ISD::ANY_EXTEND) { 4091 if (N0.getOperand(0).getValueType().bitsLT(VT)) 4092 // if the source is smaller than the dest, we still need an extend 4093 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 4094 N0.getOperand(0)); 4095 else if (N0.getOperand(0).getValueType().bitsGT(VT)) 4096 // if the source is larger than the dest, than we just need the truncate 4097 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); 4098 else 4099 // if the source and dest are the same type, we can drop both the extend 4100 // and the truncate. 4101 return N0.getOperand(0); 4102 } 4103 4104 // See if we can simplify the input to this truncate through knowledge that 4105 // only the low bits are being used. For example "trunc (or (shl x, 8), y)" 4106 // -> trunc y 4107 SDValue Shorter = 4108 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 4109 VT.getSizeInBits())); 4110 if (Shorter.getNode()) 4111 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); 4112 4113 // fold (truncate (load x)) -> (smaller load x) 4114 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 4115 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) 4116 return ReduceLoadWidth(N); 4117 return SDValue(); 4118} 4119 4120static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 4121 SDValue Elt = N->getOperand(i); 4122 if (Elt.getOpcode() != ISD::MERGE_VALUES) 4123 return Elt.getNode(); 4124 return Elt.getOperand(Elt.getResNo()).getNode(); 4125} 4126 4127/// CombineConsecutiveLoads - build_pair (load, load) -> load 4128/// if load locations are consecutive. 4129SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 4130 assert(N->getOpcode() == ISD::BUILD_PAIR); 4131 4132 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 4133 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 4134 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse()) 4135 return SDValue(); 4136 EVT LD1VT = LD1->getValueType(0); 4137 4138 if (ISD::isNON_EXTLoad(LD2) && 4139 LD2->hasOneUse() && 4140 // If both are volatile this would reduce the number of volatile loads. 4141 // If one is volatile it might be ok, but play conservative and bail out. 4142 !LD1->isVolatile() && 4143 !LD2->isVolatile() && 4144 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 4145 unsigned Align = LD1->getAlignment(); 4146 unsigned NewAlign = TLI.getTargetData()-> 4147 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 4148 4149 if (NewAlign <= Align && 4150 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 4151 return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), 4152 LD1->getBasePtr(), LD1->getSrcValue(), 4153 LD1->getSrcValueOffset(), false, false, Align); 4154 } 4155 4156 return SDValue(); 4157} 4158 4159SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) { 4160 SDValue N0 = N->getOperand(0); 4161 EVT VT = N->getValueType(0); 4162 4163 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 4164 // Only do this before legalize, since afterward the target may be depending 4165 // on the bitconvert. 4166 // First check to see if this is all constant. 4167 if (!LegalTypes && 4168 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 4169 VT.isVector()) { 4170 bool isSimple = true; 4171 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) 4172 if (N0.getOperand(i).getOpcode() != ISD::UNDEF && 4173 N0.getOperand(i).getOpcode() != ISD::Constant && 4174 N0.getOperand(i).getOpcode() != ISD::ConstantFP) { 4175 isSimple = false; 4176 break; 4177 } 4178 4179 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 4180 assert(!DestEltVT.isVector() && 4181 "Element type of vector ValueType must not be vector!"); 4182 if (isSimple) 4183 return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT); 4184 } 4185 4186 // If the input is a constant, let getNode fold it. 4187 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 4188 SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0); 4189 if (Res.getNode() != N) { 4190 if (!LegalOperations || 4191 TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) 4192 return Res; 4193 4194 // Folding it resulted in an illegal node, and it's too late to 4195 // do that. Clean up the old node and forego the transformation. 4196 // Ideally this won't happen very often, because instcombine 4197 // and the earlier dagcombine runs (where illegal nodes are 4198 // permitted) should have folded most of them already. 4199 DAG.DeleteNode(Res.getNode()); 4200 } 4201 } 4202 4203 // (conv (conv x, t1), t2) -> (conv x, t2) 4204 if (N0.getOpcode() == ISD::BIT_CONVERT) 4205 return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, 4206 N0.getOperand(0)); 4207 4208 // fold (conv (load x)) -> (load (conv*)x) 4209 // If the resultant load doesn't need a higher alignment than the original! 4210 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 4211 // Do not change the width of a volatile load. 4212 !cast<LoadSDNode>(N0)->isVolatile() && 4213 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { 4214 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4215 unsigned Align = TLI.getTargetData()-> 4216 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 4217 unsigned OrigAlign = LN0->getAlignment(); 4218 4219 if (Align <= OrigAlign) { 4220 SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), 4221 LN0->getBasePtr(), 4222 LN0->getSrcValue(), LN0->getSrcValueOffset(), 4223 LN0->isVolatile(), LN0->isNonTemporal(), 4224 OrigAlign); 4225 AddToWorkList(N); 4226 CombineTo(N0.getNode(), 4227 DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), 4228 N0.getValueType(), Load), 4229 Load.getValue(1)); 4230 return Load; 4231 } 4232 } 4233 4234 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 4235 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 4236 // This often reduces constant pool loads. 4237 if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) && 4238 N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { 4239 SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT, 4240 N0.getOperand(0)); 4241 AddToWorkList(NewConv.getNode()); 4242 4243 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 4244 if (N0.getOpcode() == ISD::FNEG) 4245 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, 4246 NewConv, DAG.getConstant(SignBit, VT)); 4247 assert(N0.getOpcode() == ISD::FABS); 4248 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4249 NewConv, DAG.getConstant(~SignBit, VT)); 4250 } 4251 4252 // fold (bitconvert (fcopysign cst, x)) -> 4253 // (or (and (bitconvert x), sign), (and cst, (not sign))) 4254 // Note that we don't handle (copysign x, cst) because this can always be 4255 // folded to an fneg or fabs. 4256 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 4257 isa<ConstantFPSDNode>(N0.getOperand(0)) && 4258 VT.isInteger() && !VT.isVector()) { 4259 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 4260 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 4261 if (isTypeLegal(IntXVT)) { 4262 SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), 4263 IntXVT, N0.getOperand(1)); 4264 AddToWorkList(X.getNode()); 4265 4266 // If X has a different width than the result/lhs, sext it or truncate it. 4267 unsigned VTWidth = VT.getSizeInBits(); 4268 if (OrigXWidth < VTWidth) { 4269 X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); 4270 AddToWorkList(X.getNode()); 4271 } else if (OrigXWidth > VTWidth) { 4272 // To get the sign bit in the right place, we have to shift it right 4273 // before truncating. 4274 X = DAG.getNode(ISD::SRL, X.getDebugLoc(), 4275 X.getValueType(), X, 4276 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 4277 AddToWorkList(X.getNode()); 4278 X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); 4279 AddToWorkList(X.getNode()); 4280 } 4281 4282 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 4283 X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, 4284 X, DAG.getConstant(SignBit, VT)); 4285 AddToWorkList(X.getNode()); 4286 4287 SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), 4288 VT, N0.getOperand(0)); 4289 Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, 4290 Cst, DAG.getConstant(~SignBit, VT)); 4291 AddToWorkList(Cst.getNode()); 4292 4293 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); 4294 } 4295 } 4296 4297 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 4298 if (N0.getOpcode() == ISD::BUILD_PAIR) { 4299 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 4300 if (CombineLD.getNode()) 4301 return CombineLD; 4302 } 4303 4304 return SDValue(); 4305} 4306 4307SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 4308 EVT VT = N->getValueType(0); 4309 return CombineConsecutiveLoads(N, VT); 4310} 4311 4312/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector 4313/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the 4314/// destination element value type. 4315SDValue DAGCombiner:: 4316ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 4317 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 4318 4319 // If this is already the right type, we're done. 4320 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 4321 4322 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 4323 unsigned DstBitSize = DstEltVT.getSizeInBits(); 4324 4325 // If this is a conversion of N elements of one type to N elements of another 4326 // type, convert each element. This handles FP<->INT cases. 4327 if (SrcBitSize == DstBitSize) { 4328 SmallVector<SDValue, 8> Ops; 4329 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 4330 SDValue Op = BV->getOperand(i); 4331 // If the vector element type is not legal, the BUILD_VECTOR operands 4332 // are promoted and implicitly truncated. Make that explicit here. 4333 if (Op.getValueType() != SrcEltVT) 4334 Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); 4335 Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(), 4336 DstEltVT, Op)); 4337 AddToWorkList(Ops.back().getNode()); 4338 } 4339 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 4340 BV->getValueType(0).getVectorNumElements()); 4341 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 4342 &Ops[0], Ops.size()); 4343 } 4344 4345 // Otherwise, we're growing or shrinking the elements. To avoid having to 4346 // handle annoying details of growing/shrinking FP values, we convert them to 4347 // int first. 4348 if (SrcEltVT.isFloatingPoint()) { 4349 // Convert the input float vector to a int vector where the elements are the 4350 // same sizes. 4351 assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); 4352 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 4353 BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode(); 4354 SrcEltVT = IntVT; 4355 } 4356 4357 // Now we know the input is an integer vector. If the output is a FP type, 4358 // convert to integer first, then to FP of the right size. 4359 if (DstEltVT.isFloatingPoint()) { 4360 assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); 4361 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 4362 SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode(); 4363 4364 // Next, convert to FP elements of the same size. 4365 return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT); 4366 } 4367 4368 // Okay, we know the src/dst types are both integers of differing types. 4369 // Handling growing first. 4370 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 4371 if (SrcBitSize < DstBitSize) { 4372 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 4373 4374 SmallVector<SDValue, 8> Ops; 4375 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 4376 i += NumInputsPerOutput) { 4377 bool isLE = TLI.isLittleEndian(); 4378 APInt NewBits = APInt(DstBitSize, 0); 4379 bool EltIsUndef = true; 4380 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 4381 // Shift the previously computed bits over. 4382 NewBits <<= SrcBitSize; 4383 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 4384 if (Op.getOpcode() == ISD::UNDEF) continue; 4385 EltIsUndef = false; 4386 4387 NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()). 4388 zextOrTrunc(SrcBitSize).zext(DstBitSize); 4389 } 4390 4391 if (EltIsUndef) 4392 Ops.push_back(DAG.getUNDEF(DstEltVT)); 4393 else 4394 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 4395 } 4396 4397 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 4398 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 4399 &Ops[0], Ops.size()); 4400 } 4401 4402 // Finally, this must be the case where we are shrinking elements: each input 4403 // turns into multiple outputs. 4404 bool isS2V = ISD::isScalarToVector(BV); 4405 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 4406 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 4407 NumOutputsPerInput*BV->getNumOperands()); 4408 SmallVector<SDValue, 8> Ops; 4409 4410 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 4411 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 4412 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 4413 Ops.push_back(DAG.getUNDEF(DstEltVT)); 4414 continue; 4415 } 4416 4417 APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))-> 4418 getAPIntValue()).zextOrTrunc(SrcBitSize); 4419 4420 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 4421 APInt ThisVal = APInt(OpVal).trunc(DstBitSize); 4422 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 4423 if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal) 4424 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 4425 return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 4426 Ops[0]); 4427 OpVal = OpVal.lshr(DstBitSize); 4428 } 4429 4430 // For big endian targets, swap the order of the pieces of each element. 4431 if (TLI.isBigEndian()) 4432 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 4433 } 4434 4435 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 4436 &Ops[0], Ops.size()); 4437} 4438 4439SDValue DAGCombiner::visitFADD(SDNode *N) { 4440 SDValue N0 = N->getOperand(0); 4441 SDValue N1 = N->getOperand(1); 4442 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4443 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 4444 EVT VT = N->getValueType(0); 4445 4446 // fold vector ops 4447 if (VT.isVector()) { 4448 SDValue FoldedVOp = SimplifyVBinOp(N); 4449 if (FoldedVOp.getNode()) return FoldedVOp; 4450 } 4451 4452 // fold (fadd c1, c2) -> (fadd c1, c2) 4453 if (N0CFP && N1CFP && VT != MVT::ppcf128) 4454 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); 4455 // canonicalize constant to RHS 4456 if (N0CFP && !N1CFP) 4457 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); 4458 // fold (fadd A, 0) -> A 4459 if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) 4460 return N0; 4461 // fold (fadd A, (fneg B)) -> (fsub A, B) 4462 if (isNegatibleForFree(N1, LegalOperations) == 2) 4463 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, 4464 GetNegatedExpression(N1, DAG, LegalOperations)); 4465 // fold (fadd (fneg A), B) -> (fsub B, A) 4466 if (isNegatibleForFree(N0, LegalOperations) == 2) 4467 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, 4468 GetNegatedExpression(N0, DAG, LegalOperations)); 4469 4470 // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 4471 if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && 4472 N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) 4473 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), 4474 DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 4475 N0.getOperand(1), N1)); 4476 4477 return SDValue(); 4478} 4479 4480SDValue DAGCombiner::visitFSUB(SDNode *N) { 4481 SDValue N0 = N->getOperand(0); 4482 SDValue N1 = N->getOperand(1); 4483 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4484 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 4485 EVT VT = N->getValueType(0); 4486 4487 // fold vector ops 4488 if (VT.isVector()) { 4489 SDValue FoldedVOp = SimplifyVBinOp(N); 4490 if (FoldedVOp.getNode()) return FoldedVOp; 4491 } 4492 4493 // fold (fsub c1, c2) -> c1-c2 4494 if (N0CFP && N1CFP && VT != MVT::ppcf128) 4495 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); 4496 // fold (fsub A, 0) -> A 4497 if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) 4498 return N0; 4499 // fold (fsub 0, B) -> -B 4500 if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) { 4501 if (isNegatibleForFree(N1, LegalOperations)) 4502 return GetNegatedExpression(N1, DAG, LegalOperations); 4503 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 4504 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); 4505 } 4506 // fold (fsub A, (fneg B)) -> (fadd A, B) 4507 if (isNegatibleForFree(N1, LegalOperations)) 4508 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, 4509 GetNegatedExpression(N1, DAG, LegalOperations)); 4510 4511 return SDValue(); 4512} 4513 4514SDValue DAGCombiner::visitFMUL(SDNode *N) { 4515 SDValue N0 = N->getOperand(0); 4516 SDValue N1 = N->getOperand(1); 4517 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4518 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 4519 EVT VT = N->getValueType(0); 4520 4521 // fold vector ops 4522 if (VT.isVector()) { 4523 SDValue FoldedVOp = SimplifyVBinOp(N); 4524 if (FoldedVOp.getNode()) return FoldedVOp; 4525 } 4526 4527 // fold (fmul c1, c2) -> c1*c2 4528 if (N0CFP && N1CFP && VT != MVT::ppcf128) 4529 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); 4530 // canonicalize constant to RHS 4531 if (N0CFP && !N1CFP) 4532 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); 4533 // fold (fmul A, 0) -> 0 4534 if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) 4535 return N1; 4536 // fold (fmul A, 0) -> 0, vector edition. 4537 if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode())) 4538 return N1; 4539 // fold (fmul X, 2.0) -> (fadd X, X) 4540 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 4541 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); 4542 // fold (fmul X, -1.0) -> (fneg X) 4543 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 4544 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 4545 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); 4546 4547 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 4548 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { 4549 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { 4550 // Both can be negated for free, check to see if at least one is cheaper 4551 // negated. 4552 if (LHSNeg == 2 || RHSNeg == 2) 4553 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 4554 GetNegatedExpression(N0, DAG, LegalOperations), 4555 GetNegatedExpression(N1, DAG, LegalOperations)); 4556 } 4557 } 4558 4559 // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 4560 if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && 4561 N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) 4562 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), 4563 DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 4564 N0.getOperand(1), N1)); 4565 4566 return SDValue(); 4567} 4568 4569SDValue DAGCombiner::visitFDIV(SDNode *N) { 4570 SDValue N0 = N->getOperand(0); 4571 SDValue N1 = N->getOperand(1); 4572 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4573 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 4574 EVT VT = N->getValueType(0); 4575 4576 // fold vector ops 4577 if (VT.isVector()) { 4578 SDValue FoldedVOp = SimplifyVBinOp(N); 4579 if (FoldedVOp.getNode()) return FoldedVOp; 4580 } 4581 4582 // fold (fdiv c1, c2) -> c1/c2 4583 if (N0CFP && N1CFP && VT != MVT::ppcf128) 4584 return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); 4585 4586 4587 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 4588 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) { 4589 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) { 4590 // Both can be negated for free, check to see if at least one is cheaper 4591 // negated. 4592 if (LHSNeg == 2 || RHSNeg == 2) 4593 return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, 4594 GetNegatedExpression(N0, DAG, LegalOperations), 4595 GetNegatedExpression(N1, DAG, LegalOperations)); 4596 } 4597 } 4598 4599 return SDValue(); 4600} 4601 4602SDValue DAGCombiner::visitFREM(SDNode *N) { 4603 SDValue N0 = N->getOperand(0); 4604 SDValue N1 = N->getOperand(1); 4605 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4606 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 4607 EVT VT = N->getValueType(0); 4608 4609 // fold (frem c1, c2) -> fmod(c1,c2) 4610 if (N0CFP && N1CFP && VT != MVT::ppcf128) 4611 return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); 4612 4613 return SDValue(); 4614} 4615 4616SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 4617 SDValue N0 = N->getOperand(0); 4618 SDValue N1 = N->getOperand(1); 4619 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4620 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 4621 EVT VT = N->getValueType(0); 4622 4623 if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold 4624 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); 4625 4626 if (N1CFP) { 4627 const APFloat& V = N1CFP->getValueAPF(); 4628 // copysign(x, c1) -> fabs(x) iff ispos(c1) 4629 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 4630 if (!V.isNegative()) { 4631 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 4632 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 4633 } else { 4634 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 4635 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, 4636 DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); 4637 } 4638 } 4639 4640 // copysign(fabs(x), y) -> copysign(x, y) 4641 // copysign(fneg(x), y) -> copysign(x, y) 4642 // copysign(copysign(x,z), y) -> copysign(x, y) 4643 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 4644 N0.getOpcode() == ISD::FCOPYSIGN) 4645 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 4646 N0.getOperand(0), N1); 4647 4648 // copysign(x, abs(y)) -> abs(x) 4649 if (N1.getOpcode() == ISD::FABS) 4650 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 4651 4652 // copysign(x, copysign(y,z)) -> copysign(x, z) 4653 if (N1.getOpcode() == ISD::FCOPYSIGN) 4654 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 4655 N0, N1.getOperand(1)); 4656 4657 // copysign(x, fp_extend(y)) -> copysign(x, y) 4658 // copysign(x, fp_round(y)) -> copysign(x, y) 4659 if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) 4660 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 4661 N0, N1.getOperand(0)); 4662 4663 return SDValue(); 4664} 4665 4666SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 4667 SDValue N0 = N->getOperand(0); 4668 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4669 EVT VT = N->getValueType(0); 4670 EVT OpVT = N0.getValueType(); 4671 4672 // fold (sint_to_fp c1) -> c1fp 4673 if (N0C && OpVT != MVT::ppcf128) 4674 return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); 4675 4676 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 4677 // but UINT_TO_FP is legal on this target, try to convert. 4678 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 4679 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 4680 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 4681 if (DAG.SignBitIsZero(N0)) 4682 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); 4683 } 4684 4685 return SDValue(); 4686} 4687 4688SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 4689 SDValue N0 = N->getOperand(0); 4690 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4691 EVT VT = N->getValueType(0); 4692 EVT OpVT = N0.getValueType(); 4693 4694 // fold (uint_to_fp c1) -> c1fp 4695 if (N0C && OpVT != MVT::ppcf128) 4696 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); 4697 4698 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 4699 // but SINT_TO_FP is legal on this target, try to convert. 4700 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 4701 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 4702 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 4703 if (DAG.SignBitIsZero(N0)) 4704 return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); 4705 } 4706 4707 return SDValue(); 4708} 4709 4710SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 4711 SDValue N0 = N->getOperand(0); 4712 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4713 EVT VT = N->getValueType(0); 4714 4715 // fold (fp_to_sint c1fp) -> c1 4716 if (N0CFP) 4717 return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); 4718 4719 return SDValue(); 4720} 4721 4722SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 4723 SDValue N0 = N->getOperand(0); 4724 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4725 EVT VT = N->getValueType(0); 4726 4727 // fold (fp_to_uint c1fp) -> c1 4728 if (N0CFP && VT != MVT::ppcf128) 4729 return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); 4730 4731 return SDValue(); 4732} 4733 4734SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 4735 SDValue N0 = N->getOperand(0); 4736 SDValue N1 = N->getOperand(1); 4737 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4738 EVT VT = N->getValueType(0); 4739 4740 // fold (fp_round c1fp) -> c1fp 4741 if (N0CFP && N0.getValueType() != MVT::ppcf128) 4742 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); 4743 4744 // fold (fp_round (fp_extend x)) -> x 4745 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 4746 return N0.getOperand(0); 4747 4748 // fold (fp_round (fp_round x)) -> (fp_round x) 4749 if (N0.getOpcode() == ISD::FP_ROUND) { 4750 // This is a value preserving truncation if both round's are. 4751 bool IsTrunc = N->getConstantOperandVal(1) == 1 && 4752 N0.getNode()->getConstantOperandVal(1) == 1; 4753 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), 4754 DAG.getIntPtrConstant(IsTrunc)); 4755 } 4756 4757 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 4758 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 4759 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, 4760 N0.getOperand(0), N1); 4761 AddToWorkList(Tmp.getNode()); 4762 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 4763 Tmp, N0.getOperand(1)); 4764 } 4765 4766 return SDValue(); 4767} 4768 4769SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 4770 SDValue N0 = N->getOperand(0); 4771 EVT VT = N->getValueType(0); 4772 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 4773 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4774 4775 // fold (fp_round_inreg c1fp) -> c1fp 4776 if (N0CFP && isTypeLegal(EVT)) { 4777 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); 4778 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); 4779 } 4780 4781 return SDValue(); 4782} 4783 4784SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 4785 SDValue N0 = N->getOperand(0); 4786 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4787 EVT VT = N->getValueType(0); 4788 4789 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 4790 if (N->hasOneUse() && 4791 N->use_begin()->getOpcode() == ISD::FP_ROUND) 4792 return SDValue(); 4793 4794 // fold (fp_extend c1fp) -> c1fp 4795 if (N0CFP && VT != MVT::ppcf128) 4796 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); 4797 4798 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 4799 // value of X. 4800 if (N0.getOpcode() == ISD::FP_ROUND 4801 && N0.getNode()->getConstantOperandVal(1) == 1) { 4802 SDValue In = N0.getOperand(0); 4803 if (In.getValueType() == VT) return In; 4804 if (VT.bitsLT(In.getValueType())) 4805 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, 4806 In, N0.getOperand(1)); 4807 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); 4808 } 4809 4810 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 4811 if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && 4812 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4813 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 4814 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4815 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, 4816 LN0->getChain(), 4817 LN0->getBasePtr(), LN0->getSrcValue(), 4818 LN0->getSrcValueOffset(), 4819 N0.getValueType(), 4820 LN0->isVolatile(), LN0->isNonTemporal(), 4821 LN0->getAlignment()); 4822 CombineTo(N, ExtLoad); 4823 CombineTo(N0.getNode(), 4824 DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), 4825 N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), 4826 ExtLoad.getValue(1)); 4827 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4828 } 4829 4830 return SDValue(); 4831} 4832 4833SDValue DAGCombiner::visitFNEG(SDNode *N) { 4834 SDValue N0 = N->getOperand(0); 4835 EVT VT = N->getValueType(0); 4836 4837 if (isNegatibleForFree(N0, LegalOperations)) 4838 return GetNegatedExpression(N0, DAG, LegalOperations); 4839 4840 // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading 4841 // constant pool values. 4842 if (N0.getOpcode() == ISD::BIT_CONVERT && 4843 !VT.isVector() && 4844 N0.getNode()->hasOneUse() && 4845 N0.getOperand(0).getValueType().isInteger()) { 4846 SDValue Int = N0.getOperand(0); 4847 EVT IntVT = Int.getValueType(); 4848 if (IntVT.isInteger() && !IntVT.isVector()) { 4849 Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, 4850 DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 4851 AddToWorkList(Int.getNode()); 4852 return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), 4853 VT, Int); 4854 } 4855 } 4856 4857 return SDValue(); 4858} 4859 4860SDValue DAGCombiner::visitFABS(SDNode *N) { 4861 SDValue N0 = N->getOperand(0); 4862 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 4863 EVT VT = N->getValueType(0); 4864 4865 // fold (fabs c1) -> fabs(c1) 4866 if (N0CFP && VT != MVT::ppcf128) 4867 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 4868 // fold (fabs (fabs x)) -> (fabs x) 4869 if (N0.getOpcode() == ISD::FABS) 4870 return N->getOperand(0); 4871 // fold (fabs (fneg x)) -> (fabs x) 4872 // fold (fabs (fcopysign x, y)) -> (fabs x) 4873 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 4874 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); 4875 4876 // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading 4877 // constant pool values. 4878 if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() && 4879 N0.getOperand(0).getValueType().isInteger() && 4880 !N0.getOperand(0).getValueType().isVector()) { 4881 SDValue Int = N0.getOperand(0); 4882 EVT IntVT = Int.getValueType(); 4883 if (IntVT.isInteger() && !IntVT.isVector()) { 4884 Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, 4885 DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 4886 AddToWorkList(Int.getNode()); 4887 return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), 4888 N->getValueType(0), Int); 4889 } 4890 } 4891 4892 return SDValue(); 4893} 4894 4895SDValue DAGCombiner::visitBRCOND(SDNode *N) { 4896 SDValue Chain = N->getOperand(0); 4897 SDValue N1 = N->getOperand(1); 4898 SDValue N2 = N->getOperand(2); 4899 4900 // If N is a constant we could fold this into a fallthrough or unconditional 4901 // branch. However that doesn't happen very often in normal code, because 4902 // Instcombine/SimplifyCFG should have handled the available opportunities. 4903 // If we did this folding here, it would be necessary to update the 4904 // MachineBasicBlock CFG, which is awkward. 4905 4906 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 4907 // on the target. 4908 if (N1.getOpcode() == ISD::SETCC && 4909 TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { 4910 return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, 4911 Chain, N1.getOperand(2), 4912 N1.getOperand(0), N1.getOperand(1), N2); 4913 } 4914 4915 SDNode *Trunc = 0; 4916 if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) { 4917 // Look past truncate. 4918 Trunc = N1.getNode(); 4919 N1 = N1.getOperand(0); 4920 } 4921 4922 if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { 4923 // Match this pattern so that we can generate simpler code: 4924 // 4925 // %a = ... 4926 // %b = and i32 %a, 2 4927 // %c = srl i32 %b, 1 4928 // brcond i32 %c ... 4929 // 4930 // into 4931 // 4932 // %a = ... 4933 // %b = and i32 %a, 2 4934 // %c = setcc eq %b, 0 4935 // brcond %c ... 4936 // 4937 // This applies only when the AND constant value has one bit set and the 4938 // SRL constant is equal to the log2 of the AND constant. The back-end is 4939 // smart enough to convert the result into a TEST/JMP sequence. 4940 SDValue Op0 = N1.getOperand(0); 4941 SDValue Op1 = N1.getOperand(1); 4942 4943 if (Op0.getOpcode() == ISD::AND && 4944 Op1.getOpcode() == ISD::Constant) { 4945 SDValue AndOp1 = Op0.getOperand(1); 4946 4947 if (AndOp1.getOpcode() == ISD::Constant) { 4948 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 4949 4950 if (AndConst.isPowerOf2() && 4951 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 4952 SDValue SetCC = 4953 DAG.getSetCC(N->getDebugLoc(), 4954 TLI.getSetCCResultType(Op0.getValueType()), 4955 Op0, DAG.getConstant(0, Op0.getValueType()), 4956 ISD::SETNE); 4957 4958 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 4959 MVT::Other, Chain, SetCC, N2); 4960 // Don't add the new BRCond into the worklist or else SimplifySelectCC 4961 // will convert it back to (X & C1) >> C2. 4962 CombineTo(N, NewBRCond, false); 4963 // Truncate is dead. 4964 if (Trunc) { 4965 removeFromWorkList(Trunc); 4966 DAG.DeleteNode(Trunc); 4967 } 4968 // Replace the uses of SRL with SETCC 4969 WorkListRemover DeadNodes(*this); 4970 DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes); 4971 removeFromWorkList(N1.getNode()); 4972 DAG.DeleteNode(N1.getNode()); 4973 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4974 } 4975 } 4976 } 4977 } 4978 4979 // Transform br(xor(x, y)) -> br(x != y) 4980 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 4981 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 4982 SDNode *TheXor = N1.getNode(); 4983 SDValue Op0 = TheXor->getOperand(0); 4984 SDValue Op1 = TheXor->getOperand(1); 4985 if (Op0.getOpcode() == Op1.getOpcode()) { 4986 // Avoid missing important xor optimizations. 4987 SDValue Tmp = visitXOR(TheXor); 4988 if (Tmp.getNode() && Tmp.getNode() != TheXor) { 4989 DEBUG(dbgs() << "\nReplacing.8 "; 4990 TheXor->dump(&DAG); 4991 dbgs() << "\nWith: "; 4992 Tmp.getNode()->dump(&DAG); 4993 dbgs() << '\n'); 4994 WorkListRemover DeadNodes(*this); 4995 DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes); 4996 removeFromWorkList(TheXor); 4997 DAG.DeleteNode(TheXor); 4998 return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 4999 MVT::Other, Chain, Tmp, N2); 5000 } 5001 } 5002 5003 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 5004 bool Equal = false; 5005 if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) 5006 if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && 5007 Op0.getOpcode() == ISD::XOR) { 5008 TheXor = Op0.getNode(); 5009 Equal = true; 5010 } 5011 5012 SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1; 5013 5014 EVT SetCCVT = NodeToReplace.getValueType(); 5015 if (LegalTypes) 5016 SetCCVT = TLI.getSetCCResultType(SetCCVT); 5017 SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), 5018 SetCCVT, 5019 Op0, Op1, 5020 Equal ? ISD::SETEQ : ISD::SETNE); 5021 // Replace the uses of XOR with SETCC 5022 WorkListRemover DeadNodes(*this); 5023 DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes); 5024 removeFromWorkList(NodeToReplace.getNode()); 5025 DAG.DeleteNode(NodeToReplace.getNode()); 5026 return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 5027 MVT::Other, Chain, SetCC, N2); 5028 } 5029 } 5030 5031 return SDValue(); 5032} 5033 5034// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 5035// 5036SDValue DAGCombiner::visitBR_CC(SDNode *N) { 5037 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 5038 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 5039 5040 // If N is a constant we could fold this into a fallthrough or unconditional 5041 // branch. However that doesn't happen very often in normal code, because 5042 // Instcombine/SimplifyCFG should have handled the available opportunities. 5043 // If we did this folding here, it would be necessary to update the 5044 // MachineBasicBlock CFG, which is awkward. 5045 5046 // Use SimplifySetCC to simplify SETCC's. 5047 SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), 5048 CondLHS, CondRHS, CC->get(), N->getDebugLoc(), 5049 false); 5050 if (Simp.getNode()) AddToWorkList(Simp.getNode()); 5051 5052 // fold to a simpler setcc 5053 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 5054 return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, 5055 N->getOperand(0), Simp.getOperand(2), 5056 Simp.getOperand(0), Simp.getOperand(1), 5057 N->getOperand(4)); 5058 5059 return SDValue(); 5060} 5061 5062/// CombineToPreIndexedLoadStore - Try turning a load / store into a 5063/// pre-indexed load / store when the base pointer is an add or subtract 5064/// and it has other uses besides the load / store. After the 5065/// transformation, the new indexed load / store has effectively folded 5066/// the add / subtract in and all of its other uses are redirected to the 5067/// new load / store. 5068bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 5069 if (!LegalOperations) 5070 return false; 5071 5072 bool isLoad = true; 5073 SDValue Ptr; 5074 EVT VT; 5075 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5076 if (LD->isIndexed()) 5077 return false; 5078 VT = LD->getMemoryVT(); 5079 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 5080 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 5081 return false; 5082 Ptr = LD->getBasePtr(); 5083 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5084 if (ST->isIndexed()) 5085 return false; 5086 VT = ST->getMemoryVT(); 5087 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 5088 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 5089 return false; 5090 Ptr = ST->getBasePtr(); 5091 isLoad = false; 5092 } else { 5093 return false; 5094 } 5095 5096 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 5097 // out. There is no reason to make this a preinc/predec. 5098 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 5099 Ptr.getNode()->hasOneUse()) 5100 return false; 5101 5102 // Ask the target to do addressing mode selection. 5103 SDValue BasePtr; 5104 SDValue Offset; 5105 ISD::MemIndexedMode AM = ISD::UNINDEXED; 5106 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 5107 return false; 5108 // Don't create a indexed load / store with zero offset. 5109 if (isa<ConstantSDNode>(Offset) && 5110 cast<ConstantSDNode>(Offset)->isNullValue()) 5111 return false; 5112 5113 // Try turning it into a pre-indexed load / store except when: 5114 // 1) The new base ptr is a frame index. 5115 // 2) If N is a store and the new base ptr is either the same as or is a 5116 // predecessor of the value being stored. 5117 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 5118 // that would create a cycle. 5119 // 4) All uses are load / store ops that use it as old base ptr. 5120 5121 // Check #1. Preinc'ing a frame index would require copying the stack pointer 5122 // (plus the implicit offset) to a register to preinc anyway. 5123 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 5124 return false; 5125 5126 // Check #2. 5127 if (!isLoad) { 5128 SDValue Val = cast<StoreSDNode>(N)->getValue(); 5129 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 5130 return false; 5131 } 5132 5133 // Now check for #3 and #4. 5134 bool RealUse = false; 5135 for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), 5136 E = Ptr.getNode()->use_end(); I != E; ++I) { 5137 SDNode *Use = *I; 5138 if (Use == N) 5139 continue; 5140 if (Use->isPredecessorOf(N)) 5141 return false; 5142 5143 if (!((Use->getOpcode() == ISD::LOAD && 5144 cast<LoadSDNode>(Use)->getBasePtr() == Ptr) || 5145 (Use->getOpcode() == ISD::STORE && 5146 cast<StoreSDNode>(Use)->getBasePtr() == Ptr))) 5147 RealUse = true; 5148 } 5149 5150 if (!RealUse) 5151 return false; 5152 5153 SDValue Result; 5154 if (isLoad) 5155 Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), 5156 BasePtr, Offset, AM); 5157 else 5158 Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), 5159 BasePtr, Offset, AM); 5160 ++PreIndexedNodes; 5161 ++NodesCombined; 5162 DEBUG(dbgs() << "\nReplacing.4 "; 5163 N->dump(&DAG); 5164 dbgs() << "\nWith: "; 5165 Result.getNode()->dump(&DAG); 5166 dbgs() << '\n'); 5167 WorkListRemover DeadNodes(*this); 5168 if (isLoad) { 5169 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), 5170 &DeadNodes); 5171 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), 5172 &DeadNodes); 5173 } else { 5174 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), 5175 &DeadNodes); 5176 } 5177 5178 // Finally, since the node is now dead, remove it from the graph. 5179 DAG.DeleteNode(N); 5180 5181 // Replace the uses of Ptr with uses of the updated base value. 5182 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0), 5183 &DeadNodes); 5184 removeFromWorkList(Ptr.getNode()); 5185 DAG.DeleteNode(Ptr.getNode()); 5186 5187 return true; 5188} 5189 5190/// CombineToPostIndexedLoadStore - Try to combine a load / store with a 5191/// add / sub of the base pointer node into a post-indexed load / store. 5192/// The transformation folded the add / subtract into the new indexed 5193/// load / store effectively and all of its uses are redirected to the 5194/// new load / store. 5195bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 5196 if (!LegalOperations) 5197 return false; 5198 5199 bool isLoad = true; 5200 SDValue Ptr; 5201 EVT VT; 5202 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 5203 if (LD->isIndexed()) 5204 return false; 5205 VT = LD->getMemoryVT(); 5206 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 5207 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 5208 return false; 5209 Ptr = LD->getBasePtr(); 5210 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 5211 if (ST->isIndexed()) 5212 return false; 5213 VT = ST->getMemoryVT(); 5214 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 5215 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 5216 return false; 5217 Ptr = ST->getBasePtr(); 5218 isLoad = false; 5219 } else { 5220 return false; 5221 } 5222 5223 if (Ptr.getNode()->hasOneUse()) 5224 return false; 5225 5226 for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), 5227 E = Ptr.getNode()->use_end(); I != E; ++I) { 5228 SDNode *Op = *I; 5229 if (Op == N || 5230 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 5231 continue; 5232 5233 SDValue BasePtr; 5234 SDValue Offset; 5235 ISD::MemIndexedMode AM = ISD::UNINDEXED; 5236 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 5237 if (Ptr == Offset && Op->getOpcode() == ISD::ADD) 5238 std::swap(BasePtr, Offset); 5239 if (Ptr != BasePtr) 5240 continue; 5241 // Don't create a indexed load / store with zero offset. 5242 if (isa<ConstantSDNode>(Offset) && 5243 cast<ConstantSDNode>(Offset)->isNullValue()) 5244 continue; 5245 5246 // Try turning it into a post-indexed load / store except when 5247 // 1) All uses are load / store ops that use it as base ptr. 5248 // 2) Op must be independent of N, i.e. Op is neither a predecessor 5249 // nor a successor of N. Otherwise, if Op is folded that would 5250 // create a cycle. 5251 5252 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 5253 continue; 5254 5255 // Check for #1. 5256 bool TryNext = false; 5257 for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), 5258 EE = BasePtr.getNode()->use_end(); II != EE; ++II) { 5259 SDNode *Use = *II; 5260 if (Use == Ptr.getNode()) 5261 continue; 5262 5263 // If all the uses are load / store addresses, then don't do the 5264 // transformation. 5265 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 5266 bool RealUse = false; 5267 for (SDNode::use_iterator III = Use->use_begin(), 5268 EEE = Use->use_end(); III != EEE; ++III) { 5269 SDNode *UseUse = *III; 5270 if (!((UseUse->getOpcode() == ISD::LOAD && 5271 cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) || 5272 (UseUse->getOpcode() == ISD::STORE && 5273 cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use))) 5274 RealUse = true; 5275 } 5276 5277 if (!RealUse) { 5278 TryNext = true; 5279 break; 5280 } 5281 } 5282 } 5283 5284 if (TryNext) 5285 continue; 5286 5287 // Check for #2 5288 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 5289 SDValue Result = isLoad 5290 ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), 5291 BasePtr, Offset, AM) 5292 : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), 5293 BasePtr, Offset, AM); 5294 ++PostIndexedNodes; 5295 ++NodesCombined; 5296 DEBUG(dbgs() << "\nReplacing.5 "; 5297 N->dump(&DAG); 5298 dbgs() << "\nWith: "; 5299 Result.getNode()->dump(&DAG); 5300 dbgs() << '\n'); 5301 WorkListRemover DeadNodes(*this); 5302 if (isLoad) { 5303 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0), 5304 &DeadNodes); 5305 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2), 5306 &DeadNodes); 5307 } else { 5308 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1), 5309 &DeadNodes); 5310 } 5311 5312 // Finally, since the node is now dead, remove it from the graph. 5313 DAG.DeleteNode(N); 5314 5315 // Replace the uses of Use with uses of the updated base value. 5316 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 5317 Result.getValue(isLoad ? 1 : 0), 5318 &DeadNodes); 5319 removeFromWorkList(Op); 5320 DAG.DeleteNode(Op); 5321 return true; 5322 } 5323 } 5324 } 5325 5326 return false; 5327} 5328 5329SDValue DAGCombiner::visitLOAD(SDNode *N) { 5330 LoadSDNode *LD = cast<LoadSDNode>(N); 5331 SDValue Chain = LD->getChain(); 5332 SDValue Ptr = LD->getBasePtr(); 5333 5334 // If load is not volatile and there are no uses of the loaded value (and 5335 // the updated indexed value in case of indexed loads), change uses of the 5336 // chain value into uses of the chain input (i.e. delete the dead load). 5337 if (!LD->isVolatile()) { 5338 if (N->getValueType(1) == MVT::Other) { 5339 // Unindexed loads. 5340 if (N->hasNUsesOfValue(0, 0)) { 5341 // It's not safe to use the two value CombineTo variant here. e.g. 5342 // v1, chain2 = load chain1, loc 5343 // v2, chain3 = load chain2, loc 5344 // v3 = add v2, c 5345 // Now we replace use of chain2 with chain1. This makes the second load 5346 // isomorphic to the one we are deleting, and thus makes this load live. 5347 DEBUG(dbgs() << "\nReplacing.6 "; 5348 N->dump(&DAG); 5349 dbgs() << "\nWith chain: "; 5350 Chain.getNode()->dump(&DAG); 5351 dbgs() << "\n"); 5352 WorkListRemover DeadNodes(*this); 5353 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes); 5354 5355 if (N->use_empty()) { 5356 removeFromWorkList(N); 5357 DAG.DeleteNode(N); 5358 } 5359 5360 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5361 } 5362 } else { 5363 // Indexed loads. 5364 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 5365 if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) { 5366 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 5367 DEBUG(dbgs() << "\nReplacing.7 "; 5368 N->dump(&DAG); 5369 dbgs() << "\nWith: "; 5370 Undef.getNode()->dump(&DAG); 5371 dbgs() << " and 2 other values\n"); 5372 WorkListRemover DeadNodes(*this); 5373 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes); 5374 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), 5375 DAG.getUNDEF(N->getValueType(1)), 5376 &DeadNodes); 5377 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes); 5378 removeFromWorkList(N); 5379 DAG.DeleteNode(N); 5380 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5381 } 5382 } 5383 } 5384 5385 // If this load is directly stored, replace the load value with the stored 5386 // value. 5387 // TODO: Handle store large -> read small portion. 5388 // TODO: Handle TRUNCSTORE/LOADEXT 5389 if (LD->getExtensionType() == ISD::NON_EXTLOAD && 5390 !LD->isVolatile()) { 5391 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 5392 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 5393 if (PrevST->getBasePtr() == Ptr && 5394 PrevST->getValue().getValueType() == N->getValueType(0)) 5395 return CombineTo(N, Chain.getOperand(1), Chain); 5396 } 5397 } 5398 5399 // Try to infer better alignment information than the load already has. 5400 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 5401 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 5402 if (Align > LD->getAlignment()) 5403 return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), 5404 LD->getValueType(0), 5405 Chain, Ptr, LD->getSrcValue(), 5406 LD->getSrcValueOffset(), LD->getMemoryVT(), 5407 LD->isVolatile(), LD->isNonTemporal(), Align); 5408 } 5409 } 5410 5411 if (CombinerAA) { 5412 // Walk up chain skipping non-aliasing memory nodes. 5413 SDValue BetterChain = FindBetterChain(N, Chain); 5414 5415 // If there is a better chain. 5416 if (Chain != BetterChain) { 5417 SDValue ReplLoad; 5418 5419 // Replace the chain to void dependency. 5420 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 5421 ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), 5422 BetterChain, Ptr, 5423 LD->getSrcValue(), LD->getSrcValueOffset(), 5424 LD->isVolatile(), LD->isNonTemporal(), 5425 LD->getAlignment()); 5426 } else { 5427 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), 5428 LD->getValueType(0), 5429 BetterChain, Ptr, LD->getSrcValue(), 5430 LD->getSrcValueOffset(), 5431 LD->getMemoryVT(), 5432 LD->isVolatile(), 5433 LD->isNonTemporal(), 5434 LD->getAlignment()); 5435 } 5436 5437 // Create token factor to keep old chain connected. 5438 SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 5439 MVT::Other, Chain, ReplLoad.getValue(1)); 5440 5441 // Make sure the new and old chains are cleaned up. 5442 AddToWorkList(Token.getNode()); 5443 5444 // Replace uses with load result and token factor. Don't add users 5445 // to work list. 5446 return CombineTo(N, ReplLoad.getValue(0), Token, false); 5447 } 5448 } 5449 5450 // Try transforming N to an indexed load. 5451 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 5452 return SDValue(N, 0); 5453 5454 if (PromoteLoad(SDValue(N, 0))) 5455 return SDValue(N, 0); 5456 return SDValue(); 5457} 5458 5459/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the 5460/// load is having specific bytes cleared out. If so, return the byte size 5461/// being masked out and the shift amount. 5462static std::pair<unsigned, unsigned> 5463CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 5464 std::pair<unsigned, unsigned> Result(0, 0); 5465 5466 // Check for the structure we're looking for. 5467 if (V->getOpcode() != ISD::AND || 5468 !isa<ConstantSDNode>(V->getOperand(1)) || 5469 !ISD::isNormalLoad(V->getOperand(0).getNode())) 5470 return Result; 5471 5472 // Check the chain and pointer. 5473 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 5474 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 5475 5476 // The store should be chained directly to the load or be an operand of a 5477 // tokenfactor. 5478 if (LD == Chain.getNode()) 5479 ; // ok. 5480 else if (Chain->getOpcode() != ISD::TokenFactor) 5481 return Result; // Fail. 5482 else { 5483 bool isOk = false; 5484 for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) 5485 if (Chain->getOperand(i).getNode() == LD) { 5486 isOk = true; 5487 break; 5488 } 5489 if (!isOk) return Result; 5490 } 5491 5492 // This only handles simple types. 5493 if (V.getValueType() != MVT::i16 && 5494 V.getValueType() != MVT::i32 && 5495 V.getValueType() != MVT::i64) 5496 return Result; 5497 5498 // Check the constant mask. Invert it so that the bits being masked out are 5499 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 5500 // follow the sign bit for uniformity. 5501 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 5502 unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); 5503 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 5504 unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); 5505 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 5506 if (NotMaskLZ == 64) return Result; // All zero mask. 5507 5508 // See if we have a continuous run of bits. If so, we have 0*1+0* 5509 if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) 5510 return Result; 5511 5512 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 5513 if (V.getValueType() != MVT::i64 && NotMaskLZ) 5514 NotMaskLZ -= 64-V.getValueSizeInBits(); 5515 5516 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 5517 switch (MaskedBytes) { 5518 case 1: 5519 case 2: 5520 case 4: break; 5521 default: return Result; // All one mask, or 5-byte mask. 5522 } 5523 5524 // Verify that the first bit starts at a multiple of mask so that the access 5525 // is aligned the same as the access width. 5526 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 5527 5528 Result.first = MaskedBytes; 5529 Result.second = NotMaskTZ/8; 5530 return Result; 5531} 5532 5533 5534/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that 5535/// provides a value as specified by MaskInfo. If so, replace the specified 5536/// store with a narrower store of truncated IVal. 5537static SDNode * 5538ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 5539 SDValue IVal, StoreSDNode *St, 5540 DAGCombiner *DC) { 5541 unsigned NumBytes = MaskInfo.first; 5542 unsigned ByteShift = MaskInfo.second; 5543 SelectionDAG &DAG = DC->getDAG(); 5544 5545 // Check to see if IVal is all zeros in the part being masked in by the 'or' 5546 // that uses this. If not, this is not a replacement. 5547 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 5548 ByteShift*8, (ByteShift+NumBytes)*8); 5549 if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; 5550 5551 // Check that it is legal on the target to do this. It is legal if the new 5552 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 5553 // legalization. 5554 MVT VT = MVT::getIntegerVT(NumBytes*8); 5555 if (!DC->isTypeLegal(VT)) 5556 return 0; 5557 5558 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 5559 // shifted by ByteShift and truncated down to NumBytes. 5560 if (ByteShift) 5561 IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, 5562 DAG.getConstant(ByteShift*8, DC->getShiftAmountTy())); 5563 5564 // Figure out the offset for the store and the alignment of the access. 5565 unsigned StOffset; 5566 unsigned NewAlign = St->getAlignment(); 5567 5568 if (DAG.getTargetLoweringInfo().isLittleEndian()) 5569 StOffset = ByteShift; 5570 else 5571 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 5572 5573 SDValue Ptr = St->getBasePtr(); 5574 if (StOffset) { 5575 Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), 5576 Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); 5577 NewAlign = MinAlign(NewAlign, StOffset); 5578 } 5579 5580 // Truncate down to the new size. 5581 IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); 5582 5583 ++OpsNarrowed; 5584 return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 5585 St->getSrcValue(), St->getSrcValueOffset()+StOffset, 5586 false, false, NewAlign).getNode(); 5587} 5588 5589 5590/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is 5591/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some 5592/// of the loaded bits, try narrowing the load and store if it would end up 5593/// being a win for performance or code size. 5594SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 5595 StoreSDNode *ST = cast<StoreSDNode>(N); 5596 if (ST->isVolatile()) 5597 return SDValue(); 5598 5599 SDValue Chain = ST->getChain(); 5600 SDValue Value = ST->getValue(); 5601 SDValue Ptr = ST->getBasePtr(); 5602 EVT VT = Value.getValueType(); 5603 5604 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 5605 return SDValue(); 5606 5607 unsigned Opc = Value.getOpcode(); 5608 5609 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 5610 // is a byte mask indicating a consecutive number of bytes, check to see if 5611 // Y is known to provide just those bytes. If so, we try to replace the 5612 // load + replace + store sequence with a single (narrower) store, which makes 5613 // the load dead. 5614 if (Opc == ISD::OR) { 5615 std::pair<unsigned, unsigned> MaskedLoad; 5616 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 5617 if (MaskedLoad.first) 5618 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 5619 Value.getOperand(1), ST,this)) 5620 return SDValue(NewST, 0); 5621 5622 // Or is commutative, so try swapping X and Y. 5623 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 5624 if (MaskedLoad.first) 5625 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 5626 Value.getOperand(0), ST,this)) 5627 return SDValue(NewST, 0); 5628 } 5629 5630 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 5631 Value.getOperand(1).getOpcode() != ISD::Constant) 5632 return SDValue(); 5633 5634 SDValue N0 = Value.getOperand(0); 5635 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) { 5636 LoadSDNode *LD = cast<LoadSDNode>(N0); 5637 if (LD->getBasePtr() != Ptr) 5638 return SDValue(); 5639 5640 // Find the type to narrow it the load / op / store to. 5641 SDValue N1 = Value.getOperand(1); 5642 unsigned BitWidth = N1.getValueSizeInBits(); 5643 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 5644 if (Opc == ISD::AND) 5645 Imm ^= APInt::getAllOnesValue(BitWidth); 5646 if (Imm == 0 || Imm.isAllOnesValue()) 5647 return SDValue(); 5648 unsigned ShAmt = Imm.countTrailingZeros(); 5649 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 5650 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 5651 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 5652 while (NewBW < BitWidth && 5653 !(TLI.isOperationLegalOrCustom(Opc, NewVT) && 5654 TLI.isNarrowingProfitable(VT, NewVT))) { 5655 NewBW = NextPowerOf2(NewBW); 5656 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 5657 } 5658 if (NewBW >= BitWidth) 5659 return SDValue(); 5660 5661 // If the lsb changed does not start at the type bitwidth boundary, 5662 // start at the previous one. 5663 if (ShAmt % NewBW) 5664 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 5665 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); 5666 if ((Imm & Mask) == Imm) { 5667 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 5668 if (Opc == ISD::AND) 5669 NewImm ^= APInt::getAllOnesValue(NewBW); 5670 uint64_t PtrOff = ShAmt / 8; 5671 // For big endian targets, we need to adjust the offset to the pointer to 5672 // load the correct bytes. 5673 if (TLI.isBigEndian()) 5674 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 5675 5676 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 5677 const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 5678 if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) 5679 return SDValue(); 5680 5681 SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), 5682 Ptr.getValueType(), Ptr, 5683 DAG.getConstant(PtrOff, Ptr.getValueType())); 5684 SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), 5685 LD->getChain(), NewPtr, 5686 LD->getSrcValue(), LD->getSrcValueOffset(), 5687 LD->isVolatile(), LD->isNonTemporal(), 5688 NewAlign); 5689 SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, 5690 DAG.getConstant(NewImm, NewVT)); 5691 SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), 5692 NewVal, NewPtr, 5693 ST->getSrcValue(), ST->getSrcValueOffset(), 5694 false, false, NewAlign); 5695 5696 AddToWorkList(NewPtr.getNode()); 5697 AddToWorkList(NewLD.getNode()); 5698 AddToWorkList(NewVal.getNode()); 5699 WorkListRemover DeadNodes(*this); 5700 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1), 5701 &DeadNodes); 5702 ++OpsNarrowed; 5703 return NewST; 5704 } 5705 } 5706 5707 return SDValue(); 5708} 5709 5710SDValue DAGCombiner::visitSTORE(SDNode *N) { 5711 StoreSDNode *ST = cast<StoreSDNode>(N); 5712 SDValue Chain = ST->getChain(); 5713 SDValue Value = ST->getValue(); 5714 SDValue Ptr = ST->getBasePtr(); 5715 5716 // If this is a store of a bit convert, store the input value if the 5717 // resultant store does not need a higher alignment than the original. 5718 if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() && 5719 ST->isUnindexed()) { 5720 unsigned OrigAlign = ST->getAlignment(); 5721 EVT SVT = Value.getOperand(0).getValueType(); 5722 unsigned Align = TLI.getTargetData()-> 5723 getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); 5724 if (Align <= OrigAlign && 5725 ((!LegalOperations && !ST->isVolatile()) || 5726 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 5727 return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), 5728 Ptr, ST->getSrcValue(), 5729 ST->getSrcValueOffset(), ST->isVolatile(), 5730 ST->isNonTemporal(), OrigAlign); 5731 } 5732 5733 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 5734 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { 5735 // NOTE: If the original store is volatile, this transform must not increase 5736 // the number of stores. For example, on x86-32 an f64 can be stored in one 5737 // processor operation but an i64 (which is not legal) requires two. So the 5738 // transform should not be done in this case. 5739 if (Value.getOpcode() != ISD::TargetConstantFP) { 5740 SDValue Tmp; 5741 switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { 5742 default: llvm_unreachable("Unknown FP type"); 5743 case MVT::f80: // We don't do this for these yet. 5744 case MVT::f128: 5745 case MVT::ppcf128: 5746 break; 5747 case MVT::f32: 5748 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 5749 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 5750 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 5751 bitcastToAPInt().getZExtValue(), MVT::i32); 5752 return DAG.getStore(Chain, N->getDebugLoc(), Tmp, 5753 Ptr, ST->getSrcValue(), 5754 ST->getSrcValueOffset(), ST->isVolatile(), 5755 ST->isNonTemporal(), ST->getAlignment()); 5756 } 5757 break; 5758 case MVT::f64: 5759 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 5760 !ST->isVolatile()) || 5761 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 5762 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 5763 getZExtValue(), MVT::i64); 5764 return DAG.getStore(Chain, N->getDebugLoc(), Tmp, 5765 Ptr, ST->getSrcValue(), 5766 ST->getSrcValueOffset(), ST->isVolatile(), 5767 ST->isNonTemporal(), ST->getAlignment()); 5768 } else if (!ST->isVolatile() && 5769 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 5770 // Many FP stores are not made apparent until after legalize, e.g. for 5771 // argument passing. Since this is so common, custom legalize the 5772 // 64-bit integer store into two 32-bit stores. 5773 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 5774 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); 5775 SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); 5776 if (TLI.isBigEndian()) std::swap(Lo, Hi); 5777 5778 int SVOffset = ST->getSrcValueOffset(); 5779 unsigned Alignment = ST->getAlignment(); 5780 bool isVolatile = ST->isVolatile(); 5781 bool isNonTemporal = ST->isNonTemporal(); 5782 5783 SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, 5784 Ptr, ST->getSrcValue(), 5785 ST->getSrcValueOffset(), 5786 isVolatile, isNonTemporal, 5787 ST->getAlignment()); 5788 Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, 5789 DAG.getConstant(4, Ptr.getValueType())); 5790 SVOffset += 4; 5791 Alignment = MinAlign(Alignment, 4U); 5792 SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, 5793 Ptr, ST->getSrcValue(), 5794 SVOffset, isVolatile, isNonTemporal, 5795 Alignment); 5796 return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 5797 St0, St1); 5798 } 5799 5800 break; 5801 } 5802 } 5803 } 5804 5805 // Try to infer better alignment information than the store already has. 5806 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 5807 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 5808 if (Align > ST->getAlignment()) 5809 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, 5810 Ptr, ST->getSrcValue(), 5811 ST->getSrcValueOffset(), ST->getMemoryVT(), 5812 ST->isVolatile(), ST->isNonTemporal(), Align); 5813 } 5814 } 5815 5816 if (CombinerAA) { 5817 // Walk up chain skipping non-aliasing memory nodes. 5818 SDValue BetterChain = FindBetterChain(N, Chain); 5819 5820 // If there is a better chain. 5821 if (Chain != BetterChain) { 5822 SDValue ReplStore; 5823 5824 // Replace the chain to avoid dependency. 5825 if (ST->isTruncatingStore()) { 5826 ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, 5827 ST->getSrcValue(),ST->getSrcValueOffset(), 5828 ST->getMemoryVT(), ST->isVolatile(), 5829 ST->isNonTemporal(), ST->getAlignment()); 5830 } else { 5831 ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, 5832 ST->getSrcValue(), ST->getSrcValueOffset(), 5833 ST->isVolatile(), ST->isNonTemporal(), 5834 ST->getAlignment()); 5835 } 5836 5837 // Create token to keep both nodes around. 5838 SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 5839 MVT::Other, Chain, ReplStore); 5840 5841 // Make sure the new and old chains are cleaned up. 5842 AddToWorkList(Token.getNode()); 5843 5844 // Don't add users to work list. 5845 return CombineTo(N, Token, false); 5846 } 5847 } 5848 5849 // Try transforming N to an indexed store. 5850 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 5851 return SDValue(N, 0); 5852 5853 // FIXME: is there such a thing as a truncating indexed store? 5854 if (ST->isTruncatingStore() && ST->isUnindexed() && 5855 Value.getValueType().isInteger()) { 5856 // See if we can simplify the input to this truncstore with knowledge that 5857 // only the low bits are being used. For example: 5858 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 5859 SDValue Shorter = 5860 GetDemandedBits(Value, 5861 APInt::getLowBitsSet(Value.getValueSizeInBits(), 5862 ST->getMemoryVT().getSizeInBits())); 5863 AddToWorkList(Value.getNode()); 5864 if (Shorter.getNode()) 5865 return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, 5866 Ptr, ST->getSrcValue(), 5867 ST->getSrcValueOffset(), ST->getMemoryVT(), 5868 ST->isVolatile(), ST->isNonTemporal(), 5869 ST->getAlignment()); 5870 5871 // Otherwise, see if we can simplify the operation with 5872 // SimplifyDemandedBits, which only works if the value has a single use. 5873 if (SimplifyDemandedBits(Value, 5874 APInt::getLowBitsSet( 5875 Value.getValueType().getScalarType().getSizeInBits(), 5876 ST->getMemoryVT().getScalarType().getSizeInBits()))) 5877 return SDValue(N, 0); 5878 } 5879 5880 // If this is a load followed by a store to the same location, then the store 5881 // is dead/noop. 5882 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 5883 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 5884 ST->isUnindexed() && !ST->isVolatile() && 5885 // There can't be any side effects between the load and store, such as 5886 // a call or store. 5887 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 5888 // The store is dead, remove it. 5889 return Chain; 5890 } 5891 } 5892 5893 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 5894 // truncating store. We can do this even if this is already a truncstore. 5895 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 5896 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 5897 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 5898 ST->getMemoryVT())) { 5899 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), 5900 Ptr, ST->getSrcValue(), 5901 ST->getSrcValueOffset(), ST->getMemoryVT(), 5902 ST->isVolatile(), ST->isNonTemporal(), 5903 ST->getAlignment()); 5904 } 5905 5906 return ReduceLoadOpStoreWidth(N); 5907} 5908 5909SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 5910 SDValue InVec = N->getOperand(0); 5911 SDValue InVal = N->getOperand(1); 5912 SDValue EltNo = N->getOperand(2); 5913 5914 // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new 5915 // vector with the inserted element. 5916 if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) { 5917 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 5918 SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(), 5919 InVec.getNode()->op_end()); 5920 if (Elt < Ops.size()) 5921 Ops[Elt] = InVal; 5922 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 5923 InVec.getValueType(), &Ops[0], Ops.size()); 5924 } 5925 // If the invec is an UNDEF and if EltNo is a constant, create a new 5926 // BUILD_VECTOR with undef elements and the inserted element. 5927 if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF && 5928 isa<ConstantSDNode>(EltNo)) { 5929 EVT VT = InVec.getValueType(); 5930 EVT EltVT = VT.getVectorElementType(); 5931 unsigned NElts = VT.getVectorNumElements(); 5932 SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT)); 5933 5934 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 5935 if (Elt < Ops.size()) 5936 Ops[Elt] = InVal; 5937 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 5938 InVec.getValueType(), &Ops[0], Ops.size()); 5939 } 5940 return SDValue(); 5941} 5942 5943SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 5944 // (vextract (scalar_to_vector val, 0) -> val 5945 SDValue InVec = N->getOperand(0); 5946 5947 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 5948 // Check if the result type doesn't match the inserted element type. A 5949 // SCALAR_TO_VECTOR may truncate the inserted element and the 5950 // EXTRACT_VECTOR_ELT may widen the extracted vector. 5951 EVT EltVT = InVec.getValueType().getVectorElementType(); 5952 SDValue InOp = InVec.getOperand(0); 5953 EVT NVT = N->getValueType(0); 5954 if (InOp.getValueType() != NVT) { 5955 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 5956 return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); 5957 } 5958 return InOp; 5959 } 5960 5961 // Perform only after legalization to ensure build_vector / vector_shuffle 5962 // optimizations have already been done. 5963 if (!LegalOperations) return SDValue(); 5964 5965 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 5966 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 5967 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 5968 SDValue EltNo = N->getOperand(1); 5969 5970 if (isa<ConstantSDNode>(EltNo)) { 5971 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 5972 bool NewLoad = false; 5973 bool BCNumEltsChanged = false; 5974 EVT VT = InVec.getValueType(); 5975 EVT ExtVT = VT.getVectorElementType(); 5976 EVT LVT = ExtVT; 5977 5978 if (InVec.getOpcode() == ISD::BIT_CONVERT) { 5979 EVT BCVT = InVec.getOperand(0).getValueType(); 5980 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 5981 return SDValue(); 5982 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 5983 BCNumEltsChanged = true; 5984 InVec = InVec.getOperand(0); 5985 ExtVT = BCVT.getVectorElementType(); 5986 NewLoad = true; 5987 } 5988 5989 LoadSDNode *LN0 = NULL; 5990 const ShuffleVectorSDNode *SVN = NULL; 5991 if (ISD::isNormalLoad(InVec.getNode())) { 5992 LN0 = cast<LoadSDNode>(InVec); 5993 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 5994 InVec.getOperand(0).getValueType() == ExtVT && 5995 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 5996 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 5997 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 5998 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 5999 // => 6000 // (load $addr+1*size) 6001 6002 // If the bit convert changed the number of elements, it is unsafe 6003 // to examine the mask. 6004 if (BCNumEltsChanged) 6005 return SDValue(); 6006 6007 // Select the input vector, guarding against out of range extract vector. 6008 unsigned NumElems = VT.getVectorNumElements(); 6009 int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt); 6010 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 6011 6012 if (InVec.getOpcode() == ISD::BIT_CONVERT) 6013 InVec = InVec.getOperand(0); 6014 if (ISD::isNormalLoad(InVec.getNode())) { 6015 LN0 = cast<LoadSDNode>(InVec); 6016 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 6017 } 6018 } 6019 6020 if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile()) 6021 return SDValue(); 6022 6023 unsigned Align = LN0->getAlignment(); 6024 if (NewLoad) { 6025 // Check the resultant load doesn't need a higher alignment than the 6026 // original load. 6027 unsigned NewAlign = 6028 TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); 6029 6030 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) 6031 return SDValue(); 6032 6033 Align = NewAlign; 6034 } 6035 6036 SDValue NewPtr = LN0->getBasePtr(); 6037 if (Elt) { 6038 unsigned PtrOff = LVT.getSizeInBits() * Elt / 8; 6039 EVT PtrType = NewPtr.getValueType(); 6040 if (TLI.isBigEndian()) 6041 PtrOff = VT.getSizeInBits() / 8 - PtrOff; 6042 NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, 6043 DAG.getConstant(PtrOff, PtrType)); 6044 } 6045 6046 return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, 6047 LN0->getSrcValue(), LN0->getSrcValueOffset(), 6048 LN0->isVolatile(), LN0->isNonTemporal(), Align); 6049 } 6050 6051 return SDValue(); 6052} 6053 6054SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 6055 unsigned NumInScalars = N->getNumOperands(); 6056 EVT VT = N->getValueType(0); 6057 6058 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 6059 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 6060 // at most two distinct vectors, turn this into a shuffle node. 6061 SDValue VecIn1, VecIn2; 6062 for (unsigned i = 0; i != NumInScalars; ++i) { 6063 // Ignore undef inputs. 6064 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 6065 6066 // If this input is something other than a EXTRACT_VECTOR_ELT with a 6067 // constant index, bail out. 6068 if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || 6069 !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { 6070 VecIn1 = VecIn2 = SDValue(0, 0); 6071 break; 6072 } 6073 6074 // If the input vector type disagrees with the result of the build_vector, 6075 // we can't make a shuffle. 6076 SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); 6077 if (ExtractedFromVec.getValueType() != VT) { 6078 VecIn1 = VecIn2 = SDValue(0, 0); 6079 break; 6080 } 6081 6082 // Otherwise, remember this. We allow up to two distinct input vectors. 6083 if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) 6084 continue; 6085 6086 if (VecIn1.getNode() == 0) { 6087 VecIn1 = ExtractedFromVec; 6088 } else if (VecIn2.getNode() == 0) { 6089 VecIn2 = ExtractedFromVec; 6090 } else { 6091 // Too many inputs. 6092 VecIn1 = VecIn2 = SDValue(0, 0); 6093 break; 6094 } 6095 } 6096 6097 // If everything is good, we can make a shuffle operation. 6098 if (VecIn1.getNode()) { 6099 SmallVector<int, 8> Mask; 6100 for (unsigned i = 0; i != NumInScalars; ++i) { 6101 if (N->getOperand(i).getOpcode() == ISD::UNDEF) { 6102 Mask.push_back(-1); 6103 continue; 6104 } 6105 6106 // If extracting from the first vector, just use the index directly. 6107 SDValue Extract = N->getOperand(i); 6108 SDValue ExtVal = Extract.getOperand(1); 6109 if (Extract.getOperand(0) == VecIn1) { 6110 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 6111 if (ExtIndex > VT.getVectorNumElements()) 6112 return SDValue(); 6113 6114 Mask.push_back(ExtIndex); 6115 continue; 6116 } 6117 6118 // Otherwise, use InIdx + VecSize 6119 unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 6120 Mask.push_back(Idx+NumInScalars); 6121 } 6122 6123 // Add count and size info. 6124 if (!isTypeLegal(VT)) 6125 return SDValue(); 6126 6127 // Return the new VECTOR_SHUFFLE node. 6128 SDValue Ops[2]; 6129 Ops[0] = VecIn1; 6130 Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 6131 return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); 6132 } 6133 6134 return SDValue(); 6135} 6136 6137SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 6138 // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of 6139 // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector 6140 // inputs come from at most two distinct vectors, turn this into a shuffle 6141 // node. 6142 6143 // If we only have one input vector, we don't need to do any concatenation. 6144 if (N->getNumOperands() == 1) 6145 return N->getOperand(0); 6146 6147 return SDValue(); 6148} 6149 6150SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 6151 return SDValue(); 6152 6153 EVT VT = N->getValueType(0); 6154 unsigned NumElts = VT.getVectorNumElements(); 6155 6156 SDValue N0 = N->getOperand(0); 6157 6158 assert(N0.getValueType().getVectorNumElements() == NumElts && 6159 "Vector shuffle must be normalized in DAG"); 6160 6161 // FIXME: implement canonicalizations from DAG.getVectorShuffle() 6162 6163 // If it is a splat, check if the argument vector is a build_vector with 6164 // all scalar elements the same. 6165 if (cast<ShuffleVectorSDNode>(N)->isSplat()) { 6166 SDNode *V = N0.getNode(); 6167 6168 6169 // If this is a bit convert that changes the element type of the vector but 6170 // not the number of vector elements, look through it. Be careful not to 6171 // look though conversions that change things like v4f32 to v2f64. 6172 if (V->getOpcode() == ISD::BIT_CONVERT) { 6173 SDValue ConvInput = V->getOperand(0); 6174 if (ConvInput.getValueType().isVector() && 6175 ConvInput.getValueType().getVectorNumElements() == NumElts) 6176 V = ConvInput.getNode(); 6177 } 6178 6179 if (V->getOpcode() == ISD::BUILD_VECTOR) { 6180 unsigned NumElems = V->getNumOperands(); 6181 unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex(); 6182 if (NumElems > BaseIdx) { 6183 SDValue Base; 6184 bool AllSame = true; 6185 for (unsigned i = 0; i != NumElems; ++i) { 6186 if (V->getOperand(i).getOpcode() != ISD::UNDEF) { 6187 Base = V->getOperand(i); 6188 break; 6189 } 6190 } 6191 // Splat of <u, u, u, u>, return <u, u, u, u> 6192 if (!Base.getNode()) 6193 return N0; 6194 for (unsigned i = 0; i != NumElems; ++i) { 6195 if (V->getOperand(i) != Base) { 6196 AllSame = false; 6197 break; 6198 } 6199 } 6200 // Splat of <x, x, x, x>, return <x, x, x, x> 6201 if (AllSame) 6202 return N0; 6203 } 6204 } 6205 } 6206 return SDValue(); 6207} 6208 6209/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform 6210/// an AND to a vector_shuffle with the destination vector and a zero vector. 6211/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 6212/// vector_shuffle V, Zero, <0, 4, 2, 4> 6213SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 6214 EVT VT = N->getValueType(0); 6215 DebugLoc dl = N->getDebugLoc(); 6216 SDValue LHS = N->getOperand(0); 6217 SDValue RHS = N->getOperand(1); 6218 if (N->getOpcode() == ISD::AND) { 6219 if (RHS.getOpcode() == ISD::BIT_CONVERT) 6220 RHS = RHS.getOperand(0); 6221 if (RHS.getOpcode() == ISD::BUILD_VECTOR) { 6222 SmallVector<int, 8> Indices; 6223 unsigned NumElts = RHS.getNumOperands(); 6224 for (unsigned i = 0; i != NumElts; ++i) { 6225 SDValue Elt = RHS.getOperand(i); 6226 if (!isa<ConstantSDNode>(Elt)) 6227 return SDValue(); 6228 else if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) 6229 Indices.push_back(i); 6230 else if (cast<ConstantSDNode>(Elt)->isNullValue()) 6231 Indices.push_back(NumElts); 6232 else 6233 return SDValue(); 6234 } 6235 6236 // Let's see if the target supports this vector_shuffle. 6237 EVT RVT = RHS.getValueType(); 6238 if (!TLI.isVectorClearMaskLegal(Indices, RVT)) 6239 return SDValue(); 6240 6241 // Return the new VECTOR_SHUFFLE node. 6242 EVT EltVT = RVT.getVectorElementType(); 6243 SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), 6244 DAG.getConstant(0, EltVT)); 6245 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 6246 RVT, &ZeroOps[0], ZeroOps.size()); 6247 LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS); 6248 SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); 6249 return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf); 6250 } 6251 } 6252 6253 return SDValue(); 6254} 6255 6256/// SimplifyVBinOp - Visit a binary vector operation, like ADD. 6257SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 6258 // After legalize, the target may be depending on adds and other 6259 // binary ops to provide legal ways to construct constants or other 6260 // things. Simplifying them may result in a loss of legality. 6261 if (LegalOperations) return SDValue(); 6262 6263 EVT VT = N->getValueType(0); 6264 assert(VT.isVector() && "SimplifyVBinOp only works on vectors!"); 6265 6266 EVT EltType = VT.getVectorElementType(); 6267 SDValue LHS = N->getOperand(0); 6268 SDValue RHS = N->getOperand(1); 6269 SDValue Shuffle = XformToShuffleWithZero(N); 6270 if (Shuffle.getNode()) return Shuffle; 6271 6272 // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold 6273 // this operation. 6274 if (LHS.getOpcode() == ISD::BUILD_VECTOR && 6275 RHS.getOpcode() == ISD::BUILD_VECTOR) { 6276 SmallVector<SDValue, 8> Ops; 6277 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 6278 SDValue LHSOp = LHS.getOperand(i); 6279 SDValue RHSOp = RHS.getOperand(i); 6280 // If these two elements can't be folded, bail out. 6281 if ((LHSOp.getOpcode() != ISD::UNDEF && 6282 LHSOp.getOpcode() != ISD::Constant && 6283 LHSOp.getOpcode() != ISD::ConstantFP) || 6284 (RHSOp.getOpcode() != ISD::UNDEF && 6285 RHSOp.getOpcode() != ISD::Constant && 6286 RHSOp.getOpcode() != ISD::ConstantFP)) 6287 break; 6288 6289 // Can't fold divide by zero. 6290 if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || 6291 N->getOpcode() == ISD::FDIV) { 6292 if ((RHSOp.getOpcode() == ISD::Constant && 6293 cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || 6294 (RHSOp.getOpcode() == ISD::ConstantFP && 6295 cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) 6296 break; 6297 } 6298 6299 Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), 6300 EltType, LHSOp, RHSOp)); 6301 AddToWorkList(Ops.back().getNode()); 6302 assert((Ops.back().getOpcode() == ISD::UNDEF || 6303 Ops.back().getOpcode() == ISD::Constant || 6304 Ops.back().getOpcode() == ISD::ConstantFP) && 6305 "Scalar binop didn't fold!"); 6306 } 6307 6308 if (Ops.size() == LHS.getNumOperands()) { 6309 EVT VT = LHS.getValueType(); 6310 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, 6311 &Ops[0], Ops.size()); 6312 } 6313 } 6314 6315 return SDValue(); 6316} 6317 6318SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, 6319 SDValue N1, SDValue N2){ 6320 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 6321 6322 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 6323 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 6324 6325 // If we got a simplified select_cc node back from SimplifySelectCC, then 6326 // break it down into a new SETCC node, and a new SELECT node, and then return 6327 // the SELECT node, since we were called with a SELECT node. 6328 if (SCC.getNode()) { 6329 // Check to see if we got a select_cc back (to turn into setcc/select). 6330 // Otherwise, just return whatever node we got back, like fabs. 6331 if (SCC.getOpcode() == ISD::SELECT_CC) { 6332 SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), 6333 N0.getValueType(), 6334 SCC.getOperand(0), SCC.getOperand(1), 6335 SCC.getOperand(4)); 6336 AddToWorkList(SETCC.getNode()); 6337 return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), 6338 SCC.getOperand(2), SCC.getOperand(3), SETCC); 6339 } 6340 6341 return SCC; 6342 } 6343 return SDValue(); 6344} 6345 6346/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS 6347/// are the two values being selected between, see if we can simplify the 6348/// select. Callers of this should assume that TheSelect is deleted if this 6349/// returns true. As such, they should return the appropriate thing (e.g. the 6350/// node) back to the top-level of the DAG combiner loop to avoid it being 6351/// looked at. 6352bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 6353 SDValue RHS) { 6354 6355 // If this is a select from two identical things, try to pull the operation 6356 // through the select. 6357 if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){ 6358 // If this is a load and the token chain is identical, replace the select 6359 // of two loads with a load through a select of the address to load from. 6360 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 6361 // constants have been dropped into the constant pool. 6362 if (LHS.getOpcode() == ISD::LOAD && 6363 // Do not let this transformation reduce the number of volatile loads. 6364 !cast<LoadSDNode>(LHS)->isVolatile() && 6365 !cast<LoadSDNode>(RHS)->isVolatile() && 6366 // Token chains must be identical. 6367 LHS.getOperand(0) == RHS.getOperand(0)) { 6368 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 6369 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 6370 6371 // If this is an EXTLOAD, the VT's must match. 6372 if (LLD->getMemoryVT() == RLD->getMemoryVT()) { 6373 // FIXME: this discards src value information. This is 6374 // over-conservative. It would be beneficial to be able to remember 6375 // both potential memory locations. Since we are discarding 6376 // src value info, don't do the transformation if the memory 6377 // locations are not in the default address space. 6378 unsigned LLDAddrSpace = 0, RLDAddrSpace = 0; 6379 if (const Value *LLDVal = LLD->getMemOperand()->getValue()) { 6380 if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType())) 6381 LLDAddrSpace = PT->getAddressSpace(); 6382 } 6383 if (const Value *RLDVal = RLD->getMemOperand()->getValue()) { 6384 if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType())) 6385 RLDAddrSpace = PT->getAddressSpace(); 6386 } 6387 SDValue Addr; 6388 if (LLDAddrSpace == 0 && RLDAddrSpace == 0) { 6389 if (TheSelect->getOpcode() == ISD::SELECT) { 6390 // Check that the condition doesn't reach either load. If so, folding 6391 // this will induce a cycle into the DAG. 6392 if ((!LLD->hasAnyUseOfValue(1) || 6393 !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) && 6394 (!RLD->hasAnyUseOfValue(1) || 6395 !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) { 6396 Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), 6397 LLD->getBasePtr().getValueType(), 6398 TheSelect->getOperand(0), LLD->getBasePtr(), 6399 RLD->getBasePtr()); 6400 } 6401 } else { 6402 // Check that the condition doesn't reach either load. If so, folding 6403 // this will induce a cycle into the DAG. 6404 if ((!LLD->hasAnyUseOfValue(1) || 6405 (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && 6406 !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) && 6407 (!RLD->hasAnyUseOfValue(1) || 6408 (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) && 6409 !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) { 6410 Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), 6411 LLD->getBasePtr().getValueType(), 6412 TheSelect->getOperand(0), 6413 TheSelect->getOperand(1), 6414 LLD->getBasePtr(), RLD->getBasePtr(), 6415 TheSelect->getOperand(4)); 6416 } 6417 } 6418 } 6419 6420 if (Addr.getNode()) { 6421 SDValue Load; 6422 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 6423 Load = DAG.getLoad(TheSelect->getValueType(0), 6424 TheSelect->getDebugLoc(), 6425 LLD->getChain(), 6426 Addr, 0, 0, 6427 LLD->isVolatile(), 6428 LLD->isNonTemporal(), 6429 LLD->getAlignment()); 6430 } else { 6431 Load = DAG.getExtLoad(LLD->getExtensionType(), 6432 TheSelect->getDebugLoc(), 6433 TheSelect->getValueType(0), 6434 LLD->getChain(), Addr, 0, 0, 6435 LLD->getMemoryVT(), 6436 LLD->isVolatile(), 6437 LLD->isNonTemporal(), 6438 LLD->getAlignment()); 6439 } 6440 6441 // Users of the select now use the result of the load. 6442 CombineTo(TheSelect, Load); 6443 6444 // Users of the old loads now use the new load's chain. We know the 6445 // old-load value is dead now. 6446 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 6447 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 6448 return true; 6449 } 6450 } 6451 } 6452 } 6453 6454 return false; 6455} 6456 6457/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 6458/// where 'cond' is the comparison specified by CC. 6459SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, 6460 SDValue N2, SDValue N3, 6461 ISD::CondCode CC, bool NotExtCompare) { 6462 // (x ? y : y) -> y. 6463 if (N2 == N3) return N2; 6464 6465 EVT VT = N2.getValueType(); 6466 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 6467 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 6468 ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); 6469 6470 // Determine if the condition we're dealing with is constant 6471 SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), 6472 N0, N1, CC, DL, false); 6473 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 6474 ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); 6475 6476 // fold select_cc true, x, y -> x 6477 if (SCCC && !SCCC->isNullValue()) 6478 return N2; 6479 // fold select_cc false, x, y -> y 6480 if (SCCC && SCCC->isNullValue()) 6481 return N3; 6482 6483 // Check to see if we can simplify the select into an fabs node 6484 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { 6485 // Allow either -0.0 or 0.0 6486 if (CFP->getValueAPF().isZero()) { 6487 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs 6488 if ((CC == ISD::SETGE || CC == ISD::SETGT) && 6489 N0 == N2 && N3.getOpcode() == ISD::FNEG && 6490 N2 == N3.getOperand(0)) 6491 return DAG.getNode(ISD::FABS, DL, VT, N0); 6492 6493 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs 6494 if ((CC == ISD::SETLT || CC == ISD::SETLE) && 6495 N0 == N3 && N2.getOpcode() == ISD::FNEG && 6496 N2.getOperand(0) == N3) 6497 return DAG.getNode(ISD::FABS, DL, VT, N3); 6498 } 6499 } 6500 6501 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 6502 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 6503 // in it. This is a win when the constant is not otherwise available because 6504 // it replaces two constant pool loads with one. We only do this if the FP 6505 // type is known to be legal, because if it isn't, then we are before legalize 6506 // types an we want the other legalization to happen first (e.g. to avoid 6507 // messing with soft float) and if the ConstantFP is not legal, because if 6508 // it is legal, we may not need to store the FP constant in a constant pool. 6509 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) 6510 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { 6511 if (TLI.isTypeLegal(N2.getValueType()) && 6512 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 6513 TargetLowering::Legal) && 6514 // If both constants have multiple uses, then we won't need to do an 6515 // extra load, they are likely around in registers for other users. 6516 (TV->hasOneUse() || FV->hasOneUse())) { 6517 Constant *Elts[] = { 6518 const_cast<ConstantFP*>(FV->getConstantFPValue()), 6519 const_cast<ConstantFP*>(TV->getConstantFPValue()) 6520 }; 6521 const Type *FPTy = Elts[0]->getType(); 6522 const TargetData &TD = *TLI.getTargetData(); 6523 6524 // Create a ConstantArray of the two constants. 6525 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2); 6526 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), 6527 TD.getPrefTypeAlignment(FPTy)); 6528 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 6529 6530 // Get the offsets to the 0 and 1 element of the array so that we can 6531 // select between them. 6532 SDValue Zero = DAG.getIntPtrConstant(0); 6533 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 6534 SDValue One = DAG.getIntPtrConstant(EltSize); 6535 6536 SDValue Cond = DAG.getSetCC(DL, 6537 TLI.getSetCCResultType(N0.getValueType()), 6538 N0, N1, CC); 6539 SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), 6540 Cond, One, Zero); 6541 CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, 6542 CstOffset); 6543 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 6544 PseudoSourceValue::getConstantPool(), 0, false, 6545 false, Alignment); 6546 6547 } 6548 } 6549 6550 // Check to see if we can perform the "gzip trick", transforming 6551 // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) 6552 if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && 6553 N0.getValueType().isInteger() && 6554 N2.getValueType().isInteger() && 6555 (N1C->isNullValue() || // (a < 0) ? b : 0 6556 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 6557 EVT XType = N0.getValueType(); 6558 EVT AType = N2.getValueType(); 6559 if (XType.bitsGE(AType)) { 6560 // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a 6561 // single-bit constant. 6562 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { 6563 unsigned ShCtV = N2C->getAPIntValue().logBase2(); 6564 ShCtV = XType.getSizeInBits()-ShCtV-1; 6565 SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy()); 6566 SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), 6567 XType, N0, ShCt); 6568 AddToWorkList(Shift.getNode()); 6569 6570 if (XType.bitsGT(AType)) { 6571 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 6572 AddToWorkList(Shift.getNode()); 6573 } 6574 6575 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 6576 } 6577 6578 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), 6579 XType, N0, 6580 DAG.getConstant(XType.getSizeInBits()-1, 6581 getShiftAmountTy())); 6582 AddToWorkList(Shift.getNode()); 6583 6584 if (XType.bitsGT(AType)) { 6585 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 6586 AddToWorkList(Shift.getNode()); 6587 } 6588 6589 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 6590 } 6591 } 6592 6593 // fold select C, 16, 0 -> shl C, 4 6594 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && 6595 TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) { 6596 6597 // If the caller doesn't want us to simplify this into a zext of a compare, 6598 // don't do it. 6599 if (NotExtCompare && N2C->getAPIntValue() == 1) 6600 return SDValue(); 6601 6602 // Get a SetCC of the condition 6603 // FIXME: Should probably make sure that setcc is legal if we ever have a 6604 // target where it isn't. 6605 SDValue Temp, SCC; 6606 // cast from setcc result type to select result type 6607 if (LegalTypes) { 6608 SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), 6609 N0, N1, CC); 6610 if (N2.getValueType().bitsLT(SCC.getValueType())) 6611 Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); 6612 else 6613 Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), 6614 N2.getValueType(), SCC); 6615 } else { 6616 SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); 6617 Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), 6618 N2.getValueType(), SCC); 6619 } 6620 6621 AddToWorkList(SCC.getNode()); 6622 AddToWorkList(Temp.getNode()); 6623 6624 if (N2C->getAPIntValue() == 1) 6625 return Temp; 6626 6627 // shl setcc result by log2 n2c 6628 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, 6629 DAG.getConstant(N2C->getAPIntValue().logBase2(), 6630 getShiftAmountTy())); 6631 } 6632 6633 // Check to see if this is the equivalent of setcc 6634 // FIXME: Turn all of these into setcc if setcc if setcc is legal 6635 // otherwise, go ahead with the folds. 6636 if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { 6637 EVT XType = N0.getValueType(); 6638 if (!LegalOperations || 6639 TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { 6640 SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); 6641 if (Res.getValueType() != VT) 6642 Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); 6643 return Res; 6644 } 6645 6646 // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) 6647 if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && 6648 (!LegalOperations || 6649 TLI.isOperationLegal(ISD::CTLZ, XType))) { 6650 SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); 6651 return DAG.getNode(ISD::SRL, DL, XType, Ctlz, 6652 DAG.getConstant(Log2_32(XType.getSizeInBits()), 6653 getShiftAmountTy())); 6654 } 6655 // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) 6656 if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { 6657 SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), 6658 XType, DAG.getConstant(0, XType), N0); 6659 SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); 6660 return DAG.getNode(ISD::SRL, DL, XType, 6661 DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), 6662 DAG.getConstant(XType.getSizeInBits()-1, 6663 getShiftAmountTy())); 6664 } 6665 // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) 6666 if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { 6667 SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, 6668 DAG.getConstant(XType.getSizeInBits()-1, 6669 getShiftAmountTy())); 6670 return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); 6671 } 6672 } 6673 6674 // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X -> 6675 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 6676 if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && 6677 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && 6678 N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { 6679 EVT XType = N0.getValueType(); 6680 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, 6681 DAG.getConstant(XType.getSizeInBits()-1, 6682 getShiftAmountTy())); 6683 SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType, 6684 N0, Shift); 6685 AddToWorkList(Shift.getNode()); 6686 AddToWorkList(Add.getNode()); 6687 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 6688 } 6689 // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X -> 6690 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 6691 if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && 6692 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { 6693 if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) { 6694 EVT XType = N0.getValueType(); 6695 if (SubC->isNullValue() && XType.isInteger()) { 6696 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, 6697 N0, 6698 DAG.getConstant(XType.getSizeInBits()-1, 6699 getShiftAmountTy())); 6700 SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), 6701 XType, N0, Shift); 6702 AddToWorkList(Shift.getNode()); 6703 AddToWorkList(Add.getNode()); 6704 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 6705 } 6706 } 6707 } 6708 6709 return SDValue(); 6710} 6711 6712/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. 6713SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, 6714 SDValue N1, ISD::CondCode Cond, 6715 DebugLoc DL, bool foldBooleans) { 6716 TargetLowering::DAGCombinerInfo 6717 DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); 6718 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 6719} 6720 6721/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 6722/// return a DAG expression to select that will generate the same value by 6723/// multiplying by a magic number. See: 6724/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 6725SDValue DAGCombiner::BuildSDIV(SDNode *N) { 6726 std::vector<SDNode*> Built; 6727 SDValue S = TLI.BuildSDIV(N, DAG, &Built); 6728 6729 for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); 6730 ii != ee; ++ii) 6731 AddToWorkList(*ii); 6732 return S; 6733} 6734 6735/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, 6736/// return a DAG expression to select that will generate the same value by 6737/// multiplying by a magic number. See: 6738/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 6739SDValue DAGCombiner::BuildUDIV(SDNode *N) { 6740 std::vector<SDNode*> Built; 6741 SDValue S = TLI.BuildUDIV(N, DAG, &Built); 6742 6743 for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); 6744 ii != ee; ++ii) 6745 AddToWorkList(*ii); 6746 return S; 6747} 6748 6749/// FindBaseOffset - Return true if base is a frame index, which is known not 6750// to alias with anything but itself. Provides base object and offset as results. 6751static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, 6752 const GlobalValue *&GV, void *&CV) { 6753 // Assume it is a primitive operation. 6754 Base = Ptr; Offset = 0; GV = 0; CV = 0; 6755 6756 // If it's an adding a simple constant then integrate the offset. 6757 if (Base.getOpcode() == ISD::ADD) { 6758 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { 6759 Base = Base.getOperand(0); 6760 Offset += C->getZExtValue(); 6761 } 6762 } 6763 6764 // Return the underlying GlobalValue, and update the Offset. Return false 6765 // for GlobalAddressSDNode since the same GlobalAddress may be represented 6766 // by multiple nodes with different offsets. 6767 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { 6768 GV = G->getGlobal(); 6769 Offset += G->getOffset(); 6770 return false; 6771 } 6772 6773 // Return the underlying Constant value, and update the Offset. Return false 6774 // for ConstantSDNodes since the same constant pool entry may be represented 6775 // by multiple nodes with different offsets. 6776 if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { 6777 CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() 6778 : (void *)C->getConstVal(); 6779 Offset += C->getOffset(); 6780 return false; 6781 } 6782 // If it's any of the following then it can't alias with anything but itself. 6783 return isa<FrameIndexSDNode>(Base); 6784} 6785 6786/// isAlias - Return true if there is any possibility that the two addresses 6787/// overlap. 6788bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, 6789 const Value *SrcValue1, int SrcValueOffset1, 6790 unsigned SrcValueAlign1, 6791 SDValue Ptr2, int64_t Size2, 6792 const Value *SrcValue2, int SrcValueOffset2, 6793 unsigned SrcValueAlign2) const { 6794 // If they are the same then they must be aliases. 6795 if (Ptr1 == Ptr2) return true; 6796 6797 // Gather base node and offset information. 6798 SDValue Base1, Base2; 6799 int64_t Offset1, Offset2; 6800 const GlobalValue *GV1, *GV2; 6801 void *CV1, *CV2; 6802 bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); 6803 bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); 6804 6805 // If they have a same base address then check to see if they overlap. 6806 if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) 6807 return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); 6808 6809 // If we know what the bases are, and they aren't identical, then we know they 6810 // cannot alias. 6811 if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) 6812 return false; 6813 6814 // If we know required SrcValue1 and SrcValue2 have relatively large alignment 6815 // compared to the size and offset of the access, we may be able to prove they 6816 // do not alias. This check is conservative for now to catch cases created by 6817 // splitting vector types. 6818 if ((SrcValueAlign1 == SrcValueAlign2) && 6819 (SrcValueOffset1 != SrcValueOffset2) && 6820 (Size1 == Size2) && (SrcValueAlign1 > Size1)) { 6821 int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; 6822 int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; 6823 6824 // There is no overlap between these relatively aligned accesses of similar 6825 // size, return no alias. 6826 if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) 6827 return false; 6828 } 6829 6830 if (CombinerGlobalAA) { 6831 // Use alias analysis information. 6832 int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); 6833 int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; 6834 int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; 6835 AliasAnalysis::AliasResult AAResult = 6836 AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2); 6837 if (AAResult == AliasAnalysis::NoAlias) 6838 return false; 6839 } 6840 6841 // Otherwise we have to assume they alias. 6842 return true; 6843} 6844 6845/// FindAliasInfo - Extracts the relevant alias information from the memory 6846/// node. Returns true if the operand was a load. 6847bool DAGCombiner::FindAliasInfo(SDNode *N, 6848 SDValue &Ptr, int64_t &Size, 6849 const Value *&SrcValue, 6850 int &SrcValueOffset, 6851 unsigned &SrcValueAlign) const { 6852 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 6853 Ptr = LD->getBasePtr(); 6854 Size = LD->getMemoryVT().getSizeInBits() >> 3; 6855 SrcValue = LD->getSrcValue(); 6856 SrcValueOffset = LD->getSrcValueOffset(); 6857 SrcValueAlign = LD->getOriginalAlignment(); 6858 return true; 6859 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 6860 Ptr = ST->getBasePtr(); 6861 Size = ST->getMemoryVT().getSizeInBits() >> 3; 6862 SrcValue = ST->getSrcValue(); 6863 SrcValueOffset = ST->getSrcValueOffset(); 6864 SrcValueAlign = ST->getOriginalAlignment(); 6865 } else { 6866 llvm_unreachable("FindAliasInfo expected a memory operand"); 6867 } 6868 6869 return false; 6870} 6871 6872/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 6873/// looking for aliasing nodes and adding them to the Aliases vector. 6874void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 6875 SmallVector<SDValue, 8> &Aliases) { 6876 SmallVector<SDValue, 8> Chains; // List of chains to visit. 6877 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 6878 6879 // Get alias information for node. 6880 SDValue Ptr; 6881 int64_t Size; 6882 const Value *SrcValue; 6883 int SrcValueOffset; 6884 unsigned SrcValueAlign; 6885 bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 6886 SrcValueAlign); 6887 6888 // Starting off. 6889 Chains.push_back(OriginalChain); 6890 unsigned Depth = 0; 6891 6892 // Look at each chain and determine if it is an alias. If so, add it to the 6893 // aliases list. If not, then continue up the chain looking for the next 6894 // candidate. 6895 while (!Chains.empty()) { 6896 SDValue Chain = Chains.back(); 6897 Chains.pop_back(); 6898 6899 // For TokenFactor nodes, look at each operand and only continue up the 6900 // chain until we find two aliases. If we've seen two aliases, assume we'll 6901 // find more and revert to original chain since the xform is unlikely to be 6902 // profitable. 6903 // 6904 // FIXME: The depth check could be made to return the last non-aliasing 6905 // chain we found before we hit a tokenfactor rather than the original 6906 // chain. 6907 if (Depth > 6 || Aliases.size() == 2) { 6908 Aliases.clear(); 6909 Aliases.push_back(OriginalChain); 6910 break; 6911 } 6912 6913 // Don't bother if we've been before. 6914 if (!Visited.insert(Chain.getNode())) 6915 continue; 6916 6917 switch (Chain.getOpcode()) { 6918 case ISD::EntryToken: 6919 // Entry token is ideal chain operand, but handled in FindBetterChain. 6920 break; 6921 6922 case ISD::LOAD: 6923 case ISD::STORE: { 6924 // Get alias information for Chain. 6925 SDValue OpPtr; 6926 int64_t OpSize; 6927 const Value *OpSrcValue; 6928 int OpSrcValueOffset; 6929 unsigned OpSrcValueAlign; 6930 bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, 6931 OpSrcValue, OpSrcValueOffset, 6932 OpSrcValueAlign); 6933 6934 // If chain is alias then stop here. 6935 if (!(IsLoad && IsOpLoad) && 6936 isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, 6937 OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, 6938 OpSrcValueAlign)) { 6939 Aliases.push_back(Chain); 6940 } else { 6941 // Look further up the chain. 6942 Chains.push_back(Chain.getOperand(0)); 6943 ++Depth; 6944 } 6945 break; 6946 } 6947 6948 case ISD::TokenFactor: 6949 // We have to check each of the operands of the token factor for "small" 6950 // token factors, so we queue them up. Adding the operands to the queue 6951 // (stack) in reverse order maintains the original order and increases the 6952 // likelihood that getNode will find a matching token factor (CSE.) 6953 if (Chain.getNumOperands() > 16) { 6954 Aliases.push_back(Chain); 6955 break; 6956 } 6957 for (unsigned n = Chain.getNumOperands(); n;) 6958 Chains.push_back(Chain.getOperand(--n)); 6959 ++Depth; 6960 break; 6961 6962 default: 6963 // For all other instructions we will just have to take what we can get. 6964 Aliases.push_back(Chain); 6965 break; 6966 } 6967 } 6968} 6969 6970/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking 6971/// for a better chain (aliasing node.) 6972SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 6973 SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. 6974 6975 // Accumulate all the aliases to this node. 6976 GatherAllAliases(N, OldChain, Aliases); 6977 6978 if (Aliases.size() == 0) { 6979 // If no operands then chain to entry token. 6980 return DAG.getEntryNode(); 6981 } else if (Aliases.size() == 1) { 6982 // If a single operand then chain to it. We don't need to revisit it. 6983 return Aliases[0]; 6984 } 6985 6986 // Construct a custom tailored token factor. 6987 return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 6988 &Aliases[0], Aliases.size()); 6989} 6990 6991// SelectionDAG::Combine - This is the entry point for the file. 6992// 6993void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, 6994 CodeGenOpt::Level OptLevel) { 6995 /// run - This is the main entry point to this class. 6996 /// 6997 DAGCombiner(*this, AA, OptLevel).Run(Level); 6998} 6999