DAGCombiner.cpp revision e0231413225cf47aaf3238bf21afd0d59025028d
1//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11// both before and after the DAG is legalized. 12// 13// This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14// primarily intended to handle simplification opportunities that are implicit 15// in the LLVM IR and exposed by the various codegen lowering phases. 16// 17//===----------------------------------------------------------------------===// 18 19#define DEBUG_TYPE "dagcombine" 20#include "llvm/CodeGen/SelectionDAG.h" 21#include "llvm/DerivedTypes.h" 22#include "llvm/LLVMContext.h" 23#include "llvm/CodeGen/MachineFunction.h" 24#include "llvm/CodeGen/MachineFrameInfo.h" 25#include "llvm/Analysis/AliasAnalysis.h" 26#include "llvm/Target/TargetData.h" 27#include "llvm/Target/TargetLowering.h" 28#include "llvm/Target/TargetMachine.h" 29#include "llvm/Target/TargetOptions.h" 30#include "llvm/ADT/SmallPtrSet.h" 31#include "llvm/ADT/Statistic.h" 32#include "llvm/Support/CommandLine.h" 33#include "llvm/Support/Debug.h" 34#include "llvm/Support/ErrorHandling.h" 35#include "llvm/Support/MathExtras.h" 36#include "llvm/Support/raw_ostream.h" 37#include <algorithm> 38using namespace llvm; 39 40STATISTIC(NodesCombined , "Number of dag nodes combined"); 41STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 42STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 43STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 44STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 45 46namespace { 47 static cl::opt<bool> 48 CombinerAA("combiner-alias-analysis", cl::Hidden, 49 cl::desc("Turn on alias analysis during testing")); 50 51 static cl::opt<bool> 52 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 53 cl::desc("Include global information in alias analysis")); 54 55//------------------------------ DAGCombiner ---------------------------------// 56 57 class DAGCombiner { 58 SelectionDAG &DAG; 59 const TargetLowering &TLI; 60 CombineLevel Level; 61 CodeGenOpt::Level OptLevel; 62 bool LegalOperations; 63 bool LegalTypes; 64 65 // Worklist of all of the nodes that need to be simplified. 66 // 67 // This has the semantics that when adding to the worklist, 68 // the item added must be next to be processed. It should 69 // also only appear once. The naive approach to this takes 70 // linear time. 71 // 72 // To reduce the insert/remove time to logarithmic, we use 73 // a set and a vector to maintain our worklist. 74 // 75 // The set contains the items on the worklist, but does not 76 // maintain the order they should be visited. 77 // 78 // The vector maintains the order nodes should be visited, but may 79 // contain duplicate or removed nodes. When choosing a node to 80 // visit, we pop off the order stack until we find an item that is 81 // also in the contents set. All operations are O(log N). 82 SmallPtrSet<SDNode*, 64> WorkListContents; 83 SmallVector<SDNode*, 64> WorkListOrder; 84 85 // AA - Used for DAG load/store alias analysis. 86 AliasAnalysis &AA; 87 88 /// AddUsersToWorkList - When an instruction is simplified, add all users of 89 /// the instruction to the work lists because they might get more simplified 90 /// now. 91 /// 92 void AddUsersToWorkList(SDNode *N) { 93 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 94 UI != UE; ++UI) 95 AddToWorkList(*UI); 96 } 97 98 /// visit - call the node-specific routine that knows how to fold each 99 /// particular type of node. 100 SDValue visit(SDNode *N); 101 102 public: 103 /// AddToWorkList - Add to the work list making sure its instance is at the 104 /// back (next to be processed.) 105 void AddToWorkList(SDNode *N) { 106 WorkListContents.insert(N); 107 WorkListOrder.push_back(N); 108 } 109 110 /// removeFromWorkList - remove all instances of N from the worklist. 111 /// 112 void removeFromWorkList(SDNode *N) { 113 WorkListContents.erase(N); 114 } 115 116 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 117 bool AddTo = true); 118 119 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 120 return CombineTo(N, &Res, 1, AddTo); 121 } 122 123 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 124 bool AddTo = true) { 125 SDValue To[] = { Res0, Res1 }; 126 return CombineTo(N, To, 2, AddTo); 127 } 128 129 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 130 131 private: 132 133 /// SimplifyDemandedBits - Check the specified integer node value to see if 134 /// it can be simplified or if things it uses can be simplified by bit 135 /// propagation. If so, return true. 136 bool SimplifyDemandedBits(SDValue Op) { 137 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 138 APInt Demanded = APInt::getAllOnesValue(BitWidth); 139 return SimplifyDemandedBits(Op, Demanded); 140 } 141 142 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 143 144 bool CombineToPreIndexedLoadStore(SDNode *N); 145 bool CombineToPostIndexedLoadStore(SDNode *N); 146 147 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 148 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 149 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 150 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 151 SDValue PromoteIntBinOp(SDValue Op); 152 SDValue PromoteIntShiftOp(SDValue Op); 153 SDValue PromoteExtend(SDValue Op); 154 bool PromoteLoad(SDValue Op); 155 156 void ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, 157 SDValue Trunc, SDValue ExtLoad, DebugLoc DL, 158 ISD::NodeType ExtType); 159 160 /// combine - call the node-specific routine that knows how to fold each 161 /// particular type of node. If that doesn't do anything, try the 162 /// target-specific DAG combines. 163 SDValue combine(SDNode *N); 164 165 // Visitation implementation - Implement dag node combining for different 166 // node types. The semantics are as follows: 167 // Return Value: 168 // SDValue.getNode() == 0 - No change was made 169 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 170 // otherwise - N should be replaced by the returned Operand. 171 // 172 SDValue visitTokenFactor(SDNode *N); 173 SDValue visitMERGE_VALUES(SDNode *N); 174 SDValue visitADD(SDNode *N); 175 SDValue visitSUB(SDNode *N); 176 SDValue visitADDC(SDNode *N); 177 SDValue visitSUBC(SDNode *N); 178 SDValue visitADDE(SDNode *N); 179 SDValue visitSUBE(SDNode *N); 180 SDValue visitMUL(SDNode *N); 181 SDValue visitSDIV(SDNode *N); 182 SDValue visitUDIV(SDNode *N); 183 SDValue visitSREM(SDNode *N); 184 SDValue visitUREM(SDNode *N); 185 SDValue visitMULHU(SDNode *N); 186 SDValue visitMULHS(SDNode *N); 187 SDValue visitSMUL_LOHI(SDNode *N); 188 SDValue visitUMUL_LOHI(SDNode *N); 189 SDValue visitSMULO(SDNode *N); 190 SDValue visitUMULO(SDNode *N); 191 SDValue visitSDIVREM(SDNode *N); 192 SDValue visitUDIVREM(SDNode *N); 193 SDValue visitAND(SDNode *N); 194 SDValue visitOR(SDNode *N); 195 SDValue visitXOR(SDNode *N); 196 SDValue SimplifyVBinOp(SDNode *N); 197 SDValue visitSHL(SDNode *N); 198 SDValue visitSRA(SDNode *N); 199 SDValue visitSRL(SDNode *N); 200 SDValue visitCTLZ(SDNode *N); 201 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 202 SDValue visitCTTZ(SDNode *N); 203 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 204 SDValue visitCTPOP(SDNode *N); 205 SDValue visitSELECT(SDNode *N); 206 SDValue visitSELECT_CC(SDNode *N); 207 SDValue visitSETCC(SDNode *N); 208 SDValue visitSIGN_EXTEND(SDNode *N); 209 SDValue visitZERO_EXTEND(SDNode *N); 210 SDValue visitANY_EXTEND(SDNode *N); 211 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 212 SDValue visitTRUNCATE(SDNode *N); 213 SDValue visitBITCAST(SDNode *N); 214 SDValue visitBUILD_PAIR(SDNode *N); 215 SDValue visitFADD(SDNode *N); 216 SDValue visitFSUB(SDNode *N); 217 SDValue visitFMUL(SDNode *N); 218 SDValue visitFMA(SDNode *N); 219 SDValue visitFDIV(SDNode *N); 220 SDValue visitFREM(SDNode *N); 221 SDValue visitFCOPYSIGN(SDNode *N); 222 SDValue visitSINT_TO_FP(SDNode *N); 223 SDValue visitUINT_TO_FP(SDNode *N); 224 SDValue visitFP_TO_SINT(SDNode *N); 225 SDValue visitFP_TO_UINT(SDNode *N); 226 SDValue visitFP_ROUND(SDNode *N); 227 SDValue visitFP_ROUND_INREG(SDNode *N); 228 SDValue visitFP_EXTEND(SDNode *N); 229 SDValue visitFNEG(SDNode *N); 230 SDValue visitFABS(SDNode *N); 231 SDValue visitBRCOND(SDNode *N); 232 SDValue visitBR_CC(SDNode *N); 233 SDValue visitLOAD(SDNode *N); 234 SDValue visitSTORE(SDNode *N); 235 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 236 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 237 SDValue visitBUILD_VECTOR(SDNode *N); 238 SDValue visitCONCAT_VECTORS(SDNode *N); 239 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 240 SDValue visitVECTOR_SHUFFLE(SDNode *N); 241 SDValue visitMEMBARRIER(SDNode *N); 242 243 SDValue XformToShuffleWithZero(SDNode *N); 244 SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); 245 246 SDValue visitShiftByConstant(SDNode *N, unsigned Amt); 247 248 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 249 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 250 SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); 251 SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, 252 SDValue N3, ISD::CondCode CC, 253 bool NotExtCompare = false); 254 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 255 DebugLoc DL, bool foldBooleans = true); 256 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 257 unsigned HiOp); 258 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 259 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 260 SDValue BuildSDIV(SDNode *N); 261 SDValue BuildUDIV(SDNode *N); 262 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 263 bool DemandHighBits = true); 264 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 265 SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); 266 SDValue ReduceLoadWidth(SDNode *N); 267 SDValue ReduceLoadOpStoreWidth(SDNode *N); 268 SDValue TransformFPLoadStorePair(SDNode *N); 269 270 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 271 272 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 273 /// looking for aliasing nodes and adding them to the Aliases vector. 274 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 275 SmallVector<SDValue, 8> &Aliases); 276 277 /// isAlias - Return true if there is any possibility that the two addresses 278 /// overlap. 279 bool isAlias(SDValue Ptr1, int64_t Size1, 280 const Value *SrcValue1, int SrcValueOffset1, 281 unsigned SrcValueAlign1, 282 const MDNode *TBAAInfo1, 283 SDValue Ptr2, int64_t Size2, 284 const Value *SrcValue2, int SrcValueOffset2, 285 unsigned SrcValueAlign2, 286 const MDNode *TBAAInfo2) const; 287 288 /// FindAliasInfo - Extracts the relevant alias information from the memory 289 /// node. Returns true if the operand was a load. 290 bool FindAliasInfo(SDNode *N, 291 SDValue &Ptr, int64_t &Size, 292 const Value *&SrcValue, int &SrcValueOffset, 293 unsigned &SrcValueAlignment, 294 const MDNode *&TBAAInfo) const; 295 296 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, 297 /// looking for a better chain (aliasing node.) 298 SDValue FindBetterChain(SDNode *N, SDValue Chain); 299 300 public: 301 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 302 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 303 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} 304 305 /// Run - runs the dag combiner on all nodes in the work list 306 void Run(CombineLevel AtLevel); 307 308 SelectionDAG &getDAG() const { return DAG; } 309 310 /// getShiftAmountTy - Returns a type large enough to hold any valid 311 /// shift amount - before type legalization these can be huge. 312 EVT getShiftAmountTy(EVT LHSTy) { 313 return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); 314 } 315 316 /// isTypeLegal - This method returns true if we are running before type 317 /// legalization or if the specified VT is legal. 318 bool isTypeLegal(const EVT &VT) { 319 if (!LegalTypes) return true; 320 return TLI.isTypeLegal(VT); 321 } 322 }; 323} 324 325 326namespace { 327/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted 328/// nodes from the worklist. 329class WorkListRemover : public SelectionDAG::DAGUpdateListener { 330 DAGCombiner &DC; 331public: 332 explicit WorkListRemover(DAGCombiner &dc) 333 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 334 335 virtual void NodeDeleted(SDNode *N, SDNode *E) { 336 DC.removeFromWorkList(N); 337 } 338}; 339} 340 341//===----------------------------------------------------------------------===// 342// TargetLowering::DAGCombinerInfo implementation 343//===----------------------------------------------------------------------===// 344 345void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 346 ((DAGCombiner*)DC)->AddToWorkList(N); 347} 348 349void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 350 ((DAGCombiner*)DC)->removeFromWorkList(N); 351} 352 353SDValue TargetLowering::DAGCombinerInfo:: 354CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { 355 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 356} 357 358SDValue TargetLowering::DAGCombinerInfo:: 359CombineTo(SDNode *N, SDValue Res, bool AddTo) { 360 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 361} 362 363 364SDValue TargetLowering::DAGCombinerInfo:: 365CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 366 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 367} 368 369void TargetLowering::DAGCombinerInfo:: 370CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 371 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 372} 373 374//===----------------------------------------------------------------------===// 375// Helper Functions 376//===----------------------------------------------------------------------===// 377 378/// isNegatibleForFree - Return 1 if we can compute the negated form of the 379/// specified expression for the same cost as the expression itself, or 2 if we 380/// can compute the negated form more cheaply than the expression itself. 381static char isNegatibleForFree(SDValue Op, bool LegalOperations, 382 const TargetLowering &TLI, 383 const TargetOptions *Options, 384 unsigned Depth = 0) { 385 // No compile time optimizations on this type. 386 if (Op.getValueType() == MVT::ppcf128) 387 return 0; 388 389 // fneg is removable even if it has multiple uses. 390 if (Op.getOpcode() == ISD::FNEG) return 2; 391 392 // Don't allow anything with multiple uses. 393 if (!Op.hasOneUse()) return 0; 394 395 // Don't recurse exponentially. 396 if (Depth > 6) return 0; 397 398 switch (Op.getOpcode()) { 399 default: return false; 400 case ISD::ConstantFP: 401 // Don't invert constant FP values after legalize. The negated constant 402 // isn't necessarily legal. 403 return LegalOperations ? 0 : 1; 404 case ISD::FADD: 405 // FIXME: determine better conditions for this xform. 406 if (!Options->UnsafeFPMath) return 0; 407 408 // After operation legalization, it might not be legal to create new FSUBs. 409 if (LegalOperations && 410 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 411 return 0; 412 413 // fold (fsub (fadd A, B)) -> (fsub (fneg A), B) 414 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 415 Options, Depth + 1)) 416 return V; 417 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 418 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 419 Depth + 1); 420 case ISD::FSUB: 421 // We can't turn -(A-B) into B-A when we honor signed zeros. 422 if (!Options->UnsafeFPMath) return 0; 423 424 // fold (fneg (fsub A, B)) -> (fsub B, A) 425 return 1; 426 427 case ISD::FMUL: 428 case ISD::FDIV: 429 if (Options->HonorSignDependentRoundingFPMath()) return 0; 430 431 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 432 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 433 Options, Depth + 1)) 434 return V; 435 436 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 437 Depth + 1); 438 439 case ISD::FP_EXTEND: 440 case ISD::FP_ROUND: 441 case ISD::FSIN: 442 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 443 Depth + 1); 444 } 445} 446 447/// GetNegatedExpression - If isNegatibleForFree returns true, this function 448/// returns the newly negated expression. 449static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 450 bool LegalOperations, unsigned Depth = 0) { 451 // fneg is removable even if it has multiple uses. 452 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 453 454 // Don't allow anything with multiple uses. 455 assert(Op.hasOneUse() && "Unknown reuse!"); 456 457 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 458 switch (Op.getOpcode()) { 459 default: llvm_unreachable("Unknown code"); 460 case ISD::ConstantFP: { 461 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 462 V.changeSign(); 463 return DAG.getConstantFP(V, Op.getValueType()); 464 } 465 case ISD::FADD: 466 // FIXME: determine better conditions for this xform. 467 assert(DAG.getTarget().Options.UnsafeFPMath); 468 469 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 470 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 471 DAG.getTargetLoweringInfo(), 472 &DAG.getTarget().Options, Depth+1)) 473 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 474 GetNegatedExpression(Op.getOperand(0), DAG, 475 LegalOperations, Depth+1), 476 Op.getOperand(1)); 477 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 478 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 479 GetNegatedExpression(Op.getOperand(1), DAG, 480 LegalOperations, Depth+1), 481 Op.getOperand(0)); 482 case ISD::FSUB: 483 // We can't turn -(A-B) into B-A when we honor signed zeros. 484 assert(DAG.getTarget().Options.UnsafeFPMath); 485 486 // fold (fneg (fsub 0, B)) -> B 487 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 488 if (N0CFP->getValueAPF().isZero()) 489 return Op.getOperand(1); 490 491 // fold (fneg (fsub A, B)) -> (fsub B, A) 492 return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), 493 Op.getOperand(1), Op.getOperand(0)); 494 495 case ISD::FMUL: 496 case ISD::FDIV: 497 assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); 498 499 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 500 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 501 DAG.getTargetLoweringInfo(), 502 &DAG.getTarget().Options, Depth+1)) 503 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 504 GetNegatedExpression(Op.getOperand(0), DAG, 505 LegalOperations, Depth+1), 506 Op.getOperand(1)); 507 508 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 509 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 510 Op.getOperand(0), 511 GetNegatedExpression(Op.getOperand(1), DAG, 512 LegalOperations, Depth+1)); 513 514 case ISD::FP_EXTEND: 515 case ISD::FSIN: 516 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), 517 GetNegatedExpression(Op.getOperand(0), DAG, 518 LegalOperations, Depth+1)); 519 case ISD::FP_ROUND: 520 return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), 521 GetNegatedExpression(Op.getOperand(0), DAG, 522 LegalOperations, Depth+1), 523 Op.getOperand(1)); 524 } 525} 526 527 528// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc 529// that selects between the values 1 and 0, making it equivalent to a setcc. 530// Also, set the incoming LHS, RHS, and CC references to the appropriate 531// nodes based on the type of node we are checking. This simplifies life a 532// bit for the callers. 533static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 534 SDValue &CC) { 535 if (N.getOpcode() == ISD::SETCC) { 536 LHS = N.getOperand(0); 537 RHS = N.getOperand(1); 538 CC = N.getOperand(2); 539 return true; 540 } 541 if (N.getOpcode() == ISD::SELECT_CC && 542 N.getOperand(2).getOpcode() == ISD::Constant && 543 N.getOperand(3).getOpcode() == ISD::Constant && 544 cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 && 545 cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) { 546 LHS = N.getOperand(0); 547 RHS = N.getOperand(1); 548 CC = N.getOperand(4); 549 return true; 550 } 551 return false; 552} 553 554// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only 555// one use. If this is true, it allows the users to invert the operation for 556// free when it is profitable to do so. 557static bool isOneUseSetCC(SDValue N) { 558 SDValue N0, N1, N2; 559 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 560 return true; 561 return false; 562} 563 564SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, 565 SDValue N0, SDValue N1) { 566 EVT VT = N0.getValueType(); 567 if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) { 568 if (isa<ConstantSDNode>(N1)) { 569 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 570 SDValue OpNode = 571 DAG.FoldConstantArithmetic(Opc, VT, 572 cast<ConstantSDNode>(N0.getOperand(1)), 573 cast<ConstantSDNode>(N1)); 574 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 575 } 576 if (N0.hasOneUse()) { 577 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use 578 SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, 579 N0.getOperand(0), N1); 580 AddToWorkList(OpNode.getNode()); 581 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 582 } 583 } 584 585 if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) { 586 if (isa<ConstantSDNode>(N0)) { 587 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 588 SDValue OpNode = 589 DAG.FoldConstantArithmetic(Opc, VT, 590 cast<ConstantSDNode>(N1.getOperand(1)), 591 cast<ConstantSDNode>(N0)); 592 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 593 } 594 if (N1.hasOneUse()) { 595 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use 596 SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, 597 N1.getOperand(0), N0); 598 AddToWorkList(OpNode.getNode()); 599 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 600 } 601 } 602 603 return SDValue(); 604} 605 606SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 607 bool AddTo) { 608 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 609 ++NodesCombined; 610 DEBUG(dbgs() << "\nReplacing.1 "; 611 N->dump(&DAG); 612 dbgs() << "\nWith: "; 613 To[0].getNode()->dump(&DAG); 614 dbgs() << " and " << NumTo-1 << " other values\n"; 615 for (unsigned i = 0, e = NumTo; i != e; ++i) 616 assert((!To[i].getNode() || 617 N->getValueType(i) == To[i].getValueType()) && 618 "Cannot combine value to value of different type!")); 619 WorkListRemover DeadNodes(*this); 620 DAG.ReplaceAllUsesWith(N, To); 621 if (AddTo) { 622 // Push the new nodes and any users onto the worklist 623 for (unsigned i = 0, e = NumTo; i != e; ++i) { 624 if (To[i].getNode()) { 625 AddToWorkList(To[i].getNode()); 626 AddUsersToWorkList(To[i].getNode()); 627 } 628 } 629 } 630 631 // Finally, if the node is now dead, remove it from the graph. The node 632 // may not be dead if the replacement process recursively simplified to 633 // something else needing this node. 634 if (N->use_empty()) { 635 // Nodes can be reintroduced into the worklist. Make sure we do not 636 // process a node that has been replaced. 637 removeFromWorkList(N); 638 639 // Finally, since the node is now dead, remove it from the graph. 640 DAG.DeleteNode(N); 641 } 642 return SDValue(N, 0); 643} 644 645void DAGCombiner:: 646CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 647 // Replace all uses. If any nodes become isomorphic to other nodes and 648 // are deleted, make sure to remove them from our worklist. 649 WorkListRemover DeadNodes(*this); 650 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 651 652 // Push the new node and any (possibly new) users onto the worklist. 653 AddToWorkList(TLO.New.getNode()); 654 AddUsersToWorkList(TLO.New.getNode()); 655 656 // Finally, if the node is now dead, remove it from the graph. The node 657 // may not be dead if the replacement process recursively simplified to 658 // something else needing this node. 659 if (TLO.Old.getNode()->use_empty()) { 660 removeFromWorkList(TLO.Old.getNode()); 661 662 // If the operands of this node are only used by the node, they will now 663 // be dead. Make sure to visit them first to delete dead nodes early. 664 for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) 665 if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) 666 AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); 667 668 DAG.DeleteNode(TLO.Old.getNode()); 669 } 670} 671 672/// SimplifyDemandedBits - Check the specified integer node value to see if 673/// it can be simplified or if things it uses can be simplified by bit 674/// propagation. If so, return true. 675bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 676 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 677 APInt KnownZero, KnownOne; 678 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 679 return false; 680 681 // Revisit the node. 682 AddToWorkList(Op.getNode()); 683 684 // Replace the old value with the new one. 685 ++NodesCombined; 686 DEBUG(dbgs() << "\nReplacing.2 "; 687 TLO.Old.getNode()->dump(&DAG); 688 dbgs() << "\nWith: "; 689 TLO.New.getNode()->dump(&DAG); 690 dbgs() << '\n'); 691 692 CommitTargetLoweringOpt(TLO); 693 return true; 694} 695 696void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 697 DebugLoc dl = Load->getDebugLoc(); 698 EVT VT = Load->getValueType(0); 699 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 700 701 DEBUG(dbgs() << "\nReplacing.9 "; 702 Load->dump(&DAG); 703 dbgs() << "\nWith: "; 704 Trunc.getNode()->dump(&DAG); 705 dbgs() << '\n'); 706 WorkListRemover DeadNodes(*this); 707 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 708 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 709 removeFromWorkList(Load); 710 DAG.DeleteNode(Load); 711 AddToWorkList(Trunc.getNode()); 712} 713 714SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 715 Replace = false; 716 DebugLoc dl = Op.getDebugLoc(); 717 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 718 EVT MemVT = LD->getMemoryVT(); 719 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 720 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 721 : ISD::EXTLOAD) 722 : LD->getExtensionType(); 723 Replace = true; 724 return DAG.getExtLoad(ExtType, dl, PVT, 725 LD->getChain(), LD->getBasePtr(), 726 LD->getPointerInfo(), 727 MemVT, LD->isVolatile(), 728 LD->isNonTemporal(), LD->getAlignment()); 729 } 730 731 unsigned Opc = Op.getOpcode(); 732 switch (Opc) { 733 default: break; 734 case ISD::AssertSext: 735 return DAG.getNode(ISD::AssertSext, dl, PVT, 736 SExtPromoteOperand(Op.getOperand(0), PVT), 737 Op.getOperand(1)); 738 case ISD::AssertZext: 739 return DAG.getNode(ISD::AssertZext, dl, PVT, 740 ZExtPromoteOperand(Op.getOperand(0), PVT), 741 Op.getOperand(1)); 742 case ISD::Constant: { 743 unsigned ExtOpc = 744 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 745 return DAG.getNode(ExtOpc, dl, PVT, Op); 746 } 747 } 748 749 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 750 return SDValue(); 751 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 752} 753 754SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 755 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 756 return SDValue(); 757 EVT OldVT = Op.getValueType(); 758 DebugLoc dl = Op.getDebugLoc(); 759 bool Replace = false; 760 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 761 if (NewOp.getNode() == 0) 762 return SDValue(); 763 AddToWorkList(NewOp.getNode()); 764 765 if (Replace) 766 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 767 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 768 DAG.getValueType(OldVT)); 769} 770 771SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 772 EVT OldVT = Op.getValueType(); 773 DebugLoc dl = Op.getDebugLoc(); 774 bool Replace = false; 775 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 776 if (NewOp.getNode() == 0) 777 return SDValue(); 778 AddToWorkList(NewOp.getNode()); 779 780 if (Replace) 781 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 782 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 783} 784 785/// PromoteIntBinOp - Promote the specified integer binary operation if the 786/// target indicates it is beneficial. e.g. On x86, it's usually better to 787/// promote i16 operations to i32 since i16 instructions are longer. 788SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 789 if (!LegalOperations) 790 return SDValue(); 791 792 EVT VT = Op.getValueType(); 793 if (VT.isVector() || !VT.isInteger()) 794 return SDValue(); 795 796 // If operation type is 'undesirable', e.g. i16 on x86, consider 797 // promoting it. 798 unsigned Opc = Op.getOpcode(); 799 if (TLI.isTypeDesirableForOp(Opc, VT)) 800 return SDValue(); 801 802 EVT PVT = VT; 803 // Consult target whether it is a good idea to promote this operation and 804 // what's the right type to promote it to. 805 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 806 assert(PVT != VT && "Don't know what type to promote to!"); 807 808 bool Replace0 = false; 809 SDValue N0 = Op.getOperand(0); 810 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 811 if (NN0.getNode() == 0) 812 return SDValue(); 813 814 bool Replace1 = false; 815 SDValue N1 = Op.getOperand(1); 816 SDValue NN1; 817 if (N0 == N1) 818 NN1 = NN0; 819 else { 820 NN1 = PromoteOperand(N1, PVT, Replace1); 821 if (NN1.getNode() == 0) 822 return SDValue(); 823 } 824 825 AddToWorkList(NN0.getNode()); 826 if (NN1.getNode()) 827 AddToWorkList(NN1.getNode()); 828 829 if (Replace0) 830 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 831 if (Replace1) 832 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 833 834 DEBUG(dbgs() << "\nPromoting "; 835 Op.getNode()->dump(&DAG)); 836 DebugLoc dl = Op.getDebugLoc(); 837 return DAG.getNode(ISD::TRUNCATE, dl, VT, 838 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 839 } 840 return SDValue(); 841} 842 843/// PromoteIntShiftOp - Promote the specified integer shift operation if the 844/// target indicates it is beneficial. e.g. On x86, it's usually better to 845/// promote i16 operations to i32 since i16 instructions are longer. 846SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 847 if (!LegalOperations) 848 return SDValue(); 849 850 EVT VT = Op.getValueType(); 851 if (VT.isVector() || !VT.isInteger()) 852 return SDValue(); 853 854 // If operation type is 'undesirable', e.g. i16 on x86, consider 855 // promoting it. 856 unsigned Opc = Op.getOpcode(); 857 if (TLI.isTypeDesirableForOp(Opc, VT)) 858 return SDValue(); 859 860 EVT PVT = VT; 861 // Consult target whether it is a good idea to promote this operation and 862 // what's the right type to promote it to. 863 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 864 assert(PVT != VT && "Don't know what type to promote to!"); 865 866 bool Replace = false; 867 SDValue N0 = Op.getOperand(0); 868 if (Opc == ISD::SRA) 869 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 870 else if (Opc == ISD::SRL) 871 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 872 else 873 N0 = PromoteOperand(N0, PVT, Replace); 874 if (N0.getNode() == 0) 875 return SDValue(); 876 877 AddToWorkList(N0.getNode()); 878 if (Replace) 879 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 880 881 DEBUG(dbgs() << "\nPromoting "; 882 Op.getNode()->dump(&DAG)); 883 DebugLoc dl = Op.getDebugLoc(); 884 return DAG.getNode(ISD::TRUNCATE, dl, VT, 885 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 886 } 887 return SDValue(); 888} 889 890SDValue DAGCombiner::PromoteExtend(SDValue Op) { 891 if (!LegalOperations) 892 return SDValue(); 893 894 EVT VT = Op.getValueType(); 895 if (VT.isVector() || !VT.isInteger()) 896 return SDValue(); 897 898 // If operation type is 'undesirable', e.g. i16 on x86, consider 899 // promoting it. 900 unsigned Opc = Op.getOpcode(); 901 if (TLI.isTypeDesirableForOp(Opc, VT)) 902 return SDValue(); 903 904 EVT PVT = VT; 905 // Consult target whether it is a good idea to promote this operation and 906 // what's the right type to promote it to. 907 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 908 assert(PVT != VT && "Don't know what type to promote to!"); 909 // fold (aext (aext x)) -> (aext x) 910 // fold (aext (zext x)) -> (zext x) 911 // fold (aext (sext x)) -> (sext x) 912 DEBUG(dbgs() << "\nPromoting "; 913 Op.getNode()->dump(&DAG)); 914 return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); 915 } 916 return SDValue(); 917} 918 919bool DAGCombiner::PromoteLoad(SDValue Op) { 920 if (!LegalOperations) 921 return false; 922 923 EVT VT = Op.getValueType(); 924 if (VT.isVector() || !VT.isInteger()) 925 return false; 926 927 // If operation type is 'undesirable', e.g. i16 on x86, consider 928 // promoting it. 929 unsigned Opc = Op.getOpcode(); 930 if (TLI.isTypeDesirableForOp(Opc, VT)) 931 return false; 932 933 EVT PVT = VT; 934 // Consult target whether it is a good idea to promote this operation and 935 // what's the right type to promote it to. 936 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 937 assert(PVT != VT && "Don't know what type to promote to!"); 938 939 DebugLoc dl = Op.getDebugLoc(); 940 SDNode *N = Op.getNode(); 941 LoadSDNode *LD = cast<LoadSDNode>(N); 942 EVT MemVT = LD->getMemoryVT(); 943 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 944 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 945 : ISD::EXTLOAD) 946 : LD->getExtensionType(); 947 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 948 LD->getChain(), LD->getBasePtr(), 949 LD->getPointerInfo(), 950 MemVT, LD->isVolatile(), 951 LD->isNonTemporal(), LD->getAlignment()); 952 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 953 954 DEBUG(dbgs() << "\nPromoting "; 955 N->dump(&DAG); 956 dbgs() << "\nTo: "; 957 Result.getNode()->dump(&DAG); 958 dbgs() << '\n'); 959 WorkListRemover DeadNodes(*this); 960 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 961 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 962 removeFromWorkList(N); 963 DAG.DeleteNode(N); 964 AddToWorkList(Result.getNode()); 965 return true; 966 } 967 return false; 968} 969 970 971//===----------------------------------------------------------------------===// 972// Main DAG Combiner implementation 973//===----------------------------------------------------------------------===// 974 975void DAGCombiner::Run(CombineLevel AtLevel) { 976 // set the instance variables, so that the various visit routines may use it. 977 Level = AtLevel; 978 LegalOperations = Level >= AfterLegalizeVectorOps; 979 LegalTypes = Level >= AfterLegalizeTypes; 980 981 // Add all the dag nodes to the worklist. 982 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 983 E = DAG.allnodes_end(); I != E; ++I) 984 AddToWorkList(I); 985 986 // Create a dummy node (which is not added to allnodes), that adds a reference 987 // to the root node, preventing it from being deleted, and tracking any 988 // changes of the root. 989 HandleSDNode Dummy(DAG.getRoot()); 990 991 // The root of the dag may dangle to deleted nodes until the dag combiner is 992 // done. Set it to null to avoid confusion. 993 DAG.setRoot(SDValue()); 994 995 // while the worklist isn't empty, find a node and 996 // try and combine it. 997 while (!WorkListContents.empty()) { 998 SDNode *N; 999 // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. 1000 // In order to avoid a linear scan, we use a set (O(log N)) to hold what the 1001 // worklist *should* contain, and check the node we want to visit is should 1002 // actually be visited. 1003 do { 1004 N = WorkListOrder.pop_back_val(); 1005 } while (!WorkListContents.erase(N)); 1006 1007 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1008 // N is deleted from the DAG, since they too may now be dead or may have a 1009 // reduced number of uses, allowing other xforms. 1010 if (N->use_empty() && N != &Dummy) { 1011 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1012 AddToWorkList(N->getOperand(i).getNode()); 1013 1014 DAG.DeleteNode(N); 1015 continue; 1016 } 1017 1018 SDValue RV = combine(N); 1019 1020 if (RV.getNode() == 0) 1021 continue; 1022 1023 ++NodesCombined; 1024 1025 // If we get back the same node we passed in, rather than a new node or 1026 // zero, we know that the node must have defined multiple values and 1027 // CombineTo was used. Since CombineTo takes care of the worklist 1028 // mechanics for us, we have no work to do in this case. 1029 if (RV.getNode() == N) 1030 continue; 1031 1032 assert(N->getOpcode() != ISD::DELETED_NODE && 1033 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1034 "Node was deleted but visit returned new node!"); 1035 1036 DEBUG(dbgs() << "\nReplacing.3 "; 1037 N->dump(&DAG); 1038 dbgs() << "\nWith: "; 1039 RV.getNode()->dump(&DAG); 1040 dbgs() << '\n'); 1041 1042 // Transfer debug value. 1043 DAG.TransferDbgValues(SDValue(N, 0), RV); 1044 WorkListRemover DeadNodes(*this); 1045 if (N->getNumValues() == RV.getNode()->getNumValues()) 1046 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1047 else { 1048 assert(N->getValueType(0) == RV.getValueType() && 1049 N->getNumValues() == 1 && "Type mismatch"); 1050 SDValue OpV = RV; 1051 DAG.ReplaceAllUsesWith(N, &OpV); 1052 } 1053 1054 // Push the new node and any users onto the worklist 1055 AddToWorkList(RV.getNode()); 1056 AddUsersToWorkList(RV.getNode()); 1057 1058 // Add any uses of the old node to the worklist in case this node is the 1059 // last one that uses them. They may become dead after this node is 1060 // deleted. 1061 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1062 AddToWorkList(N->getOperand(i).getNode()); 1063 1064 // Finally, if the node is now dead, remove it from the graph. The node 1065 // may not be dead if the replacement process recursively simplified to 1066 // something else needing this node. 1067 if (N->use_empty()) { 1068 // Nodes can be reintroduced into the worklist. Make sure we do not 1069 // process a node that has been replaced. 1070 removeFromWorkList(N); 1071 1072 // Finally, since the node is now dead, remove it from the graph. 1073 DAG.DeleteNode(N); 1074 } 1075 } 1076 1077 // If the root changed (e.g. it was a dead load, update the root). 1078 DAG.setRoot(Dummy.getValue()); 1079 DAG.RemoveDeadNodes(); 1080} 1081 1082SDValue DAGCombiner::visit(SDNode *N) { 1083 switch (N->getOpcode()) { 1084 default: break; 1085 case ISD::TokenFactor: return visitTokenFactor(N); 1086 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1087 case ISD::ADD: return visitADD(N); 1088 case ISD::SUB: return visitSUB(N); 1089 case ISD::ADDC: return visitADDC(N); 1090 case ISD::SUBC: return visitSUBC(N); 1091 case ISD::ADDE: return visitADDE(N); 1092 case ISD::SUBE: return visitSUBE(N); 1093 case ISD::MUL: return visitMUL(N); 1094 case ISD::SDIV: return visitSDIV(N); 1095 case ISD::UDIV: return visitUDIV(N); 1096 case ISD::SREM: return visitSREM(N); 1097 case ISD::UREM: return visitUREM(N); 1098 case ISD::MULHU: return visitMULHU(N); 1099 case ISD::MULHS: return visitMULHS(N); 1100 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1101 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1102 case ISD::SMULO: return visitSMULO(N); 1103 case ISD::UMULO: return visitUMULO(N); 1104 case ISD::SDIVREM: return visitSDIVREM(N); 1105 case ISD::UDIVREM: return visitUDIVREM(N); 1106 case ISD::AND: return visitAND(N); 1107 case ISD::OR: return visitOR(N); 1108 case ISD::XOR: return visitXOR(N); 1109 case ISD::SHL: return visitSHL(N); 1110 case ISD::SRA: return visitSRA(N); 1111 case ISD::SRL: return visitSRL(N); 1112 case ISD::CTLZ: return visitCTLZ(N); 1113 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1114 case ISD::CTTZ: return visitCTTZ(N); 1115 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1116 case ISD::CTPOP: return visitCTPOP(N); 1117 case ISD::SELECT: return visitSELECT(N); 1118 case ISD::SELECT_CC: return visitSELECT_CC(N); 1119 case ISD::SETCC: return visitSETCC(N); 1120 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1121 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1122 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1123 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1124 case ISD::TRUNCATE: return visitTRUNCATE(N); 1125 case ISD::BITCAST: return visitBITCAST(N); 1126 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1127 case ISD::FADD: return visitFADD(N); 1128 case ISD::FSUB: return visitFSUB(N); 1129 case ISD::FMUL: return visitFMUL(N); 1130 case ISD::FMA: return visitFMA(N); 1131 case ISD::FDIV: return visitFDIV(N); 1132 case ISD::FREM: return visitFREM(N); 1133 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1134 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1135 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1136 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1137 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1138 case ISD::FP_ROUND: return visitFP_ROUND(N); 1139 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1140 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1141 case ISD::FNEG: return visitFNEG(N); 1142 case ISD::FABS: return visitFABS(N); 1143 case ISD::BRCOND: return visitBRCOND(N); 1144 case ISD::BR_CC: return visitBR_CC(N); 1145 case ISD::LOAD: return visitLOAD(N); 1146 case ISD::STORE: return visitSTORE(N); 1147 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1148 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1149 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1150 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1151 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1152 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1153 case ISD::MEMBARRIER: return visitMEMBARRIER(N); 1154 } 1155 return SDValue(); 1156} 1157 1158SDValue DAGCombiner::combine(SDNode *N) { 1159 SDValue RV = visit(N); 1160 1161 // If nothing happened, try a target-specific DAG combine. 1162 if (RV.getNode() == 0) { 1163 assert(N->getOpcode() != ISD::DELETED_NODE && 1164 "Node was deleted but visit returned NULL!"); 1165 1166 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1167 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1168 1169 // Expose the DAG combiner to the target combiner impls. 1170 TargetLowering::DAGCombinerInfo 1171 DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); 1172 1173 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1174 } 1175 } 1176 1177 // If nothing happened still, try promoting the operation. 1178 if (RV.getNode() == 0) { 1179 switch (N->getOpcode()) { 1180 default: break; 1181 case ISD::ADD: 1182 case ISD::SUB: 1183 case ISD::MUL: 1184 case ISD::AND: 1185 case ISD::OR: 1186 case ISD::XOR: 1187 RV = PromoteIntBinOp(SDValue(N, 0)); 1188 break; 1189 case ISD::SHL: 1190 case ISD::SRA: 1191 case ISD::SRL: 1192 RV = PromoteIntShiftOp(SDValue(N, 0)); 1193 break; 1194 case ISD::SIGN_EXTEND: 1195 case ISD::ZERO_EXTEND: 1196 case ISD::ANY_EXTEND: 1197 RV = PromoteExtend(SDValue(N, 0)); 1198 break; 1199 case ISD::LOAD: 1200 if (PromoteLoad(SDValue(N, 0))) 1201 RV = SDValue(N, 0); 1202 break; 1203 } 1204 } 1205 1206 // If N is a commutative binary node, try commuting it to enable more 1207 // sdisel CSE. 1208 if (RV.getNode() == 0 && 1209 SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1210 N->getNumValues() == 1) { 1211 SDValue N0 = N->getOperand(0); 1212 SDValue N1 = N->getOperand(1); 1213 1214 // Constant operands are canonicalized to RHS. 1215 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1216 SDValue Ops[] = { N1, N0 }; 1217 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), 1218 Ops, 2); 1219 if (CSENode) 1220 return SDValue(CSENode, 0); 1221 } 1222 } 1223 1224 return RV; 1225} 1226 1227/// getInputChainForNode - Given a node, return its input chain if it has one, 1228/// otherwise return a null sd operand. 1229static SDValue getInputChainForNode(SDNode *N) { 1230 if (unsigned NumOps = N->getNumOperands()) { 1231 if (N->getOperand(0).getValueType() == MVT::Other) 1232 return N->getOperand(0); 1233 else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1234 return N->getOperand(NumOps-1); 1235 for (unsigned i = 1; i < NumOps-1; ++i) 1236 if (N->getOperand(i).getValueType() == MVT::Other) 1237 return N->getOperand(i); 1238 } 1239 return SDValue(); 1240} 1241 1242SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1243 // If N has two operands, where one has an input chain equal to the other, 1244 // the 'other' chain is redundant. 1245 if (N->getNumOperands() == 2) { 1246 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1247 return N->getOperand(0); 1248 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1249 return N->getOperand(1); 1250 } 1251 1252 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1253 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1254 SmallPtrSet<SDNode*, 16> SeenOps; 1255 bool Changed = false; // If we should replace this token factor. 1256 1257 // Start out with this token factor. 1258 TFs.push_back(N); 1259 1260 // Iterate through token factors. The TFs grows when new token factors are 1261 // encountered. 1262 for (unsigned i = 0; i < TFs.size(); ++i) { 1263 SDNode *TF = TFs[i]; 1264 1265 // Check each of the operands. 1266 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1267 SDValue Op = TF->getOperand(i); 1268 1269 switch (Op.getOpcode()) { 1270 case ISD::EntryToken: 1271 // Entry tokens don't need to be added to the list. They are 1272 // rededundant. 1273 Changed = true; 1274 break; 1275 1276 case ISD::TokenFactor: 1277 if (Op.hasOneUse() && 1278 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1279 // Queue up for processing. 1280 TFs.push_back(Op.getNode()); 1281 // Clean up in case the token factor is removed. 1282 AddToWorkList(Op.getNode()); 1283 Changed = true; 1284 break; 1285 } 1286 // Fall thru 1287 1288 default: 1289 // Only add if it isn't already in the list. 1290 if (SeenOps.insert(Op.getNode())) 1291 Ops.push_back(Op); 1292 else 1293 Changed = true; 1294 break; 1295 } 1296 } 1297 } 1298 1299 SDValue Result; 1300 1301 // If we've change things around then replace token factor. 1302 if (Changed) { 1303 if (Ops.empty()) { 1304 // The entry token is the only possible outcome. 1305 Result = DAG.getEntryNode(); 1306 } else { 1307 // New and improved token factor. 1308 Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 1309 MVT::Other, &Ops[0], Ops.size()); 1310 } 1311 1312 // Don't add users to work list. 1313 return CombineTo(N, Result, false); 1314 } 1315 1316 return Result; 1317} 1318 1319/// MERGE_VALUES can always be eliminated. 1320SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1321 WorkListRemover DeadNodes(*this); 1322 // Replacing results may cause a different MERGE_VALUES to suddenly 1323 // be CSE'd with N, and carry its uses with it. Iterate until no 1324 // uses remain, to ensure that the node can be safely deleted. 1325 // First add the users of this node to the work list so that they 1326 // can be tried again once they have new operands. 1327 AddUsersToWorkList(N); 1328 do { 1329 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1330 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1331 } while (!N->use_empty()); 1332 removeFromWorkList(N); 1333 DAG.DeleteNode(N); 1334 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1335} 1336 1337static 1338SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, 1339 SelectionDAG &DAG) { 1340 EVT VT = N0.getValueType(); 1341 SDValue N00 = N0.getOperand(0); 1342 SDValue N01 = N0.getOperand(1); 1343 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); 1344 1345 if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && 1346 isa<ConstantSDNode>(N00.getOperand(1))) { 1347 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1348 N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, 1349 DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT, 1350 N00.getOperand(0), N01), 1351 DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT, 1352 N00.getOperand(1), N01)); 1353 return DAG.getNode(ISD::ADD, DL, VT, N0, N1); 1354 } 1355 1356 return SDValue(); 1357} 1358 1359SDValue DAGCombiner::visitADD(SDNode *N) { 1360 SDValue N0 = N->getOperand(0); 1361 SDValue N1 = N->getOperand(1); 1362 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1363 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1364 EVT VT = N0.getValueType(); 1365 1366 // fold vector ops 1367 if (VT.isVector()) { 1368 SDValue FoldedVOp = SimplifyVBinOp(N); 1369 if (FoldedVOp.getNode()) return FoldedVOp; 1370 } 1371 1372 // fold (add x, undef) -> undef 1373 if (N0.getOpcode() == ISD::UNDEF) 1374 return N0; 1375 if (N1.getOpcode() == ISD::UNDEF) 1376 return N1; 1377 // fold (add c1, c2) -> c1+c2 1378 if (N0C && N1C) 1379 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1380 // canonicalize constant to RHS 1381 if (N0C && !N1C) 1382 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0); 1383 // fold (add x, 0) -> x 1384 if (N1C && N1C->isNullValue()) 1385 return N0; 1386 // fold (add Sym, c) -> Sym+c 1387 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1388 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1389 GA->getOpcode() == ISD::GlobalAddress) 1390 return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, 1391 GA->getOffset() + 1392 (uint64_t)N1C->getSExtValue()); 1393 // fold ((c1-A)+c2) -> (c1+c2)-A 1394 if (N1C && N0.getOpcode() == ISD::SUB) 1395 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1396 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1397 DAG.getConstant(N1C->getAPIntValue()+ 1398 N0C->getAPIntValue(), VT), 1399 N0.getOperand(1)); 1400 // reassociate add 1401 SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); 1402 if (RADD.getNode() != 0) 1403 return RADD; 1404 // fold ((0-A) + B) -> B-A 1405 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1406 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1407 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); 1408 // fold (A + (0-B)) -> A-B 1409 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1410 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1411 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); 1412 // fold (A+(B-A)) -> B 1413 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1414 return N1.getOperand(0); 1415 // fold ((B-A)+A) -> B 1416 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1417 return N0.getOperand(0); 1418 // fold (A+(B-(A+C))) to (B-C) 1419 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1420 N0 == N1.getOperand(1).getOperand(0)) 1421 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), 1422 N1.getOperand(1).getOperand(1)); 1423 // fold (A+(B-(C+A))) to (B-C) 1424 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1425 N0 == N1.getOperand(1).getOperand(1)) 1426 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), 1427 N1.getOperand(1).getOperand(0)); 1428 // fold (A+((B-A)+or-C)) to (B+or-C) 1429 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1430 N1.getOperand(0).getOpcode() == ISD::SUB && 1431 N0 == N1.getOperand(0).getOperand(1)) 1432 return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, 1433 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1434 1435 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1436 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1437 SDValue N00 = N0.getOperand(0); 1438 SDValue N01 = N0.getOperand(1); 1439 SDValue N10 = N1.getOperand(0); 1440 SDValue N11 = N1.getOperand(1); 1441 1442 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1443 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1444 DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), 1445 DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); 1446 } 1447 1448 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1449 return SDValue(N, 0); 1450 1451 // fold (a+b) -> (a|b) iff a and b share no bits. 1452 if (VT.isInteger() && !VT.isVector()) { 1453 APInt LHSZero, LHSOne; 1454 APInt RHSZero, RHSOne; 1455 DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); 1456 1457 if (LHSZero.getBoolValue()) { 1458 DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); 1459 1460 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1461 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1462 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1463 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); 1464 } 1465 } 1466 1467 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1468 if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { 1469 SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); 1470 if (Result.getNode()) return Result; 1471 } 1472 if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { 1473 SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); 1474 if (Result.getNode()) return Result; 1475 } 1476 1477 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1478 if (N1.getOpcode() == ISD::SHL && 1479 N1.getOperand(0).getOpcode() == ISD::SUB) 1480 if (ConstantSDNode *C = 1481 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1482 if (C->getAPIntValue() == 0) 1483 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, 1484 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1485 N1.getOperand(0).getOperand(1), 1486 N1.getOperand(1))); 1487 if (N0.getOpcode() == ISD::SHL && 1488 N0.getOperand(0).getOpcode() == ISD::SUB) 1489 if (ConstantSDNode *C = 1490 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1491 if (C->getAPIntValue() == 0) 1492 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, 1493 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1494 N0.getOperand(0).getOperand(1), 1495 N0.getOperand(1))); 1496 1497 if (N1.getOpcode() == ISD::AND) { 1498 SDValue AndOp0 = N1.getOperand(0); 1499 ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 1500 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1501 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1502 1503 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1504 // and similar xforms where the inner op is either ~0 or 0. 1505 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 1506 DebugLoc DL = N->getDebugLoc(); 1507 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1508 } 1509 } 1510 1511 // add (sext i1), X -> sub X, (zext i1) 1512 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1513 N0.getOperand(0).getValueType() == MVT::i1 && 1514 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1515 DebugLoc DL = N->getDebugLoc(); 1516 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1517 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1518 } 1519 1520 return SDValue(); 1521} 1522 1523SDValue DAGCombiner::visitADDC(SDNode *N) { 1524 SDValue N0 = N->getOperand(0); 1525 SDValue N1 = N->getOperand(1); 1526 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1527 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1528 EVT VT = N0.getValueType(); 1529 1530 // If the flag result is dead, turn this into an ADD. 1531 if (!N->hasAnyUseOfValue(1)) 1532 return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), 1533 DAG.getNode(ISD::CARRY_FALSE, 1534 N->getDebugLoc(), MVT::Glue)); 1535 1536 // canonicalize constant to RHS. 1537 if (N0C && !N1C) 1538 return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); 1539 1540 // fold (addc x, 0) -> x + no carry out 1541 if (N1C && N1C->isNullValue()) 1542 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1543 N->getDebugLoc(), MVT::Glue)); 1544 1545 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1546 APInt LHSZero, LHSOne; 1547 APInt RHSZero, RHSOne; 1548 DAG.ComputeMaskedBits(N0, LHSZero, LHSOne); 1549 1550 if (LHSZero.getBoolValue()) { 1551 DAG.ComputeMaskedBits(N1, RHSZero, RHSOne); 1552 1553 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1554 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1555 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1556 return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), 1557 DAG.getNode(ISD::CARRY_FALSE, 1558 N->getDebugLoc(), MVT::Glue)); 1559 } 1560 1561 return SDValue(); 1562} 1563 1564SDValue DAGCombiner::visitADDE(SDNode *N) { 1565 SDValue N0 = N->getOperand(0); 1566 SDValue N1 = N->getOperand(1); 1567 SDValue CarryIn = N->getOperand(2); 1568 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1569 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1570 1571 // canonicalize constant to RHS 1572 if (N0C && !N1C) 1573 return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), 1574 N1, N0, CarryIn); 1575 1576 // fold (adde x, y, false) -> (addc x, y) 1577 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1578 return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); 1579 1580 return SDValue(); 1581} 1582 1583// Since it may not be valid to emit a fold to zero for vector initializers 1584// check if we can before folding. 1585static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, 1586 SelectionDAG &DAG, bool LegalOperations) { 1587 if (!VT.isVector()) { 1588 return DAG.getConstant(0, VT); 1589 } 1590 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { 1591 // Produce a vector of zeros. 1592 SDValue El = DAG.getConstant(0, VT.getVectorElementType()); 1593 std::vector<SDValue> Ops(VT.getVectorNumElements(), El); 1594 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, 1595 &Ops[0], Ops.size()); 1596 } 1597 return SDValue(); 1598} 1599 1600SDValue DAGCombiner::visitSUB(SDNode *N) { 1601 SDValue N0 = N->getOperand(0); 1602 SDValue N1 = N->getOperand(1); 1603 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1604 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1605 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? 0 : 1606 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1607 EVT VT = N0.getValueType(); 1608 1609 // fold vector ops 1610 if (VT.isVector()) { 1611 SDValue FoldedVOp = SimplifyVBinOp(N); 1612 if (FoldedVOp.getNode()) return FoldedVOp; 1613 } 1614 1615 // fold (sub x, x) -> 0 1616 // FIXME: Refactor this and xor and other similar operations together. 1617 if (N0 == N1) 1618 return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); 1619 // fold (sub c1, c2) -> c1-c2 1620 if (N0C && N1C) 1621 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1622 // fold (sub x, c) -> (add x, -c) 1623 if (N1C) 1624 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, 1625 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1626 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1627 if (N0C && N0C->isAllOnesValue()) 1628 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); 1629 // fold A-(A-B) -> B 1630 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1631 return N1.getOperand(1); 1632 // fold (A+B)-A -> B 1633 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1634 return N0.getOperand(1); 1635 // fold (A+B)-B -> A 1636 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1637 return N0.getOperand(0); 1638 // fold C2-(A+C1) -> (C2-C1)-A 1639 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1640 SDValue NewC = DAG.getConstant((N0C->getAPIntValue() - N1C1->getAPIntValue()), VT); 1641 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, 1642 N1.getOperand(0)); 1643 } 1644 // fold ((A+(B+or-C))-B) -> A+or-C 1645 if (N0.getOpcode() == ISD::ADD && 1646 (N0.getOperand(1).getOpcode() == ISD::SUB || 1647 N0.getOperand(1).getOpcode() == ISD::ADD) && 1648 N0.getOperand(1).getOperand(0) == N1) 1649 return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, 1650 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1651 // fold ((A+(C+B))-B) -> A+C 1652 if (N0.getOpcode() == ISD::ADD && 1653 N0.getOperand(1).getOpcode() == ISD::ADD && 1654 N0.getOperand(1).getOperand(1) == N1) 1655 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, 1656 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1657 // fold ((A-(B-C))-C) -> A-B 1658 if (N0.getOpcode() == ISD::SUB && 1659 N0.getOperand(1).getOpcode() == ISD::SUB && 1660 N0.getOperand(1).getOperand(1) == N1) 1661 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1662 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1663 1664 // If either operand of a sub is undef, the result is undef 1665 if (N0.getOpcode() == ISD::UNDEF) 1666 return N0; 1667 if (N1.getOpcode() == ISD::UNDEF) 1668 return N1; 1669 1670 // If the relocation model supports it, consider symbol offsets. 1671 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1672 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1673 // fold (sub Sym, c) -> Sym-c 1674 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1675 return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, 1676 GA->getOffset() - 1677 (uint64_t)N1C->getSExtValue()); 1678 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1679 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1680 if (GA->getGlobal() == GB->getGlobal()) 1681 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1682 VT); 1683 } 1684 1685 return SDValue(); 1686} 1687 1688SDValue DAGCombiner::visitSUBC(SDNode *N) { 1689 SDValue N0 = N->getOperand(0); 1690 SDValue N1 = N->getOperand(1); 1691 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1692 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1693 EVT VT = N0.getValueType(); 1694 1695 // If the flag result is dead, turn this into an SUB. 1696 if (!N->hasAnyUseOfValue(1)) 1697 return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), 1698 DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1699 MVT::Glue)); 1700 1701 // fold (subc x, x) -> 0 + no borrow 1702 if (N0 == N1) 1703 return CombineTo(N, DAG.getConstant(0, VT), 1704 DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1705 MVT::Glue)); 1706 1707 // fold (subc x, 0) -> x + no borrow 1708 if (N1C && N1C->isNullValue()) 1709 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1710 MVT::Glue)); 1711 1712 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1713 if (N0C && N0C->isAllOnesValue()) 1714 return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), 1715 DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), 1716 MVT::Glue)); 1717 1718 return SDValue(); 1719} 1720 1721SDValue DAGCombiner::visitSUBE(SDNode *N) { 1722 SDValue N0 = N->getOperand(0); 1723 SDValue N1 = N->getOperand(1); 1724 SDValue CarryIn = N->getOperand(2); 1725 1726 // fold (sube x, y, false) -> (subc x, y) 1727 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1728 return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); 1729 1730 return SDValue(); 1731} 1732 1733SDValue DAGCombiner::visitMUL(SDNode *N) { 1734 SDValue N0 = N->getOperand(0); 1735 SDValue N1 = N->getOperand(1); 1736 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1737 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1738 EVT VT = N0.getValueType(); 1739 1740 // fold vector ops 1741 if (VT.isVector()) { 1742 SDValue FoldedVOp = SimplifyVBinOp(N); 1743 if (FoldedVOp.getNode()) return FoldedVOp; 1744 } 1745 1746 // fold (mul x, undef) -> 0 1747 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1748 return DAG.getConstant(0, VT); 1749 // fold (mul c1, c2) -> c1*c2 1750 if (N0C && N1C) 1751 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); 1752 // canonicalize constant to RHS 1753 if (N0C && !N1C) 1754 return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); 1755 // fold (mul x, 0) -> 0 1756 if (N1C && N1C->isNullValue()) 1757 return N1; 1758 // fold (mul x, -1) -> 0-x 1759 if (N1C && N1C->isAllOnesValue()) 1760 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1761 DAG.getConstant(0, VT), N0); 1762 // fold (mul x, (1 << c)) -> x << c 1763 if (N1C && N1C->getAPIntValue().isPowerOf2()) 1764 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 1765 DAG.getConstant(N1C->getAPIntValue().logBase2(), 1766 getShiftAmountTy(N0.getValueType()))); 1767 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 1768 if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { 1769 unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); 1770 // FIXME: If the input is something that is easily negated (e.g. a 1771 // single-use add), we should put the negate there. 1772 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1773 DAG.getConstant(0, VT), 1774 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 1775 DAG.getConstant(Log2Val, 1776 getShiftAmountTy(N0.getValueType())))); 1777 } 1778 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 1779 if (N1C && N0.getOpcode() == ISD::SHL && 1780 isa<ConstantSDNode>(N0.getOperand(1))) { 1781 SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1782 N1, N0.getOperand(1)); 1783 AddToWorkList(C3.getNode()); 1784 return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1785 N0.getOperand(0), C3); 1786 } 1787 1788 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 1789 // use. 1790 { 1791 SDValue Sh(0,0), Y(0,0); 1792 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 1793 if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) && 1794 N0.getNode()->hasOneUse()) { 1795 Sh = N0; Y = N1; 1796 } else if (N1.getOpcode() == ISD::SHL && 1797 isa<ConstantSDNode>(N1.getOperand(1)) && 1798 N1.getNode()->hasOneUse()) { 1799 Sh = N1; Y = N0; 1800 } 1801 1802 if (Sh.getNode()) { 1803 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1804 Sh.getOperand(0), Y); 1805 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, 1806 Mul, Sh.getOperand(1)); 1807 } 1808 } 1809 1810 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 1811 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 1812 isa<ConstantSDNode>(N0.getOperand(1))) 1813 return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, 1814 DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, 1815 N0.getOperand(0), N1), 1816 DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, 1817 N0.getOperand(1), N1)); 1818 1819 // reassociate mul 1820 SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); 1821 if (RMUL.getNode() != 0) 1822 return RMUL; 1823 1824 return SDValue(); 1825} 1826 1827SDValue DAGCombiner::visitSDIV(SDNode *N) { 1828 SDValue N0 = N->getOperand(0); 1829 SDValue N1 = N->getOperand(1); 1830 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1831 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1832 EVT VT = N->getValueType(0); 1833 1834 // fold vector ops 1835 if (VT.isVector()) { 1836 SDValue FoldedVOp = SimplifyVBinOp(N); 1837 if (FoldedVOp.getNode()) return FoldedVOp; 1838 } 1839 1840 // fold (sdiv c1, c2) -> c1/c2 1841 if (N0C && N1C && !N1C->isNullValue()) 1842 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 1843 // fold (sdiv X, 1) -> X 1844 if (N1C && N1C->getAPIntValue() == 1LL) 1845 return N0; 1846 // fold (sdiv X, -1) -> 0-X 1847 if (N1C && N1C->isAllOnesValue()) 1848 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1849 DAG.getConstant(0, VT), N0); 1850 // If we know the sign bits of both operands are zero, strength reduce to a 1851 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 1852 if (!VT.isVector()) { 1853 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 1854 return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), 1855 N0, N1); 1856 } 1857 // fold (sdiv X, pow2) -> simple ops after legalize 1858 if (N1C && !N1C->isNullValue() && 1859 (N1C->getAPIntValue().isPowerOf2() || 1860 (-N1C->getAPIntValue()).isPowerOf2())) { 1861 // If dividing by powers of two is cheap, then don't perform the following 1862 // fold. 1863 if (TLI.isPow2DivCheap()) 1864 return SDValue(); 1865 1866 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 1867 1868 // Splat the sign bit into the register 1869 SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, 1870 DAG.getConstant(VT.getSizeInBits()-1, 1871 getShiftAmountTy(N0.getValueType()))); 1872 AddToWorkList(SGN.getNode()); 1873 1874 // Add (N0 < 0) ? abs2 - 1 : 0; 1875 SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, 1876 DAG.getConstant(VT.getSizeInBits() - lg2, 1877 getShiftAmountTy(SGN.getValueType()))); 1878 SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); 1879 AddToWorkList(SRL.getNode()); 1880 AddToWorkList(ADD.getNode()); // Divide by pow2 1881 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, 1882 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 1883 1884 // If we're dividing by a positive value, we're done. Otherwise, we must 1885 // negate the result. 1886 if (N1C->getAPIntValue().isNonNegative()) 1887 return SRA; 1888 1889 AddToWorkList(SRA.getNode()); 1890 return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, 1891 DAG.getConstant(0, VT), SRA); 1892 } 1893 1894 // if integer divide is expensive and we satisfy the requirements, emit an 1895 // alternate sequence. 1896 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 1897 SDValue Op = BuildSDIV(N); 1898 if (Op.getNode()) return Op; 1899 } 1900 1901 // undef / X -> 0 1902 if (N0.getOpcode() == ISD::UNDEF) 1903 return DAG.getConstant(0, VT); 1904 // X / undef -> undef 1905 if (N1.getOpcode() == ISD::UNDEF) 1906 return N1; 1907 1908 return SDValue(); 1909} 1910 1911SDValue DAGCombiner::visitUDIV(SDNode *N) { 1912 SDValue N0 = N->getOperand(0); 1913 SDValue N1 = N->getOperand(1); 1914 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1915 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1916 EVT VT = N->getValueType(0); 1917 1918 // fold vector ops 1919 if (VT.isVector()) { 1920 SDValue FoldedVOp = SimplifyVBinOp(N); 1921 if (FoldedVOp.getNode()) return FoldedVOp; 1922 } 1923 1924 // fold (udiv c1, c2) -> c1/c2 1925 if (N0C && N1C && !N1C->isNullValue()) 1926 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 1927 // fold (udiv x, (1 << c)) -> x >>u c 1928 if (N1C && N1C->getAPIntValue().isPowerOf2()) 1929 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, 1930 DAG.getConstant(N1C->getAPIntValue().logBase2(), 1931 getShiftAmountTy(N0.getValueType()))); 1932 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 1933 if (N1.getOpcode() == ISD::SHL) { 1934 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 1935 if (SHC->getAPIntValue().isPowerOf2()) { 1936 EVT ADDVT = N1.getOperand(1).getValueType(); 1937 SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, 1938 N1.getOperand(1), 1939 DAG.getConstant(SHC->getAPIntValue() 1940 .logBase2(), 1941 ADDVT)); 1942 AddToWorkList(Add.getNode()); 1943 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); 1944 } 1945 } 1946 } 1947 // fold (udiv x, c) -> alternate 1948 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { 1949 SDValue Op = BuildUDIV(N); 1950 if (Op.getNode()) return Op; 1951 } 1952 1953 // undef / X -> 0 1954 if (N0.getOpcode() == ISD::UNDEF) 1955 return DAG.getConstant(0, VT); 1956 // X / undef -> undef 1957 if (N1.getOpcode() == ISD::UNDEF) 1958 return N1; 1959 1960 return SDValue(); 1961} 1962 1963SDValue DAGCombiner::visitSREM(SDNode *N) { 1964 SDValue N0 = N->getOperand(0); 1965 SDValue N1 = N->getOperand(1); 1966 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1967 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1968 EVT VT = N->getValueType(0); 1969 1970 // fold (srem c1, c2) -> c1%c2 1971 if (N0C && N1C && !N1C->isNullValue()) 1972 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 1973 // If we know the sign bits of both operands are zero, strength reduce to a 1974 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 1975 if (!VT.isVector()) { 1976 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 1977 return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); 1978 } 1979 1980 // If X/C can be simplified by the division-by-constant logic, lower 1981 // X%C to the equivalent of X-X/C*C. 1982 if (N1C && !N1C->isNullValue()) { 1983 SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); 1984 AddToWorkList(Div.getNode()); 1985 SDValue OptimizedDiv = combine(Div.getNode()); 1986 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 1987 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 1988 OptimizedDiv, N1); 1989 SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); 1990 AddToWorkList(Mul.getNode()); 1991 return Sub; 1992 } 1993 } 1994 1995 // undef % X -> 0 1996 if (N0.getOpcode() == ISD::UNDEF) 1997 return DAG.getConstant(0, VT); 1998 // X % undef -> undef 1999 if (N1.getOpcode() == ISD::UNDEF) 2000 return N1; 2001 2002 return SDValue(); 2003} 2004 2005SDValue DAGCombiner::visitUREM(SDNode *N) { 2006 SDValue N0 = N->getOperand(0); 2007 SDValue N1 = N->getOperand(1); 2008 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2009 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2010 EVT VT = N->getValueType(0); 2011 2012 // fold (urem c1, c2) -> c1%c2 2013 if (N0C && N1C && !N1C->isNullValue()) 2014 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 2015 // fold (urem x, pow2) -> (and x, pow2-1) 2016 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 2017 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, 2018 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 2019 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2020 if (N1.getOpcode() == ISD::SHL) { 2021 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2022 if (SHC->getAPIntValue().isPowerOf2()) { 2023 SDValue Add = 2024 DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, 2025 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 2026 VT)); 2027 AddToWorkList(Add.getNode()); 2028 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); 2029 } 2030 } 2031 } 2032 2033 // If X/C can be simplified by the division-by-constant logic, lower 2034 // X%C to the equivalent of X-X/C*C. 2035 if (N1C && !N1C->isNullValue()) { 2036 SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); 2037 AddToWorkList(Div.getNode()); 2038 SDValue OptimizedDiv = combine(Div.getNode()); 2039 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2040 SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, 2041 OptimizedDiv, N1); 2042 SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); 2043 AddToWorkList(Mul.getNode()); 2044 return Sub; 2045 } 2046 } 2047 2048 // undef % X -> 0 2049 if (N0.getOpcode() == ISD::UNDEF) 2050 return DAG.getConstant(0, VT); 2051 // X % undef -> undef 2052 if (N1.getOpcode() == ISD::UNDEF) 2053 return N1; 2054 2055 return SDValue(); 2056} 2057 2058SDValue DAGCombiner::visitMULHS(SDNode *N) { 2059 SDValue N0 = N->getOperand(0); 2060 SDValue N1 = N->getOperand(1); 2061 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2062 EVT VT = N->getValueType(0); 2063 DebugLoc DL = N->getDebugLoc(); 2064 2065 // fold (mulhs x, 0) -> 0 2066 if (N1C && N1C->isNullValue()) 2067 return N1; 2068 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2069 if (N1C && N1C->getAPIntValue() == 1) 2070 return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, 2071 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2072 getShiftAmountTy(N0.getValueType()))); 2073 // fold (mulhs x, undef) -> 0 2074 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2075 return DAG.getConstant(0, VT); 2076 2077 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2078 // plus a shift. 2079 if (VT.isSimple() && !VT.isVector()) { 2080 MVT Simple = VT.getSimpleVT(); 2081 unsigned SimpleSize = Simple.getSizeInBits(); 2082 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2083 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2084 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2085 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2086 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2087 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2088 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2089 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2090 } 2091 } 2092 2093 return SDValue(); 2094} 2095 2096SDValue DAGCombiner::visitMULHU(SDNode *N) { 2097 SDValue N0 = N->getOperand(0); 2098 SDValue N1 = N->getOperand(1); 2099 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2100 EVT VT = N->getValueType(0); 2101 DebugLoc DL = N->getDebugLoc(); 2102 2103 // fold (mulhu x, 0) -> 0 2104 if (N1C && N1C->isNullValue()) 2105 return N1; 2106 // fold (mulhu x, 1) -> 0 2107 if (N1C && N1C->getAPIntValue() == 1) 2108 return DAG.getConstant(0, N0.getValueType()); 2109 // fold (mulhu x, undef) -> 0 2110 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2111 return DAG.getConstant(0, VT); 2112 2113 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2114 // plus a shift. 2115 if (VT.isSimple() && !VT.isVector()) { 2116 MVT Simple = VT.getSimpleVT(); 2117 unsigned SimpleSize = Simple.getSizeInBits(); 2118 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2119 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2120 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2121 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2122 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2123 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2124 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2125 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2126 } 2127 } 2128 2129 return SDValue(); 2130} 2131 2132/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that 2133/// compute two values. LoOp and HiOp give the opcodes for the two computations 2134/// that are being performed. Return true if a simplification was made. 2135/// 2136SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2137 unsigned HiOp) { 2138 // If the high half is not needed, just compute the low half. 2139 bool HiExists = N->hasAnyUseOfValue(1); 2140 if (!HiExists && 2141 (!LegalOperations || 2142 TLI.isOperationLegal(LoOp, N->getValueType(0)))) { 2143 SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), 2144 N->op_begin(), N->getNumOperands()); 2145 return CombineTo(N, Res, Res); 2146 } 2147 2148 // If the low half is not needed, just compute the high half. 2149 bool LoExists = N->hasAnyUseOfValue(0); 2150 if (!LoExists && 2151 (!LegalOperations || 2152 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2153 SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), 2154 N->op_begin(), N->getNumOperands()); 2155 return CombineTo(N, Res, Res); 2156 } 2157 2158 // If both halves are used, return as it is. 2159 if (LoExists && HiExists) 2160 return SDValue(); 2161 2162 // If the two computed results can be simplified separately, separate them. 2163 if (LoExists) { 2164 SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), 2165 N->op_begin(), N->getNumOperands()); 2166 AddToWorkList(Lo.getNode()); 2167 SDValue LoOpt = combine(Lo.getNode()); 2168 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2169 (!LegalOperations || 2170 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2171 return CombineTo(N, LoOpt, LoOpt); 2172 } 2173 2174 if (HiExists) { 2175 SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), 2176 N->op_begin(), N->getNumOperands()); 2177 AddToWorkList(Hi.getNode()); 2178 SDValue HiOpt = combine(Hi.getNode()); 2179 if (HiOpt.getNode() && HiOpt != Hi && 2180 (!LegalOperations || 2181 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2182 return CombineTo(N, HiOpt, HiOpt); 2183 } 2184 2185 return SDValue(); 2186} 2187 2188SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2189 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2190 if (Res.getNode()) return Res; 2191 2192 EVT VT = N->getValueType(0); 2193 DebugLoc DL = N->getDebugLoc(); 2194 2195 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2196 // plus a shift. 2197 if (VT.isSimple() && !VT.isVector()) { 2198 MVT Simple = VT.getSimpleVT(); 2199 unsigned SimpleSize = Simple.getSizeInBits(); 2200 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2201 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2202 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2203 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2204 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2205 // Compute the high part as N1. 2206 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2207 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2208 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2209 // Compute the low part as N0. 2210 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2211 return CombineTo(N, Lo, Hi); 2212 } 2213 } 2214 2215 return SDValue(); 2216} 2217 2218SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2219 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2220 if (Res.getNode()) return Res; 2221 2222 EVT VT = N->getValueType(0); 2223 DebugLoc DL = N->getDebugLoc(); 2224 2225 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2226 // plus a shift. 2227 if (VT.isSimple() && !VT.isVector()) { 2228 MVT Simple = VT.getSimpleVT(); 2229 unsigned SimpleSize = Simple.getSizeInBits(); 2230 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2231 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2232 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2233 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2234 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2235 // Compute the high part as N1. 2236 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2237 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2238 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2239 // Compute the low part as N0. 2240 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2241 return CombineTo(N, Lo, Hi); 2242 } 2243 } 2244 2245 return SDValue(); 2246} 2247 2248SDValue DAGCombiner::visitSMULO(SDNode *N) { 2249 // (smulo x, 2) -> (saddo x, x) 2250 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2251 if (C2->getAPIntValue() == 2) 2252 return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), 2253 N->getOperand(0), N->getOperand(0)); 2254 2255 return SDValue(); 2256} 2257 2258SDValue DAGCombiner::visitUMULO(SDNode *N) { 2259 // (umulo x, 2) -> (uaddo x, x) 2260 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2261 if (C2->getAPIntValue() == 2) 2262 return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), 2263 N->getOperand(0), N->getOperand(0)); 2264 2265 return SDValue(); 2266} 2267 2268SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 2269 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2270 if (Res.getNode()) return Res; 2271 2272 return SDValue(); 2273} 2274 2275SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 2276 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2277 if (Res.getNode()) return Res; 2278 2279 return SDValue(); 2280} 2281 2282/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with 2283/// two operands of the same opcode, try to simplify it. 2284SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2285 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2286 EVT VT = N0.getValueType(); 2287 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2288 2289 // Bail early if none of these transforms apply. 2290 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2291 2292 // For each of OP in AND/OR/XOR: 2293 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2294 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2295 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2296 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2297 // 2298 // do not sink logical op inside of a vector extend, since it may combine 2299 // into a vsetcc. 2300 EVT Op0VT = N0.getOperand(0).getValueType(); 2301 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2302 N0.getOpcode() == ISD::SIGN_EXTEND || 2303 // Avoid infinite looping with PromoteIntBinOp. 2304 (N0.getOpcode() == ISD::ANY_EXTEND && 2305 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2306 (N0.getOpcode() == ISD::TRUNCATE && 2307 (!TLI.isZExtFree(VT, Op0VT) || 2308 !TLI.isTruncateFree(Op0VT, VT)) && 2309 TLI.isTypeLegal(Op0VT))) && 2310 !VT.isVector() && 2311 Op0VT == N1.getOperand(0).getValueType() && 2312 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2313 SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), 2314 N0.getOperand(0).getValueType(), 2315 N0.getOperand(0), N1.getOperand(0)); 2316 AddToWorkList(ORNode.getNode()); 2317 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); 2318 } 2319 2320 // For each of OP in SHL/SRL/SRA/AND... 2321 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2322 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2323 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2324 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2325 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2326 N0.getOperand(1) == N1.getOperand(1)) { 2327 SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), 2328 N0.getOperand(0).getValueType(), 2329 N0.getOperand(0), N1.getOperand(0)); 2330 AddToWorkList(ORNode.getNode()); 2331 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 2332 ORNode, N0.getOperand(1)); 2333 } 2334 2335 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2336 // Only perform this optimization after type legalization and before 2337 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2338 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2339 // we don't want to undo this promotion. 2340 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2341 // on scalars. 2342 if ((N0.getOpcode() == ISD::BITCAST || N0.getOpcode() == ISD::SCALAR_TO_VECTOR) 2343 && Level == AfterLegalizeVectorOps) { 2344 SDValue In0 = N0.getOperand(0); 2345 SDValue In1 = N1.getOperand(0); 2346 EVT In0Ty = In0.getValueType(); 2347 EVT In1Ty = In1.getValueType(); 2348 // If both incoming values are integers, and the original types are the same. 2349 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2350 SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), In0Ty, In0, In1); 2351 SDValue BC = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, Op); 2352 AddToWorkList(Op.getNode()); 2353 return BC; 2354 } 2355 } 2356 2357 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2358 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2359 // If both shuffles use the same mask, and both shuffle within a single 2360 // vector, then it is worthwhile to move the swizzle after the operation. 2361 // The type-legalizer generates this pattern when loading illegal 2362 // vector types from memory. In many cases this allows additional shuffle 2363 // optimizations. 2364 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 2365 N0.getOperand(1).getOpcode() == ISD::UNDEF && 2366 N1.getOperand(1).getOpcode() == ISD::UNDEF) { 2367 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2368 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2369 2370 assert(N0.getOperand(0).getValueType() == N1.getOperand(1).getValueType() && 2371 "Inputs to shuffles are not the same type"); 2372 2373 unsigned NumElts = VT.getVectorNumElements(); 2374 2375 // Check that both shuffles use the same mask. The masks are known to be of 2376 // the same length because the result vector type is the same. 2377 bool SameMask = true; 2378 for (unsigned i = 0; i != NumElts; ++i) { 2379 int Idx0 = SVN0->getMaskElt(i); 2380 int Idx1 = SVN1->getMaskElt(i); 2381 if (Idx0 != Idx1) { 2382 SameMask = false; 2383 break; 2384 } 2385 } 2386 2387 if (SameMask) { 2388 SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, 2389 N0.getOperand(0), N1.getOperand(0)); 2390 AddToWorkList(Op.getNode()); 2391 return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, 2392 DAG.getUNDEF(VT), &SVN0->getMask()[0]); 2393 } 2394 } 2395 2396 return SDValue(); 2397} 2398 2399SDValue DAGCombiner::visitAND(SDNode *N) { 2400 SDValue N0 = N->getOperand(0); 2401 SDValue N1 = N->getOperand(1); 2402 SDValue LL, LR, RL, RR, CC0, CC1; 2403 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2404 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2405 EVT VT = N1.getValueType(); 2406 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2407 2408 // fold vector ops 2409 if (VT.isVector()) { 2410 SDValue FoldedVOp = SimplifyVBinOp(N); 2411 if (FoldedVOp.getNode()) return FoldedVOp; 2412 } 2413 2414 // fold (and x, undef) -> 0 2415 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2416 return DAG.getConstant(0, VT); 2417 // fold (and c1, c2) -> c1&c2 2418 if (N0C && N1C) 2419 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2420 // canonicalize constant to RHS 2421 if (N0C && !N1C) 2422 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); 2423 // fold (and x, -1) -> x 2424 if (N1C && N1C->isAllOnesValue()) 2425 return N0; 2426 // if (and x, c) is known to be zero, return 0 2427 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2428 APInt::getAllOnesValue(BitWidth))) 2429 return DAG.getConstant(0, VT); 2430 // reassociate and 2431 SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); 2432 if (RAND.getNode() != 0) 2433 return RAND; 2434 // fold (and (or x, C), D) -> D if (C & D) == D 2435 if (N1C && N0.getOpcode() == ISD::OR) 2436 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2437 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2438 return N1; 2439 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2440 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2441 SDValue N0Op0 = N0.getOperand(0); 2442 APInt Mask = ~N1C->getAPIntValue(); 2443 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2444 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2445 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), 2446 N0.getValueType(), N0Op0); 2447 2448 // Replace uses of the AND with uses of the Zero extend node. 2449 CombineTo(N, Zext); 2450 2451 // We actually want to replace all uses of the any_extend with the 2452 // zero_extend, to avoid duplicating things. This will later cause this 2453 // AND to be folded. 2454 CombineTo(N0.getNode(), Zext); 2455 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2456 } 2457 } 2458 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 2459 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 2460 // already be zero by virtue of the width of the base type of the load. 2461 // 2462 // the 'X' node here can either be nothing or an extract_vector_elt to catch 2463 // more cases. 2464 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 2465 N0.getOperand(0).getOpcode() == ISD::LOAD) || 2466 N0.getOpcode() == ISD::LOAD) { 2467 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 2468 N0 : N0.getOperand(0) ); 2469 2470 // Get the constant (if applicable) the zero'th operand is being ANDed with. 2471 // This can be a pure constant or a vector splat, in which case we treat the 2472 // vector as a scalar and use the splat value. 2473 APInt Constant = APInt::getNullValue(1); 2474 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 2475 Constant = C->getAPIntValue(); 2476 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 2477 APInt SplatValue, SplatUndef; 2478 unsigned SplatBitSize; 2479 bool HasAnyUndefs; 2480 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 2481 SplatBitSize, HasAnyUndefs); 2482 if (IsSplat) { 2483 // Undef bits can contribute to a possible optimisation if set, so 2484 // set them. 2485 SplatValue |= SplatUndef; 2486 2487 // The splat value may be something like "0x00FFFFFF", which means 0 for 2488 // the first vector value and FF for the rest, repeating. We need a mask 2489 // that will apply equally to all members of the vector, so AND all the 2490 // lanes of the constant together. 2491 EVT VT = Vector->getValueType(0); 2492 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 2493 Constant = APInt::getAllOnesValue(BitWidth); 2494 for (unsigned i = 0, n = VT.getVectorNumElements(); i < n; ++i) 2495 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 2496 } 2497 } 2498 2499 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 2500 // actually legal and isn't going to get expanded, else this is a false 2501 // optimisation. 2502 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 2503 Load->getMemoryVT()); 2504 2505 // Resize the constant to the same size as the original memory access before 2506 // extension. If it is still the AllOnesValue then this AND is completely 2507 // unneeded. 2508 Constant = 2509 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 2510 2511 bool B; 2512 switch (Load->getExtensionType()) { 2513 default: B = false; break; 2514 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 2515 case ISD::ZEXTLOAD: 2516 case ISD::NON_EXTLOAD: B = true; break; 2517 } 2518 2519 if (B && Constant.isAllOnesValue()) { 2520 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 2521 // preserve semantics once we get rid of the AND. 2522 SDValue NewLoad(Load, 0); 2523 if (Load->getExtensionType() == ISD::EXTLOAD) { 2524 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 2525 Load->getValueType(0), Load->getDebugLoc(), 2526 Load->getChain(), Load->getBasePtr(), 2527 Load->getOffset(), Load->getMemoryVT(), 2528 Load->getMemOperand()); 2529 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 2530 if (Load->getNumValues() == 3) { 2531 // PRE/POST_INC loads have 3 values. 2532 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 2533 NewLoad.getValue(2) }; 2534 CombineTo(Load, To, 3, true); 2535 } else { 2536 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 2537 } 2538 } 2539 2540 // Fold the AND away, taking care not to fold to the old load node if we 2541 // replaced it. 2542 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 2543 2544 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2545 } 2546 } 2547 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2548 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2549 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2550 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2551 2552 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2553 LL.getValueType().isInteger()) { 2554 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2555 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2556 SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), 2557 LR.getValueType(), LL, RL); 2558 AddToWorkList(ORNode.getNode()); 2559 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2560 } 2561 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2562 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2563 SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), 2564 LR.getValueType(), LL, RL); 2565 AddToWorkList(ANDNode.getNode()); 2566 return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); 2567 } 2568 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2569 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2570 SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), 2571 LR.getValueType(), LL, RL); 2572 AddToWorkList(ORNode.getNode()); 2573 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 2574 } 2575 } 2576 // canonicalize equivalent to ll == rl 2577 if (LL == RR && LR == RL) { 2578 Op1 = ISD::getSetCCSwappedOperands(Op1); 2579 std::swap(RL, RR); 2580 } 2581 if (LL == RL && LR == RR) { 2582 bool isInteger = LL.getValueType().isInteger(); 2583 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2584 if (Result != ISD::SETCC_INVALID && 2585 (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) 2586 return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), 2587 LL, LR, Result); 2588 } 2589 } 2590 2591 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 2592 if (N0.getOpcode() == N1.getOpcode()) { 2593 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2594 if (Tmp.getNode()) return Tmp; 2595 } 2596 2597 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 2598 // fold (and (sra)) -> (and (srl)) when possible. 2599 if (!VT.isVector() && 2600 SimplifyDemandedBits(SDValue(N, 0))) 2601 return SDValue(N, 0); 2602 2603 // fold (zext_inreg (extload x)) -> (zextload x) 2604 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 2605 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2606 EVT MemVT = LN0->getMemoryVT(); 2607 // If we zero all the possible extended bits, then we can turn this into 2608 // a zextload if we are running before legalize or the operation is legal. 2609 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2610 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2611 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2612 ((!LegalOperations && !LN0->isVolatile()) || 2613 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2614 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, 2615 LN0->getChain(), LN0->getBasePtr(), 2616 LN0->getPointerInfo(), MemVT, 2617 LN0->isVolatile(), LN0->isNonTemporal(), 2618 LN0->getAlignment()); 2619 AddToWorkList(N); 2620 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2621 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2622 } 2623 } 2624 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 2625 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 2626 N0.hasOneUse()) { 2627 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2628 EVT MemVT = LN0->getMemoryVT(); 2629 // If we zero all the possible extended bits, then we can turn this into 2630 // a zextload if we are running before legalize or the operation is legal. 2631 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2632 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2633 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2634 ((!LegalOperations && !LN0->isVolatile()) || 2635 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2636 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, 2637 LN0->getChain(), 2638 LN0->getBasePtr(), LN0->getPointerInfo(), 2639 MemVT, 2640 LN0->isVolatile(), LN0->isNonTemporal(), 2641 LN0->getAlignment()); 2642 AddToWorkList(N); 2643 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2644 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2645 } 2646 } 2647 2648 // fold (and (load x), 255) -> (zextload x, i8) 2649 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2650 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2651 if (N1C && (N0.getOpcode() == ISD::LOAD || 2652 (N0.getOpcode() == ISD::ANY_EXTEND && 2653 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2654 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2655 LoadSDNode *LN0 = HasAnyExt 2656 ? cast<LoadSDNode>(N0.getOperand(0)) 2657 : cast<LoadSDNode>(N0); 2658 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2659 LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { 2660 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2661 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2662 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2663 EVT LoadedVT = LN0->getMemoryVT(); 2664 2665 if (ExtVT == LoadedVT && 2666 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2667 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2668 2669 SDValue NewLoad = 2670 DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, 2671 LN0->getChain(), LN0->getBasePtr(), 2672 LN0->getPointerInfo(), 2673 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2674 LN0->getAlignment()); 2675 AddToWorkList(N); 2676 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 2677 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2678 } 2679 2680 // Do not change the width of a volatile load. 2681 // Do not generate loads of non-round integer types since these can 2682 // be expensive (and would be wrong if the type is not byte sized). 2683 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 2684 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2685 EVT PtrType = LN0->getOperand(1).getValueType(); 2686 2687 unsigned Alignment = LN0->getAlignment(); 2688 SDValue NewPtr = LN0->getBasePtr(); 2689 2690 // For big endian targets, we need to add an offset to the pointer 2691 // to load the correct bytes. For little endian systems, we merely 2692 // need to read fewer bytes from the same pointer. 2693 if (TLI.isBigEndian()) { 2694 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 2695 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 2696 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 2697 NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, 2698 NewPtr, DAG.getConstant(PtrOff, PtrType)); 2699 Alignment = MinAlign(Alignment, PtrOff); 2700 } 2701 2702 AddToWorkList(NewPtr.getNode()); 2703 2704 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2705 SDValue Load = 2706 DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, 2707 LN0->getChain(), NewPtr, 2708 LN0->getPointerInfo(), 2709 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2710 Alignment); 2711 AddToWorkList(N); 2712 CombineTo(LN0, Load, Load.getValue(1)); 2713 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2714 } 2715 } 2716 } 2717 } 2718 2719 return SDValue(); 2720} 2721 2722/// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 2723/// 2724SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 2725 bool DemandHighBits) { 2726 if (!LegalOperations) 2727 return SDValue(); 2728 2729 EVT VT = N->getValueType(0); 2730 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 2731 return SDValue(); 2732 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 2733 return SDValue(); 2734 2735 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 2736 bool LookPassAnd0 = false; 2737 bool LookPassAnd1 = false; 2738 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 2739 std::swap(N0, N1); 2740 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 2741 std::swap(N0, N1); 2742 if (N0.getOpcode() == ISD::AND) { 2743 if (!N0.getNode()->hasOneUse()) 2744 return SDValue(); 2745 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2746 if (!N01C || N01C->getZExtValue() != 0xFF00) 2747 return SDValue(); 2748 N0 = N0.getOperand(0); 2749 LookPassAnd0 = true; 2750 } 2751 2752 if (N1.getOpcode() == ISD::AND) { 2753 if (!N1.getNode()->hasOneUse()) 2754 return SDValue(); 2755 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 2756 if (!N11C || N11C->getZExtValue() != 0xFF) 2757 return SDValue(); 2758 N1 = N1.getOperand(0); 2759 LookPassAnd1 = true; 2760 } 2761 2762 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 2763 std::swap(N0, N1); 2764 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 2765 return SDValue(); 2766 if (!N0.getNode()->hasOneUse() || 2767 !N1.getNode()->hasOneUse()) 2768 return SDValue(); 2769 2770 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2771 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 2772 if (!N01C || !N11C) 2773 return SDValue(); 2774 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 2775 return SDValue(); 2776 2777 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 2778 SDValue N00 = N0->getOperand(0); 2779 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 2780 if (!N00.getNode()->hasOneUse()) 2781 return SDValue(); 2782 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 2783 if (!N001C || N001C->getZExtValue() != 0xFF) 2784 return SDValue(); 2785 N00 = N00.getOperand(0); 2786 LookPassAnd0 = true; 2787 } 2788 2789 SDValue N10 = N1->getOperand(0); 2790 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 2791 if (!N10.getNode()->hasOneUse()) 2792 return SDValue(); 2793 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 2794 if (!N101C || N101C->getZExtValue() != 0xFF00) 2795 return SDValue(); 2796 N10 = N10.getOperand(0); 2797 LookPassAnd1 = true; 2798 } 2799 2800 if (N00 != N10) 2801 return SDValue(); 2802 2803 // Make sure everything beyond the low halfword is zero since the SRL 16 2804 // will clear the top bits. 2805 unsigned OpSizeInBits = VT.getSizeInBits(); 2806 if (DemandHighBits && OpSizeInBits > 16 && 2807 (!LookPassAnd0 || !LookPassAnd1) && 2808 !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) 2809 return SDValue(); 2810 2811 SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); 2812 if (OpSizeInBits > 16) 2813 Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, 2814 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 2815 return Res; 2816} 2817 2818/// isBSwapHWordElement - Return true if the specified node is an element 2819/// that makes up a 32-bit packed halfword byteswap. i.e. 2820/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 2821static bool isBSwapHWordElement(SDValue N, SmallVector<SDNode*,4> &Parts) { 2822 if (!N.getNode()->hasOneUse()) 2823 return false; 2824 2825 unsigned Opc = N.getOpcode(); 2826 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 2827 return false; 2828 2829 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2830 if (!N1C) 2831 return false; 2832 2833 unsigned Num; 2834 switch (N1C->getZExtValue()) { 2835 default: 2836 return false; 2837 case 0xFF: Num = 0; break; 2838 case 0xFF00: Num = 1; break; 2839 case 0xFF0000: Num = 2; break; 2840 case 0xFF000000: Num = 3; break; 2841 } 2842 2843 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 2844 SDValue N0 = N.getOperand(0); 2845 if (Opc == ISD::AND) { 2846 if (Num == 0 || Num == 2) { 2847 // (x >> 8) & 0xff 2848 // (x >> 8) & 0xff0000 2849 if (N0.getOpcode() != ISD::SRL) 2850 return false; 2851 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2852 if (!C || C->getZExtValue() != 8) 2853 return false; 2854 } else { 2855 // (x << 8) & 0xff00 2856 // (x << 8) & 0xff000000 2857 if (N0.getOpcode() != ISD::SHL) 2858 return false; 2859 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 2860 if (!C || C->getZExtValue() != 8) 2861 return false; 2862 } 2863 } else if (Opc == ISD::SHL) { 2864 // (x & 0xff) << 8 2865 // (x & 0xff0000) << 8 2866 if (Num != 0 && Num != 2) 2867 return false; 2868 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2869 if (!C || C->getZExtValue() != 8) 2870 return false; 2871 } else { // Opc == ISD::SRL 2872 // (x & 0xff00) >> 8 2873 // (x & 0xff000000) >> 8 2874 if (Num != 1 && Num != 3) 2875 return false; 2876 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 2877 if (!C || C->getZExtValue() != 8) 2878 return false; 2879 } 2880 2881 if (Parts[Num]) 2882 return false; 2883 2884 Parts[Num] = N0.getOperand(0).getNode(); 2885 return true; 2886} 2887 2888/// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is 2889/// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 2890/// => (rotl (bswap x), 16) 2891SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 2892 if (!LegalOperations) 2893 return SDValue(); 2894 2895 EVT VT = N->getValueType(0); 2896 if (VT != MVT::i32) 2897 return SDValue(); 2898 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 2899 return SDValue(); 2900 2901 SmallVector<SDNode*,4> Parts(4, (SDNode*)0); 2902 // Look for either 2903 // (or (or (and), (and)), (or (and), (and))) 2904 // (or (or (or (and), (and)), (and)), (and)) 2905 if (N0.getOpcode() != ISD::OR) 2906 return SDValue(); 2907 SDValue N00 = N0.getOperand(0); 2908 SDValue N01 = N0.getOperand(1); 2909 2910 if (N1.getOpcode() == ISD::OR) { 2911 // (or (or (and), (and)), (or (and), (and))) 2912 SDValue N000 = N00.getOperand(0); 2913 if (!isBSwapHWordElement(N000, Parts)) 2914 return SDValue(); 2915 2916 SDValue N001 = N00.getOperand(1); 2917 if (!isBSwapHWordElement(N001, Parts)) 2918 return SDValue(); 2919 SDValue N010 = N01.getOperand(0); 2920 if (!isBSwapHWordElement(N010, Parts)) 2921 return SDValue(); 2922 SDValue N011 = N01.getOperand(1); 2923 if (!isBSwapHWordElement(N011, Parts)) 2924 return SDValue(); 2925 } else { 2926 // (or (or (or (and), (and)), (and)), (and)) 2927 if (!isBSwapHWordElement(N1, Parts)) 2928 return SDValue(); 2929 if (!isBSwapHWordElement(N01, Parts)) 2930 return SDValue(); 2931 if (N00.getOpcode() != ISD::OR) 2932 return SDValue(); 2933 SDValue N000 = N00.getOperand(0); 2934 if (!isBSwapHWordElement(N000, Parts)) 2935 return SDValue(); 2936 SDValue N001 = N00.getOperand(1); 2937 if (!isBSwapHWordElement(N001, Parts)) 2938 return SDValue(); 2939 } 2940 2941 // Make sure the parts are all coming from the same node. 2942 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 2943 return SDValue(); 2944 2945 SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, 2946 SDValue(Parts[0],0)); 2947 2948 // Result of the bswap should be rotated by 16. If it's not legal, than 2949 // do (x << 16) | (x >> 16). 2950 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 2951 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 2952 return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); 2953 else if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 2954 return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); 2955 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, 2956 DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), 2957 DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); 2958} 2959 2960SDValue DAGCombiner::visitOR(SDNode *N) { 2961 SDValue N0 = N->getOperand(0); 2962 SDValue N1 = N->getOperand(1); 2963 SDValue LL, LR, RL, RR, CC0, CC1; 2964 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2965 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2966 EVT VT = N1.getValueType(); 2967 2968 // fold vector ops 2969 if (VT.isVector()) { 2970 SDValue FoldedVOp = SimplifyVBinOp(N); 2971 if (FoldedVOp.getNode()) return FoldedVOp; 2972 } 2973 2974 // fold (or x, undef) -> -1 2975 if (!LegalOperations && 2976 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 2977 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 2978 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 2979 } 2980 // fold (or c1, c2) -> c1|c2 2981 if (N0C && N1C) 2982 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 2983 // canonicalize constant to RHS 2984 if (N0C && !N1C) 2985 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); 2986 // fold (or x, 0) -> x 2987 if (N1C && N1C->isNullValue()) 2988 return N0; 2989 // fold (or x, -1) -> -1 2990 if (N1C && N1C->isAllOnesValue()) 2991 return N1; 2992 // fold (or x, c) -> c iff (x & ~c) == 0 2993 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 2994 return N1; 2995 2996 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 2997 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 2998 if (BSwap.getNode() != 0) 2999 return BSwap; 3000 BSwap = MatchBSwapHWordLow(N, N0, N1); 3001 if (BSwap.getNode() != 0) 3002 return BSwap; 3003 3004 // reassociate or 3005 SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); 3006 if (ROR.getNode() != 0) 3007 return ROR; 3008 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3009 // iff (c1 & c2) == 0. 3010 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3011 isa<ConstantSDNode>(N0.getOperand(1))) { 3012 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3013 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) 3014 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 3015 DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, 3016 N0.getOperand(0), N1), 3017 DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); 3018 } 3019 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3020 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3021 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3022 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3023 3024 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 3025 LL.getValueType().isInteger()) { 3026 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3027 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3028 if (cast<ConstantSDNode>(LR)->isNullValue() && 3029 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3030 SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), 3031 LR.getValueType(), LL, RL); 3032 AddToWorkList(ORNode.getNode()); 3033 return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); 3034 } 3035 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3036 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3037 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 3038 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3039 SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), 3040 LR.getValueType(), LL, RL); 3041 AddToWorkList(ANDNode.getNode()); 3042 return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); 3043 } 3044 } 3045 // canonicalize equivalent to ll == rl 3046 if (LL == RR && LR == RL) { 3047 Op1 = ISD::getSetCCSwappedOperands(Op1); 3048 std::swap(RL, RR); 3049 } 3050 if (LL == RL && LR == RR) { 3051 bool isInteger = LL.getValueType().isInteger(); 3052 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3053 if (Result != ISD::SETCC_INVALID && 3054 (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) 3055 return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), 3056 LL, LR, Result); 3057 } 3058 } 3059 3060 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3061 if (N0.getOpcode() == N1.getOpcode()) { 3062 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3063 if (Tmp.getNode()) return Tmp; 3064 } 3065 3066 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3067 if (N0.getOpcode() == ISD::AND && 3068 N1.getOpcode() == ISD::AND && 3069 N0.getOperand(1).getOpcode() == ISD::Constant && 3070 N1.getOperand(1).getOpcode() == ISD::Constant && 3071 // Don't increase # computations. 3072 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3073 // We can only do this xform if we know that bits from X that are set in C2 3074 // but not in C1 are already zero. Likewise for Y. 3075 const APInt &LHSMask = 3076 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3077 const APInt &RHSMask = 3078 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 3079 3080 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3081 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3082 SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, 3083 N0.getOperand(0), N1.getOperand(0)); 3084 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, 3085 DAG.getConstant(LHSMask | RHSMask, VT)); 3086 } 3087 } 3088 3089 // See if this is some rotate idiom. 3090 if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) 3091 return SDValue(Rot, 0); 3092 3093 // Simplify the operands using demanded-bits information. 3094 if (!VT.isVector() && 3095 SimplifyDemandedBits(SDValue(N, 0))) 3096 return SDValue(N, 0); 3097 3098 return SDValue(); 3099} 3100 3101/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. 3102static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3103 if (Op.getOpcode() == ISD::AND) { 3104 if (isa<ConstantSDNode>(Op.getOperand(1))) { 3105 Mask = Op.getOperand(1); 3106 Op = Op.getOperand(0); 3107 } else { 3108 return false; 3109 } 3110 } 3111 3112 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3113 Shift = Op; 3114 return true; 3115 } 3116 3117 return false; 3118} 3119 3120// MatchRotate - Handle an 'or' of two operands. If this is one of the many 3121// idioms for rotate, and if the target supports rotation instructions, generate 3122// a rot[lr]. 3123SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { 3124 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3125 EVT VT = LHS.getValueType(); 3126 if (!TLI.isTypeLegal(VT)) return 0; 3127 3128 // The target must have at least one rotate flavor. 3129 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3130 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3131 if (!HasROTL && !HasROTR) return 0; 3132 3133 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3134 SDValue LHSShift; // The shift. 3135 SDValue LHSMask; // AND value if any. 3136 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3137 return 0; // Not part of a rotate. 3138 3139 SDValue RHSShift; // The shift. 3140 SDValue RHSMask; // AND value if any. 3141 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3142 return 0; // Not part of a rotate. 3143 3144 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3145 return 0; // Not shifting the same value. 3146 3147 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3148 return 0; // Shifts must disagree. 3149 3150 // Canonicalize shl to left side in a shl/srl pair. 3151 if (RHSShift.getOpcode() == ISD::SHL) { 3152 std::swap(LHS, RHS); 3153 std::swap(LHSShift, RHSShift); 3154 std::swap(LHSMask , RHSMask ); 3155 } 3156 3157 unsigned OpSizeInBits = VT.getSizeInBits(); 3158 SDValue LHSShiftArg = LHSShift.getOperand(0); 3159 SDValue LHSShiftAmt = LHSShift.getOperand(1); 3160 SDValue RHSShiftAmt = RHSShift.getOperand(1); 3161 3162 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 3163 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 3164 if (LHSShiftAmt.getOpcode() == ISD::Constant && 3165 RHSShiftAmt.getOpcode() == ISD::Constant) { 3166 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 3167 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 3168 if ((LShVal + RShVal) != OpSizeInBits) 3169 return 0; 3170 3171 SDValue Rot; 3172 if (HasROTL) 3173 Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt); 3174 else 3175 Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt); 3176 3177 // If there is an AND of either shifted operand, apply it to the result. 3178 if (LHSMask.getNode() || RHSMask.getNode()) { 3179 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 3180 3181 if (LHSMask.getNode()) { 3182 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 3183 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 3184 } 3185 if (RHSMask.getNode()) { 3186 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 3187 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 3188 } 3189 3190 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 3191 } 3192 3193 return Rot.getNode(); 3194 } 3195 3196 // If there is a mask here, and we have a variable shift, we can't be sure 3197 // that we're masking out the right stuff. 3198 if (LHSMask.getNode() || RHSMask.getNode()) 3199 return 0; 3200 3201 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) 3202 // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) 3203 if (RHSShiftAmt.getOpcode() == ISD::SUB && 3204 LHSShiftAmt == RHSShiftAmt.getOperand(1)) { 3205 if (ConstantSDNode *SUBC = 3206 dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) { 3207 if (SUBC->getAPIntValue() == OpSizeInBits) { 3208 if (HasROTL) 3209 return DAG.getNode(ISD::ROTL, DL, VT, 3210 LHSShiftArg, LHSShiftAmt).getNode(); 3211 else 3212 return DAG.getNode(ISD::ROTR, DL, VT, 3213 LHSShiftArg, RHSShiftAmt).getNode(); 3214 } 3215 } 3216 } 3217 3218 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) 3219 // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) 3220 if (LHSShiftAmt.getOpcode() == ISD::SUB && 3221 RHSShiftAmt == LHSShiftAmt.getOperand(1)) { 3222 if (ConstantSDNode *SUBC = 3223 dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) { 3224 if (SUBC->getAPIntValue() == OpSizeInBits) { 3225 if (HasROTR) 3226 return DAG.getNode(ISD::ROTR, DL, VT, 3227 LHSShiftArg, RHSShiftAmt).getNode(); 3228 else 3229 return DAG.getNode(ISD::ROTL, DL, VT, 3230 LHSShiftArg, LHSShiftAmt).getNode(); 3231 } 3232 } 3233 } 3234 3235 // Look for sign/zext/any-extended or truncate cases: 3236 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND 3237 || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND 3238 || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND 3239 || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3240 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND 3241 || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND 3242 || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND 3243 || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3244 SDValue LExtOp0 = LHSShiftAmt.getOperand(0); 3245 SDValue RExtOp0 = RHSShiftAmt.getOperand(0); 3246 if (RExtOp0.getOpcode() == ISD::SUB && 3247 RExtOp0.getOperand(1) == LExtOp0) { 3248 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 3249 // (rotl x, y) 3250 // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> 3251 // (rotr x, (sub 32, y)) 3252 if (ConstantSDNode *SUBC = 3253 dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) { 3254 if (SUBC->getAPIntValue() == OpSizeInBits) { 3255 return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3256 LHSShiftArg, 3257 HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); 3258 } 3259 } 3260 } else if (LExtOp0.getOpcode() == ISD::SUB && 3261 RExtOp0 == LExtOp0.getOperand(1)) { 3262 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 3263 // (rotr x, y) 3264 // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> 3265 // (rotl x, (sub 32, y)) 3266 if (ConstantSDNode *SUBC = 3267 dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) { 3268 if (SUBC->getAPIntValue() == OpSizeInBits) { 3269 return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, 3270 LHSShiftArg, 3271 HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); 3272 } 3273 } 3274 } 3275 } 3276 3277 return 0; 3278} 3279 3280SDValue DAGCombiner::visitXOR(SDNode *N) { 3281 SDValue N0 = N->getOperand(0); 3282 SDValue N1 = N->getOperand(1); 3283 SDValue LHS, RHS, CC; 3284 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3285 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3286 EVT VT = N0.getValueType(); 3287 3288 // fold vector ops 3289 if (VT.isVector()) { 3290 SDValue FoldedVOp = SimplifyVBinOp(N); 3291 if (FoldedVOp.getNode()) return FoldedVOp; 3292 } 3293 3294 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3295 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3296 return DAG.getConstant(0, VT); 3297 // fold (xor x, undef) -> undef 3298 if (N0.getOpcode() == ISD::UNDEF) 3299 return N0; 3300 if (N1.getOpcode() == ISD::UNDEF) 3301 return N1; 3302 // fold (xor c1, c2) -> c1^c2 3303 if (N0C && N1C) 3304 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 3305 // canonicalize constant to RHS 3306 if (N0C && !N1C) 3307 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); 3308 // fold (xor x, 0) -> x 3309 if (N1C && N1C->isNullValue()) 3310 return N0; 3311 // reassociate xor 3312 SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); 3313 if (RXOR.getNode() != 0) 3314 return RXOR; 3315 3316 // fold !(x cc y) -> (x !cc y) 3317 if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { 3318 bool isInt = LHS.getValueType().isInteger(); 3319 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3320 isInt); 3321 3322 if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { 3323 switch (N0.getOpcode()) { 3324 default: 3325 llvm_unreachable("Unhandled SetCC Equivalent!"); 3326 case ISD::SETCC: 3327 return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); 3328 case ISD::SELECT_CC: 3329 return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), 3330 N0.getOperand(3), NotCC); 3331 } 3332 } 3333 } 3334 3335 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 3336 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 3337 N0.getNode()->hasOneUse() && 3338 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 3339 SDValue V = N0.getOperand(0); 3340 V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, 3341 DAG.getConstant(1, V.getValueType())); 3342 AddToWorkList(V.getNode()); 3343 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); 3344 } 3345 3346 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 3347 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 3348 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3349 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3350 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 3351 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3352 LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS 3353 RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS 3354 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 3355 return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); 3356 } 3357 } 3358 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 3359 if (N1C && N1C->isAllOnesValue() && 3360 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3361 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3362 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 3363 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3364 LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS 3365 RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS 3366 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 3367 return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); 3368 } 3369 } 3370 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 3371 if (N1C && N0.getOpcode() == ISD::XOR) { 3372 ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 3373 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3374 if (N00C) 3375 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), 3376 DAG.getConstant(N1C->getAPIntValue() ^ 3377 N00C->getAPIntValue(), VT)); 3378 if (N01C) 3379 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), 3380 DAG.getConstant(N1C->getAPIntValue() ^ 3381 N01C->getAPIntValue(), VT)); 3382 } 3383 // fold (xor x, x) -> 0 3384 if (N0 == N1) 3385 return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); 3386 3387 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 3388 if (N0.getOpcode() == N1.getOpcode()) { 3389 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3390 if (Tmp.getNode()) return Tmp; 3391 } 3392 3393 // Simplify the expression using non-local knowledge. 3394 if (!VT.isVector() && 3395 SimplifyDemandedBits(SDValue(N, 0))) 3396 return SDValue(N, 0); 3397 3398 return SDValue(); 3399} 3400 3401/// visitShiftByConstant - Handle transforms common to the three shifts, when 3402/// the shift amount is a constant. 3403SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { 3404 SDNode *LHS = N->getOperand(0).getNode(); 3405 if (!LHS->hasOneUse()) return SDValue(); 3406 3407 // We want to pull some binops through shifts, so that we have (and (shift)) 3408 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 3409 // thing happens with address calculations, so it's important to canonicalize 3410 // it. 3411 bool HighBitSet = false; // Can we transform this if the high bit is set? 3412 3413 switch (LHS->getOpcode()) { 3414 default: return SDValue(); 3415 case ISD::OR: 3416 case ISD::XOR: 3417 HighBitSet = false; // We can only transform sra if the high bit is clear. 3418 break; 3419 case ISD::AND: 3420 HighBitSet = true; // We can only transform sra if the high bit is set. 3421 break; 3422 case ISD::ADD: 3423 if (N->getOpcode() != ISD::SHL) 3424 return SDValue(); // only shl(add) not sr[al](add). 3425 HighBitSet = false; // We can only transform sra if the high bit is clear. 3426 break; 3427 } 3428 3429 // We require the RHS of the binop to be a constant as well. 3430 ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 3431 if (!BinOpCst) return SDValue(); 3432 3433 // FIXME: disable this unless the input to the binop is a shift by a constant. 3434 // If it is not a shift, it pessimizes some common cases like: 3435 // 3436 // void foo(int *X, int i) { X[i & 1235] = 1; } 3437 // int bar(int *X, int i) { return X[i & 255]; } 3438 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 3439 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 3440 BinOpLHSVal->getOpcode() != ISD::SRA && 3441 BinOpLHSVal->getOpcode() != ISD::SRL) || 3442 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 3443 return SDValue(); 3444 3445 EVT VT = N->getValueType(0); 3446 3447 // If this is a signed shift right, and the high bit is modified by the 3448 // logical operation, do not perform the transformation. The highBitSet 3449 // boolean indicates the value of the high bit of the constant which would 3450 // cause it to be modified for this operation. 3451 if (N->getOpcode() == ISD::SRA) { 3452 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 3453 if (BinOpRHSSignSet != HighBitSet) 3454 return SDValue(); 3455 } 3456 3457 // Fold the constants, shifting the binop RHS by the shift amount. 3458 SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), 3459 N->getValueType(0), 3460 LHS->getOperand(1), N->getOperand(1)); 3461 3462 // Create the new shift. 3463 SDValue NewShift = DAG.getNode(N->getOpcode(), 3464 LHS->getOperand(0).getDebugLoc(), 3465 VT, LHS->getOperand(0), N->getOperand(1)); 3466 3467 // Create the new binop. 3468 return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); 3469} 3470 3471SDValue DAGCombiner::visitSHL(SDNode *N) { 3472 SDValue N0 = N->getOperand(0); 3473 SDValue N1 = N->getOperand(1); 3474 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3475 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3476 EVT VT = N0.getValueType(); 3477 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3478 3479 // fold (shl c1, c2) -> c1<<c2 3480 if (N0C && N1C) 3481 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 3482 // fold (shl 0, x) -> 0 3483 if (N0C && N0C->isNullValue()) 3484 return N0; 3485 // fold (shl x, c >= size(x)) -> undef 3486 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3487 return DAG.getUNDEF(VT); 3488 // fold (shl x, 0) -> x 3489 if (N1C && N1C->isNullValue()) 3490 return N0; 3491 // fold (shl undef, x) -> 0 3492 if (N0.getOpcode() == ISD::UNDEF) 3493 return DAG.getConstant(0, VT); 3494 // if (shl x, c) is known to be zero, return 0 3495 if (DAG.MaskedValueIsZero(SDValue(N, 0), 3496 APInt::getAllOnesValue(OpSizeInBits))) 3497 return DAG.getConstant(0, VT); 3498 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 3499 if (N1.getOpcode() == ISD::TRUNCATE && 3500 N1.getOperand(0).getOpcode() == ISD::AND && 3501 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3502 SDValue N101 = N1.getOperand(0).getOperand(1); 3503 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3504 EVT TruncVT = N1.getValueType(); 3505 SDValue N100 = N1.getOperand(0).getOperand(0); 3506 APInt TruncC = N101C->getAPIntValue(); 3507 TruncC = TruncC.trunc(TruncVT.getSizeInBits()); 3508 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, 3509 DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, 3510 DAG.getNode(ISD::TRUNCATE, 3511 N->getDebugLoc(), 3512 TruncVT, N100), 3513 DAG.getConstant(TruncC, TruncVT))); 3514 } 3515 } 3516 3517 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3518 return SDValue(N, 0); 3519 3520 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 3521 if (N1C && N0.getOpcode() == ISD::SHL && 3522 N0.getOperand(1).getOpcode() == ISD::Constant) { 3523 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3524 uint64_t c2 = N1C->getZExtValue(); 3525 if (c1 + c2 >= OpSizeInBits) 3526 return DAG.getConstant(0, VT); 3527 return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), 3528 DAG.getConstant(c1 + c2, N1.getValueType())); 3529 } 3530 3531 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 3532 // For this to be valid, the second form must not preserve any of the bits 3533 // that are shifted out by the inner shift in the first form. This means 3534 // the outer shift size must be >= the number of bits added by the ext. 3535 // As a corollary, we don't care what kind of ext it is. 3536 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 3537 N0.getOpcode() == ISD::ANY_EXTEND || 3538 N0.getOpcode() == ISD::SIGN_EXTEND) && 3539 N0.getOperand(0).getOpcode() == ISD::SHL && 3540 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 3541 uint64_t c1 = 3542 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 3543 uint64_t c2 = N1C->getZExtValue(); 3544 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 3545 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 3546 if (c2 >= OpSizeInBits - InnerShiftSize) { 3547 if (c1 + c2 >= OpSizeInBits) 3548 return DAG.getConstant(0, VT); 3549 return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, 3550 DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, 3551 N0.getOperand(0)->getOperand(0)), 3552 DAG.getConstant(c1 + c2, N1.getValueType())); 3553 } 3554 } 3555 3556 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 3557 // (and (srl x, (sub c1, c2), MASK) 3558 // Only fold this if the inner shift has no other uses -- if it does, folding 3559 // this will increase the total number of instructions. 3560 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && 3561 N0.getOperand(1).getOpcode() == ISD::Constant) { 3562 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3563 if (c1 < VT.getSizeInBits()) { 3564 uint64_t c2 = N1C->getZExtValue(); 3565 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 3566 VT.getSizeInBits() - c1); 3567 SDValue Shift; 3568 if (c2 > c1) { 3569 Mask = Mask.shl(c2-c1); 3570 Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), 3571 DAG.getConstant(c2-c1, N1.getValueType())); 3572 } else { 3573 Mask = Mask.lshr(c1-c2); 3574 Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), 3575 DAG.getConstant(c1-c2, N1.getValueType())); 3576 } 3577 return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, 3578 DAG.getConstant(Mask, VT)); 3579 } 3580 } 3581 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 3582 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 3583 SDValue HiBitsMask = 3584 DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(), 3585 VT.getSizeInBits() - 3586 N1C->getZExtValue()), 3587 VT); 3588 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), 3589 HiBitsMask); 3590 } 3591 3592 if (N1C) { 3593 SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); 3594 if (NewSHL.getNode()) 3595 return NewSHL; 3596 } 3597 3598 return SDValue(); 3599} 3600 3601SDValue DAGCombiner::visitSRA(SDNode *N) { 3602 SDValue N0 = N->getOperand(0); 3603 SDValue N1 = N->getOperand(1); 3604 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3605 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3606 EVT VT = N0.getValueType(); 3607 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3608 3609 // fold (sra c1, c2) -> (sra c1, c2) 3610 if (N0C && N1C) 3611 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 3612 // fold (sra 0, x) -> 0 3613 if (N0C && N0C->isNullValue()) 3614 return N0; 3615 // fold (sra -1, x) -> -1 3616 if (N0C && N0C->isAllOnesValue()) 3617 return N0; 3618 // fold (sra x, (setge c, size(x))) -> undef 3619 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3620 return DAG.getUNDEF(VT); 3621 // fold (sra x, 0) -> x 3622 if (N1C && N1C->isNullValue()) 3623 return N0; 3624 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 3625 // sext_inreg. 3626 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 3627 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 3628 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 3629 if (VT.isVector()) 3630 ExtVT = EVT::getVectorVT(*DAG.getContext(), 3631 ExtVT, VT.getVectorNumElements()); 3632 if ((!LegalOperations || 3633 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 3634 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 3635 N0.getOperand(0), DAG.getValueType(ExtVT)); 3636 } 3637 3638 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 3639 if (N1C && N0.getOpcode() == ISD::SRA) { 3640 if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 3641 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 3642 if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; 3643 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), 3644 DAG.getConstant(Sum, N1C->getValueType(0))); 3645 } 3646 } 3647 3648 // fold (sra (shl X, m), (sub result_size, n)) 3649 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 3650 // result_size - n != m. 3651 // If truncate is free for the target sext(shl) is likely to result in better 3652 // code. 3653 if (N0.getOpcode() == ISD::SHL) { 3654 // Get the two constanst of the shifts, CN0 = m, CN = n. 3655 const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3656 if (N01C && N1C) { 3657 // Determine what the truncate's result bitsize and type would be. 3658 EVT TruncVT = 3659 EVT::getIntegerVT(*DAG.getContext(), 3660 OpSizeInBits - N1C->getZExtValue()); 3661 // Determine the residual right-shift amount. 3662 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 3663 3664 // If the shift is not a no-op (in which case this should be just a sign 3665 // extend already), the truncated to type is legal, sign_extend is legal 3666 // on that type, and the truncate to that type is both legal and free, 3667 // perform the transform. 3668 if ((ShiftAmt > 0) && 3669 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 3670 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 3671 TLI.isTruncateFree(VT, TruncVT)) { 3672 3673 SDValue Amt = DAG.getConstant(ShiftAmt, 3674 getShiftAmountTy(N0.getOperand(0).getValueType())); 3675 SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, 3676 N0.getOperand(0), Amt); 3677 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, 3678 Shift); 3679 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), 3680 N->getValueType(0), Trunc); 3681 } 3682 } 3683 } 3684 3685 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 3686 if (N1.getOpcode() == ISD::TRUNCATE && 3687 N1.getOperand(0).getOpcode() == ISD::AND && 3688 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3689 SDValue N101 = N1.getOperand(0).getOperand(1); 3690 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3691 EVT TruncVT = N1.getValueType(); 3692 SDValue N100 = N1.getOperand(0).getOperand(0); 3693 APInt TruncC = N101C->getAPIntValue(); 3694 TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); 3695 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, 3696 DAG.getNode(ISD::AND, N->getDebugLoc(), 3697 TruncVT, 3698 DAG.getNode(ISD::TRUNCATE, 3699 N->getDebugLoc(), 3700 TruncVT, N100), 3701 DAG.getConstant(TruncC, TruncVT))); 3702 } 3703 } 3704 3705 // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2)) 3706 // if c1 is equal to the number of bits the trunc removes 3707 if (N0.getOpcode() == ISD::TRUNCATE && 3708 (N0.getOperand(0).getOpcode() == ISD::SRL || 3709 N0.getOperand(0).getOpcode() == ISD::SRA) && 3710 N0.getOperand(0).hasOneUse() && 3711 N0.getOperand(0).getOperand(1).hasOneUse() && 3712 N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) { 3713 EVT LargeVT = N0.getOperand(0).getValueType(); 3714 ConstantSDNode *LargeShiftAmt = 3715 cast<ConstantSDNode>(N0.getOperand(0).getOperand(1)); 3716 3717 if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits == 3718 LargeShiftAmt->getZExtValue()) { 3719 SDValue Amt = 3720 DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), 3721 getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); 3722 SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, 3723 N0.getOperand(0).getOperand(0), Amt); 3724 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); 3725 } 3726 } 3727 3728 // Simplify, based on bits shifted out of the LHS. 3729 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3730 return SDValue(N, 0); 3731 3732 3733 // If the sign bit is known to be zero, switch this to a SRL. 3734 if (DAG.SignBitIsZero(N0)) 3735 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); 3736 3737 if (N1C) { 3738 SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); 3739 if (NewSRA.getNode()) 3740 return NewSRA; 3741 } 3742 3743 return SDValue(); 3744} 3745 3746SDValue DAGCombiner::visitSRL(SDNode *N) { 3747 SDValue N0 = N->getOperand(0); 3748 SDValue N1 = N->getOperand(1); 3749 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3750 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3751 EVT VT = N0.getValueType(); 3752 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 3753 3754 // fold (srl c1, c2) -> c1 >>u c2 3755 if (N0C && N1C) 3756 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 3757 // fold (srl 0, x) -> 0 3758 if (N0C && N0C->isNullValue()) 3759 return N0; 3760 // fold (srl x, c >= size(x)) -> undef 3761 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3762 return DAG.getUNDEF(VT); 3763 // fold (srl x, 0) -> x 3764 if (N1C && N1C->isNullValue()) 3765 return N0; 3766 // if (srl x, c) is known to be zero, return 0 3767 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 3768 APInt::getAllOnesValue(OpSizeInBits))) 3769 return DAG.getConstant(0, VT); 3770 3771 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 3772 if (N1C && N0.getOpcode() == ISD::SRL && 3773 N0.getOperand(1).getOpcode() == ISD::Constant) { 3774 uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); 3775 uint64_t c2 = N1C->getZExtValue(); 3776 if (c1 + c2 >= OpSizeInBits) 3777 return DAG.getConstant(0, VT); 3778 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), 3779 DAG.getConstant(c1 + c2, N1.getValueType())); 3780 } 3781 3782 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 3783 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 3784 N0.getOperand(0).getOpcode() == ISD::SRL && 3785 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 3786 uint64_t c1 = 3787 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 3788 uint64_t c2 = N1C->getZExtValue(); 3789 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 3790 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 3791 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 3792 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 3793 if (c1 + OpSizeInBits == InnerShiftSize) { 3794 if (c1 + c2 >= InnerShiftSize) 3795 return DAG.getConstant(0, VT); 3796 return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, 3797 DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, 3798 N0.getOperand(0)->getOperand(0), 3799 DAG.getConstant(c1 + c2, ShiftCountVT))); 3800 } 3801 } 3802 3803 // fold (srl (shl x, c), c) -> (and x, cst2) 3804 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && 3805 N0.getValueSizeInBits() <= 64) { 3806 uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); 3807 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), 3808 DAG.getConstant(~0ULL >> ShAmt, VT)); 3809 } 3810 3811 3812 // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) 3813 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 3814 // Shifting in all undef bits? 3815 EVT SmallVT = N0.getOperand(0).getValueType(); 3816 if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) 3817 return DAG.getUNDEF(VT); 3818 3819 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 3820 uint64_t ShiftAmt = N1C->getZExtValue(); 3821 SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, 3822 N0.getOperand(0), 3823 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 3824 AddToWorkList(SmallShift.getNode()); 3825 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); 3826 } 3827 } 3828 3829 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 3830 // bit, which is unmodified by sra. 3831 if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { 3832 if (N0.getOpcode() == ISD::SRA) 3833 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); 3834 } 3835 3836 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 3837 if (N1C && N0.getOpcode() == ISD::CTLZ && 3838 N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) { 3839 APInt KnownZero, KnownOne; 3840 DAG.ComputeMaskedBits(N0.getOperand(0), KnownZero, KnownOne); 3841 3842 // If any of the input bits are KnownOne, then the input couldn't be all 3843 // zeros, thus the result of the srl will always be zero. 3844 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 3845 3846 // If all of the bits input the to ctlz node are known to be zero, then 3847 // the result of the ctlz is "32" and the result of the shift is one. 3848 APInt UnknownBits = ~KnownZero; 3849 if (UnknownBits == 0) return DAG.getConstant(1, VT); 3850 3851 // Otherwise, check to see if there is exactly one bit input to the ctlz. 3852 if ((UnknownBits & (UnknownBits - 1)) == 0) { 3853 // Okay, we know that only that the single bit specified by UnknownBits 3854 // could be set on input to the CTLZ node. If this bit is set, the SRL 3855 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 3856 // to an SRL/XOR pair, which is likely to simplify more. 3857 unsigned ShAmt = UnknownBits.countTrailingZeros(); 3858 SDValue Op = N0.getOperand(0); 3859 3860 if (ShAmt) { 3861 Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, 3862 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 3863 AddToWorkList(Op.getNode()); 3864 } 3865 3866 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, 3867 Op, DAG.getConstant(1, VT)); 3868 } 3869 } 3870 3871 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 3872 if (N1.getOpcode() == ISD::TRUNCATE && 3873 N1.getOperand(0).getOpcode() == ISD::AND && 3874 N1.hasOneUse() && N1.getOperand(0).hasOneUse()) { 3875 SDValue N101 = N1.getOperand(0).getOperand(1); 3876 if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) { 3877 EVT TruncVT = N1.getValueType(); 3878 SDValue N100 = N1.getOperand(0).getOperand(0); 3879 APInt TruncC = N101C->getAPIntValue(); 3880 TruncC = TruncC.trunc(TruncVT.getSizeInBits()); 3881 return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, 3882 DAG.getNode(ISD::AND, N->getDebugLoc(), 3883 TruncVT, 3884 DAG.getNode(ISD::TRUNCATE, 3885 N->getDebugLoc(), 3886 TruncVT, N100), 3887 DAG.getConstant(TruncC, TruncVT))); 3888 } 3889 } 3890 3891 // fold operands of srl based on knowledge that the low bits are not 3892 // demanded. 3893 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 3894 return SDValue(N, 0); 3895 3896 if (N1C) { 3897 SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue()); 3898 if (NewSRL.getNode()) 3899 return NewSRL; 3900 } 3901 3902 // Attempt to convert a srl of a load into a narrower zero-extending load. 3903 SDValue NarrowLoad = ReduceLoadWidth(N); 3904 if (NarrowLoad.getNode()) 3905 return NarrowLoad; 3906 3907 // Here is a common situation. We want to optimize: 3908 // 3909 // %a = ... 3910 // %b = and i32 %a, 2 3911 // %c = srl i32 %b, 1 3912 // brcond i32 %c ... 3913 // 3914 // into 3915 // 3916 // %a = ... 3917 // %b = and %a, 2 3918 // %c = setcc eq %b, 0 3919 // brcond %c ... 3920 // 3921 // However when after the source operand of SRL is optimized into AND, the SRL 3922 // itself may not be optimized further. Look for it and add the BRCOND into 3923 // the worklist. 3924 if (N->hasOneUse()) { 3925 SDNode *Use = *N->use_begin(); 3926 if (Use->getOpcode() == ISD::BRCOND) 3927 AddToWorkList(Use); 3928 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 3929 // Also look pass the truncate. 3930 Use = *Use->use_begin(); 3931 if (Use->getOpcode() == ISD::BRCOND) 3932 AddToWorkList(Use); 3933 } 3934 } 3935 3936 return SDValue(); 3937} 3938 3939SDValue DAGCombiner::visitCTLZ(SDNode *N) { 3940 SDValue N0 = N->getOperand(0); 3941 EVT VT = N->getValueType(0); 3942 3943 // fold (ctlz c1) -> c2 3944 if (isa<ConstantSDNode>(N0)) 3945 return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); 3946 return SDValue(); 3947} 3948 3949SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 3950 SDValue N0 = N->getOperand(0); 3951 EVT VT = N->getValueType(0); 3952 3953 // fold (ctlz_zero_undef c1) -> c2 3954 if (isa<ConstantSDNode>(N0)) 3955 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); 3956 return SDValue(); 3957} 3958 3959SDValue DAGCombiner::visitCTTZ(SDNode *N) { 3960 SDValue N0 = N->getOperand(0); 3961 EVT VT = N->getValueType(0); 3962 3963 // fold (cttz c1) -> c2 3964 if (isa<ConstantSDNode>(N0)) 3965 return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); 3966 return SDValue(); 3967} 3968 3969SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 3970 SDValue N0 = N->getOperand(0); 3971 EVT VT = N->getValueType(0); 3972 3973 // fold (cttz_zero_undef c1) -> c2 3974 if (isa<ConstantSDNode>(N0)) 3975 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); 3976 return SDValue(); 3977} 3978 3979SDValue DAGCombiner::visitCTPOP(SDNode *N) { 3980 SDValue N0 = N->getOperand(0); 3981 EVT VT = N->getValueType(0); 3982 3983 // fold (ctpop c1) -> c2 3984 if (isa<ConstantSDNode>(N0)) 3985 return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); 3986 return SDValue(); 3987} 3988 3989SDValue DAGCombiner::visitSELECT(SDNode *N) { 3990 SDValue N0 = N->getOperand(0); 3991 SDValue N1 = N->getOperand(1); 3992 SDValue N2 = N->getOperand(2); 3993 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3994 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3995 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3996 EVT VT = N->getValueType(0); 3997 EVT VT0 = N0.getValueType(); 3998 3999 // fold (select C, X, X) -> X 4000 if (N1 == N2) 4001 return N1; 4002 // fold (select true, X, Y) -> X 4003 if (N0C && !N0C->isNullValue()) 4004 return N1; 4005 // fold (select false, X, Y) -> Y 4006 if (N0C && N0C->isNullValue()) 4007 return N2; 4008 // fold (select C, 1, X) -> (or C, X) 4009 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 4010 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); 4011 // fold (select C, 0, 1) -> (xor C, 1) 4012 if (VT.isInteger() && 4013 (VT0 == MVT::i1 || 4014 (VT0.isInteger() && 4015 TLI.getBooleanContents(false) == TargetLowering::ZeroOrOneBooleanContent)) && 4016 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 4017 SDValue XORNode; 4018 if (VT == VT0) 4019 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, 4020 N0, DAG.getConstant(1, VT0)); 4021 XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, 4022 N0, DAG.getConstant(1, VT0)); 4023 AddToWorkList(XORNode.getNode()); 4024 if (VT.bitsGT(VT0)) 4025 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); 4026 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); 4027 } 4028 // fold (select C, 0, X) -> (and (not C), X) 4029 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 4030 SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); 4031 AddToWorkList(NOTNode.getNode()); 4032 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); 4033 } 4034 // fold (select C, X, 1) -> (or (not C), X) 4035 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 4036 SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); 4037 AddToWorkList(NOTNode.getNode()); 4038 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); 4039 } 4040 // fold (select C, X, 0) -> (and C, X) 4041 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 4042 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); 4043 // fold (select X, X, Y) -> (or X, Y) 4044 // fold (select X, 1, Y) -> (or X, Y) 4045 if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) 4046 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); 4047 // fold (select X, Y, X) -> (and X, Y) 4048 // fold (select X, Y, 0) -> (and X, Y) 4049 if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) 4050 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); 4051 4052 // If we can fold this based on the true/false value, do so. 4053 if (SimplifySelectOps(N, N1, N2)) 4054 return SDValue(N, 0); // Don't revisit N. 4055 4056 // fold selects based on a setcc into other things, such as min/max/abs 4057 if (N0.getOpcode() == ISD::SETCC) { 4058 // FIXME: 4059 // Check against MVT::Other for SELECT_CC, which is a workaround for targets 4060 // having to say they don't support SELECT_CC on every type the DAG knows 4061 // about, since there is no way to mark an opcode illegal at all value types 4062 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && 4063 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) 4064 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, 4065 N0.getOperand(0), N0.getOperand(1), 4066 N1, N2, N0.getOperand(2)); 4067 return SimplifySelect(N->getDebugLoc(), N0, N1, N2); 4068 } 4069 4070 return SDValue(); 4071} 4072 4073SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 4074 SDValue N0 = N->getOperand(0); 4075 SDValue N1 = N->getOperand(1); 4076 SDValue N2 = N->getOperand(2); 4077 SDValue N3 = N->getOperand(3); 4078 SDValue N4 = N->getOperand(4); 4079 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 4080 4081 // fold select_cc lhs, rhs, x, x, cc -> x 4082 if (N2 == N3) 4083 return N2; 4084 4085 // Determine if the condition we're dealing with is constant 4086 SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), 4087 N0, N1, CC, N->getDebugLoc(), false); 4088 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 4089 4090 if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) { 4091 if (!SCCC->isNullValue()) 4092 return N2; // cond always true -> true val 4093 else 4094 return N3; // cond always false -> false val 4095 } 4096 4097 // Fold to a simpler select_cc 4098 if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) 4099 return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), 4100 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 4101 SCC.getOperand(2)); 4102 4103 // If we can fold this based on the true/false value, do so. 4104 if (SimplifySelectOps(N, N2, N3)) 4105 return SDValue(N, 0); // Don't revisit N. 4106 4107 // fold select_cc into other things, such as min/max/abs 4108 return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); 4109} 4110 4111SDValue DAGCombiner::visitSETCC(SDNode *N) { 4112 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 4113 cast<CondCodeSDNode>(N->getOperand(2))->get(), 4114 N->getDebugLoc()); 4115} 4116 4117// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 4118// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 4119// transformation. Returns true if extension are possible and the above 4120// mentioned transformation is profitable. 4121static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 4122 unsigned ExtOpc, 4123 SmallVector<SDNode*, 4> &ExtendNodes, 4124 const TargetLowering &TLI) { 4125 bool HasCopyToRegUses = false; 4126 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 4127 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 4128 UE = N0.getNode()->use_end(); 4129 UI != UE; ++UI) { 4130 SDNode *User = *UI; 4131 if (User == N) 4132 continue; 4133 if (UI.getUse().getResNo() != N0.getResNo()) 4134 continue; 4135 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 4136 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 4137 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 4138 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 4139 // Sign bits will be lost after a zext. 4140 return false; 4141 bool Add = false; 4142 for (unsigned i = 0; i != 2; ++i) { 4143 SDValue UseOp = User->getOperand(i); 4144 if (UseOp == N0) 4145 continue; 4146 if (!isa<ConstantSDNode>(UseOp)) 4147 return false; 4148 Add = true; 4149 } 4150 if (Add) 4151 ExtendNodes.push_back(User); 4152 continue; 4153 } 4154 // If truncates aren't free and there are users we can't 4155 // extend, it isn't worthwhile. 4156 if (!isTruncFree) 4157 return false; 4158 // Remember if this value is live-out. 4159 if (User->getOpcode() == ISD::CopyToReg) 4160 HasCopyToRegUses = true; 4161 } 4162 4163 if (HasCopyToRegUses) { 4164 bool BothLiveOut = false; 4165 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 4166 UI != UE; ++UI) { 4167 SDUse &Use = UI.getUse(); 4168 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 4169 BothLiveOut = true; 4170 break; 4171 } 4172 } 4173 if (BothLiveOut) 4174 // Both unextended and extended values are live out. There had better be 4175 // a good reason for the transformation. 4176 return ExtendNodes.size(); 4177 } 4178 return true; 4179} 4180 4181void DAGCombiner::ExtendSetCCUses(SmallVector<SDNode*, 4> SetCCs, 4182 SDValue Trunc, SDValue ExtLoad, DebugLoc DL, 4183 ISD::NodeType ExtType) { 4184 // Extend SetCC uses if necessary. 4185 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 4186 SDNode *SetCC = SetCCs[i]; 4187 SmallVector<SDValue, 4> Ops; 4188 4189 for (unsigned j = 0; j != 2; ++j) { 4190 SDValue SOp = SetCC->getOperand(j); 4191 if (SOp == Trunc) 4192 Ops.push_back(ExtLoad); 4193 else 4194 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 4195 } 4196 4197 Ops.push_back(SetCC->getOperand(2)); 4198 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), 4199 &Ops[0], Ops.size())); 4200 } 4201} 4202 4203SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 4204 SDValue N0 = N->getOperand(0); 4205 EVT VT = N->getValueType(0); 4206 4207 // fold (sext c1) -> c1 4208 if (isa<ConstantSDNode>(N0)) 4209 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); 4210 4211 // fold (sext (sext x)) -> (sext x) 4212 // fold (sext (aext x)) -> (sext x) 4213 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 4214 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, 4215 N0.getOperand(0)); 4216 4217 if (N0.getOpcode() == ISD::TRUNCATE) { 4218 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 4219 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 4220 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4221 if (NarrowLoad.getNode()) { 4222 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4223 if (NarrowLoad.getNode() != N0.getNode()) { 4224 CombineTo(N0.getNode(), NarrowLoad); 4225 // CombineTo deleted the truncate, if needed, but not what's under it. 4226 AddToWorkList(oye); 4227 } 4228 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4229 } 4230 4231 // See if the value being truncated is already sign extended. If so, just 4232 // eliminate the trunc/sext pair. 4233 SDValue Op = N0.getOperand(0); 4234 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 4235 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 4236 unsigned DestBits = VT.getScalarType().getSizeInBits(); 4237 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 4238 4239 if (OpBits == DestBits) { 4240 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 4241 // bits, it is already ready. 4242 if (NumSignBits > DestBits-MidBits) 4243 return Op; 4244 } else if (OpBits < DestBits) { 4245 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 4246 // bits, just sext from i32. 4247 if (NumSignBits > OpBits-MidBits) 4248 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); 4249 } else { 4250 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 4251 // bits, just truncate to i32. 4252 if (NumSignBits > OpBits-MidBits) 4253 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4254 } 4255 4256 // fold (sext (truncate x)) -> (sextinreg x). 4257 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 4258 N0.getValueType())) { 4259 if (OpBits < DestBits) 4260 Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); 4261 else if (OpBits > DestBits) 4262 Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); 4263 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, 4264 DAG.getValueType(N0.getValueType())); 4265 } 4266 } 4267 4268 // fold (sext (load x)) -> (sext (truncate (sextload x))) 4269 // None of the supported targets knows how to perform load and sign extend 4270 // on vectors in one instruction. We only perform this transformation on 4271 // scalars. 4272 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4273 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4274 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { 4275 bool DoXform = true; 4276 SmallVector<SDNode*, 4> SetCCs; 4277 if (!N0.hasOneUse()) 4278 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 4279 if (DoXform) { 4280 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4281 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4282 LN0->getChain(), 4283 LN0->getBasePtr(), LN0->getPointerInfo(), 4284 N0.getValueType(), 4285 LN0->isVolatile(), LN0->isNonTemporal(), 4286 LN0->getAlignment()); 4287 CombineTo(N, ExtLoad); 4288 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4289 N0.getValueType(), ExtLoad); 4290 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4291 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4292 ISD::SIGN_EXTEND); 4293 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4294 } 4295 } 4296 4297 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 4298 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 4299 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 4300 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 4301 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4302 EVT MemVT = LN0->getMemoryVT(); 4303 if ((!LegalOperations && !LN0->isVolatile()) || 4304 TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { 4305 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 4306 LN0->getChain(), 4307 LN0->getBasePtr(), LN0->getPointerInfo(), 4308 MemVT, 4309 LN0->isVolatile(), LN0->isNonTemporal(), 4310 LN0->getAlignment()); 4311 CombineTo(N, ExtLoad); 4312 CombineTo(N0.getNode(), 4313 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4314 N0.getValueType(), ExtLoad), 4315 ExtLoad.getValue(1)); 4316 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4317 } 4318 } 4319 4320 // fold (sext (and/or/xor (load x), cst)) -> 4321 // (and/or/xor (sextload x), (sext cst)) 4322 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 4323 N0.getOpcode() == ISD::XOR) && 4324 isa<LoadSDNode>(N0.getOperand(0)) && 4325 N0.getOperand(1).getOpcode() == ISD::Constant && 4326 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && 4327 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 4328 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 4329 if (LN0->getExtensionType() != ISD::ZEXTLOAD) { 4330 bool DoXform = true; 4331 SmallVector<SDNode*, 4> SetCCs; 4332 if (!N0.hasOneUse()) 4333 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 4334 SetCCs, TLI); 4335 if (DoXform) { 4336 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, 4337 LN0->getChain(), LN0->getBasePtr(), 4338 LN0->getPointerInfo(), 4339 LN0->getMemoryVT(), 4340 LN0->isVolatile(), 4341 LN0->isNonTemporal(), 4342 LN0->getAlignment()); 4343 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4344 Mask = Mask.sext(VT.getSizeInBits()); 4345 SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 4346 ExtLoad, DAG.getConstant(Mask, VT)); 4347 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 4348 N0.getOperand(0).getDebugLoc(), 4349 N0.getOperand(0).getValueType(), ExtLoad); 4350 CombineTo(N, And); 4351 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 4352 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4353 ISD::SIGN_EXTEND); 4354 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4355 } 4356 } 4357 } 4358 4359 if (N0.getOpcode() == ISD::SETCC) { 4360 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 4361 // Only do this before legalize for now. 4362 if (VT.isVector() && !LegalOperations) { 4363 EVT N0VT = N0.getOperand(0).getValueType(); 4364 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 4365 // of the same size as the compared operands. Only optimize sext(setcc()) 4366 // if this is the case. 4367 EVT SVT = TLI.getSetCCResultType(N0VT); 4368 4369 // We know that the # elements of the results is the same as the 4370 // # elements of the compare (and the # elements of the compare result 4371 // for that matter). Check to see that they are the same size. If so, 4372 // we know that the element size of the sext'd result matches the 4373 // element size of the compare operands. 4374 if (VT.getSizeInBits() == SVT.getSizeInBits()) 4375 return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4376 N0.getOperand(1), 4377 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4378 // If the desired elements are smaller or larger than the source 4379 // elements we can use a matching integer vector type and then 4380 // truncate/sign extend 4381 else { 4382 EVT MatchingElementType = 4383 EVT::getIntegerVT(*DAG.getContext(), 4384 N0VT.getScalarType().getSizeInBits()); 4385 EVT MatchingVectorType = 4386 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4387 N0VT.getVectorNumElements()); 4388 4389 if (SVT == MatchingVectorType) { 4390 SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, 4391 N0.getOperand(0), N0.getOperand(1), 4392 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4393 return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); 4394 } 4395 } 4396 } 4397 4398 // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc) 4399 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 4400 SDValue NegOne = 4401 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 4402 SDValue SCC = 4403 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4404 NegOne, DAG.getConstant(0, VT), 4405 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4406 if (SCC.getNode()) return SCC; 4407 if (!LegalOperations || 4408 TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) 4409 return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, 4410 DAG.getSetCC(N->getDebugLoc(), 4411 TLI.getSetCCResultType(VT), 4412 N0.getOperand(0), N0.getOperand(1), 4413 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 4414 NegOne, DAG.getConstant(0, VT)); 4415 } 4416 4417 // fold (sext x) -> (zext x) if the sign bit is known zero. 4418 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 4419 DAG.SignBitIsZero(N0)) 4420 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); 4421 4422 return SDValue(); 4423} 4424 4425// isTruncateOf - If N is a truncate of some other value, return true, record 4426// the value being truncated in Op and which of Op's bits are zero in KnownZero. 4427// This function computes KnownZero to avoid a duplicated call to 4428// ComputeMaskedBits in the caller. 4429static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 4430 APInt &KnownZero) { 4431 APInt KnownOne; 4432 if (N->getOpcode() == ISD::TRUNCATE) { 4433 Op = N->getOperand(0); 4434 DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); 4435 return true; 4436 } 4437 4438 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || 4439 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 4440 return false; 4441 4442 SDValue Op0 = N->getOperand(0); 4443 SDValue Op1 = N->getOperand(1); 4444 assert(Op0.getValueType() == Op1.getValueType()); 4445 4446 ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); 4447 ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); 4448 if (COp0 && COp0->isNullValue()) 4449 Op = Op1; 4450 else if (COp1 && COp1->isNullValue()) 4451 Op = Op0; 4452 else 4453 return false; 4454 4455 DAG.ComputeMaskedBits(Op, KnownZero, KnownOne); 4456 4457 if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 4458 return false; 4459 4460 return true; 4461} 4462 4463SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 4464 SDValue N0 = N->getOperand(0); 4465 EVT VT = N->getValueType(0); 4466 4467 // fold (zext c1) -> c1 4468 if (isa<ConstantSDNode>(N0)) 4469 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); 4470 // fold (zext (zext x)) -> (zext x) 4471 // fold (zext (aext x)) -> (zext x) 4472 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 4473 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, 4474 N0.getOperand(0)); 4475 4476 // fold (zext (truncate x)) -> (zext x) or 4477 // (zext (truncate x)) -> (truncate x) 4478 // This is valid when the truncated bits of x are already zero. 4479 // FIXME: We should extend this to work for vectors too. 4480 SDValue Op; 4481 APInt KnownZero; 4482 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 4483 APInt TruncatedBits = 4484 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 4485 APInt(Op.getValueSizeInBits(), 0) : 4486 APInt::getBitsSet(Op.getValueSizeInBits(), 4487 N0.getValueSizeInBits(), 4488 std::min(Op.getValueSizeInBits(), 4489 VT.getSizeInBits())); 4490 if (TruncatedBits == (KnownZero & TruncatedBits)) { 4491 if (VT.bitsGT(Op.getValueType())) 4492 return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); 4493 if (VT.bitsLT(Op.getValueType())) 4494 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4495 4496 return Op; 4497 } 4498 } 4499 4500 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 4501 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 4502 if (N0.getOpcode() == ISD::TRUNCATE) { 4503 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4504 if (NarrowLoad.getNode()) { 4505 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4506 if (NarrowLoad.getNode() != N0.getNode()) { 4507 CombineTo(N0.getNode(), NarrowLoad); 4508 // CombineTo deleted the truncate, if needed, but not what's under it. 4509 AddToWorkList(oye); 4510 } 4511 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4512 } 4513 } 4514 4515 // fold (zext (truncate x)) -> (and x, mask) 4516 if (N0.getOpcode() == ISD::TRUNCATE && 4517 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { 4518 4519 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 4520 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 4521 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4522 if (NarrowLoad.getNode()) { 4523 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4524 if (NarrowLoad.getNode() != N0.getNode()) { 4525 CombineTo(N0.getNode(), NarrowLoad); 4526 // CombineTo deleted the truncate, if needed, but not what's under it. 4527 AddToWorkList(oye); 4528 } 4529 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4530 } 4531 4532 SDValue Op = N0.getOperand(0); 4533 if (Op.getValueType().bitsLT(VT)) { 4534 Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); 4535 AddToWorkList(Op.getNode()); 4536 } else if (Op.getValueType().bitsGT(VT)) { 4537 Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); 4538 AddToWorkList(Op.getNode()); 4539 } 4540 return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), 4541 N0.getValueType().getScalarType()); 4542 } 4543 4544 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 4545 // if either of the casts is not free. 4546 if (N0.getOpcode() == ISD::AND && 4547 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 4548 N0.getOperand(1).getOpcode() == ISD::Constant && 4549 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 4550 N0.getValueType()) || 4551 !TLI.isZExtFree(N0.getValueType(), VT))) { 4552 SDValue X = N0.getOperand(0).getOperand(0); 4553 if (X.getValueType().bitsLT(VT)) { 4554 X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); 4555 } else if (X.getValueType().bitsGT(VT)) { 4556 X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); 4557 } 4558 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4559 Mask = Mask.zext(VT.getSizeInBits()); 4560 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4561 X, DAG.getConstant(Mask, VT)); 4562 } 4563 4564 // fold (zext (load x)) -> (zext (truncate (zextload x))) 4565 // None of the supported targets knows how to perform load and vector_zext 4566 // on vectors in one instruction. We only perform this transformation on 4567 // scalars. 4568 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4569 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4570 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { 4571 bool DoXform = true; 4572 SmallVector<SDNode*, 4> SetCCs; 4573 if (!N0.hasOneUse()) 4574 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 4575 if (DoXform) { 4576 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4577 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, 4578 LN0->getChain(), 4579 LN0->getBasePtr(), LN0->getPointerInfo(), 4580 N0.getValueType(), 4581 LN0->isVolatile(), LN0->isNonTemporal(), 4582 LN0->getAlignment()); 4583 CombineTo(N, ExtLoad); 4584 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4585 N0.getValueType(), ExtLoad); 4586 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4587 4588 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4589 ISD::ZERO_EXTEND); 4590 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4591 } 4592 } 4593 4594 // fold (zext (and/or/xor (load x), cst)) -> 4595 // (and/or/xor (zextload x), (zext cst)) 4596 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 4597 N0.getOpcode() == ISD::XOR) && 4598 isa<LoadSDNode>(N0.getOperand(0)) && 4599 N0.getOperand(1).getOpcode() == ISD::Constant && 4600 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && 4601 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 4602 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 4603 if (LN0->getExtensionType() != ISD::SEXTLOAD) { 4604 bool DoXform = true; 4605 SmallVector<SDNode*, 4> SetCCs; 4606 if (!N0.hasOneUse()) 4607 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 4608 SetCCs, TLI); 4609 if (DoXform) { 4610 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, 4611 LN0->getChain(), LN0->getBasePtr(), 4612 LN0->getPointerInfo(), 4613 LN0->getMemoryVT(), 4614 LN0->isVolatile(), 4615 LN0->isNonTemporal(), 4616 LN0->getAlignment()); 4617 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4618 Mask = Mask.zext(VT.getSizeInBits()); 4619 SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 4620 ExtLoad, DAG.getConstant(Mask, VT)); 4621 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 4622 N0.getOperand(0).getDebugLoc(), 4623 N0.getOperand(0).getValueType(), ExtLoad); 4624 CombineTo(N, And); 4625 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 4626 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4627 ISD::ZERO_EXTEND); 4628 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4629 } 4630 } 4631 } 4632 4633 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 4634 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 4635 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 4636 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 4637 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4638 EVT MemVT = LN0->getMemoryVT(); 4639 if ((!LegalOperations && !LN0->isVolatile()) || 4640 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { 4641 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, 4642 LN0->getChain(), 4643 LN0->getBasePtr(), LN0->getPointerInfo(), 4644 MemVT, 4645 LN0->isVolatile(), LN0->isNonTemporal(), 4646 LN0->getAlignment()); 4647 CombineTo(N, ExtLoad); 4648 CombineTo(N0.getNode(), 4649 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), 4650 ExtLoad), 4651 ExtLoad.getValue(1)); 4652 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4653 } 4654 } 4655 4656 if (N0.getOpcode() == ISD::SETCC) { 4657 if (!LegalOperations && VT.isVector()) { 4658 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 4659 // Only do this before legalize for now. 4660 EVT N0VT = N0.getOperand(0).getValueType(); 4661 EVT EltVT = VT.getVectorElementType(); 4662 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 4663 DAG.getConstant(1, EltVT)); 4664 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 4665 // We know that the # elements of the results is the same as the 4666 // # elements of the compare (and the # elements of the compare result 4667 // for that matter). Check to see that they are the same size. If so, 4668 // we know that the element size of the sext'd result matches the 4669 // element size of the compare operands. 4670 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4671 DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4672 N0.getOperand(1), 4673 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 4674 DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, 4675 &OneOps[0], OneOps.size())); 4676 4677 // If the desired elements are smaller or larger than the source 4678 // elements we can use a matching integer vector type and then 4679 // truncate/sign extend 4680 EVT MatchingElementType = 4681 EVT::getIntegerVT(*DAG.getContext(), 4682 N0VT.getScalarType().getSizeInBits()); 4683 EVT MatchingVectorType = 4684 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4685 N0VT.getVectorNumElements()); 4686 SDValue VsetCC = 4687 DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), 4688 N0.getOperand(1), 4689 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4690 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4691 DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), 4692 DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, 4693 &OneOps[0], OneOps.size())); 4694 } 4695 4696 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 4697 SDValue SCC = 4698 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4699 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 4700 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4701 if (SCC.getNode()) return SCC; 4702 } 4703 4704 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 4705 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 4706 isa<ConstantSDNode>(N0.getOperand(1)) && 4707 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 4708 N0.hasOneUse()) { 4709 SDValue ShAmt = N0.getOperand(1); 4710 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 4711 if (N0.getOpcode() == ISD::SHL) { 4712 SDValue InnerZExt = N0.getOperand(0); 4713 // If the original shl may be shifting out bits, do not perform this 4714 // transformation. 4715 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 4716 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 4717 if (ShAmtVal > KnownZeroBits) 4718 return SDValue(); 4719 } 4720 4721 DebugLoc DL = N->getDebugLoc(); 4722 4723 // Ensure that the shift amount is wide enough for the shifted value. 4724 if (VT.getSizeInBits() >= 256) 4725 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 4726 4727 return DAG.getNode(N0.getOpcode(), DL, VT, 4728 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 4729 ShAmt); 4730 } 4731 4732 return SDValue(); 4733} 4734 4735SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 4736 SDValue N0 = N->getOperand(0); 4737 EVT VT = N->getValueType(0); 4738 4739 // fold (aext c1) -> c1 4740 if (isa<ConstantSDNode>(N0)) 4741 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); 4742 // fold (aext (aext x)) -> (aext x) 4743 // fold (aext (zext x)) -> (zext x) 4744 // fold (aext (sext x)) -> (sext x) 4745 if (N0.getOpcode() == ISD::ANY_EXTEND || 4746 N0.getOpcode() == ISD::ZERO_EXTEND || 4747 N0.getOpcode() == ISD::SIGN_EXTEND) 4748 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); 4749 4750 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 4751 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 4752 if (N0.getOpcode() == ISD::TRUNCATE) { 4753 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4754 if (NarrowLoad.getNode()) { 4755 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4756 if (NarrowLoad.getNode() != N0.getNode()) { 4757 CombineTo(N0.getNode(), NarrowLoad); 4758 // CombineTo deleted the truncate, if needed, but not what's under it. 4759 AddToWorkList(oye); 4760 } 4761 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4762 } 4763 } 4764 4765 // fold (aext (truncate x)) 4766 if (N0.getOpcode() == ISD::TRUNCATE) { 4767 SDValue TruncOp = N0.getOperand(0); 4768 if (TruncOp.getValueType() == VT) 4769 return TruncOp; // x iff x size == zext size. 4770 if (TruncOp.getValueType().bitsGT(VT)) 4771 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); 4772 return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); 4773 } 4774 4775 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 4776 // if the trunc is not free. 4777 if (N0.getOpcode() == ISD::AND && 4778 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 4779 N0.getOperand(1).getOpcode() == ISD::Constant && 4780 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 4781 N0.getValueType())) { 4782 SDValue X = N0.getOperand(0).getOperand(0); 4783 if (X.getValueType().bitsLT(VT)) { 4784 X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); 4785 } else if (X.getValueType().bitsGT(VT)) { 4786 X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); 4787 } 4788 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 4789 Mask = Mask.zext(VT.getSizeInBits()); 4790 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 4791 X, DAG.getConstant(Mask, VT)); 4792 } 4793 4794 // fold (aext (load x)) -> (aext (truncate (extload x))) 4795 // None of the supported targets knows how to perform load and any_ext 4796 // on vectors in one instruction. We only perform this transformation on 4797 // scalars. 4798 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 4799 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 4800 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 4801 bool DoXform = true; 4802 SmallVector<SDNode*, 4> SetCCs; 4803 if (!N0.hasOneUse()) 4804 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 4805 if (DoXform) { 4806 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4807 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, 4808 LN0->getChain(), 4809 LN0->getBasePtr(), LN0->getPointerInfo(), 4810 N0.getValueType(), 4811 LN0->isVolatile(), LN0->isNonTemporal(), 4812 LN0->getAlignment()); 4813 CombineTo(N, ExtLoad); 4814 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4815 N0.getValueType(), ExtLoad); 4816 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 4817 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), 4818 ISD::ANY_EXTEND); 4819 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4820 } 4821 } 4822 4823 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 4824 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 4825 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 4826 if (N0.getOpcode() == ISD::LOAD && 4827 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 4828 N0.hasOneUse()) { 4829 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 4830 EVT MemVT = LN0->getMemoryVT(); 4831 SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), 4832 VT, LN0->getChain(), LN0->getBasePtr(), 4833 LN0->getPointerInfo(), MemVT, 4834 LN0->isVolatile(), LN0->isNonTemporal(), 4835 LN0->getAlignment()); 4836 CombineTo(N, ExtLoad); 4837 CombineTo(N0.getNode(), 4838 DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), 4839 N0.getValueType(), ExtLoad), 4840 ExtLoad.getValue(1)); 4841 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4842 } 4843 4844 if (N0.getOpcode() == ISD::SETCC) { 4845 // aext(setcc) -> sext_in_reg(vsetcc) for vectors. 4846 // Only do this before legalize for now. 4847 if (VT.isVector() && !LegalOperations) { 4848 EVT N0VT = N0.getOperand(0).getValueType(); 4849 // We know that the # elements of the results is the same as the 4850 // # elements of the compare (and the # elements of the compare result 4851 // for that matter). Check to see that they are the same size. If so, 4852 // we know that the element size of the sext'd result matches the 4853 // element size of the compare operands. 4854 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 4855 return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), 4856 N0.getOperand(1), 4857 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4858 // If the desired elements are smaller or larger than the source 4859 // elements we can use a matching integer vector type and then 4860 // truncate/sign extend 4861 else { 4862 EVT MatchingElementType = 4863 EVT::getIntegerVT(*DAG.getContext(), 4864 N0VT.getScalarType().getSizeInBits()); 4865 EVT MatchingVectorType = 4866 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 4867 N0VT.getVectorNumElements()); 4868 SDValue VsetCC = 4869 DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), 4870 N0.getOperand(1), 4871 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 4872 return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); 4873 } 4874 } 4875 4876 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 4877 SDValue SCC = 4878 SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), 4879 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 4880 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 4881 if (SCC.getNode()) 4882 return SCC; 4883 } 4884 4885 return SDValue(); 4886} 4887 4888/// GetDemandedBits - See if the specified operand can be simplified with the 4889/// knowledge that only the bits specified by Mask are used. If so, return the 4890/// simpler operand, otherwise return a null SDValue. 4891SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 4892 switch (V.getOpcode()) { 4893 default: break; 4894 case ISD::Constant: { 4895 const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); 4896 assert(CV != 0 && "Const value should be ConstSDNode."); 4897 const APInt &CVal = CV->getAPIntValue(); 4898 APInt NewVal = CVal & Mask; 4899 if (NewVal != CVal) { 4900 return DAG.getConstant(NewVal, V.getValueType()); 4901 } 4902 break; 4903 } 4904 case ISD::OR: 4905 case ISD::XOR: 4906 // If the LHS or RHS don't contribute bits to the or, drop them. 4907 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 4908 return V.getOperand(1); 4909 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 4910 return V.getOperand(0); 4911 break; 4912 case ISD::SRL: 4913 // Only look at single-use SRLs. 4914 if (!V.getNode()->hasOneUse()) 4915 break; 4916 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 4917 // See if we can recursively simplify the LHS. 4918 unsigned Amt = RHSC->getZExtValue(); 4919 4920 // Watch out for shift count overflow though. 4921 if (Amt >= Mask.getBitWidth()) break; 4922 APInt NewMask = Mask << Amt; 4923 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 4924 if (SimplifyLHS.getNode()) 4925 return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), 4926 SimplifyLHS, V.getOperand(1)); 4927 } 4928 } 4929 return SDValue(); 4930} 4931 4932/// ReduceLoadWidth - If the result of a wider load is shifted to right of N 4933/// bits and then truncated to a narrower type and where N is a multiple 4934/// of number of bits of the narrower type, transform it to a narrower load 4935/// from address + N / num of bits of new type. If the result is to be 4936/// extended, also fold the extension to form a extending load. 4937SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 4938 unsigned Opc = N->getOpcode(); 4939 4940 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 4941 SDValue N0 = N->getOperand(0); 4942 EVT VT = N->getValueType(0); 4943 EVT ExtVT = VT; 4944 4945 // This transformation isn't valid for vector loads. 4946 if (VT.isVector()) 4947 return SDValue(); 4948 4949 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 4950 // extended to VT. 4951 if (Opc == ISD::SIGN_EXTEND_INREG) { 4952 ExtType = ISD::SEXTLOAD; 4953 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 4954 } else if (Opc == ISD::SRL) { 4955 // Another special-case: SRL is basically zero-extending a narrower value. 4956 ExtType = ISD::ZEXTLOAD; 4957 N0 = SDValue(N, 0); 4958 ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 4959 if (!N01) return SDValue(); 4960 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 4961 VT.getSizeInBits() - N01->getZExtValue()); 4962 } 4963 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) 4964 return SDValue(); 4965 4966 unsigned EVTBits = ExtVT.getSizeInBits(); 4967 4968 // Do not generate loads of non-round integer types since these can 4969 // be expensive (and would be wrong if the type is not byte sized). 4970 if (!ExtVT.isRound()) 4971 return SDValue(); 4972 4973 unsigned ShAmt = 0; 4974 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4975 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 4976 ShAmt = N01->getZExtValue(); 4977 // Is the shift amount a multiple of size of VT? 4978 if ((ShAmt & (EVTBits-1)) == 0) { 4979 N0 = N0.getOperand(0); 4980 // Is the load width a multiple of size of VT? 4981 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 4982 return SDValue(); 4983 } 4984 4985 // At this point, we must have a load or else we can't do the transform. 4986 if (!isa<LoadSDNode>(N0)) return SDValue(); 4987 4988 // If the shift amount is larger than the input type then we're not 4989 // accessing any of the loaded bytes. If the load was a zextload/extload 4990 // then the result of the shift+trunc is zero/undef (handled elsewhere). 4991 // If the load was a sextload then the result is a splat of the sign bit 4992 // of the extended byte. This is not worth optimizing for. 4993 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 4994 return SDValue(); 4995 } 4996 } 4997 4998 // If the load is shifted left (and the result isn't shifted back right), 4999 // we can fold the truncate through the shift. 5000 unsigned ShLeftAmt = 0; 5001 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 5002 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 5003 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 5004 ShLeftAmt = N01->getZExtValue(); 5005 N0 = N0.getOperand(0); 5006 } 5007 } 5008 5009 // If we haven't found a load, we can't narrow it. Don't transform one with 5010 // multiple uses, this would require adding a new load. 5011 if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() || 5012 // Don't change the width of a volatile load. 5013 cast<LoadSDNode>(N0)->isVolatile()) 5014 return SDValue(); 5015 5016 // Verify that we are actually reducing a load width here. 5017 if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits) 5018 return SDValue(); 5019 5020 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5021 EVT PtrType = N0.getOperand(1).getValueType(); 5022 5023 // For big endian targets, we need to adjust the offset to the pointer to 5024 // load the correct bytes. 5025 if (TLI.isBigEndian()) { 5026 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 5027 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 5028 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 5029 } 5030 5031 uint64_t PtrOff = ShAmt / 8; 5032 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 5033 SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), 5034 PtrType, LN0->getBasePtr(), 5035 DAG.getConstant(PtrOff, PtrType)); 5036 AddToWorkList(NewPtr.getNode()); 5037 5038 SDValue Load; 5039 if (ExtType == ISD::NON_EXTLOAD) 5040 Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, 5041 LN0->getPointerInfo().getWithOffset(PtrOff), 5042 LN0->isVolatile(), LN0->isNonTemporal(), 5043 LN0->isInvariant(), NewAlign); 5044 else 5045 Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, 5046 LN0->getPointerInfo().getWithOffset(PtrOff), 5047 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 5048 NewAlign); 5049 5050 // Replace the old load's chain with the new load's chain. 5051 WorkListRemover DeadNodes(*this); 5052 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 5053 5054 // Shift the result left, if we've swallowed a left shift. 5055 SDValue Result = Load; 5056 if (ShLeftAmt != 0) { 5057 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 5058 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 5059 ShImmTy = VT; 5060 Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, 5061 Result, DAG.getConstant(ShLeftAmt, ShImmTy)); 5062 } 5063 5064 // Return the new loaded value. 5065 return Result; 5066} 5067 5068SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 5069 SDValue N0 = N->getOperand(0); 5070 SDValue N1 = N->getOperand(1); 5071 EVT VT = N->getValueType(0); 5072 EVT EVT = cast<VTSDNode>(N1)->getVT(); 5073 unsigned VTBits = VT.getScalarType().getSizeInBits(); 5074 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 5075 5076 // fold (sext_in_reg c1) -> c1 5077 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) 5078 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); 5079 5080 // If the input is already sign extended, just drop the extension. 5081 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 5082 return N0; 5083 5084 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 5085 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 5086 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) { 5087 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 5088 N0.getOperand(0), N1); 5089 } 5090 5091 // fold (sext_in_reg (sext x)) -> (sext x) 5092 // fold (sext_in_reg (aext x)) -> (sext x) 5093 // if x is small enough. 5094 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 5095 SDValue N00 = N0.getOperand(0); 5096 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 5097 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 5098 return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); 5099 } 5100 5101 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 5102 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 5103 return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); 5104 5105 // fold operands of sext_in_reg based on knowledge that the top bits are not 5106 // demanded. 5107 if (SimplifyDemandedBits(SDValue(N, 0))) 5108 return SDValue(N, 0); 5109 5110 // fold (sext_in_reg (load x)) -> (smaller sextload x) 5111 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 5112 SDValue NarrowLoad = ReduceLoadWidth(N); 5113 if (NarrowLoad.getNode()) 5114 return NarrowLoad; 5115 5116 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 5117 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 5118 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 5119 if (N0.getOpcode() == ISD::SRL) { 5120 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 5121 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 5122 // We can turn this into an SRA iff the input to the SRL is already sign 5123 // extended enough. 5124 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 5125 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 5126 return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, 5127 N0.getOperand(0), N0.getOperand(1)); 5128 } 5129 } 5130 5131 // fold (sext_inreg (extload x)) -> (sextload x) 5132 if (ISD::isEXTLoad(N0.getNode()) && 5133 ISD::isUNINDEXEDLoad(N0.getNode()) && 5134 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 5135 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5136 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 5137 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5138 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 5139 LN0->getChain(), 5140 LN0->getBasePtr(), LN0->getPointerInfo(), 5141 EVT, 5142 LN0->isVolatile(), LN0->isNonTemporal(), 5143 LN0->getAlignment()); 5144 CombineTo(N, ExtLoad); 5145 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 5146 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5147 } 5148 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 5149 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 5150 N0.hasOneUse() && 5151 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 5152 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5153 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 5154 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5155 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, 5156 LN0->getChain(), 5157 LN0->getBasePtr(), LN0->getPointerInfo(), 5158 EVT, 5159 LN0->isVolatile(), LN0->isNonTemporal(), 5160 LN0->getAlignment()); 5161 CombineTo(N, ExtLoad); 5162 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 5163 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5164 } 5165 5166 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 5167 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 5168 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 5169 N0.getOperand(1), false); 5170 if (BSwap.getNode() != 0) 5171 return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, 5172 BSwap, N1); 5173 } 5174 5175 return SDValue(); 5176} 5177 5178SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 5179 SDValue N0 = N->getOperand(0); 5180 EVT VT = N->getValueType(0); 5181 bool isLE = TLI.isLittleEndian(); 5182 5183 // noop truncate 5184 if (N0.getValueType() == N->getValueType(0)) 5185 return N0; 5186 // fold (truncate c1) -> c1 5187 if (isa<ConstantSDNode>(N0)) 5188 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); 5189 // fold (truncate (truncate x)) -> (truncate x) 5190 if (N0.getOpcode() == ISD::TRUNCATE) 5191 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); 5192 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 5193 if (N0.getOpcode() == ISD::ZERO_EXTEND || 5194 N0.getOpcode() == ISD::SIGN_EXTEND || 5195 N0.getOpcode() == ISD::ANY_EXTEND) { 5196 if (N0.getOperand(0).getValueType().bitsLT(VT)) 5197 // if the source is smaller than the dest, we still need an extend 5198 return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, 5199 N0.getOperand(0)); 5200 else if (N0.getOperand(0).getValueType().bitsGT(VT)) 5201 // if the source is larger than the dest, than we just need the truncate 5202 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); 5203 else 5204 // if the source and dest are the same type, we can drop both the extend 5205 // and the truncate. 5206 return N0.getOperand(0); 5207 } 5208 5209 // Fold extract-and-trunc into a narrow extract. For example: 5210 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 5211 // i32 y = TRUNCATE(i64 x) 5212 // -- becomes -- 5213 // v16i8 b = BITCAST (v2i64 val) 5214 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 5215 // 5216 // Note: We only run this optimization after type legalization (which often 5217 // creates this pattern) and before operation legalization after which 5218 // we need to be more careful about the vector instructions that we generate. 5219 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 5220 LegalTypes && !LegalOperations && N0->hasOneUse()) { 5221 5222 EVT VecTy = N0.getOperand(0).getValueType(); 5223 EVT ExTy = N0.getValueType(); 5224 EVT TrTy = N->getValueType(0); 5225 5226 unsigned NumElem = VecTy.getVectorNumElements(); 5227 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 5228 5229 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 5230 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 5231 5232 SDValue EltNo = N0->getOperand(1); 5233 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 5234 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 5235 EVT IndexTy = N0->getOperand(1).getValueType(); 5236 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 5237 5238 SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 5239 NVT, N0.getOperand(0)); 5240 5241 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 5242 N->getDebugLoc(), TrTy, V, 5243 DAG.getConstant(Index, IndexTy)); 5244 } 5245 } 5246 5247 // See if we can simplify the input to this truncate through knowledge that 5248 // only the low bits are being used. 5249 // For example "trunc (or (shl x, 8), y)" // -> trunc y 5250 // Currently we only perform this optimization on scalars because vectors 5251 // may have different active low bits. 5252 if (!VT.isVector()) { 5253 SDValue Shorter = 5254 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 5255 VT.getSizeInBits())); 5256 if (Shorter.getNode()) 5257 return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); 5258 } 5259 // fold (truncate (load x)) -> (smaller load x) 5260 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 5261 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 5262 SDValue Reduced = ReduceLoadWidth(N); 5263 if (Reduced.getNode()) 5264 return Reduced; 5265 } 5266 5267 // Simplify the operands using demanded-bits information. 5268 if (!VT.isVector() && 5269 SimplifyDemandedBits(SDValue(N, 0))) 5270 return SDValue(N, 0); 5271 5272 return SDValue(); 5273} 5274 5275static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 5276 SDValue Elt = N->getOperand(i); 5277 if (Elt.getOpcode() != ISD::MERGE_VALUES) 5278 return Elt.getNode(); 5279 return Elt.getOperand(Elt.getResNo()).getNode(); 5280} 5281 5282/// CombineConsecutiveLoads - build_pair (load, load) -> load 5283/// if load locations are consecutive. 5284SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 5285 assert(N->getOpcode() == ISD::BUILD_PAIR); 5286 5287 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 5288 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 5289 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 5290 LD1->getPointerInfo().getAddrSpace() != 5291 LD2->getPointerInfo().getAddrSpace()) 5292 return SDValue(); 5293 EVT LD1VT = LD1->getValueType(0); 5294 5295 if (ISD::isNON_EXTLoad(LD2) && 5296 LD2->hasOneUse() && 5297 // If both are volatile this would reduce the number of volatile loads. 5298 // If one is volatile it might be ok, but play conservative and bail out. 5299 !LD1->isVolatile() && 5300 !LD2->isVolatile() && 5301 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 5302 unsigned Align = LD1->getAlignment(); 5303 unsigned NewAlign = TLI.getTargetData()-> 5304 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 5305 5306 if (NewAlign <= Align && 5307 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 5308 return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), 5309 LD1->getBasePtr(), LD1->getPointerInfo(), 5310 false, false, false, Align); 5311 } 5312 5313 return SDValue(); 5314} 5315 5316SDValue DAGCombiner::visitBITCAST(SDNode *N) { 5317 SDValue N0 = N->getOperand(0); 5318 EVT VT = N->getValueType(0); 5319 5320 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 5321 // Only do this before legalize, since afterward the target may be depending 5322 // on the bitconvert. 5323 // First check to see if this is all constant. 5324 if (!LegalTypes && 5325 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 5326 VT.isVector()) { 5327 bool isSimple = true; 5328 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) 5329 if (N0.getOperand(i).getOpcode() != ISD::UNDEF && 5330 N0.getOperand(i).getOpcode() != ISD::Constant && 5331 N0.getOperand(i).getOpcode() != ISD::ConstantFP) { 5332 isSimple = false; 5333 break; 5334 } 5335 5336 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 5337 assert(!DestEltVT.isVector() && 5338 "Element type of vector ValueType must not be vector!"); 5339 if (isSimple) 5340 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 5341 } 5342 5343 // If the input is a constant, let getNode fold it. 5344 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 5345 SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); 5346 if (Res.getNode() != N) { 5347 if (!LegalOperations || 5348 TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) 5349 return Res; 5350 5351 // Folding it resulted in an illegal node, and it's too late to 5352 // do that. Clean up the old node and forego the transformation. 5353 // Ideally this won't happen very often, because instcombine 5354 // and the earlier dagcombine runs (where illegal nodes are 5355 // permitted) should have folded most of them already. 5356 DAG.DeleteNode(Res.getNode()); 5357 } 5358 } 5359 5360 // (conv (conv x, t1), t2) -> (conv x, t2) 5361 if (N0.getOpcode() == ISD::BITCAST) 5362 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, 5363 N0.getOperand(0)); 5364 5365 // fold (conv (load x)) -> (load (conv*)x) 5366 // If the resultant load doesn't need a higher alignment than the original! 5367 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 5368 // Do not change the width of a volatile load. 5369 !cast<LoadSDNode>(N0)->isVolatile() && 5370 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { 5371 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5372 unsigned Align = TLI.getTargetData()-> 5373 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 5374 unsigned OrigAlign = LN0->getAlignment(); 5375 5376 if (Align <= OrigAlign) { 5377 SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), 5378 LN0->getBasePtr(), LN0->getPointerInfo(), 5379 LN0->isVolatile(), LN0->isNonTemporal(), 5380 LN0->isInvariant(), OrigAlign); 5381 AddToWorkList(N); 5382 CombineTo(N0.getNode(), 5383 DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5384 N0.getValueType(), Load), 5385 Load.getValue(1)); 5386 return Load; 5387 } 5388 } 5389 5390 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 5391 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 5392 // This often reduces constant pool loads. 5393 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || 5394 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && 5395 N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) { 5396 SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, 5397 N0.getOperand(0)); 5398 AddToWorkList(NewConv.getNode()); 5399 5400 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 5401 if (N0.getOpcode() == ISD::FNEG) 5402 return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, 5403 NewConv, DAG.getConstant(SignBit, VT)); 5404 assert(N0.getOpcode() == ISD::FABS); 5405 return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, 5406 NewConv, DAG.getConstant(~SignBit, VT)); 5407 } 5408 5409 // fold (bitconvert (fcopysign cst, x)) -> 5410 // (or (and (bitconvert x), sign), (and cst, (not sign))) 5411 // Note that we don't handle (copysign x, cst) because this can always be 5412 // folded to an fneg or fabs. 5413 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 5414 isa<ConstantFPSDNode>(N0.getOperand(0)) && 5415 VT.isInteger() && !VT.isVector()) { 5416 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 5417 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 5418 if (isTypeLegal(IntXVT)) { 5419 SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5420 IntXVT, N0.getOperand(1)); 5421 AddToWorkList(X.getNode()); 5422 5423 // If X has a different width than the result/lhs, sext it or truncate it. 5424 unsigned VTWidth = VT.getSizeInBits(); 5425 if (OrigXWidth < VTWidth) { 5426 X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); 5427 AddToWorkList(X.getNode()); 5428 } else if (OrigXWidth > VTWidth) { 5429 // To get the sign bit in the right place, we have to shift it right 5430 // before truncating. 5431 X = DAG.getNode(ISD::SRL, X.getDebugLoc(), 5432 X.getValueType(), X, 5433 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 5434 AddToWorkList(X.getNode()); 5435 X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); 5436 AddToWorkList(X.getNode()); 5437 } 5438 5439 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 5440 X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, 5441 X, DAG.getConstant(SignBit, VT)); 5442 AddToWorkList(X.getNode()); 5443 5444 SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), 5445 VT, N0.getOperand(0)); 5446 Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, 5447 Cst, DAG.getConstant(~SignBit, VT)); 5448 AddToWorkList(Cst.getNode()); 5449 5450 return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); 5451 } 5452 } 5453 5454 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 5455 if (N0.getOpcode() == ISD::BUILD_PAIR) { 5456 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 5457 if (CombineLD.getNode()) 5458 return CombineLD; 5459 } 5460 5461 return SDValue(); 5462} 5463 5464SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 5465 EVT VT = N->getValueType(0); 5466 return CombineConsecutiveLoads(N, VT); 5467} 5468 5469/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector 5470/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the 5471/// destination element value type. 5472SDValue DAGCombiner:: 5473ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 5474 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 5475 5476 // If this is already the right type, we're done. 5477 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 5478 5479 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 5480 unsigned DstBitSize = DstEltVT.getSizeInBits(); 5481 5482 // If this is a conversion of N elements of one type to N elements of another 5483 // type, convert each element. This handles FP<->INT cases. 5484 if (SrcBitSize == DstBitSize) { 5485 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 5486 BV->getValueType(0).getVectorNumElements()); 5487 5488 // Due to the FP element handling below calling this routine recursively, 5489 // we can end up with a scalar-to-vector node here. 5490 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 5491 return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 5492 DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), 5493 DstEltVT, BV->getOperand(0))); 5494 5495 SmallVector<SDValue, 8> Ops; 5496 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 5497 SDValue Op = BV->getOperand(i); 5498 // If the vector element type is not legal, the BUILD_VECTOR operands 5499 // are promoted and implicitly truncated. Make that explicit here. 5500 if (Op.getValueType() != SrcEltVT) 5501 Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); 5502 Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), 5503 DstEltVT, Op)); 5504 AddToWorkList(Ops.back().getNode()); 5505 } 5506 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5507 &Ops[0], Ops.size()); 5508 } 5509 5510 // Otherwise, we're growing or shrinking the elements. To avoid having to 5511 // handle annoying details of growing/shrinking FP values, we convert them to 5512 // int first. 5513 if (SrcEltVT.isFloatingPoint()) { 5514 // Convert the input float vector to a int vector where the elements are the 5515 // same sizes. 5516 assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); 5517 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 5518 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 5519 SrcEltVT = IntVT; 5520 } 5521 5522 // Now we know the input is an integer vector. If the output is a FP type, 5523 // convert to integer first, then to FP of the right size. 5524 if (DstEltVT.isFloatingPoint()) { 5525 assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); 5526 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 5527 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 5528 5529 // Next, convert to FP elements of the same size. 5530 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 5531 } 5532 5533 // Okay, we know the src/dst types are both integers of differing types. 5534 // Handling growing first. 5535 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 5536 if (SrcBitSize < DstBitSize) { 5537 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 5538 5539 SmallVector<SDValue, 8> Ops; 5540 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 5541 i += NumInputsPerOutput) { 5542 bool isLE = TLI.isLittleEndian(); 5543 APInt NewBits = APInt(DstBitSize, 0); 5544 bool EltIsUndef = true; 5545 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 5546 // Shift the previously computed bits over. 5547 NewBits <<= SrcBitSize; 5548 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 5549 if (Op.getOpcode() == ISD::UNDEF) continue; 5550 EltIsUndef = false; 5551 5552 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 5553 zextOrTrunc(SrcBitSize).zext(DstBitSize); 5554 } 5555 5556 if (EltIsUndef) 5557 Ops.push_back(DAG.getUNDEF(DstEltVT)); 5558 else 5559 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 5560 } 5561 5562 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 5563 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5564 &Ops[0], Ops.size()); 5565 } 5566 5567 // Finally, this must be the case where we are shrinking elements: each input 5568 // turns into multiple outputs. 5569 bool isS2V = ISD::isScalarToVector(BV); 5570 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 5571 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 5572 NumOutputsPerInput*BV->getNumOperands()); 5573 SmallVector<SDValue, 8> Ops; 5574 5575 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 5576 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 5577 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 5578 Ops.push_back(DAG.getUNDEF(DstEltVT)); 5579 continue; 5580 } 5581 5582 APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> 5583 getAPIntValue().zextOrTrunc(SrcBitSize); 5584 5585 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 5586 APInt ThisVal = OpVal.trunc(DstBitSize); 5587 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 5588 if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) 5589 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 5590 return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, 5591 Ops[0]); 5592 OpVal = OpVal.lshr(DstBitSize); 5593 } 5594 5595 // For big endian targets, swap the order of the pieces of each element. 5596 if (TLI.isBigEndian()) 5597 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 5598 } 5599 5600 return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, 5601 &Ops[0], Ops.size()); 5602} 5603 5604SDValue DAGCombiner::visitFADD(SDNode *N) { 5605 SDValue N0 = N->getOperand(0); 5606 SDValue N1 = N->getOperand(1); 5607 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5608 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5609 EVT VT = N->getValueType(0); 5610 5611 // fold vector ops 5612 if (VT.isVector()) { 5613 SDValue FoldedVOp = SimplifyVBinOp(N); 5614 if (FoldedVOp.getNode()) return FoldedVOp; 5615 } 5616 5617 // fold (fadd c1, c2) -> c1 + c2 5618 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5619 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); 5620 // canonicalize constant to RHS 5621 if (N0CFP && !N1CFP) 5622 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); 5623 // fold (fadd A, 0) -> A 5624 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 5625 N1CFP->getValueAPF().isZero()) 5626 return N0; 5627 // fold (fadd A, (fneg B)) -> (fsub A, B) 5628 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 5629 isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) 5630 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, 5631 GetNegatedExpression(N1, DAG, LegalOperations)); 5632 // fold (fadd (fneg A), B) -> (fsub B, A) 5633 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 5634 isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) 5635 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, 5636 GetNegatedExpression(N0, DAG, LegalOperations)); 5637 5638 // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 5639 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 5640 N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 5641 isa<ConstantFPSDNode>(N0.getOperand(1))) 5642 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), 5643 DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, 5644 N0.getOperand(1), N1)); 5645 5646 // FADD -> FMA combines: 5647 if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || 5648 DAG.getTarget().Options.UnsafeFPMath) && 5649 DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && 5650 TLI.isOperationLegal(ISD::FMA, VT)) { 5651 5652 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 5653 if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { 5654 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, 5655 N0.getOperand(0), N0.getOperand(1), N1); 5656 } 5657 5658 // fold (fadd x, (fmul y, z)) -> (fma x, y, z) 5659 // Note: Commutes FADD operands. 5660 if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { 5661 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, 5662 N1.getOperand(0), N1.getOperand(1), N0); 5663 } 5664 } 5665 5666 return SDValue(); 5667} 5668 5669SDValue DAGCombiner::visitFSUB(SDNode *N) { 5670 SDValue N0 = N->getOperand(0); 5671 SDValue N1 = N->getOperand(1); 5672 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5673 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5674 EVT VT = N->getValueType(0); 5675 5676 // fold vector ops 5677 if (VT.isVector()) { 5678 SDValue FoldedVOp = SimplifyVBinOp(N); 5679 if (FoldedVOp.getNode()) return FoldedVOp; 5680 } 5681 5682 // fold (fsub c1, c2) -> c1-c2 5683 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5684 return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); 5685 // fold (fsub A, 0) -> A 5686 if (DAG.getTarget().Options.UnsafeFPMath && 5687 N1CFP && N1CFP->getValueAPF().isZero()) 5688 return N0; 5689 // fold (fsub 0, B) -> -B 5690 if (DAG.getTarget().Options.UnsafeFPMath && 5691 N0CFP && N0CFP->getValueAPF().isZero()) { 5692 if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) 5693 return GetNegatedExpression(N1, DAG, LegalOperations); 5694 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 5695 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1); 5696 } 5697 // fold (fsub A, (fneg B)) -> (fadd A, B) 5698 if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) 5699 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, 5700 GetNegatedExpression(N1, DAG, LegalOperations)); 5701 5702 // If 'unsafe math' is enabled, fold 5703 // (fsub x, x) -> 0.0 & 5704 // (fsub x, (fadd x, y)) -> (fneg y) & 5705 // (fsub x, (fadd y, x)) -> (fneg y) 5706 if (DAG.getTarget().Options.UnsafeFPMath) { 5707 if (N0 == N1) 5708 return DAG.getConstantFP(0.0f, VT); 5709 5710 if (N1.getOpcode() == ISD::FADD) { 5711 SDValue N10 = N1->getOperand(0); 5712 SDValue N11 = N1->getOperand(1); 5713 5714 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, 5715 &DAG.getTarget().Options)) 5716 return GetNegatedExpression(N11, DAG, LegalOperations); 5717 else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, 5718 &DAG.getTarget().Options)) 5719 return GetNegatedExpression(N10, DAG, LegalOperations); 5720 } 5721 } 5722 5723 // FSUB -> FMA combines: 5724 if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || 5725 DAG.getTarget().Options.UnsafeFPMath) && 5726 DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && 5727 TLI.isOperationLegal(ISD::FMA, VT)) { 5728 5729 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 5730 if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { 5731 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, 5732 N0.getOperand(0), N0.getOperand(1), 5733 DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, N1)); 5734 } 5735 5736 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 5737 // Note: Commutes FSUB operands. 5738 if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { 5739 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, 5740 DAG.getNode(ISD::FNEG, N1->getDebugLoc(), VT, 5741 N1.getOperand(0)), 5742 N1.getOperand(1), N0); 5743 } 5744 } 5745 5746 return SDValue(); 5747} 5748 5749SDValue DAGCombiner::visitFMUL(SDNode *N) { 5750 SDValue N0 = N->getOperand(0); 5751 SDValue N1 = N->getOperand(1); 5752 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5753 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5754 EVT VT = N->getValueType(0); 5755 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5756 5757 // fold vector ops 5758 if (VT.isVector()) { 5759 SDValue FoldedVOp = SimplifyVBinOp(N); 5760 if (FoldedVOp.getNode()) return FoldedVOp; 5761 } 5762 5763 // fold (fmul c1, c2) -> c1*c2 5764 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5765 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); 5766 // canonicalize constant to RHS 5767 if (N0CFP && !N1CFP) 5768 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); 5769 // fold (fmul A, 0) -> 0 5770 if (DAG.getTarget().Options.UnsafeFPMath && 5771 N1CFP && N1CFP->getValueAPF().isZero()) 5772 return N1; 5773 // fold (fmul A, 0) -> 0, vector edition. 5774 if (DAG.getTarget().Options.UnsafeFPMath && 5775 ISD::isBuildVectorAllZeros(N1.getNode())) 5776 return N1; 5777 // fold (fmul A, 1.0) -> A 5778 if (N1CFP && N1CFP->isExactlyValue(1.0)) 5779 return N0; 5780 // fold (fmul X, 2.0) -> (fadd X, X) 5781 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 5782 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); 5783 // fold (fmul X, -1.0) -> (fneg X) 5784 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 5785 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 5786 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); 5787 5788 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 5789 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, 5790 &DAG.getTarget().Options)) { 5791 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 5792 &DAG.getTarget().Options)) { 5793 // Both can be negated for free, check to see if at least one is cheaper 5794 // negated. 5795 if (LHSNeg == 2 || RHSNeg == 2) 5796 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5797 GetNegatedExpression(N0, DAG, LegalOperations), 5798 GetNegatedExpression(N1, DAG, LegalOperations)); 5799 } 5800 } 5801 5802 // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 5803 if (DAG.getTarget().Options.UnsafeFPMath && 5804 N1CFP && N0.getOpcode() == ISD::FMUL && 5805 N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) 5806 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), 5807 DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, 5808 N0.getOperand(1), N1)); 5809 5810 return SDValue(); 5811} 5812 5813SDValue DAGCombiner::visitFMA(SDNode *N) { 5814 SDValue N0 = N->getOperand(0); 5815 SDValue N1 = N->getOperand(1); 5816 SDValue N2 = N->getOperand(2); 5817 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5818 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5819 EVT VT = N->getValueType(0); 5820 5821 if (N0CFP && N0CFP->isExactlyValue(1.0)) 5822 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); 5823 if (N1CFP && N1CFP->isExactlyValue(1.0)) 5824 return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2); 5825 5826 // Canonicalize (fma c, x, y) -> (fma x, c, y) 5827 if (N0CFP && !N1CFP) 5828 return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); 5829 5830 return SDValue(); 5831} 5832 5833SDValue DAGCombiner::visitFDIV(SDNode *N) { 5834 SDValue N0 = N->getOperand(0); 5835 SDValue N1 = N->getOperand(1); 5836 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5837 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5838 EVT VT = N->getValueType(0); 5839 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 5840 5841 // fold vector ops 5842 if (VT.isVector()) { 5843 SDValue FoldedVOp = SimplifyVBinOp(N); 5844 if (FoldedVOp.getNode()) return FoldedVOp; 5845 } 5846 5847 // fold (fdiv c1, c2) -> c1/c2 5848 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5849 return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); 5850 5851 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 5852 if (N1CFP && VT != MVT::ppcf128 && DAG.getTarget().Options.UnsafeFPMath) { 5853 // Compute the reciprocal 1.0 / c2. 5854 APFloat N1APF = N1CFP->getValueAPF(); 5855 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 5856 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 5857 // Only do the transform if the reciprocal is a legal fp immediate that 5858 // isn't too nasty (eg NaN, denormal, ...). 5859 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 5860 (!LegalOperations || 5861 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 5862 // backend)... we should handle this gracefully after Legalize. 5863 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || 5864 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || 5865 TLI.isFPImmLegal(Recip, VT))) 5866 return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, 5867 DAG.getConstantFP(Recip, VT)); 5868 } 5869 5870 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 5871 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, 5872 &DAG.getTarget().Options)) { 5873 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 5874 &DAG.getTarget().Options)) { 5875 // Both can be negated for free, check to see if at least one is cheaper 5876 // negated. 5877 if (LHSNeg == 2 || RHSNeg == 2) 5878 return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, 5879 GetNegatedExpression(N0, DAG, LegalOperations), 5880 GetNegatedExpression(N1, DAG, LegalOperations)); 5881 } 5882 } 5883 5884 return SDValue(); 5885} 5886 5887SDValue DAGCombiner::visitFREM(SDNode *N) { 5888 SDValue N0 = N->getOperand(0); 5889 SDValue N1 = N->getOperand(1); 5890 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5891 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5892 EVT VT = N->getValueType(0); 5893 5894 // fold (frem c1, c2) -> fmod(c1,c2) 5895 if (N0CFP && N1CFP && VT != MVT::ppcf128) 5896 return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); 5897 5898 return SDValue(); 5899} 5900 5901SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 5902 SDValue N0 = N->getOperand(0); 5903 SDValue N1 = N->getOperand(1); 5904 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 5905 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 5906 EVT VT = N->getValueType(0); 5907 5908 if (N0CFP && N1CFP && VT != MVT::ppcf128) // Constant fold 5909 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); 5910 5911 if (N1CFP) { 5912 const APFloat& V = N1CFP->getValueAPF(); 5913 // copysign(x, c1) -> fabs(x) iff ispos(c1) 5914 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 5915 if (!V.isNegative()) { 5916 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 5917 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 5918 } else { 5919 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 5920 return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, 5921 DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); 5922 } 5923 } 5924 5925 // copysign(fabs(x), y) -> copysign(x, y) 5926 // copysign(fneg(x), y) -> copysign(x, y) 5927 // copysign(copysign(x,z), y) -> copysign(x, y) 5928 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 5929 N0.getOpcode() == ISD::FCOPYSIGN) 5930 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 5931 N0.getOperand(0), N1); 5932 5933 // copysign(x, abs(y)) -> abs(x) 5934 if (N1.getOpcode() == ISD::FABS) 5935 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 5936 5937 // copysign(x, copysign(y,z)) -> copysign(x, z) 5938 if (N1.getOpcode() == ISD::FCOPYSIGN) 5939 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 5940 N0, N1.getOperand(1)); 5941 5942 // copysign(x, fp_extend(y)) -> copysign(x, y) 5943 // copysign(x, fp_round(y)) -> copysign(x, y) 5944 if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) 5945 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 5946 N0, N1.getOperand(0)); 5947 5948 return SDValue(); 5949} 5950 5951SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 5952 SDValue N0 = N->getOperand(0); 5953 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 5954 EVT VT = N->getValueType(0); 5955 EVT OpVT = N0.getValueType(); 5956 5957 // fold (sint_to_fp c1) -> c1fp 5958 if (N0C && OpVT != MVT::ppcf128 && 5959 // ...but only if the target supports immediate floating-point values 5960 (!LegalOperations || 5961 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 5962 return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); 5963 5964 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 5965 // but UINT_TO_FP is legal on this target, try to convert. 5966 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 5967 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 5968 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 5969 if (DAG.SignBitIsZero(N0)) 5970 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); 5971 } 5972 5973 return SDValue(); 5974} 5975 5976SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 5977 SDValue N0 = N->getOperand(0); 5978 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 5979 EVT VT = N->getValueType(0); 5980 EVT OpVT = N0.getValueType(); 5981 5982 // fold (uint_to_fp c1) -> c1fp 5983 if (N0C && OpVT != MVT::ppcf128 && 5984 // ...but only if the target supports immediate floating-point values 5985 (!LegalOperations || 5986 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 5987 return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); 5988 5989 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 5990 // but SINT_TO_FP is legal on this target, try to convert. 5991 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 5992 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 5993 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 5994 if (DAG.SignBitIsZero(N0)) 5995 return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); 5996 } 5997 5998 return SDValue(); 5999} 6000 6001SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 6002 SDValue N0 = N->getOperand(0); 6003 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6004 EVT VT = N->getValueType(0); 6005 6006 // fold (fp_to_sint c1fp) -> c1 6007 if (N0CFP) 6008 return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); 6009 6010 return SDValue(); 6011} 6012 6013SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 6014 SDValue N0 = N->getOperand(0); 6015 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6016 EVT VT = N->getValueType(0); 6017 6018 // fold (fp_to_uint c1fp) -> c1 6019 if (N0CFP && VT != MVT::ppcf128) 6020 return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); 6021 6022 return SDValue(); 6023} 6024 6025SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 6026 SDValue N0 = N->getOperand(0); 6027 SDValue N1 = N->getOperand(1); 6028 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6029 EVT VT = N->getValueType(0); 6030 6031 // fold (fp_round c1fp) -> c1fp 6032 if (N0CFP && N0.getValueType() != MVT::ppcf128) 6033 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); 6034 6035 // fold (fp_round (fp_extend x)) -> x 6036 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 6037 return N0.getOperand(0); 6038 6039 // fold (fp_round (fp_round x)) -> (fp_round x) 6040 if (N0.getOpcode() == ISD::FP_ROUND) { 6041 // This is a value preserving truncation if both round's are. 6042 bool IsTrunc = N->getConstantOperandVal(1) == 1 && 6043 N0.getNode()->getConstantOperandVal(1) == 1; 6044 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), 6045 DAG.getIntPtrConstant(IsTrunc)); 6046 } 6047 6048 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 6049 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 6050 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, 6051 N0.getOperand(0), N1); 6052 AddToWorkList(Tmp.getNode()); 6053 return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, 6054 Tmp, N0.getOperand(1)); 6055 } 6056 6057 return SDValue(); 6058} 6059 6060SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 6061 SDValue N0 = N->getOperand(0); 6062 EVT VT = N->getValueType(0); 6063 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 6064 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6065 6066 // fold (fp_round_inreg c1fp) -> c1fp 6067 if (N0CFP && isTypeLegal(EVT)) { 6068 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); 6069 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); 6070 } 6071 6072 return SDValue(); 6073} 6074 6075SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 6076 SDValue N0 = N->getOperand(0); 6077 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6078 EVT VT = N->getValueType(0); 6079 6080 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 6081 if (N->hasOneUse() && 6082 N->use_begin()->getOpcode() == ISD::FP_ROUND) 6083 return SDValue(); 6084 6085 // fold (fp_extend c1fp) -> c1fp 6086 if (N0CFP && VT != MVT::ppcf128) 6087 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); 6088 6089 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 6090 // value of X. 6091 if (N0.getOpcode() == ISD::FP_ROUND 6092 && N0.getNode()->getConstantOperandVal(1) == 1) { 6093 SDValue In = N0.getOperand(0); 6094 if (In.getValueType() == VT) return In; 6095 if (VT.bitsLT(In.getValueType())) 6096 return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, 6097 In, N0.getOperand(1)); 6098 return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); 6099 } 6100 6101 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 6102 if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && 6103 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 6104 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 6105 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6106 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, 6107 LN0->getChain(), 6108 LN0->getBasePtr(), LN0->getPointerInfo(), 6109 N0.getValueType(), 6110 LN0->isVolatile(), LN0->isNonTemporal(), 6111 LN0->getAlignment()); 6112 CombineTo(N, ExtLoad); 6113 CombineTo(N0.getNode(), 6114 DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), 6115 N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), 6116 ExtLoad.getValue(1)); 6117 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6118 } 6119 6120 return SDValue(); 6121} 6122 6123SDValue DAGCombiner::visitFNEG(SDNode *N) { 6124 SDValue N0 = N->getOperand(0); 6125 EVT VT = N->getValueType(0); 6126 6127 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 6128 &DAG.getTarget().Options)) 6129 return GetNegatedExpression(N0, DAG, LegalOperations); 6130 6131 // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading 6132 // constant pool values. 6133 if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && 6134 !VT.isVector() && 6135 N0.getNode()->hasOneUse() && 6136 N0.getOperand(0).getValueType().isInteger()) { 6137 SDValue Int = N0.getOperand(0); 6138 EVT IntVT = Int.getValueType(); 6139 if (IntVT.isInteger() && !IntVT.isVector()) { 6140 Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, 6141 DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 6142 AddToWorkList(Int.getNode()); 6143 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 6144 VT, Int); 6145 } 6146 } 6147 6148 return SDValue(); 6149} 6150 6151SDValue DAGCombiner::visitFABS(SDNode *N) { 6152 SDValue N0 = N->getOperand(0); 6153 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6154 EVT VT = N->getValueType(0); 6155 6156 // fold (fabs c1) -> fabs(c1) 6157 if (N0CFP && VT != MVT::ppcf128) 6158 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); 6159 // fold (fabs (fabs x)) -> (fabs x) 6160 if (N0.getOpcode() == ISD::FABS) 6161 return N->getOperand(0); 6162 // fold (fabs (fneg x)) -> (fabs x) 6163 // fold (fabs (fcopysign x, y)) -> (fabs x) 6164 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 6165 return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); 6166 6167 // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading 6168 // constant pool values. 6169 if (!TLI.isFAbsFree(VT) && 6170 N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && 6171 N0.getOperand(0).getValueType().isInteger() && 6172 !N0.getOperand(0).getValueType().isVector()) { 6173 SDValue Int = N0.getOperand(0); 6174 EVT IntVT = Int.getValueType(); 6175 if (IntVT.isInteger() && !IntVT.isVector()) { 6176 Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, 6177 DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 6178 AddToWorkList(Int.getNode()); 6179 return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), 6180 N->getValueType(0), Int); 6181 } 6182 } 6183 6184 return SDValue(); 6185} 6186 6187SDValue DAGCombiner::visitBRCOND(SDNode *N) { 6188 SDValue Chain = N->getOperand(0); 6189 SDValue N1 = N->getOperand(1); 6190 SDValue N2 = N->getOperand(2); 6191 6192 // If N is a constant we could fold this into a fallthrough or unconditional 6193 // branch. However that doesn't happen very often in normal code, because 6194 // Instcombine/SimplifyCFG should have handled the available opportunities. 6195 // If we did this folding here, it would be necessary to update the 6196 // MachineBasicBlock CFG, which is awkward. 6197 6198 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 6199 // on the target. 6200 if (N1.getOpcode() == ISD::SETCC && 6201 TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { 6202 return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, 6203 Chain, N1.getOperand(2), 6204 N1.getOperand(0), N1.getOperand(1), N2); 6205 } 6206 6207 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || 6208 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 6209 (N1.getOperand(0).hasOneUse() && 6210 N1.getOperand(0).getOpcode() == ISD::SRL))) { 6211 SDNode *Trunc = 0; 6212 if (N1.getOpcode() == ISD::TRUNCATE) { 6213 // Look pass the truncate. 6214 Trunc = N1.getNode(); 6215 N1 = N1.getOperand(0); 6216 } 6217 6218 // Match this pattern so that we can generate simpler code: 6219 // 6220 // %a = ... 6221 // %b = and i32 %a, 2 6222 // %c = srl i32 %b, 1 6223 // brcond i32 %c ... 6224 // 6225 // into 6226 // 6227 // %a = ... 6228 // %b = and i32 %a, 2 6229 // %c = setcc eq %b, 0 6230 // brcond %c ... 6231 // 6232 // This applies only when the AND constant value has one bit set and the 6233 // SRL constant is equal to the log2 of the AND constant. The back-end is 6234 // smart enough to convert the result into a TEST/JMP sequence. 6235 SDValue Op0 = N1.getOperand(0); 6236 SDValue Op1 = N1.getOperand(1); 6237 6238 if (Op0.getOpcode() == ISD::AND && 6239 Op1.getOpcode() == ISD::Constant) { 6240 SDValue AndOp1 = Op0.getOperand(1); 6241 6242 if (AndOp1.getOpcode() == ISD::Constant) { 6243 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 6244 6245 if (AndConst.isPowerOf2() && 6246 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 6247 SDValue SetCC = 6248 DAG.getSetCC(N->getDebugLoc(), 6249 TLI.getSetCCResultType(Op0.getValueType()), 6250 Op0, DAG.getConstant(0, Op0.getValueType()), 6251 ISD::SETNE); 6252 6253 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 6254 MVT::Other, Chain, SetCC, N2); 6255 // Don't add the new BRCond into the worklist or else SimplifySelectCC 6256 // will convert it back to (X & C1) >> C2. 6257 CombineTo(N, NewBRCond, false); 6258 // Truncate is dead. 6259 if (Trunc) { 6260 removeFromWorkList(Trunc); 6261 DAG.DeleteNode(Trunc); 6262 } 6263 // Replace the uses of SRL with SETCC 6264 WorkListRemover DeadNodes(*this); 6265 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 6266 removeFromWorkList(N1.getNode()); 6267 DAG.DeleteNode(N1.getNode()); 6268 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6269 } 6270 } 6271 } 6272 6273 if (Trunc) 6274 // Restore N1 if the above transformation doesn't match. 6275 N1 = N->getOperand(1); 6276 } 6277 6278 // Transform br(xor(x, y)) -> br(x != y) 6279 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 6280 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 6281 SDNode *TheXor = N1.getNode(); 6282 SDValue Op0 = TheXor->getOperand(0); 6283 SDValue Op1 = TheXor->getOperand(1); 6284 if (Op0.getOpcode() == Op1.getOpcode()) { 6285 // Avoid missing important xor optimizations. 6286 SDValue Tmp = visitXOR(TheXor); 6287 if (Tmp.getNode() && Tmp.getNode() != TheXor) { 6288 DEBUG(dbgs() << "\nReplacing.8 "; 6289 TheXor->dump(&DAG); 6290 dbgs() << "\nWith: "; 6291 Tmp.getNode()->dump(&DAG); 6292 dbgs() << '\n'); 6293 WorkListRemover DeadNodes(*this); 6294 DAG.ReplaceAllUsesOfValueWith(N1, Tmp); 6295 removeFromWorkList(TheXor); 6296 DAG.DeleteNode(TheXor); 6297 return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 6298 MVT::Other, Chain, Tmp, N2); 6299 } 6300 } 6301 6302 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 6303 bool Equal = false; 6304 if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) 6305 if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && 6306 Op0.getOpcode() == ISD::XOR) { 6307 TheXor = Op0.getNode(); 6308 Equal = true; 6309 } 6310 6311 EVT SetCCVT = N1.getValueType(); 6312 if (LegalTypes) 6313 SetCCVT = TLI.getSetCCResultType(SetCCVT); 6314 SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), 6315 SetCCVT, 6316 Op0, Op1, 6317 Equal ? ISD::SETEQ : ISD::SETNE); 6318 // Replace the uses of XOR with SETCC 6319 WorkListRemover DeadNodes(*this); 6320 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 6321 removeFromWorkList(N1.getNode()); 6322 DAG.DeleteNode(N1.getNode()); 6323 return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), 6324 MVT::Other, Chain, SetCC, N2); 6325 } 6326 } 6327 6328 return SDValue(); 6329} 6330 6331// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 6332// 6333SDValue DAGCombiner::visitBR_CC(SDNode *N) { 6334 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 6335 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 6336 6337 // If N is a constant we could fold this into a fallthrough or unconditional 6338 // branch. However that doesn't happen very often in normal code, because 6339 // Instcombine/SimplifyCFG should have handled the available opportunities. 6340 // If we did this folding here, it would be necessary to update the 6341 // MachineBasicBlock CFG, which is awkward. 6342 6343 // Use SimplifySetCC to simplify SETCC's. 6344 SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), 6345 CondLHS, CondRHS, CC->get(), N->getDebugLoc(), 6346 false); 6347 if (Simp.getNode()) AddToWorkList(Simp.getNode()); 6348 6349 // fold to a simpler setcc 6350 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 6351 return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, 6352 N->getOperand(0), Simp.getOperand(2), 6353 Simp.getOperand(0), Simp.getOperand(1), 6354 N->getOperand(4)); 6355 6356 return SDValue(); 6357} 6358 6359/// canFoldInAddressingMode - Return true if 'Use' is a load or a store that 6360/// uses N as its base pointer and that N may be folded in the load / store 6361/// addressing mode. 6362static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 6363 SelectionDAG &DAG, 6364 const TargetLowering &TLI) { 6365 EVT VT; 6366 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 6367 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 6368 return false; 6369 VT = Use->getValueType(0); 6370 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 6371 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 6372 return false; 6373 VT = ST->getValue().getValueType(); 6374 } else 6375 return false; 6376 6377 TargetLowering::AddrMode AM; 6378 if (N->getOpcode() == ISD::ADD) { 6379 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 6380 if (Offset) 6381 // [reg +/- imm] 6382 AM.BaseOffs = Offset->getSExtValue(); 6383 else 6384 // [reg +/- reg] 6385 AM.Scale = 1; 6386 } else if (N->getOpcode() == ISD::SUB) { 6387 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 6388 if (Offset) 6389 // [reg +/- imm] 6390 AM.BaseOffs = -Offset->getSExtValue(); 6391 else 6392 // [reg +/- reg] 6393 AM.Scale = 1; 6394 } else 6395 return false; 6396 6397 return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); 6398} 6399 6400/// CombineToPreIndexedLoadStore - Try turning a load / store into a 6401/// pre-indexed load / store when the base pointer is an add or subtract 6402/// and it has other uses besides the load / store. After the 6403/// transformation, the new indexed load / store has effectively folded 6404/// the add / subtract in and all of its other uses are redirected to the 6405/// new load / store. 6406bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 6407 if (Level < AfterLegalizeDAG) 6408 return false; 6409 6410 bool isLoad = true; 6411 SDValue Ptr; 6412 EVT VT; 6413 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 6414 if (LD->isIndexed()) 6415 return false; 6416 VT = LD->getMemoryVT(); 6417 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 6418 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 6419 return false; 6420 Ptr = LD->getBasePtr(); 6421 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 6422 if (ST->isIndexed()) 6423 return false; 6424 VT = ST->getMemoryVT(); 6425 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 6426 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 6427 return false; 6428 Ptr = ST->getBasePtr(); 6429 isLoad = false; 6430 } else { 6431 return false; 6432 } 6433 6434 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 6435 // out. There is no reason to make this a preinc/predec. 6436 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 6437 Ptr.getNode()->hasOneUse()) 6438 return false; 6439 6440 // Ask the target to do addressing mode selection. 6441 SDValue BasePtr; 6442 SDValue Offset; 6443 ISD::MemIndexedMode AM = ISD::UNINDEXED; 6444 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 6445 return false; 6446 // Don't create a indexed load / store with zero offset. 6447 if (isa<ConstantSDNode>(Offset) && 6448 cast<ConstantSDNode>(Offset)->isNullValue()) 6449 return false; 6450 6451 // Try turning it into a pre-indexed load / store except when: 6452 // 1) The new base ptr is a frame index. 6453 // 2) If N is a store and the new base ptr is either the same as or is a 6454 // predecessor of the value being stored. 6455 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 6456 // that would create a cycle. 6457 // 4) All uses are load / store ops that use it as old base ptr. 6458 6459 // Check #1. Preinc'ing a frame index would require copying the stack pointer 6460 // (plus the implicit offset) to a register to preinc anyway. 6461 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 6462 return false; 6463 6464 // Check #2. 6465 if (!isLoad) { 6466 SDValue Val = cast<StoreSDNode>(N)->getValue(); 6467 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 6468 return false; 6469 } 6470 6471 // Now check for #3 and #4. 6472 bool RealUse = false; 6473 6474 // Caches for hasPredecessorHelper 6475 SmallPtrSet<const SDNode *, 32> Visited; 6476 SmallVector<const SDNode *, 16> Worklist; 6477 6478 for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), 6479 E = Ptr.getNode()->use_end(); I != E; ++I) { 6480 SDNode *Use = *I; 6481 if (Use == N) 6482 continue; 6483 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 6484 return false; 6485 6486 // If Ptr may be folded in addressing mode of other use, then it's 6487 // not profitable to do this transformation. 6488 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 6489 RealUse = true; 6490 } 6491 6492 if (!RealUse) 6493 return false; 6494 6495 SDValue Result; 6496 if (isLoad) 6497 Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), 6498 BasePtr, Offset, AM); 6499 else 6500 Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), 6501 BasePtr, Offset, AM); 6502 ++PreIndexedNodes; 6503 ++NodesCombined; 6504 DEBUG(dbgs() << "\nReplacing.4 "; 6505 N->dump(&DAG); 6506 dbgs() << "\nWith: "; 6507 Result.getNode()->dump(&DAG); 6508 dbgs() << '\n'); 6509 WorkListRemover DeadNodes(*this); 6510 if (isLoad) { 6511 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 6512 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 6513 } else { 6514 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 6515 } 6516 6517 // Finally, since the node is now dead, remove it from the graph. 6518 DAG.DeleteNode(N); 6519 6520 // Replace the uses of Ptr with uses of the updated base value. 6521 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 6522 removeFromWorkList(Ptr.getNode()); 6523 DAG.DeleteNode(Ptr.getNode()); 6524 6525 return true; 6526} 6527 6528/// CombineToPostIndexedLoadStore - Try to combine a load / store with a 6529/// add / sub of the base pointer node into a post-indexed load / store. 6530/// The transformation folded the add / subtract into the new indexed 6531/// load / store effectively and all of its uses are redirected to the 6532/// new load / store. 6533bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 6534 if (Level < AfterLegalizeDAG) 6535 return false; 6536 6537 bool isLoad = true; 6538 SDValue Ptr; 6539 EVT VT; 6540 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 6541 if (LD->isIndexed()) 6542 return false; 6543 VT = LD->getMemoryVT(); 6544 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 6545 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 6546 return false; 6547 Ptr = LD->getBasePtr(); 6548 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 6549 if (ST->isIndexed()) 6550 return false; 6551 VT = ST->getMemoryVT(); 6552 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 6553 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 6554 return false; 6555 Ptr = ST->getBasePtr(); 6556 isLoad = false; 6557 } else { 6558 return false; 6559 } 6560 6561 if (Ptr.getNode()->hasOneUse()) 6562 return false; 6563 6564 for (SDNode::use_iterator I = Ptr.getNode()->use_begin(), 6565 E = Ptr.getNode()->use_end(); I != E; ++I) { 6566 SDNode *Op = *I; 6567 if (Op == N || 6568 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 6569 continue; 6570 6571 SDValue BasePtr; 6572 SDValue Offset; 6573 ISD::MemIndexedMode AM = ISD::UNINDEXED; 6574 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 6575 // Don't create a indexed load / store with zero offset. 6576 if (isa<ConstantSDNode>(Offset) && 6577 cast<ConstantSDNode>(Offset)->isNullValue()) 6578 continue; 6579 6580 // Try turning it into a post-indexed load / store except when 6581 // 1) All uses are load / store ops that use it as base ptr (and 6582 // it may be folded as addressing mmode). 6583 // 2) Op must be independent of N, i.e. Op is neither a predecessor 6584 // nor a successor of N. Otherwise, if Op is folded that would 6585 // create a cycle. 6586 6587 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 6588 continue; 6589 6590 // Check for #1. 6591 bool TryNext = false; 6592 for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(), 6593 EE = BasePtr.getNode()->use_end(); II != EE; ++II) { 6594 SDNode *Use = *II; 6595 if (Use == Ptr.getNode()) 6596 continue; 6597 6598 // If all the uses are load / store addresses, then don't do the 6599 // transformation. 6600 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 6601 bool RealUse = false; 6602 for (SDNode::use_iterator III = Use->use_begin(), 6603 EEE = Use->use_end(); III != EEE; ++III) { 6604 SDNode *UseUse = *III; 6605 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 6606 RealUse = true; 6607 } 6608 6609 if (!RealUse) { 6610 TryNext = true; 6611 break; 6612 } 6613 } 6614 } 6615 6616 if (TryNext) 6617 continue; 6618 6619 // Check for #2 6620 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 6621 SDValue Result = isLoad 6622 ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), 6623 BasePtr, Offset, AM) 6624 : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), 6625 BasePtr, Offset, AM); 6626 ++PostIndexedNodes; 6627 ++NodesCombined; 6628 DEBUG(dbgs() << "\nReplacing.5 "; 6629 N->dump(&DAG); 6630 dbgs() << "\nWith: "; 6631 Result.getNode()->dump(&DAG); 6632 dbgs() << '\n'); 6633 WorkListRemover DeadNodes(*this); 6634 if (isLoad) { 6635 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 6636 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 6637 } else { 6638 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 6639 } 6640 6641 // Finally, since the node is now dead, remove it from the graph. 6642 DAG.DeleteNode(N); 6643 6644 // Replace the uses of Use with uses of the updated base value. 6645 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 6646 Result.getValue(isLoad ? 1 : 0)); 6647 removeFromWorkList(Op); 6648 DAG.DeleteNode(Op); 6649 return true; 6650 } 6651 } 6652 } 6653 6654 return false; 6655} 6656 6657SDValue DAGCombiner::visitLOAD(SDNode *N) { 6658 LoadSDNode *LD = cast<LoadSDNode>(N); 6659 SDValue Chain = LD->getChain(); 6660 SDValue Ptr = LD->getBasePtr(); 6661 6662 // If load is not volatile and there are no uses of the loaded value (and 6663 // the updated indexed value in case of indexed loads), change uses of the 6664 // chain value into uses of the chain input (i.e. delete the dead load). 6665 if (!LD->isVolatile()) { 6666 if (N->getValueType(1) == MVT::Other) { 6667 // Unindexed loads. 6668 if (!N->hasAnyUseOfValue(0)) { 6669 // It's not safe to use the two value CombineTo variant here. e.g. 6670 // v1, chain2 = load chain1, loc 6671 // v2, chain3 = load chain2, loc 6672 // v3 = add v2, c 6673 // Now we replace use of chain2 with chain1. This makes the second load 6674 // isomorphic to the one we are deleting, and thus makes this load live. 6675 DEBUG(dbgs() << "\nReplacing.6 "; 6676 N->dump(&DAG); 6677 dbgs() << "\nWith chain: "; 6678 Chain.getNode()->dump(&DAG); 6679 dbgs() << "\n"); 6680 WorkListRemover DeadNodes(*this); 6681 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 6682 6683 if (N->use_empty()) { 6684 removeFromWorkList(N); 6685 DAG.DeleteNode(N); 6686 } 6687 6688 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6689 } 6690 } else { 6691 // Indexed loads. 6692 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 6693 if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { 6694 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 6695 DEBUG(dbgs() << "\nReplacing.7 "; 6696 N->dump(&DAG); 6697 dbgs() << "\nWith: "; 6698 Undef.getNode()->dump(&DAG); 6699 dbgs() << " and 2 other values\n"); 6700 WorkListRemover DeadNodes(*this); 6701 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 6702 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), 6703 DAG.getUNDEF(N->getValueType(1))); 6704 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 6705 removeFromWorkList(N); 6706 DAG.DeleteNode(N); 6707 return SDValue(N, 0); // Return N so it doesn't get rechecked! 6708 } 6709 } 6710 } 6711 6712 // If this load is directly stored, replace the load value with the stored 6713 // value. 6714 // TODO: Handle store large -> read small portion. 6715 // TODO: Handle TRUNCSTORE/LOADEXT 6716 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 6717 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 6718 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 6719 if (PrevST->getBasePtr() == Ptr && 6720 PrevST->getValue().getValueType() == N->getValueType(0)) 6721 return CombineTo(N, Chain.getOperand(1), Chain); 6722 } 6723 } 6724 6725 // Try to infer better alignment information than the load already has. 6726 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 6727 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 6728 if (Align > LD->getAlignment()) 6729 return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), 6730 LD->getValueType(0), 6731 Chain, Ptr, LD->getPointerInfo(), 6732 LD->getMemoryVT(), 6733 LD->isVolatile(), LD->isNonTemporal(), Align); 6734 } 6735 } 6736 6737 if (CombinerAA) { 6738 // Walk up chain skipping non-aliasing memory nodes. 6739 SDValue BetterChain = FindBetterChain(N, Chain); 6740 6741 // If there is a better chain. 6742 if (Chain != BetterChain) { 6743 SDValue ReplLoad; 6744 6745 // Replace the chain to void dependency. 6746 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 6747 ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), 6748 BetterChain, Ptr, LD->getPointerInfo(), 6749 LD->isVolatile(), LD->isNonTemporal(), 6750 LD->isInvariant(), LD->getAlignment()); 6751 } else { 6752 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), 6753 LD->getValueType(0), 6754 BetterChain, Ptr, LD->getPointerInfo(), 6755 LD->getMemoryVT(), 6756 LD->isVolatile(), 6757 LD->isNonTemporal(), 6758 LD->getAlignment()); 6759 } 6760 6761 // Create token factor to keep old chain connected. 6762 SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 6763 MVT::Other, Chain, ReplLoad.getValue(1)); 6764 6765 // Make sure the new and old chains are cleaned up. 6766 AddToWorkList(Token.getNode()); 6767 6768 // Replace uses with load result and token factor. Don't add users 6769 // to work list. 6770 return CombineTo(N, ReplLoad.getValue(0), Token, false); 6771 } 6772 } 6773 6774 // Try transforming N to an indexed load. 6775 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 6776 return SDValue(N, 0); 6777 6778 return SDValue(); 6779} 6780 6781/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the 6782/// load is having specific bytes cleared out. If so, return the byte size 6783/// being masked out and the shift amount. 6784static std::pair<unsigned, unsigned> 6785CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 6786 std::pair<unsigned, unsigned> Result(0, 0); 6787 6788 // Check for the structure we're looking for. 6789 if (V->getOpcode() != ISD::AND || 6790 !isa<ConstantSDNode>(V->getOperand(1)) || 6791 !ISD::isNormalLoad(V->getOperand(0).getNode())) 6792 return Result; 6793 6794 // Check the chain and pointer. 6795 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 6796 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 6797 6798 // The store should be chained directly to the load or be an operand of a 6799 // tokenfactor. 6800 if (LD == Chain.getNode()) 6801 ; // ok. 6802 else if (Chain->getOpcode() != ISD::TokenFactor) 6803 return Result; // Fail. 6804 else { 6805 bool isOk = false; 6806 for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) 6807 if (Chain->getOperand(i).getNode() == LD) { 6808 isOk = true; 6809 break; 6810 } 6811 if (!isOk) return Result; 6812 } 6813 6814 // This only handles simple types. 6815 if (V.getValueType() != MVT::i16 && 6816 V.getValueType() != MVT::i32 && 6817 V.getValueType() != MVT::i64) 6818 return Result; 6819 6820 // Check the constant mask. Invert it so that the bits being masked out are 6821 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 6822 // follow the sign bit for uniformity. 6823 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 6824 unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); 6825 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 6826 unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); 6827 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 6828 if (NotMaskLZ == 64) return Result; // All zero mask. 6829 6830 // See if we have a continuous run of bits. If so, we have 0*1+0* 6831 if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) 6832 return Result; 6833 6834 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 6835 if (V.getValueType() != MVT::i64 && NotMaskLZ) 6836 NotMaskLZ -= 64-V.getValueSizeInBits(); 6837 6838 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 6839 switch (MaskedBytes) { 6840 case 1: 6841 case 2: 6842 case 4: break; 6843 default: return Result; // All one mask, or 5-byte mask. 6844 } 6845 6846 // Verify that the first bit starts at a multiple of mask so that the access 6847 // is aligned the same as the access width. 6848 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 6849 6850 Result.first = MaskedBytes; 6851 Result.second = NotMaskTZ/8; 6852 return Result; 6853} 6854 6855 6856/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that 6857/// provides a value as specified by MaskInfo. If so, replace the specified 6858/// store with a narrower store of truncated IVal. 6859static SDNode * 6860ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 6861 SDValue IVal, StoreSDNode *St, 6862 DAGCombiner *DC) { 6863 unsigned NumBytes = MaskInfo.first; 6864 unsigned ByteShift = MaskInfo.second; 6865 SelectionDAG &DAG = DC->getDAG(); 6866 6867 // Check to see if IVal is all zeros in the part being masked in by the 'or' 6868 // that uses this. If not, this is not a replacement. 6869 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 6870 ByteShift*8, (ByteShift+NumBytes)*8); 6871 if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0; 6872 6873 // Check that it is legal on the target to do this. It is legal if the new 6874 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 6875 // legalization. 6876 MVT VT = MVT::getIntegerVT(NumBytes*8); 6877 if (!DC->isTypeLegal(VT)) 6878 return 0; 6879 6880 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 6881 // shifted by ByteShift and truncated down to NumBytes. 6882 if (ByteShift) 6883 IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, 6884 DAG.getConstant(ByteShift*8, 6885 DC->getShiftAmountTy(IVal.getValueType()))); 6886 6887 // Figure out the offset for the store and the alignment of the access. 6888 unsigned StOffset; 6889 unsigned NewAlign = St->getAlignment(); 6890 6891 if (DAG.getTargetLoweringInfo().isLittleEndian()) 6892 StOffset = ByteShift; 6893 else 6894 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 6895 6896 SDValue Ptr = St->getBasePtr(); 6897 if (StOffset) { 6898 Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), 6899 Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); 6900 NewAlign = MinAlign(NewAlign, StOffset); 6901 } 6902 6903 // Truncate down to the new size. 6904 IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); 6905 6906 ++OpsNarrowed; 6907 return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, 6908 St->getPointerInfo().getWithOffset(StOffset), 6909 false, false, NewAlign).getNode(); 6910} 6911 6912 6913/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is 6914/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some 6915/// of the loaded bits, try narrowing the load and store if it would end up 6916/// being a win for performance or code size. 6917SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 6918 StoreSDNode *ST = cast<StoreSDNode>(N); 6919 if (ST->isVolatile()) 6920 return SDValue(); 6921 6922 SDValue Chain = ST->getChain(); 6923 SDValue Value = ST->getValue(); 6924 SDValue Ptr = ST->getBasePtr(); 6925 EVT VT = Value.getValueType(); 6926 6927 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 6928 return SDValue(); 6929 6930 unsigned Opc = Value.getOpcode(); 6931 6932 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 6933 // is a byte mask indicating a consecutive number of bytes, check to see if 6934 // Y is known to provide just those bytes. If so, we try to replace the 6935 // load + replace + store sequence with a single (narrower) store, which makes 6936 // the load dead. 6937 if (Opc == ISD::OR) { 6938 std::pair<unsigned, unsigned> MaskedLoad; 6939 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 6940 if (MaskedLoad.first) 6941 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 6942 Value.getOperand(1), ST,this)) 6943 return SDValue(NewST, 0); 6944 6945 // Or is commutative, so try swapping X and Y. 6946 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 6947 if (MaskedLoad.first) 6948 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 6949 Value.getOperand(0), ST,this)) 6950 return SDValue(NewST, 0); 6951 } 6952 6953 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 6954 Value.getOperand(1).getOpcode() != ISD::Constant) 6955 return SDValue(); 6956 6957 SDValue N0 = Value.getOperand(0); 6958 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 6959 Chain == SDValue(N0.getNode(), 1)) { 6960 LoadSDNode *LD = cast<LoadSDNode>(N0); 6961 if (LD->getBasePtr() != Ptr || 6962 LD->getPointerInfo().getAddrSpace() != 6963 ST->getPointerInfo().getAddrSpace()) 6964 return SDValue(); 6965 6966 // Find the type to narrow it the load / op / store to. 6967 SDValue N1 = Value.getOperand(1); 6968 unsigned BitWidth = N1.getValueSizeInBits(); 6969 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 6970 if (Opc == ISD::AND) 6971 Imm ^= APInt::getAllOnesValue(BitWidth); 6972 if (Imm == 0 || Imm.isAllOnesValue()) 6973 return SDValue(); 6974 unsigned ShAmt = Imm.countTrailingZeros(); 6975 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 6976 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 6977 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 6978 while (NewBW < BitWidth && 6979 !(TLI.isOperationLegalOrCustom(Opc, NewVT) && 6980 TLI.isNarrowingProfitable(VT, NewVT))) { 6981 NewBW = NextPowerOf2(NewBW); 6982 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 6983 } 6984 if (NewBW >= BitWidth) 6985 return SDValue(); 6986 6987 // If the lsb changed does not start at the type bitwidth boundary, 6988 // start at the previous one. 6989 if (ShAmt % NewBW) 6990 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 6991 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); 6992 if ((Imm & Mask) == Imm) { 6993 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 6994 if (Opc == ISD::AND) 6995 NewImm ^= APInt::getAllOnesValue(NewBW); 6996 uint64_t PtrOff = ShAmt / 8; 6997 // For big endian targets, we need to adjust the offset to the pointer to 6998 // load the correct bytes. 6999 if (TLI.isBigEndian()) 7000 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 7001 7002 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 7003 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 7004 if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy)) 7005 return SDValue(); 7006 7007 SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), 7008 Ptr.getValueType(), Ptr, 7009 DAG.getConstant(PtrOff, Ptr.getValueType())); 7010 SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), 7011 LD->getChain(), NewPtr, 7012 LD->getPointerInfo().getWithOffset(PtrOff), 7013 LD->isVolatile(), LD->isNonTemporal(), 7014 LD->isInvariant(), NewAlign); 7015 SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, 7016 DAG.getConstant(NewImm, NewVT)); 7017 SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), 7018 NewVal, NewPtr, 7019 ST->getPointerInfo().getWithOffset(PtrOff), 7020 false, false, NewAlign); 7021 7022 AddToWorkList(NewPtr.getNode()); 7023 AddToWorkList(NewLD.getNode()); 7024 AddToWorkList(NewVal.getNode()); 7025 WorkListRemover DeadNodes(*this); 7026 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 7027 ++OpsNarrowed; 7028 return NewST; 7029 } 7030 } 7031 7032 return SDValue(); 7033} 7034 7035/// TransformFPLoadStorePair - For a given floating point load / store pair, 7036/// if the load value isn't used by any other operations, then consider 7037/// transforming the pair to integer load / store operations if the target 7038/// deems the transformation profitable. 7039SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 7040 StoreSDNode *ST = cast<StoreSDNode>(N); 7041 SDValue Chain = ST->getChain(); 7042 SDValue Value = ST->getValue(); 7043 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 7044 Value.hasOneUse() && 7045 Chain == SDValue(Value.getNode(), 1)) { 7046 LoadSDNode *LD = cast<LoadSDNode>(Value); 7047 EVT VT = LD->getMemoryVT(); 7048 if (!VT.isFloatingPoint() || 7049 VT != ST->getMemoryVT() || 7050 LD->isNonTemporal() || 7051 ST->isNonTemporal() || 7052 LD->getPointerInfo().getAddrSpace() != 0 || 7053 ST->getPointerInfo().getAddrSpace() != 0) 7054 return SDValue(); 7055 7056 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 7057 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 7058 !TLI.isOperationLegal(ISD::STORE, IntVT) || 7059 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 7060 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 7061 return SDValue(); 7062 7063 unsigned LDAlign = LD->getAlignment(); 7064 unsigned STAlign = ST->getAlignment(); 7065 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 7066 unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy); 7067 if (LDAlign < ABIAlign || STAlign < ABIAlign) 7068 return SDValue(); 7069 7070 SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), 7071 LD->getChain(), LD->getBasePtr(), 7072 LD->getPointerInfo(), 7073 false, false, false, LDAlign); 7074 7075 SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), 7076 NewLD, ST->getBasePtr(), 7077 ST->getPointerInfo(), 7078 false, false, STAlign); 7079 7080 AddToWorkList(NewLD.getNode()); 7081 AddToWorkList(NewST.getNode()); 7082 WorkListRemover DeadNodes(*this); 7083 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 7084 ++LdStFP2Int; 7085 return NewST; 7086 } 7087 7088 return SDValue(); 7089} 7090 7091SDValue DAGCombiner::visitSTORE(SDNode *N) { 7092 StoreSDNode *ST = cast<StoreSDNode>(N); 7093 SDValue Chain = ST->getChain(); 7094 SDValue Value = ST->getValue(); 7095 SDValue Ptr = ST->getBasePtr(); 7096 7097 // If this is a store of a bit convert, store the input value if the 7098 // resultant store does not need a higher alignment than the original. 7099 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 7100 ST->isUnindexed()) { 7101 unsigned OrigAlign = ST->getAlignment(); 7102 EVT SVT = Value.getOperand(0).getValueType(); 7103 unsigned Align = TLI.getTargetData()-> 7104 getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); 7105 if (Align <= OrigAlign && 7106 ((!LegalOperations && !ST->isVolatile()) || 7107 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 7108 return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), 7109 Ptr, ST->getPointerInfo(), ST->isVolatile(), 7110 ST->isNonTemporal(), OrigAlign); 7111 } 7112 7113 // Turn 'store undef, Ptr' -> nothing. 7114 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 7115 return Chain; 7116 7117 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 7118 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { 7119 // NOTE: If the original store is volatile, this transform must not increase 7120 // the number of stores. For example, on x86-32 an f64 can be stored in one 7121 // processor operation but an i64 (which is not legal) requires two. So the 7122 // transform should not be done in this case. 7123 if (Value.getOpcode() != ISD::TargetConstantFP) { 7124 SDValue Tmp; 7125 switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { 7126 default: llvm_unreachable("Unknown FP type"); 7127 case MVT::f16: // We don't do this for these yet. 7128 case MVT::f80: 7129 case MVT::f128: 7130 case MVT::ppcf128: 7131 break; 7132 case MVT::f32: 7133 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 7134 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 7135 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 7136 bitcastToAPInt().getZExtValue(), MVT::i32); 7137 return DAG.getStore(Chain, N->getDebugLoc(), Tmp, 7138 Ptr, ST->getPointerInfo(), ST->isVolatile(), 7139 ST->isNonTemporal(), ST->getAlignment()); 7140 } 7141 break; 7142 case MVT::f64: 7143 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 7144 !ST->isVolatile()) || 7145 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 7146 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 7147 getZExtValue(), MVT::i64); 7148 return DAG.getStore(Chain, N->getDebugLoc(), Tmp, 7149 Ptr, ST->getPointerInfo(), ST->isVolatile(), 7150 ST->isNonTemporal(), ST->getAlignment()); 7151 } 7152 7153 if (!ST->isVolatile() && 7154 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 7155 // Many FP stores are not made apparent until after legalize, e.g. for 7156 // argument passing. Since this is so common, custom legalize the 7157 // 64-bit integer store into two 32-bit stores. 7158 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 7159 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); 7160 SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); 7161 if (TLI.isBigEndian()) std::swap(Lo, Hi); 7162 7163 unsigned Alignment = ST->getAlignment(); 7164 bool isVolatile = ST->isVolatile(); 7165 bool isNonTemporal = ST->isNonTemporal(); 7166 7167 SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, 7168 Ptr, ST->getPointerInfo(), 7169 isVolatile, isNonTemporal, 7170 ST->getAlignment()); 7171 Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, 7172 DAG.getConstant(4, Ptr.getValueType())); 7173 Alignment = MinAlign(Alignment, 4U); 7174 SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, 7175 Ptr, ST->getPointerInfo().getWithOffset(4), 7176 isVolatile, isNonTemporal, 7177 Alignment); 7178 return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 7179 St0, St1); 7180 } 7181 7182 break; 7183 } 7184 } 7185 } 7186 7187 // Try to infer better alignment information than the store already has. 7188 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 7189 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 7190 if (Align > ST->getAlignment()) 7191 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, 7192 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 7193 ST->isVolatile(), ST->isNonTemporal(), Align); 7194 } 7195 } 7196 7197 // Try transforming a pair floating point load / store ops to integer 7198 // load / store ops. 7199 SDValue NewST = TransformFPLoadStorePair(N); 7200 if (NewST.getNode()) 7201 return NewST; 7202 7203 if (CombinerAA) { 7204 // Walk up chain skipping non-aliasing memory nodes. 7205 SDValue BetterChain = FindBetterChain(N, Chain); 7206 7207 // If there is a better chain. 7208 if (Chain != BetterChain) { 7209 SDValue ReplStore; 7210 7211 // Replace the chain to avoid dependency. 7212 if (ST->isTruncatingStore()) { 7213 ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, 7214 ST->getPointerInfo(), 7215 ST->getMemoryVT(), ST->isVolatile(), 7216 ST->isNonTemporal(), ST->getAlignment()); 7217 } else { 7218 ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, 7219 ST->getPointerInfo(), 7220 ST->isVolatile(), ST->isNonTemporal(), 7221 ST->getAlignment()); 7222 } 7223 7224 // Create token to keep both nodes around. 7225 SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), 7226 MVT::Other, Chain, ReplStore); 7227 7228 // Make sure the new and old chains are cleaned up. 7229 AddToWorkList(Token.getNode()); 7230 7231 // Don't add users to work list. 7232 return CombineTo(N, Token, false); 7233 } 7234 } 7235 7236 // Try transforming N to an indexed store. 7237 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 7238 return SDValue(N, 0); 7239 7240 // FIXME: is there such a thing as a truncating indexed store? 7241 if (ST->isTruncatingStore() && ST->isUnindexed() && 7242 Value.getValueType().isInteger()) { 7243 // See if we can simplify the input to this truncstore with knowledge that 7244 // only the low bits are being used. For example: 7245 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 7246 SDValue Shorter = 7247 GetDemandedBits(Value, 7248 APInt::getLowBitsSet( 7249 Value.getValueType().getScalarType().getSizeInBits(), 7250 ST->getMemoryVT().getScalarType().getSizeInBits())); 7251 AddToWorkList(Value.getNode()); 7252 if (Shorter.getNode()) 7253 return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, 7254 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 7255 ST->isVolatile(), ST->isNonTemporal(), 7256 ST->getAlignment()); 7257 7258 // Otherwise, see if we can simplify the operation with 7259 // SimplifyDemandedBits, which only works if the value has a single use. 7260 if (SimplifyDemandedBits(Value, 7261 APInt::getLowBitsSet( 7262 Value.getValueType().getScalarType().getSizeInBits(), 7263 ST->getMemoryVT().getScalarType().getSizeInBits()))) 7264 return SDValue(N, 0); 7265 } 7266 7267 // If this is a load followed by a store to the same location, then the store 7268 // is dead/noop. 7269 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 7270 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 7271 ST->isUnindexed() && !ST->isVolatile() && 7272 // There can't be any side effects between the load and store, such as 7273 // a call or store. 7274 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 7275 // The store is dead, remove it. 7276 return Chain; 7277 } 7278 } 7279 7280 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 7281 // truncating store. We can do this even if this is already a truncstore. 7282 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 7283 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 7284 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 7285 ST->getMemoryVT())) { 7286 return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), 7287 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 7288 ST->isVolatile(), ST->isNonTemporal(), 7289 ST->getAlignment()); 7290 } 7291 7292 return ReduceLoadOpStoreWidth(N); 7293} 7294 7295SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 7296 SDValue InVec = N->getOperand(0); 7297 SDValue InVal = N->getOperand(1); 7298 SDValue EltNo = N->getOperand(2); 7299 DebugLoc dl = N->getDebugLoc(); 7300 7301 // If the inserted element is an UNDEF, just use the input vector. 7302 if (InVal.getOpcode() == ISD::UNDEF) 7303 return InVec; 7304 7305 EVT VT = InVec.getValueType(); 7306 7307 // If we can't generate a legal BUILD_VECTOR, exit 7308 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 7309 return SDValue(); 7310 7311 // Check that we know which element is being inserted 7312 if (!isa<ConstantSDNode>(EltNo)) 7313 return SDValue(); 7314 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 7315 7316 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 7317 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 7318 // vector elements. 7319 SmallVector<SDValue, 8> Ops; 7320 if (InVec.getOpcode() == ISD::BUILD_VECTOR) { 7321 Ops.append(InVec.getNode()->op_begin(), 7322 InVec.getNode()->op_end()); 7323 } else if (InVec.getOpcode() == ISD::UNDEF) { 7324 unsigned NElts = VT.getVectorNumElements(); 7325 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 7326 } else { 7327 return SDValue(); 7328 } 7329 7330 // Insert the element 7331 if (Elt < Ops.size()) { 7332 // All the operands of BUILD_VECTOR must have the same type; 7333 // we enforce that here. 7334 EVT OpVT = Ops[0].getValueType(); 7335 if (InVal.getValueType() != OpVT) 7336 InVal = OpVT.bitsGT(InVal.getValueType()) ? 7337 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 7338 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 7339 Ops[Elt] = InVal; 7340 } 7341 7342 // Return the new vector 7343 return DAG.getNode(ISD::BUILD_VECTOR, dl, 7344 VT, &Ops[0], Ops.size()); 7345} 7346 7347SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 7348 // (vextract (scalar_to_vector val, 0) -> val 7349 SDValue InVec = N->getOperand(0); 7350 EVT VT = InVec.getValueType(); 7351 EVT NVT = N->getValueType(0); 7352 7353 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 7354 // Check if the result type doesn't match the inserted element type. A 7355 // SCALAR_TO_VECTOR may truncate the inserted element and the 7356 // EXTRACT_VECTOR_ELT may widen the extracted vector. 7357 SDValue InOp = InVec.getOperand(0); 7358 if (InOp.getValueType() != NVT) { 7359 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 7360 return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); 7361 } 7362 return InOp; 7363 } 7364 7365 SDValue EltNo = N->getOperand(1); 7366 bool ConstEltNo = isa<ConstantSDNode>(EltNo); 7367 7368 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 7369 // We only perform this optimization before the op legalization phase because 7370 // we may introduce new vector instructions which are not backed by TD patterns. 7371 // For example on AVX, extracting elements from a wide vector without using 7372 // extract_subvector. 7373 if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE 7374 && ConstEltNo && !LegalOperations) { 7375 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 7376 int NumElem = VT.getVectorNumElements(); 7377 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); 7378 // Find the new index to extract from. 7379 int OrigElt = SVOp->getMaskElt(Elt); 7380 7381 // Extracting an undef index is undef. 7382 if (OrigElt == -1) 7383 return DAG.getUNDEF(NVT); 7384 7385 // Select the right vector half to extract from. 7386 if (OrigElt < NumElem) { 7387 InVec = InVec->getOperand(0); 7388 } else { 7389 InVec = InVec->getOperand(1); 7390 OrigElt -= NumElem; 7391 } 7392 7393 EVT IndexTy = N->getOperand(1).getValueType(); 7394 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, 7395 InVec, DAG.getConstant(OrigElt, IndexTy)); 7396 } 7397 7398 // Perform only after legalization to ensure build_vector / vector_shuffle 7399 // optimizations have already been done. 7400 if (!LegalOperations) return SDValue(); 7401 7402 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 7403 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 7404 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 7405 7406 if (ConstEltNo) { 7407 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 7408 bool NewLoad = false; 7409 bool BCNumEltsChanged = false; 7410 EVT ExtVT = VT.getVectorElementType(); 7411 EVT LVT = ExtVT; 7412 7413 // If the result of load has to be truncated, then it's not necessarily 7414 // profitable. 7415 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 7416 return SDValue(); 7417 7418 if (InVec.getOpcode() == ISD::BITCAST) { 7419 // Don't duplicate a load with other uses. 7420 if (!InVec.hasOneUse()) 7421 return SDValue(); 7422 7423 EVT BCVT = InVec.getOperand(0).getValueType(); 7424 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 7425 return SDValue(); 7426 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 7427 BCNumEltsChanged = true; 7428 InVec = InVec.getOperand(0); 7429 ExtVT = BCVT.getVectorElementType(); 7430 NewLoad = true; 7431 } 7432 7433 LoadSDNode *LN0 = NULL; 7434 const ShuffleVectorSDNode *SVN = NULL; 7435 if (ISD::isNormalLoad(InVec.getNode())) { 7436 LN0 = cast<LoadSDNode>(InVec); 7437 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 7438 InVec.getOperand(0).getValueType() == ExtVT && 7439 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 7440 // Don't duplicate a load with other uses. 7441 if (!InVec.hasOneUse()) 7442 return SDValue(); 7443 7444 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 7445 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 7446 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 7447 // => 7448 // (load $addr+1*size) 7449 7450 // Don't duplicate a load with other uses. 7451 if (!InVec.hasOneUse()) 7452 return SDValue(); 7453 7454 // If the bit convert changed the number of elements, it is unsafe 7455 // to examine the mask. 7456 if (BCNumEltsChanged) 7457 return SDValue(); 7458 7459 // Select the input vector, guarding against out of range extract vector. 7460 unsigned NumElems = VT.getVectorNumElements(); 7461 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 7462 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 7463 7464 if (InVec.getOpcode() == ISD::BITCAST) { 7465 // Don't duplicate a load with other uses. 7466 if (!InVec.hasOneUse()) 7467 return SDValue(); 7468 7469 InVec = InVec.getOperand(0); 7470 } 7471 if (ISD::isNormalLoad(InVec.getNode())) { 7472 LN0 = cast<LoadSDNode>(InVec); 7473 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 7474 } 7475 } 7476 7477 // Make sure we found a non-volatile load and the extractelement is 7478 // the only use. 7479 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 7480 return SDValue(); 7481 7482 // If Idx was -1 above, Elt is going to be -1, so just return undef. 7483 if (Elt == -1) 7484 return DAG.getUNDEF(LVT); 7485 7486 unsigned Align = LN0->getAlignment(); 7487 if (NewLoad) { 7488 // Check the resultant load doesn't need a higher alignment than the 7489 // original load. 7490 unsigned NewAlign = 7491 TLI.getTargetData() 7492 ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); 7493 7494 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) 7495 return SDValue(); 7496 7497 Align = NewAlign; 7498 } 7499 7500 SDValue NewPtr = LN0->getBasePtr(); 7501 unsigned PtrOff = 0; 7502 7503 if (Elt) { 7504 PtrOff = LVT.getSizeInBits() * Elt / 8; 7505 EVT PtrType = NewPtr.getValueType(); 7506 if (TLI.isBigEndian()) 7507 PtrOff = VT.getSizeInBits() / 8 - PtrOff; 7508 NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, 7509 DAG.getConstant(PtrOff, PtrType)); 7510 } 7511 7512 // The replacement we need to do here is a little tricky: we need to 7513 // replace an extractelement of a load with a load. 7514 // Use ReplaceAllUsesOfValuesWith to do the replacement. 7515 // Note that this replacement assumes that the extractvalue is the only 7516 // use of the load; that's okay because we don't want to perform this 7517 // transformation in other cases anyway. 7518 SDValue Load; 7519 SDValue Chain; 7520 if (NVT.bitsGT(LVT)) { 7521 // If the result type of vextract is wider than the load, then issue an 7522 // extending load instead. 7523 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) 7524 ? ISD::ZEXTLOAD : ISD::EXTLOAD; 7525 Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), 7526 NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), 7527 LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); 7528 Chain = Load.getValue(1); 7529 } else { 7530 Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, 7531 LN0->getPointerInfo().getWithOffset(PtrOff), 7532 LN0->isVolatile(), LN0->isNonTemporal(), 7533 LN0->isInvariant(), Align); 7534 Chain = Load.getValue(1); 7535 if (NVT.bitsLT(LVT)) 7536 Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); 7537 else 7538 Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); 7539 } 7540 WorkListRemover DeadNodes(*this); 7541 SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; 7542 SDValue To[] = { Load, Chain }; 7543 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 7544 // Since we're explcitly calling ReplaceAllUses, add the new node to the 7545 // worklist explicitly as well. 7546 AddToWorkList(Load.getNode()); 7547 AddUsersToWorkList(Load.getNode()); // Add users too 7548 // Make sure to revisit this node to clean it up; it will usually be dead. 7549 AddToWorkList(N); 7550 return SDValue(N, 0); 7551 } 7552 7553 return SDValue(); 7554} 7555 7556SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 7557 unsigned NumInScalars = N->getNumOperands(); 7558 DebugLoc dl = N->getDebugLoc(); 7559 EVT VT = N->getValueType(0); 7560 // Check to see if this is a BUILD_VECTOR of a bunch of values 7561 // which come from any_extend or zero_extend nodes. If so, we can create 7562 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 7563 // optimizations. We do not handle sign-extend because we can't fill the sign 7564 // using shuffles. 7565 EVT SourceType = MVT::Other; 7566 bool AllAnyExt = true; 7567 bool AllUndef = true; 7568 for (unsigned i = 0; i != NumInScalars; ++i) { 7569 SDValue In = N->getOperand(i); 7570 // Ignore undef inputs. 7571 if (In.getOpcode() == ISD::UNDEF) continue; 7572 AllUndef = false; 7573 7574 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 7575 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 7576 7577 // Abort if the element is not an extension. 7578 if (!ZeroExt && !AnyExt) { 7579 SourceType = MVT::Other; 7580 break; 7581 } 7582 7583 // The input is a ZeroExt or AnyExt. Check the original type. 7584 EVT InTy = In.getOperand(0).getValueType(); 7585 7586 // Check that all of the widened source types are the same. 7587 if (SourceType == MVT::Other) 7588 // First time. 7589 SourceType = InTy; 7590 else if (InTy != SourceType) { 7591 // Multiple income types. Abort. 7592 SourceType = MVT::Other; 7593 break; 7594 } 7595 7596 // Check if all of the extends are ANY_EXTENDs. 7597 AllAnyExt &= AnyExt; 7598 } 7599 7600 if (AllUndef) 7601 return DAG.getUNDEF(VT); 7602 7603 // In order to have valid types, all of the inputs must be extended from the 7604 // same source type and all of the inputs must be any or zero extend. 7605 // Scalar sizes must be a power of two. 7606 EVT OutScalarTy = N->getValueType(0).getScalarType(); 7607 bool ValidTypes = SourceType != MVT::Other && 7608 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 7609 isPowerOf2_32(SourceType.getSizeInBits()); 7610 7611 // We perform this optimization post type-legalization because 7612 // the type-legalizer often scalarizes integer-promoted vectors. 7613 // Performing this optimization before may create bit-casts which 7614 // will be type-legalized to complex code sequences. 7615 // We perform this optimization only before the operation legalizer because we 7616 // may introduce illegal operations. 7617 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 7618 // turn into a single shuffle instruction. 7619 if ((Level == AfterLegalizeVectorOps || Level == AfterLegalizeTypes) && 7620 ValidTypes) { 7621 bool isLE = TLI.isLittleEndian(); 7622 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 7623 assert(ElemRatio > 1 && "Invalid element size ratio"); 7624 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 7625 DAG.getConstant(0, SourceType); 7626 7627 unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); 7628 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 7629 7630 // Populate the new build_vector 7631 for (unsigned i=0; i < N->getNumOperands(); ++i) { 7632 SDValue Cast = N->getOperand(i); 7633 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 7634 Cast.getOpcode() == ISD::ZERO_EXTEND || 7635 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 7636 SDValue In; 7637 if (Cast.getOpcode() == ISD::UNDEF) 7638 In = DAG.getUNDEF(SourceType); 7639 else 7640 In = Cast->getOperand(0); 7641 unsigned Index = isLE ? (i * ElemRatio) : 7642 (i * ElemRatio + (ElemRatio - 1)); 7643 7644 assert(Index < Ops.size() && "Invalid index"); 7645 Ops[Index] = In; 7646 } 7647 7648 // The type of the new BUILD_VECTOR node. 7649 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 7650 assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && 7651 "Invalid vector size"); 7652 // Check if the new vector type is legal. 7653 if (!isTypeLegal(VecVT)) return SDValue(); 7654 7655 // Make the new BUILD_VECTOR. 7656 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 7657 VecVT, &Ops[0], Ops.size()); 7658 7659 // The new BUILD_VECTOR node has the potential to be further optimized. 7660 AddToWorkList(BV.getNode()); 7661 // Bitcast to the desired type. 7662 return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); 7663 } 7664 7665 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 7666 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 7667 // at most two distinct vectors, turn this into a shuffle node. 7668 7669 // May only combine to shuffle after legalize if shuffle is legal. 7670 if (LegalOperations && 7671 !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) 7672 return SDValue(); 7673 7674 SDValue VecIn1, VecIn2; 7675 for (unsigned i = 0; i != NumInScalars; ++i) { 7676 // Ignore undef inputs. 7677 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 7678 7679 // If this input is something other than a EXTRACT_VECTOR_ELT with a 7680 // constant index, bail out. 7681 if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || 7682 !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { 7683 VecIn1 = VecIn2 = SDValue(0, 0); 7684 break; 7685 } 7686 7687 // We allow up to two distinct input vectors. 7688 SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); 7689 if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) 7690 continue; 7691 7692 if (VecIn1.getNode() == 0) { 7693 VecIn1 = ExtractedFromVec; 7694 } else if (VecIn2.getNode() == 0) { 7695 VecIn2 = ExtractedFromVec; 7696 } else { 7697 // Too many inputs. 7698 VecIn1 = VecIn2 = SDValue(0, 0); 7699 break; 7700 } 7701 } 7702 7703 // If everything is good, we can make a shuffle operation. 7704 if (VecIn1.getNode()) { 7705 SmallVector<int, 8> Mask; 7706 for (unsigned i = 0; i != NumInScalars; ++i) { 7707 if (N->getOperand(i).getOpcode() == ISD::UNDEF) { 7708 Mask.push_back(-1); 7709 continue; 7710 } 7711 7712 // If extracting from the first vector, just use the index directly. 7713 SDValue Extract = N->getOperand(i); 7714 SDValue ExtVal = Extract.getOperand(1); 7715 if (Extract.getOperand(0) == VecIn1) { 7716 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 7717 if (ExtIndex > VT.getVectorNumElements()) 7718 return SDValue(); 7719 7720 Mask.push_back(ExtIndex); 7721 continue; 7722 } 7723 7724 // Otherwise, use InIdx + VecSize 7725 unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 7726 Mask.push_back(Idx+NumInScalars); 7727 } 7728 7729 // We can't generate a shuffle node with mismatched input and output types. 7730 // Attempt to transform a single input vector to the correct type. 7731 if ((VT != VecIn1.getValueType())) { 7732 // We don't support shuffeling between TWO values of different types. 7733 if (VecIn2.getNode() != 0) 7734 return SDValue(); 7735 7736 // We only support widening of vectors which are half the size of the 7737 // output registers. For example XMM->YMM widening on X86 with AVX. 7738 if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) 7739 return SDValue(); 7740 7741 // Widen the input vector by adding undef values. 7742 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, 7743 VecIn1, DAG.getUNDEF(VecIn1.getValueType())); 7744 } 7745 7746 // If VecIn2 is unused then change it to undef. 7747 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 7748 7749 // Check that we were able to transform all incoming values to the same type. 7750 if (VecIn2.getValueType() != VecIn1.getValueType() || 7751 VecIn1.getValueType() != VT) 7752 return SDValue(); 7753 7754 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 7755 if (!isTypeLegal(VT)) 7756 return SDValue(); 7757 7758 // Return the new VECTOR_SHUFFLE node. 7759 SDValue Ops[2]; 7760 Ops[0] = VecIn1; 7761 Ops[1] = VecIn2; 7762 return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]); 7763 } 7764 7765 return SDValue(); 7766} 7767 7768SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 7769 // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of 7770 // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector 7771 // inputs come from at most two distinct vectors, turn this into a shuffle 7772 // node. 7773 7774 // If we only have one input vector, we don't need to do any concatenation. 7775 if (N->getNumOperands() == 1) 7776 return N->getOperand(0); 7777 7778 return SDValue(); 7779} 7780 7781SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 7782 EVT NVT = N->getValueType(0); 7783 SDValue V = N->getOperand(0); 7784 7785 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 7786 // Handle only simple case where vector being inserted and vector 7787 // being extracted are of same type, and are half size of larger vectors. 7788 EVT BigVT = V->getOperand(0).getValueType(); 7789 EVT SmallVT = V->getOperand(1).getValueType(); 7790 if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) 7791 return SDValue(); 7792 7793 // Only handle cases where both indexes are constants with the same type. 7794 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7795 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 7796 7797 if (InsIdx && ExtIdx && 7798 InsIdx->getValueType(0).getSizeInBits() <= 64 && 7799 ExtIdx->getValueType(0).getSizeInBits() <= 64) { 7800 // Combine: 7801 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 7802 // Into: 7803 // indices are equal => V1 7804 // otherwise => (extract_subvec V1, ExtIdx) 7805 if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) 7806 return V->getOperand(1); 7807 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, 7808 V->getOperand(0), N->getOperand(1)); 7809 } 7810 } 7811 7812 return SDValue(); 7813} 7814 7815SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 7816 EVT VT = N->getValueType(0); 7817 unsigned NumElts = VT.getVectorNumElements(); 7818 7819 SDValue N0 = N->getOperand(0); 7820 SDValue N1 = N->getOperand(1); 7821 7822 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 7823 7824 // Canonicalize shuffle undef, undef -> undef 7825 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 7826 return DAG.getUNDEF(VT); 7827 7828 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 7829 7830 // Canonicalize shuffle v, v -> v, undef 7831 if (N0 == N1) { 7832 SmallVector<int, 8> NewMask; 7833 for (unsigned i = 0; i != NumElts; ++i) { 7834 int Idx = SVN->getMaskElt(i); 7835 if (Idx >= (int)NumElts) Idx -= NumElts; 7836 NewMask.push_back(Idx); 7837 } 7838 return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), 7839 &NewMask[0]); 7840 } 7841 7842 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 7843 if (N0.getOpcode() == ISD::UNDEF) { 7844 SmallVector<int, 8> NewMask; 7845 for (unsigned i = 0; i != NumElts; ++i) { 7846 int Idx = SVN->getMaskElt(i); 7847 if (Idx >= 0) { 7848 if (Idx < (int)NumElts) 7849 Idx += NumElts; 7850 else 7851 Idx -= NumElts; 7852 } 7853 NewMask.push_back(Idx); 7854 } 7855 return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), 7856 &NewMask[0]); 7857 } 7858 7859 // Remove references to rhs if it is undef 7860 if (N1.getOpcode() == ISD::UNDEF) { 7861 bool Changed = false; 7862 SmallVector<int, 8> NewMask; 7863 for (unsigned i = 0; i != NumElts; ++i) { 7864 int Idx = SVN->getMaskElt(i); 7865 if (Idx >= (int)NumElts) { 7866 Idx = -1; 7867 Changed = true; 7868 } 7869 NewMask.push_back(Idx); 7870 } 7871 if (Changed) 7872 return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); 7873 } 7874 7875 // If it is a splat, check if the argument vector is another splat or a 7876 // build_vector with all scalar elements the same. 7877 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 7878 SDNode *V = N0.getNode(); 7879 7880 // If this is a bit convert that changes the element type of the vector but 7881 // not the number of vector elements, look through it. Be careful not to 7882 // look though conversions that change things like v4f32 to v2f64. 7883 if (V->getOpcode() == ISD::BITCAST) { 7884 SDValue ConvInput = V->getOperand(0); 7885 if (ConvInput.getValueType().isVector() && 7886 ConvInput.getValueType().getVectorNumElements() == NumElts) 7887 V = ConvInput.getNode(); 7888 } 7889 7890 if (V->getOpcode() == ISD::BUILD_VECTOR) { 7891 assert(V->getNumOperands() == NumElts && 7892 "BUILD_VECTOR has wrong number of operands"); 7893 SDValue Base; 7894 bool AllSame = true; 7895 for (unsigned i = 0; i != NumElts; ++i) { 7896 if (V->getOperand(i).getOpcode() != ISD::UNDEF) { 7897 Base = V->getOperand(i); 7898 break; 7899 } 7900 } 7901 // Splat of <u, u, u, u>, return <u, u, u, u> 7902 if (!Base.getNode()) 7903 return N0; 7904 for (unsigned i = 0; i != NumElts; ++i) { 7905 if (V->getOperand(i) != Base) { 7906 AllSame = false; 7907 break; 7908 } 7909 } 7910 // Splat of <x, x, x, x>, return <x, x, x, x> 7911 if (AllSame) 7912 return N0; 7913 } 7914 } 7915 7916 // If this shuffle node is simply a swizzle of another shuffle node, 7917 // and it reverses the swizzle of the previous shuffle then we can 7918 // optimize shuffle(shuffle(x, undef), undef) -> x. 7919 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 7920 N1.getOpcode() == ISD::UNDEF) { 7921 7922 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); 7923 7924 // Shuffle nodes can only reverse shuffles with a single non-undef value. 7925 if (N0.getOperand(1).getOpcode() != ISD::UNDEF) 7926 return SDValue(); 7927 7928 // The incoming shuffle must be of the same type as the result of the 7929 // current shuffle. 7930 assert(OtherSV->getOperand(0).getValueType() == VT && 7931 "Shuffle types don't match"); 7932 7933 for (unsigned i = 0; i != NumElts; ++i) { 7934 int Idx = SVN->getMaskElt(i); 7935 assert(Idx < (int)NumElts && "Index references undef operand"); 7936 // Next, this index comes from the first value, which is the incoming 7937 // shuffle. Adopt the incoming index. 7938 if (Idx >= 0) 7939 Idx = OtherSV->getMaskElt(Idx); 7940 7941 // The combined shuffle must map each index to itself. 7942 if (Idx >= 0 && (unsigned)Idx != i) 7943 return SDValue(); 7944 } 7945 7946 return OtherSV->getOperand(0); 7947 } 7948 7949 return SDValue(); 7950} 7951 7952SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { 7953 if (!TLI.getShouldFoldAtomicFences()) 7954 return SDValue(); 7955 7956 SDValue atomic = N->getOperand(0); 7957 switch (atomic.getOpcode()) { 7958 case ISD::ATOMIC_CMP_SWAP: 7959 case ISD::ATOMIC_SWAP: 7960 case ISD::ATOMIC_LOAD_ADD: 7961 case ISD::ATOMIC_LOAD_SUB: 7962 case ISD::ATOMIC_LOAD_AND: 7963 case ISD::ATOMIC_LOAD_OR: 7964 case ISD::ATOMIC_LOAD_XOR: 7965 case ISD::ATOMIC_LOAD_NAND: 7966 case ISD::ATOMIC_LOAD_MIN: 7967 case ISD::ATOMIC_LOAD_MAX: 7968 case ISD::ATOMIC_LOAD_UMIN: 7969 case ISD::ATOMIC_LOAD_UMAX: 7970 break; 7971 default: 7972 return SDValue(); 7973 } 7974 7975 SDValue fence = atomic.getOperand(0); 7976 if (fence.getOpcode() != ISD::MEMBARRIER) 7977 return SDValue(); 7978 7979 switch (atomic.getOpcode()) { 7980 case ISD::ATOMIC_CMP_SWAP: 7981 return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), 7982 fence.getOperand(0), 7983 atomic.getOperand(1), atomic.getOperand(2), 7984 atomic.getOperand(3)), atomic.getResNo()); 7985 case ISD::ATOMIC_SWAP: 7986 case ISD::ATOMIC_LOAD_ADD: 7987 case ISD::ATOMIC_LOAD_SUB: 7988 case ISD::ATOMIC_LOAD_AND: 7989 case ISD::ATOMIC_LOAD_OR: 7990 case ISD::ATOMIC_LOAD_XOR: 7991 case ISD::ATOMIC_LOAD_NAND: 7992 case ISD::ATOMIC_LOAD_MIN: 7993 case ISD::ATOMIC_LOAD_MAX: 7994 case ISD::ATOMIC_LOAD_UMIN: 7995 case ISD::ATOMIC_LOAD_UMAX: 7996 return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), 7997 fence.getOperand(0), 7998 atomic.getOperand(1), atomic.getOperand(2)), 7999 atomic.getResNo()); 8000 default: 8001 return SDValue(); 8002 } 8003} 8004 8005/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform 8006/// an AND to a vector_shuffle with the destination vector and a zero vector. 8007/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 8008/// vector_shuffle V, Zero, <0, 4, 2, 4> 8009SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 8010 EVT VT = N->getValueType(0); 8011 DebugLoc dl = N->getDebugLoc(); 8012 SDValue LHS = N->getOperand(0); 8013 SDValue RHS = N->getOperand(1); 8014 if (N->getOpcode() == ISD::AND) { 8015 if (RHS.getOpcode() == ISD::BITCAST) 8016 RHS = RHS.getOperand(0); 8017 if (RHS.getOpcode() == ISD::BUILD_VECTOR) { 8018 SmallVector<int, 8> Indices; 8019 unsigned NumElts = RHS.getNumOperands(); 8020 for (unsigned i = 0; i != NumElts; ++i) { 8021 SDValue Elt = RHS.getOperand(i); 8022 if (!isa<ConstantSDNode>(Elt)) 8023 return SDValue(); 8024 8025 if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) 8026 Indices.push_back(i); 8027 else if (cast<ConstantSDNode>(Elt)->isNullValue()) 8028 Indices.push_back(NumElts); 8029 else 8030 return SDValue(); 8031 } 8032 8033 // Let's see if the target supports this vector_shuffle. 8034 EVT RVT = RHS.getValueType(); 8035 if (!TLI.isVectorClearMaskLegal(Indices, RVT)) 8036 return SDValue(); 8037 8038 // Return the new VECTOR_SHUFFLE node. 8039 EVT EltVT = RVT.getVectorElementType(); 8040 SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), 8041 DAG.getConstant(0, EltVT)); 8042 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 8043 RVT, &ZeroOps[0], ZeroOps.size()); 8044 LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); 8045 SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); 8046 return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); 8047 } 8048 } 8049 8050 return SDValue(); 8051} 8052 8053/// SimplifyVBinOp - Visit a binary vector operation, like ADD. 8054SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 8055 // After legalize, the target may be depending on adds and other 8056 // binary ops to provide legal ways to construct constants or other 8057 // things. Simplifying them may result in a loss of legality. 8058 if (LegalOperations) return SDValue(); 8059 8060 assert(N->getValueType(0).isVector() && 8061 "SimplifyVBinOp only works on vectors!"); 8062 8063 SDValue LHS = N->getOperand(0); 8064 SDValue RHS = N->getOperand(1); 8065 SDValue Shuffle = XformToShuffleWithZero(N); 8066 if (Shuffle.getNode()) return Shuffle; 8067 8068 // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold 8069 // this operation. 8070 if (LHS.getOpcode() == ISD::BUILD_VECTOR && 8071 RHS.getOpcode() == ISD::BUILD_VECTOR) { 8072 SmallVector<SDValue, 8> Ops; 8073 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 8074 SDValue LHSOp = LHS.getOperand(i); 8075 SDValue RHSOp = RHS.getOperand(i); 8076 // If these two elements can't be folded, bail out. 8077 if ((LHSOp.getOpcode() != ISD::UNDEF && 8078 LHSOp.getOpcode() != ISD::Constant && 8079 LHSOp.getOpcode() != ISD::ConstantFP) || 8080 (RHSOp.getOpcode() != ISD::UNDEF && 8081 RHSOp.getOpcode() != ISD::Constant && 8082 RHSOp.getOpcode() != ISD::ConstantFP)) 8083 break; 8084 8085 // Can't fold divide by zero. 8086 if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || 8087 N->getOpcode() == ISD::FDIV) { 8088 if ((RHSOp.getOpcode() == ISD::Constant && 8089 cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || 8090 (RHSOp.getOpcode() == ISD::ConstantFP && 8091 cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) 8092 break; 8093 } 8094 8095 EVT VT = LHSOp.getValueType(); 8096 EVT RVT = RHSOp.getValueType(); 8097 if (RVT != VT) { 8098 // Integer BUILD_VECTOR operands may have types larger than the element 8099 // size (e.g., when the element type is not legal). Prior to type 8100 // legalization, the types may not match between the two BUILD_VECTORS. 8101 // Truncate one of the operands to make them match. 8102 if (RVT.getSizeInBits() > VT.getSizeInBits()) { 8103 RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); 8104 } else { 8105 LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); 8106 VT = RVT; 8107 } 8108 } 8109 SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, 8110 LHSOp, RHSOp); 8111 if (FoldOp.getOpcode() != ISD::UNDEF && 8112 FoldOp.getOpcode() != ISD::Constant && 8113 FoldOp.getOpcode() != ISD::ConstantFP) 8114 break; 8115 Ops.push_back(FoldOp); 8116 AddToWorkList(FoldOp.getNode()); 8117 } 8118 8119 if (Ops.size() == LHS.getNumOperands()) 8120 return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), 8121 LHS.getValueType(), &Ops[0], Ops.size()); 8122 } 8123 8124 return SDValue(); 8125} 8126 8127SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, 8128 SDValue N1, SDValue N2){ 8129 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 8130 8131 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 8132 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 8133 8134 // If we got a simplified select_cc node back from SimplifySelectCC, then 8135 // break it down into a new SETCC node, and a new SELECT node, and then return 8136 // the SELECT node, since we were called with a SELECT node. 8137 if (SCC.getNode()) { 8138 // Check to see if we got a select_cc back (to turn into setcc/select). 8139 // Otherwise, just return whatever node we got back, like fabs. 8140 if (SCC.getOpcode() == ISD::SELECT_CC) { 8141 SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), 8142 N0.getValueType(), 8143 SCC.getOperand(0), SCC.getOperand(1), 8144 SCC.getOperand(4)); 8145 AddToWorkList(SETCC.getNode()); 8146 return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), 8147 SCC.getOperand(2), SCC.getOperand(3), SETCC); 8148 } 8149 8150 return SCC; 8151 } 8152 return SDValue(); 8153} 8154 8155/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS 8156/// are the two values being selected between, see if we can simplify the 8157/// select. Callers of this should assume that TheSelect is deleted if this 8158/// returns true. As such, they should return the appropriate thing (e.g. the 8159/// node) back to the top-level of the DAG combiner loop to avoid it being 8160/// looked at. 8161bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 8162 SDValue RHS) { 8163 8164 // Cannot simplify select with vector condition 8165 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 8166 8167 // If this is a select from two identical things, try to pull the operation 8168 // through the select. 8169 if (LHS.getOpcode() != RHS.getOpcode() || 8170 !LHS.hasOneUse() || !RHS.hasOneUse()) 8171 return false; 8172 8173 // If this is a load and the token chain is identical, replace the select 8174 // of two loads with a load through a select of the address to load from. 8175 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 8176 // constants have been dropped into the constant pool. 8177 if (LHS.getOpcode() == ISD::LOAD) { 8178 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 8179 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 8180 8181 // Token chains must be identical. 8182 if (LHS.getOperand(0) != RHS.getOperand(0) || 8183 // Do not let this transformation reduce the number of volatile loads. 8184 LLD->isVolatile() || RLD->isVolatile() || 8185 // If this is an EXTLOAD, the VT's must match. 8186 LLD->getMemoryVT() != RLD->getMemoryVT() || 8187 // If this is an EXTLOAD, the kind of extension must match. 8188 (LLD->getExtensionType() != RLD->getExtensionType() && 8189 // The only exception is if one of the extensions is anyext. 8190 LLD->getExtensionType() != ISD::EXTLOAD && 8191 RLD->getExtensionType() != ISD::EXTLOAD) || 8192 // FIXME: this discards src value information. This is 8193 // over-conservative. It would be beneficial to be able to remember 8194 // both potential memory locations. Since we are discarding 8195 // src value info, don't do the transformation if the memory 8196 // locations are not in the default address space. 8197 LLD->getPointerInfo().getAddrSpace() != 0 || 8198 RLD->getPointerInfo().getAddrSpace() != 0) 8199 return false; 8200 8201 // Check that the select condition doesn't reach either load. If so, 8202 // folding this will induce a cycle into the DAG. If not, this is safe to 8203 // xform, so create a select of the addresses. 8204 SDValue Addr; 8205 if (TheSelect->getOpcode() == ISD::SELECT) { 8206 SDNode *CondNode = TheSelect->getOperand(0).getNode(); 8207 if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || 8208 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) 8209 return false; 8210 Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), 8211 LLD->getBasePtr().getValueType(), 8212 TheSelect->getOperand(0), LLD->getBasePtr(), 8213 RLD->getBasePtr()); 8214 } else { // Otherwise SELECT_CC 8215 SDNode *CondLHS = TheSelect->getOperand(0).getNode(); 8216 SDNode *CondRHS = TheSelect->getOperand(1).getNode(); 8217 8218 if ((LLD->hasAnyUseOfValue(1) && 8219 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || 8220 (RLD->hasAnyUseOfValue(1) && 8221 (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) 8222 return false; 8223 8224 Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), 8225 LLD->getBasePtr().getValueType(), 8226 TheSelect->getOperand(0), 8227 TheSelect->getOperand(1), 8228 LLD->getBasePtr(), RLD->getBasePtr(), 8229 TheSelect->getOperand(4)); 8230 } 8231 8232 SDValue Load; 8233 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 8234 Load = DAG.getLoad(TheSelect->getValueType(0), 8235 TheSelect->getDebugLoc(), 8236 // FIXME: Discards pointer info. 8237 LLD->getChain(), Addr, MachinePointerInfo(), 8238 LLD->isVolatile(), LLD->isNonTemporal(), 8239 LLD->isInvariant(), LLD->getAlignment()); 8240 } else { 8241 Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? 8242 RLD->getExtensionType() : LLD->getExtensionType(), 8243 TheSelect->getDebugLoc(), 8244 TheSelect->getValueType(0), 8245 // FIXME: Discards pointer info. 8246 LLD->getChain(), Addr, MachinePointerInfo(), 8247 LLD->getMemoryVT(), LLD->isVolatile(), 8248 LLD->isNonTemporal(), LLD->getAlignment()); 8249 } 8250 8251 // Users of the select now use the result of the load. 8252 CombineTo(TheSelect, Load); 8253 8254 // Users of the old loads now use the new load's chain. We know the 8255 // old-load value is dead now. 8256 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 8257 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 8258 return true; 8259 } 8260 8261 return false; 8262} 8263 8264/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 8265/// where 'cond' is the comparison specified by CC. 8266SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, 8267 SDValue N2, SDValue N3, 8268 ISD::CondCode CC, bool NotExtCompare) { 8269 // (x ? y : y) -> y. 8270 if (N2 == N3) return N2; 8271 8272 EVT VT = N2.getValueType(); 8273 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 8274 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 8275 ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); 8276 8277 // Determine if the condition we're dealing with is constant 8278 SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), 8279 N0, N1, CC, DL, false); 8280 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 8281 ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); 8282 8283 // fold select_cc true, x, y -> x 8284 if (SCCC && !SCCC->isNullValue()) 8285 return N2; 8286 // fold select_cc false, x, y -> y 8287 if (SCCC && SCCC->isNullValue()) 8288 return N3; 8289 8290 // Check to see if we can simplify the select into an fabs node 8291 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { 8292 // Allow either -0.0 or 0.0 8293 if (CFP->getValueAPF().isZero()) { 8294 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs 8295 if ((CC == ISD::SETGE || CC == ISD::SETGT) && 8296 N0 == N2 && N3.getOpcode() == ISD::FNEG && 8297 N2 == N3.getOperand(0)) 8298 return DAG.getNode(ISD::FABS, DL, VT, N0); 8299 8300 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs 8301 if ((CC == ISD::SETLT || CC == ISD::SETLE) && 8302 N0 == N3 && N2.getOpcode() == ISD::FNEG && 8303 N2.getOperand(0) == N3) 8304 return DAG.getNode(ISD::FABS, DL, VT, N3); 8305 } 8306 } 8307 8308 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 8309 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 8310 // in it. This is a win when the constant is not otherwise available because 8311 // it replaces two constant pool loads with one. We only do this if the FP 8312 // type is known to be legal, because if it isn't, then we are before legalize 8313 // types an we want the other legalization to happen first (e.g. to avoid 8314 // messing with soft float) and if the ConstantFP is not legal, because if 8315 // it is legal, we may not need to store the FP constant in a constant pool. 8316 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) 8317 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { 8318 if (TLI.isTypeLegal(N2.getValueType()) && 8319 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 8320 TargetLowering::Legal) && 8321 // If both constants have multiple uses, then we won't need to do an 8322 // extra load, they are likely around in registers for other users. 8323 (TV->hasOneUse() || FV->hasOneUse())) { 8324 Constant *Elts[] = { 8325 const_cast<ConstantFP*>(FV->getConstantFPValue()), 8326 const_cast<ConstantFP*>(TV->getConstantFPValue()) 8327 }; 8328 Type *FPTy = Elts[0]->getType(); 8329 const TargetData &TD = *TLI.getTargetData(); 8330 8331 // Create a ConstantArray of the two constants. 8332 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 8333 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), 8334 TD.getPrefTypeAlignment(FPTy)); 8335 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 8336 8337 // Get the offsets to the 0 and 1 element of the array so that we can 8338 // select between them. 8339 SDValue Zero = DAG.getIntPtrConstant(0); 8340 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 8341 SDValue One = DAG.getIntPtrConstant(EltSize); 8342 8343 SDValue Cond = DAG.getSetCC(DL, 8344 TLI.getSetCCResultType(N0.getValueType()), 8345 N0, N1, CC); 8346 AddToWorkList(Cond.getNode()); 8347 SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), 8348 Cond, One, Zero); 8349 AddToWorkList(CstOffset.getNode()); 8350 CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, 8351 CstOffset); 8352 AddToWorkList(CPIdx.getNode()); 8353 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 8354 MachinePointerInfo::getConstantPool(), false, 8355 false, false, Alignment); 8356 8357 } 8358 } 8359 8360 // Check to see if we can perform the "gzip trick", transforming 8361 // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) 8362 if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && 8363 (N1C->isNullValue() || // (a < 0) ? b : 0 8364 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 8365 EVT XType = N0.getValueType(); 8366 EVT AType = N2.getValueType(); 8367 if (XType.bitsGE(AType)) { 8368 // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a 8369 // single-bit constant. 8370 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { 8371 unsigned ShCtV = N2C->getAPIntValue().logBase2(); 8372 ShCtV = XType.getSizeInBits()-ShCtV-1; 8373 SDValue ShCt = DAG.getConstant(ShCtV, 8374 getShiftAmountTy(N0.getValueType())); 8375 SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), 8376 XType, N0, ShCt); 8377 AddToWorkList(Shift.getNode()); 8378 8379 if (XType.bitsGT(AType)) { 8380 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 8381 AddToWorkList(Shift.getNode()); 8382 } 8383 8384 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 8385 } 8386 8387 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), 8388 XType, N0, 8389 DAG.getConstant(XType.getSizeInBits()-1, 8390 getShiftAmountTy(N0.getValueType()))); 8391 AddToWorkList(Shift.getNode()); 8392 8393 if (XType.bitsGT(AType)) { 8394 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 8395 AddToWorkList(Shift.getNode()); 8396 } 8397 8398 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 8399 } 8400 } 8401 8402 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 8403 // where y is has a single bit set. 8404 // A plaintext description would be, we can turn the SELECT_CC into an AND 8405 // when the condition can be materialized as an all-ones register. Any 8406 // single bit-test can be materialized as an all-ones register with 8407 // shift-left and shift-right-arith. 8408 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 8409 N0->getValueType(0) == VT && 8410 N1C && N1C->isNullValue() && 8411 N2C && N2C->isNullValue()) { 8412 SDValue AndLHS = N0->getOperand(0); 8413 ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 8414 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 8415 // Shift the tested bit over the sign bit. 8416 APInt AndMask = ConstAndRHS->getAPIntValue(); 8417 SDValue ShlAmt = 8418 DAG.getConstant(AndMask.countLeadingZeros(), 8419 getShiftAmountTy(AndLHS.getValueType())); 8420 SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); 8421 8422 // Now arithmetic right shift it all the way over, so the result is either 8423 // all-ones, or zero. 8424 SDValue ShrAmt = 8425 DAG.getConstant(AndMask.getBitWidth()-1, 8426 getShiftAmountTy(Shl.getValueType())); 8427 SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); 8428 8429 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 8430 } 8431 } 8432 8433 // fold select C, 16, 0 -> shl C, 4 8434 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && 8435 TLI.getBooleanContents(N0.getValueType().isVector()) == 8436 TargetLowering::ZeroOrOneBooleanContent) { 8437 8438 // If the caller doesn't want us to simplify this into a zext of a compare, 8439 // don't do it. 8440 if (NotExtCompare && N2C->getAPIntValue() == 1) 8441 return SDValue(); 8442 8443 // Get a SetCC of the condition 8444 // FIXME: Should probably make sure that setcc is legal if we ever have a 8445 // target where it isn't. 8446 SDValue Temp, SCC; 8447 // cast from setcc result type to select result type 8448 if (LegalTypes) { 8449 SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), 8450 N0, N1, CC); 8451 if (N2.getValueType().bitsLT(SCC.getValueType())) 8452 Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType()); 8453 else 8454 Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), 8455 N2.getValueType(), SCC); 8456 } else { 8457 SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); 8458 Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), 8459 N2.getValueType(), SCC); 8460 } 8461 8462 AddToWorkList(SCC.getNode()); 8463 AddToWorkList(Temp.getNode()); 8464 8465 if (N2C->getAPIntValue() == 1) 8466 return Temp; 8467 8468 // shl setcc result by log2 n2c 8469 return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, 8470 DAG.getConstant(N2C->getAPIntValue().logBase2(), 8471 getShiftAmountTy(Temp.getValueType()))); 8472 } 8473 8474 // Check to see if this is the equivalent of setcc 8475 // FIXME: Turn all of these into setcc if setcc if setcc is legal 8476 // otherwise, go ahead with the folds. 8477 if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { 8478 EVT XType = N0.getValueType(); 8479 if (!LegalOperations || 8480 TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { 8481 SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); 8482 if (Res.getValueType() != VT) 8483 Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); 8484 return Res; 8485 } 8486 8487 // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) 8488 if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && 8489 (!LegalOperations || 8490 TLI.isOperationLegal(ISD::CTLZ, XType))) { 8491 SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); 8492 return DAG.getNode(ISD::SRL, DL, XType, Ctlz, 8493 DAG.getConstant(Log2_32(XType.getSizeInBits()), 8494 getShiftAmountTy(Ctlz.getValueType()))); 8495 } 8496 // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) 8497 if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { 8498 SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), 8499 XType, DAG.getConstant(0, XType), N0); 8500 SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); 8501 return DAG.getNode(ISD::SRL, DL, XType, 8502 DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), 8503 DAG.getConstant(XType.getSizeInBits()-1, 8504 getShiftAmountTy(XType))); 8505 } 8506 // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) 8507 if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { 8508 SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, 8509 DAG.getConstant(XType.getSizeInBits()-1, 8510 getShiftAmountTy(N0.getValueType()))); 8511 return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); 8512 } 8513 } 8514 8515 // Check to see if this is an integer abs. 8516 // select_cc setg[te] X, 0, X, -X -> 8517 // select_cc setgt X, -1, X, -X -> 8518 // select_cc setl[te] X, 0, -X, X -> 8519 // select_cc setlt X, 1, -X, X -> 8520 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 8521 if (N1C) { 8522 ConstantSDNode *SubC = NULL; 8523 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || 8524 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 8525 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 8526 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 8527 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || 8528 (N1C->isOne() && CC == ISD::SETLT)) && 8529 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 8530 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 8531 8532 EVT XType = N0.getValueType(); 8533 if (SubC && SubC->isNullValue() && XType.isInteger()) { 8534 SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, 8535 N0, 8536 DAG.getConstant(XType.getSizeInBits()-1, 8537 getShiftAmountTy(N0.getValueType()))); 8538 SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), 8539 XType, N0, Shift); 8540 AddToWorkList(Shift.getNode()); 8541 AddToWorkList(Add.getNode()); 8542 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 8543 } 8544 } 8545 8546 return SDValue(); 8547} 8548 8549/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. 8550SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, 8551 SDValue N1, ISD::CondCode Cond, 8552 DebugLoc DL, bool foldBooleans) { 8553 TargetLowering::DAGCombinerInfo 8554 DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); 8555 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 8556} 8557 8558/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 8559/// return a DAG expression to select that will generate the same value by 8560/// multiplying by a magic number. See: 8561/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 8562SDValue DAGCombiner::BuildSDIV(SDNode *N) { 8563 std::vector<SDNode*> Built; 8564 SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); 8565 8566 for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); 8567 ii != ee; ++ii) 8568 AddToWorkList(*ii); 8569 return S; 8570} 8571 8572/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, 8573/// return a DAG expression to select that will generate the same value by 8574/// multiplying by a magic number. See: 8575/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 8576SDValue DAGCombiner::BuildUDIV(SDNode *N) { 8577 std::vector<SDNode*> Built; 8578 SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); 8579 8580 for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end(); 8581 ii != ee; ++ii) 8582 AddToWorkList(*ii); 8583 return S; 8584} 8585 8586/// FindBaseOffset - Return true if base is a frame index, which is known not 8587// to alias with anything but itself. Provides base object and offset as 8588// results. 8589static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, 8590 const GlobalValue *&GV, void *&CV) { 8591 // Assume it is a primitive operation. 8592 Base = Ptr; Offset = 0; GV = 0; CV = 0; 8593 8594 // If it's an adding a simple constant then integrate the offset. 8595 if (Base.getOpcode() == ISD::ADD) { 8596 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { 8597 Base = Base.getOperand(0); 8598 Offset += C->getZExtValue(); 8599 } 8600 } 8601 8602 // Return the underlying GlobalValue, and update the Offset. Return false 8603 // for GlobalAddressSDNode since the same GlobalAddress may be represented 8604 // by multiple nodes with different offsets. 8605 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { 8606 GV = G->getGlobal(); 8607 Offset += G->getOffset(); 8608 return false; 8609 } 8610 8611 // Return the underlying Constant value, and update the Offset. Return false 8612 // for ConstantSDNodes since the same constant pool entry may be represented 8613 // by multiple nodes with different offsets. 8614 if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { 8615 CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal() 8616 : (void *)C->getConstVal(); 8617 Offset += C->getOffset(); 8618 return false; 8619 } 8620 // If it's any of the following then it can't alias with anything but itself. 8621 return isa<FrameIndexSDNode>(Base); 8622} 8623 8624/// isAlias - Return true if there is any possibility that the two addresses 8625/// overlap. 8626bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, 8627 const Value *SrcValue1, int SrcValueOffset1, 8628 unsigned SrcValueAlign1, 8629 const MDNode *TBAAInfo1, 8630 SDValue Ptr2, int64_t Size2, 8631 const Value *SrcValue2, int SrcValueOffset2, 8632 unsigned SrcValueAlign2, 8633 const MDNode *TBAAInfo2) const { 8634 // If they are the same then they must be aliases. 8635 if (Ptr1 == Ptr2) return true; 8636 8637 // Gather base node and offset information. 8638 SDValue Base1, Base2; 8639 int64_t Offset1, Offset2; 8640 const GlobalValue *GV1, *GV2; 8641 void *CV1, *CV2; 8642 bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1); 8643 bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2); 8644 8645 // If they have a same base address then check to see if they overlap. 8646 if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) 8647 return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); 8648 8649 // It is possible for different frame indices to alias each other, mostly 8650 // when tail call optimization reuses return address slots for arguments. 8651 // To catch this case, look up the actual index of frame indices to compute 8652 // the real alias relationship. 8653 if (isFrameIndex1 && isFrameIndex2) { 8654 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 8655 Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); 8656 Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); 8657 return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1); 8658 } 8659 8660 // Otherwise, if we know what the bases are, and they aren't identical, then 8661 // we know they cannot alias. 8662 if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) 8663 return false; 8664 8665 // If we know required SrcValue1 and SrcValue2 have relatively large alignment 8666 // compared to the size and offset of the access, we may be able to prove they 8667 // do not alias. This check is conservative for now to catch cases created by 8668 // splitting vector types. 8669 if ((SrcValueAlign1 == SrcValueAlign2) && 8670 (SrcValueOffset1 != SrcValueOffset2) && 8671 (Size1 == Size2) && (SrcValueAlign1 > Size1)) { 8672 int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1; 8673 int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1; 8674 8675 // There is no overlap between these relatively aligned accesses of similar 8676 // size, return no alias. 8677 if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1) 8678 return false; 8679 } 8680 8681 if (CombinerGlobalAA) { 8682 // Use alias analysis information. 8683 int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); 8684 int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; 8685 int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset; 8686 AliasAnalysis::AliasResult AAResult = 8687 AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1), 8688 AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2)); 8689 if (AAResult == AliasAnalysis::NoAlias) 8690 return false; 8691 } 8692 8693 // Otherwise we have to assume they alias. 8694 return true; 8695} 8696 8697/// FindAliasInfo - Extracts the relevant alias information from the memory 8698/// node. Returns true if the operand was a load. 8699bool DAGCombiner::FindAliasInfo(SDNode *N, 8700 SDValue &Ptr, int64_t &Size, 8701 const Value *&SrcValue, 8702 int &SrcValueOffset, 8703 unsigned &SrcValueAlign, 8704 const MDNode *&TBAAInfo) const { 8705 LSBaseSDNode *LS = cast<LSBaseSDNode>(N); 8706 8707 Ptr = LS->getBasePtr(); 8708 Size = LS->getMemoryVT().getSizeInBits() >> 3; 8709 SrcValue = LS->getSrcValue(); 8710 SrcValueOffset = LS->getSrcValueOffset(); 8711 SrcValueAlign = LS->getOriginalAlignment(); 8712 TBAAInfo = LS->getTBAAInfo(); 8713 return isa<LoadSDNode>(LS); 8714} 8715 8716/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 8717/// looking for aliasing nodes and adding them to the Aliases vector. 8718void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 8719 SmallVector<SDValue, 8> &Aliases) { 8720 SmallVector<SDValue, 8> Chains; // List of chains to visit. 8721 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 8722 8723 // Get alias information for node. 8724 SDValue Ptr; 8725 int64_t Size; 8726 const Value *SrcValue; 8727 int SrcValueOffset; 8728 unsigned SrcValueAlign; 8729 const MDNode *SrcTBAAInfo; 8730 bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, 8731 SrcValueAlign, SrcTBAAInfo); 8732 8733 // Starting off. 8734 Chains.push_back(OriginalChain); 8735 unsigned Depth = 0; 8736 8737 // Look at each chain and determine if it is an alias. If so, add it to the 8738 // aliases list. If not, then continue up the chain looking for the next 8739 // candidate. 8740 while (!Chains.empty()) { 8741 SDValue Chain = Chains.back(); 8742 Chains.pop_back(); 8743 8744 // For TokenFactor nodes, look at each operand and only continue up the 8745 // chain until we find two aliases. If we've seen two aliases, assume we'll 8746 // find more and revert to original chain since the xform is unlikely to be 8747 // profitable. 8748 // 8749 // FIXME: The depth check could be made to return the last non-aliasing 8750 // chain we found before we hit a tokenfactor rather than the original 8751 // chain. 8752 if (Depth > 6 || Aliases.size() == 2) { 8753 Aliases.clear(); 8754 Aliases.push_back(OriginalChain); 8755 break; 8756 } 8757 8758 // Don't bother if we've been before. 8759 if (!Visited.insert(Chain.getNode())) 8760 continue; 8761 8762 switch (Chain.getOpcode()) { 8763 case ISD::EntryToken: 8764 // Entry token is ideal chain operand, but handled in FindBetterChain. 8765 break; 8766 8767 case ISD::LOAD: 8768 case ISD::STORE: { 8769 // Get alias information for Chain. 8770 SDValue OpPtr; 8771 int64_t OpSize; 8772 const Value *OpSrcValue; 8773 int OpSrcValueOffset; 8774 unsigned OpSrcValueAlign; 8775 const MDNode *OpSrcTBAAInfo; 8776 bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, 8777 OpSrcValue, OpSrcValueOffset, 8778 OpSrcValueAlign, 8779 OpSrcTBAAInfo); 8780 8781 // If chain is alias then stop here. 8782 if (!(IsLoad && IsOpLoad) && 8783 isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, 8784 SrcTBAAInfo, 8785 OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, 8786 OpSrcValueAlign, OpSrcTBAAInfo)) { 8787 Aliases.push_back(Chain); 8788 } else { 8789 // Look further up the chain. 8790 Chains.push_back(Chain.getOperand(0)); 8791 ++Depth; 8792 } 8793 break; 8794 } 8795 8796 case ISD::TokenFactor: 8797 // We have to check each of the operands of the token factor for "small" 8798 // token factors, so we queue them up. Adding the operands to the queue 8799 // (stack) in reverse order maintains the original order and increases the 8800 // likelihood that getNode will find a matching token factor (CSE.) 8801 if (Chain.getNumOperands() > 16) { 8802 Aliases.push_back(Chain); 8803 break; 8804 } 8805 for (unsigned n = Chain.getNumOperands(); n;) 8806 Chains.push_back(Chain.getOperand(--n)); 8807 ++Depth; 8808 break; 8809 8810 default: 8811 // For all other instructions we will just have to take what we can get. 8812 Aliases.push_back(Chain); 8813 break; 8814 } 8815 } 8816} 8817 8818/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking 8819/// for a better chain (aliasing node.) 8820SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 8821 SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. 8822 8823 // Accumulate all the aliases to this node. 8824 GatherAllAliases(N, OldChain, Aliases); 8825 8826 // If no operands then chain to entry token. 8827 if (Aliases.size() == 0) 8828 return DAG.getEntryNode(); 8829 8830 // If a single operand then chain to it. We don't need to revisit it. 8831 if (Aliases.size() == 1) 8832 return Aliases[0]; 8833 8834 // Construct a custom tailored token factor. 8835 return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, 8836 &Aliases[0], Aliases.size()); 8837} 8838 8839// SelectionDAG::Combine - This is the entry point for the file. 8840// 8841void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, 8842 CodeGenOpt::Level OptLevel) { 8843 /// run - This is the main entry point to this class. 8844 /// 8845 DAGCombiner(*this, AA, OptLevel).Run(Level); 8846} 8847