DAGCombiner.cpp revision ac7eae5b89d82603c4cf315458380c6a401a8fb2
1//===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
11// both before and after the DAG is legalized.
12//
13// This pass is not a substitute for the LLVM IR instcombine pass. This pass is
14// primarily intended to handle simplification opportunities that are implicit
15// in the LLVM IR and exposed by the various codegen lowering phases.
16//
17//===----------------------------------------------------------------------===//
18
19#define DEBUG_TYPE "dagcombine"
20#include "llvm/CodeGen/SelectionDAG.h"
21#include "llvm/DerivedTypes.h"
22#include "llvm/LLVMContext.h"
23#include "llvm/CodeGen/MachineFunction.h"
24#include "llvm/CodeGen/MachineFrameInfo.h"
25#include "llvm/CodeGen/PseudoSourceValue.h"
26#include "llvm/Analysis/AliasAnalysis.h"
27#include "llvm/Target/TargetData.h"
28#include "llvm/Target/TargetFrameInfo.h"
29#include "llvm/Target/TargetLowering.h"
30#include "llvm/Target/TargetMachine.h"
31#include "llvm/Target/TargetOptions.h"
32#include "llvm/ADT/SmallPtrSet.h"
33#include "llvm/ADT/Statistic.h"
34#include "llvm/Support/CommandLine.h"
35#include "llvm/Support/Debug.h"
36#include "llvm/Support/ErrorHandling.h"
37#include "llvm/Support/MathExtras.h"
38#include "llvm/Support/raw_ostream.h"
39#include <algorithm>
40using namespace llvm;
41
42STATISTIC(NodesCombined   , "Number of dag nodes combined");
43STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
44STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
45STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
46
47namespace {
48  static cl::opt<bool>
49    CombinerAA("combiner-alias-analysis", cl::Hidden,
50               cl::desc("Turn on alias analysis during testing"));
51
52  static cl::opt<bool>
53    CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
54               cl::desc("Include global information in alias analysis"));
55
56//------------------------------ DAGCombiner ---------------------------------//
57
58  class DAGCombiner {
59    SelectionDAG &DAG;
60    const TargetLowering &TLI;
61    CombineLevel Level;
62    CodeGenOpt::Level OptLevel;
63    bool LegalOperations;
64    bool LegalTypes;
65
66    // Worklist of all of the nodes that need to be simplified.
67    std::vector<SDNode*> WorkList;
68
69    // AA - Used for DAG load/store alias analysis.
70    AliasAnalysis &AA;
71
72    /// AddUsersToWorkList - When an instruction is simplified, add all users of
73    /// the instruction to the work lists because they might get more simplified
74    /// now.
75    ///
76    void AddUsersToWorkList(SDNode *N) {
77      for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
78           UI != UE; ++UI)
79        AddToWorkList(*UI);
80    }
81
82    /// visit - call the node-specific routine that knows how to fold each
83    /// particular type of node.
84    SDValue visit(SDNode *N);
85
86  public:
87    /// AddToWorkList - Add to the work list making sure it's instance is at the
88    /// the back (next to be processed.)
89    void AddToWorkList(SDNode *N) {
90      removeFromWorkList(N);
91      WorkList.push_back(N);
92    }
93
94    /// removeFromWorkList - remove all instances of N from the worklist.
95    ///
96    void removeFromWorkList(SDNode *N) {
97      WorkList.erase(std::remove(WorkList.begin(), WorkList.end(), N),
98                     WorkList.end());
99    }
100
101    SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
102                      bool AddTo = true);
103
104    SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
105      return CombineTo(N, &Res, 1, AddTo);
106    }
107
108    SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
109                      bool AddTo = true) {
110      SDValue To[] = { Res0, Res1 };
111      return CombineTo(N, To, 2, AddTo);
112    }
113
114    void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
115
116  private:
117
118    /// SimplifyDemandedBits - Check the specified integer node value to see if
119    /// it can be simplified or if things it uses can be simplified by bit
120    /// propagation.  If so, return true.
121    bool SimplifyDemandedBits(SDValue Op) {
122      unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
123      APInt Demanded = APInt::getAllOnesValue(BitWidth);
124      return SimplifyDemandedBits(Op, Demanded);
125    }
126
127    bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
128
129    bool CombineToPreIndexedLoadStore(SDNode *N);
130    bool CombineToPostIndexedLoadStore(SDNode *N);
131
132    void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
133    SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
134    SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
135    SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
136    SDValue PromoteIntBinOp(SDValue Op);
137    SDValue PromoteIntShiftOp(SDValue Op);
138    SDValue PromoteExtend(SDValue Op);
139    bool PromoteLoad(SDValue Op);
140
141    /// combine - call the node-specific routine that knows how to fold each
142    /// particular type of node. If that doesn't do anything, try the
143    /// target-specific DAG combines.
144    SDValue combine(SDNode *N);
145
146    // Visitation implementation - Implement dag node combining for different
147    // node types.  The semantics are as follows:
148    // Return Value:
149    //   SDValue.getNode() == 0 - No change was made
150    //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
151    //   otherwise              - N should be replaced by the returned Operand.
152    //
153    SDValue visitTokenFactor(SDNode *N);
154    SDValue visitMERGE_VALUES(SDNode *N);
155    SDValue visitADD(SDNode *N);
156    SDValue visitSUB(SDNode *N);
157    SDValue visitADDC(SDNode *N);
158    SDValue visitADDE(SDNode *N);
159    SDValue visitMUL(SDNode *N);
160    SDValue visitSDIV(SDNode *N);
161    SDValue visitUDIV(SDNode *N);
162    SDValue visitSREM(SDNode *N);
163    SDValue visitUREM(SDNode *N);
164    SDValue visitMULHU(SDNode *N);
165    SDValue visitMULHS(SDNode *N);
166    SDValue visitSMUL_LOHI(SDNode *N);
167    SDValue visitUMUL_LOHI(SDNode *N);
168    SDValue visitSDIVREM(SDNode *N);
169    SDValue visitUDIVREM(SDNode *N);
170    SDValue visitAND(SDNode *N);
171    SDValue visitOR(SDNode *N);
172    SDValue visitXOR(SDNode *N);
173    SDValue SimplifyVBinOp(SDNode *N);
174    SDValue visitSHL(SDNode *N);
175    SDValue visitSRA(SDNode *N);
176    SDValue visitSRL(SDNode *N);
177    SDValue visitCTLZ(SDNode *N);
178    SDValue visitCTTZ(SDNode *N);
179    SDValue visitCTPOP(SDNode *N);
180    SDValue visitSELECT(SDNode *N);
181    SDValue visitSELECT_CC(SDNode *N);
182    SDValue visitSETCC(SDNode *N);
183    SDValue visitSIGN_EXTEND(SDNode *N);
184    SDValue visitZERO_EXTEND(SDNode *N);
185    SDValue visitANY_EXTEND(SDNode *N);
186    SDValue visitSIGN_EXTEND_INREG(SDNode *N);
187    SDValue visitTRUNCATE(SDNode *N);
188    SDValue visitBIT_CONVERT(SDNode *N);
189    SDValue visitBUILD_PAIR(SDNode *N);
190    SDValue visitFADD(SDNode *N);
191    SDValue visitFSUB(SDNode *N);
192    SDValue visitFMUL(SDNode *N);
193    SDValue visitFDIV(SDNode *N);
194    SDValue visitFREM(SDNode *N);
195    SDValue visitFCOPYSIGN(SDNode *N);
196    SDValue visitSINT_TO_FP(SDNode *N);
197    SDValue visitUINT_TO_FP(SDNode *N);
198    SDValue visitFP_TO_SINT(SDNode *N);
199    SDValue visitFP_TO_UINT(SDNode *N);
200    SDValue visitFP_ROUND(SDNode *N);
201    SDValue visitFP_ROUND_INREG(SDNode *N);
202    SDValue visitFP_EXTEND(SDNode *N);
203    SDValue visitFNEG(SDNode *N);
204    SDValue visitFABS(SDNode *N);
205    SDValue visitBRCOND(SDNode *N);
206    SDValue visitBR_CC(SDNode *N);
207    SDValue visitLOAD(SDNode *N);
208    SDValue visitSTORE(SDNode *N);
209    SDValue visitINSERT_VECTOR_ELT(SDNode *N);
210    SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
211    SDValue visitBUILD_VECTOR(SDNode *N);
212    SDValue visitCONCAT_VECTORS(SDNode *N);
213    SDValue visitVECTOR_SHUFFLE(SDNode *N);
214
215    SDValue XformToShuffleWithZero(SDNode *N);
216    SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS);
217
218    SDValue visitShiftByConstant(SDNode *N, unsigned Amt);
219
220    bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
221    SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
222    SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2);
223    SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2,
224                             SDValue N3, ISD::CondCode CC,
225                             bool NotExtCompare = false);
226    SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
227                          DebugLoc DL, bool foldBooleans = true);
228    SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
229                                         unsigned HiOp);
230    SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
231    SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
232    SDValue BuildSDIV(SDNode *N);
233    SDValue BuildUDIV(SDNode *N);
234    SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
235    SDValue ReduceLoadWidth(SDNode *N);
236    SDValue ReduceLoadOpStoreWidth(SDNode *N);
237
238    SDValue GetDemandedBits(SDValue V, const APInt &Mask);
239
240    /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
241    /// looking for aliasing nodes and adding them to the Aliases vector.
242    void GatherAllAliases(SDNode *N, SDValue OriginalChain,
243                          SmallVector<SDValue, 8> &Aliases);
244
245    /// isAlias - Return true if there is any possibility that the two addresses
246    /// overlap.
247    bool isAlias(SDValue Ptr1, int64_t Size1,
248                 const Value *SrcValue1, int SrcValueOffset1,
249                 unsigned SrcValueAlign1,
250                 SDValue Ptr2, int64_t Size2,
251                 const Value *SrcValue2, int SrcValueOffset2,
252                 unsigned SrcValueAlign2) const;
253
254    /// FindAliasInfo - Extracts the relevant alias information from the memory
255    /// node.  Returns true if the operand was a load.
256    bool FindAliasInfo(SDNode *N,
257                       SDValue &Ptr, int64_t &Size,
258                       const Value *&SrcValue, int &SrcValueOffset,
259                       unsigned &SrcValueAlignment) const;
260
261    /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
262    /// looking for a better chain (aliasing node.)
263    SDValue FindBetterChain(SDNode *N, SDValue Chain);
264
265  public:
266    DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
267      : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted),
268        OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {}
269
270    /// Run - runs the dag combiner on all nodes in the work list
271    void Run(CombineLevel AtLevel);
272
273    SelectionDAG &getDAG() const { return DAG; }
274
275    /// getShiftAmountTy - Returns a type large enough to hold any valid
276    /// shift amount - before type legalization these can be huge.
277    EVT getShiftAmountTy() {
278      return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
279    }
280
281    /// isTypeLegal - This method returns true if we are running before type
282    /// legalization or if the specified VT is legal.
283    bool isTypeLegal(const EVT &VT) {
284      if (!LegalTypes) return true;
285      return TLI.isTypeLegal(VT);
286    }
287  };
288}
289
290
291namespace {
292/// WorkListRemover - This class is a DAGUpdateListener that removes any deleted
293/// nodes from the worklist.
294class WorkListRemover : public SelectionDAG::DAGUpdateListener {
295  DAGCombiner &DC;
296public:
297  explicit WorkListRemover(DAGCombiner &dc) : DC(dc) {}
298
299  virtual void NodeDeleted(SDNode *N, SDNode *E) {
300    DC.removeFromWorkList(N);
301  }
302
303  virtual void NodeUpdated(SDNode *N) {
304    // Ignore updates.
305  }
306};
307}
308
309//===----------------------------------------------------------------------===//
310//  TargetLowering::DAGCombinerInfo implementation
311//===----------------------------------------------------------------------===//
312
313void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
314  ((DAGCombiner*)DC)->AddToWorkList(N);
315}
316
317SDValue TargetLowering::DAGCombinerInfo::
318CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) {
319  return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
320}
321
322SDValue TargetLowering::DAGCombinerInfo::
323CombineTo(SDNode *N, SDValue Res, bool AddTo) {
324  return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
325}
326
327
328SDValue TargetLowering::DAGCombinerInfo::
329CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
330  return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
331}
332
333void TargetLowering::DAGCombinerInfo::
334CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
335  return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
336}
337
338//===----------------------------------------------------------------------===//
339// Helper Functions
340//===----------------------------------------------------------------------===//
341
342/// isNegatibleForFree - Return 1 if we can compute the negated form of the
343/// specified expression for the same cost as the expression itself, or 2 if we
344/// can compute the negated form more cheaply than the expression itself.
345static char isNegatibleForFree(SDValue Op, bool LegalOperations,
346                               unsigned Depth = 0) {
347  // No compile time optimizations on this type.
348  if (Op.getValueType() == MVT::ppcf128)
349    return 0;
350
351  // fneg is removable even if it has multiple uses.
352  if (Op.getOpcode() == ISD::FNEG) return 2;
353
354  // Don't allow anything with multiple uses.
355  if (!Op.hasOneUse()) return 0;
356
357  // Don't recurse exponentially.
358  if (Depth > 6) return 0;
359
360  switch (Op.getOpcode()) {
361  default: return false;
362  case ISD::ConstantFP:
363    // Don't invert constant FP values after legalize.  The negated constant
364    // isn't necessarily legal.
365    return LegalOperations ? 0 : 1;
366  case ISD::FADD:
367    // FIXME: determine better conditions for this xform.
368    if (!UnsafeFPMath) return 0;
369
370    // fold (fsub (fadd A, B)) -> (fsub (fneg A), B)
371    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
372      return V;
373    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
374    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
375  case ISD::FSUB:
376    // We can't turn -(A-B) into B-A when we honor signed zeros.
377    if (!UnsafeFPMath) return 0;
378
379    // fold (fneg (fsub A, B)) -> (fsub B, A)
380    return 1;
381
382  case ISD::FMUL:
383  case ISD::FDIV:
384    if (HonorSignDependentRoundingFPMath()) return 0;
385
386    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
387    if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
388      return V;
389
390    return isNegatibleForFree(Op.getOperand(1), LegalOperations, Depth+1);
391
392  case ISD::FP_EXTEND:
393  case ISD::FP_ROUND:
394  case ISD::FSIN:
395    return isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1);
396  }
397}
398
399/// GetNegatedExpression - If isNegatibleForFree returns true, this function
400/// returns the newly negated expression.
401static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
402                                    bool LegalOperations, unsigned Depth = 0) {
403  // fneg is removable even if it has multiple uses.
404  if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
405
406  // Don't allow anything with multiple uses.
407  assert(Op.hasOneUse() && "Unknown reuse!");
408
409  assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
410  switch (Op.getOpcode()) {
411  default: llvm_unreachable("Unknown code");
412  case ISD::ConstantFP: {
413    APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
414    V.changeSign();
415    return DAG.getConstantFP(V, Op.getValueType());
416  }
417  case ISD::FADD:
418    // FIXME: determine better conditions for this xform.
419    assert(UnsafeFPMath);
420
421    // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
422    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
423      return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
424                         GetNegatedExpression(Op.getOperand(0), DAG,
425                                              LegalOperations, Depth+1),
426                         Op.getOperand(1));
427    // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
428    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
429                       GetNegatedExpression(Op.getOperand(1), DAG,
430                                            LegalOperations, Depth+1),
431                       Op.getOperand(0));
432  case ISD::FSUB:
433    // We can't turn -(A-B) into B-A when we honor signed zeros.
434    assert(UnsafeFPMath);
435
436    // fold (fneg (fsub 0, B)) -> B
437    if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
438      if (N0CFP->getValueAPF().isZero())
439        return Op.getOperand(1);
440
441    // fold (fneg (fsub A, B)) -> (fsub B, A)
442    return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(),
443                       Op.getOperand(1), Op.getOperand(0));
444
445  case ISD::FMUL:
446  case ISD::FDIV:
447    assert(!HonorSignDependentRoundingFPMath());
448
449    // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
450    if (isNegatibleForFree(Op.getOperand(0), LegalOperations, Depth+1))
451      return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
452                         GetNegatedExpression(Op.getOperand(0), DAG,
453                                              LegalOperations, Depth+1),
454                         Op.getOperand(1));
455
456    // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
457    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
458                       Op.getOperand(0),
459                       GetNegatedExpression(Op.getOperand(1), DAG,
460                                            LegalOperations, Depth+1));
461
462  case ISD::FP_EXTEND:
463  case ISD::FSIN:
464    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(),
465                       GetNegatedExpression(Op.getOperand(0), DAG,
466                                            LegalOperations, Depth+1));
467  case ISD::FP_ROUND:
468      return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(),
469                         GetNegatedExpression(Op.getOperand(0), DAG,
470                                              LegalOperations, Depth+1),
471                         Op.getOperand(1));
472  }
473}
474
475
476// isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc
477// that selects between the values 1 and 0, making it equivalent to a setcc.
478// Also, set the incoming LHS, RHS, and CC references to the appropriate
479// nodes based on the type of node we are checking.  This simplifies life a
480// bit for the callers.
481static bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
482                              SDValue &CC) {
483  if (N.getOpcode() == ISD::SETCC) {
484    LHS = N.getOperand(0);
485    RHS = N.getOperand(1);
486    CC  = N.getOperand(2);
487    return true;
488  }
489  if (N.getOpcode() == ISD::SELECT_CC &&
490      N.getOperand(2).getOpcode() == ISD::Constant &&
491      N.getOperand(3).getOpcode() == ISD::Constant &&
492      cast<ConstantSDNode>(N.getOperand(2))->getAPIntValue() == 1 &&
493      cast<ConstantSDNode>(N.getOperand(3))->isNullValue()) {
494    LHS = N.getOperand(0);
495    RHS = N.getOperand(1);
496    CC  = N.getOperand(4);
497    return true;
498  }
499  return false;
500}
501
502// isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only
503// one use.  If this is true, it allows the users to invert the operation for
504// free when it is profitable to do so.
505static bool isOneUseSetCC(SDValue N) {
506  SDValue N0, N1, N2;
507  if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
508    return true;
509  return false;
510}
511
512SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL,
513                                    SDValue N0, SDValue N1) {
514  EVT VT = N0.getValueType();
515  if (N0.getOpcode() == Opc && isa<ConstantSDNode>(N0.getOperand(1))) {
516    if (isa<ConstantSDNode>(N1)) {
517      // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
518      SDValue OpNode =
519        DAG.FoldConstantArithmetic(Opc, VT,
520                                   cast<ConstantSDNode>(N0.getOperand(1)),
521                                   cast<ConstantSDNode>(N1));
522      return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
523    } else if (N0.hasOneUse()) {
524      // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use
525      SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
526                                   N0.getOperand(0), N1);
527      AddToWorkList(OpNode.getNode());
528      return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
529    }
530  }
531
532  if (N1.getOpcode() == Opc && isa<ConstantSDNode>(N1.getOperand(1))) {
533    if (isa<ConstantSDNode>(N0)) {
534      // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
535      SDValue OpNode =
536        DAG.FoldConstantArithmetic(Opc, VT,
537                                   cast<ConstantSDNode>(N1.getOperand(1)),
538                                   cast<ConstantSDNode>(N0));
539      return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
540    } else if (N1.hasOneUse()) {
541      // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use
542      SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT,
543                                   N1.getOperand(0), N0);
544      AddToWorkList(OpNode.getNode());
545      return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
546    }
547  }
548
549  return SDValue();
550}
551
552SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
553                               bool AddTo) {
554  assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
555  ++NodesCombined;
556  DEBUG(dbgs() << "\nReplacing.1 ";
557        N->dump(&DAG);
558        dbgs() << "\nWith: ";
559        To[0].getNode()->dump(&DAG);
560        dbgs() << " and " << NumTo-1 << " other values\n";
561        for (unsigned i = 0, e = NumTo; i != e; ++i)
562          assert((!To[i].getNode() ||
563                  N->getValueType(i) == To[i].getValueType()) &&
564                 "Cannot combine value to value of different type!"));
565  WorkListRemover DeadNodes(*this);
566  DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
567
568  if (AddTo) {
569    // Push the new nodes and any users onto the worklist
570    for (unsigned i = 0, e = NumTo; i != e; ++i) {
571      if (To[i].getNode()) {
572        AddToWorkList(To[i].getNode());
573        AddUsersToWorkList(To[i].getNode());
574      }
575    }
576  }
577
578  // Finally, if the node is now dead, remove it from the graph.  The node
579  // may not be dead if the replacement process recursively simplified to
580  // something else needing this node.
581  if (N->use_empty()) {
582    // Nodes can be reintroduced into the worklist.  Make sure we do not
583    // process a node that has been replaced.
584    removeFromWorkList(N);
585
586    // Finally, since the node is now dead, remove it from the graph.
587    DAG.DeleteNode(N);
588  }
589  return SDValue(N, 0);
590}
591
592void DAGCombiner::
593CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
594  // Replace all uses.  If any nodes become isomorphic to other nodes and
595  // are deleted, make sure to remove them from our worklist.
596  WorkListRemover DeadNodes(*this);
597  DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes);
598
599  // Push the new node and any (possibly new) users onto the worklist.
600  AddToWorkList(TLO.New.getNode());
601  AddUsersToWorkList(TLO.New.getNode());
602
603  // Finally, if the node is now dead, remove it from the graph.  The node
604  // may not be dead if the replacement process recursively simplified to
605  // something else needing this node.
606  if (TLO.Old.getNode()->use_empty()) {
607    removeFromWorkList(TLO.Old.getNode());
608
609    // If the operands of this node are only used by the node, they will now
610    // be dead.  Make sure to visit them first to delete dead nodes early.
611    for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i)
612      if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse())
613        AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode());
614
615    DAG.DeleteNode(TLO.Old.getNode());
616  }
617}
618
619/// SimplifyDemandedBits - Check the specified integer node value to see if
620/// it can be simplified or if things it uses can be simplified by bit
621/// propagation.  If so, return true.
622bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
623  TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
624  APInt KnownZero, KnownOne;
625  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
626    return false;
627
628  // Revisit the node.
629  AddToWorkList(Op.getNode());
630
631  // Replace the old value with the new one.
632  ++NodesCombined;
633  DEBUG(dbgs() << "\nReplacing.2 ";
634        TLO.Old.getNode()->dump(&DAG);
635        dbgs() << "\nWith: ";
636        TLO.New.getNode()->dump(&DAG);
637        dbgs() << '\n');
638
639  CommitTargetLoweringOpt(TLO);
640  return true;
641}
642
643void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
644  DebugLoc dl = Load->getDebugLoc();
645  EVT VT = Load->getValueType(0);
646  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
647
648  DEBUG(dbgs() << "\nReplacing.9 ";
649        Load->dump(&DAG);
650        dbgs() << "\nWith: ";
651        Trunc.getNode()->dump(&DAG);
652        dbgs() << '\n');
653  WorkListRemover DeadNodes(*this);
654  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc, &DeadNodes);
655  DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1),
656                                &DeadNodes);
657  removeFromWorkList(Load);
658  DAG.DeleteNode(Load);
659  AddToWorkList(Trunc.getNode());
660}
661
662SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
663  Replace = false;
664  DebugLoc dl = Op.getDebugLoc();
665  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
666    EVT MemVT = LD->getMemoryVT();
667    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
668      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
669      : LD->getExtensionType();
670    Replace = true;
671    return DAG.getExtLoad(ExtType, dl, PVT,
672                          LD->getChain(), LD->getBasePtr(),
673                          LD->getSrcValue(), LD->getSrcValueOffset(),
674                          MemVT, LD->isVolatile(),
675                          LD->isNonTemporal(), LD->getAlignment());
676  }
677
678  unsigned Opc = Op.getOpcode();
679  switch (Opc) {
680  default: break;
681  case ISD::AssertSext:
682    return DAG.getNode(ISD::AssertSext, dl, PVT,
683                       SExtPromoteOperand(Op.getOperand(0), PVT),
684                       Op.getOperand(1));
685  case ISD::AssertZext:
686    return DAG.getNode(ISD::AssertZext, dl, PVT,
687                       ZExtPromoteOperand(Op.getOperand(0), PVT),
688                       Op.getOperand(1));
689  case ISD::Constant: {
690    unsigned ExtOpc =
691      Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
692    return DAG.getNode(ExtOpc, dl, PVT, Op);
693  }
694  }
695
696  if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
697    return SDValue();
698  return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
699}
700
701SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
702  if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
703    return SDValue();
704  EVT OldVT = Op.getValueType();
705  DebugLoc dl = Op.getDebugLoc();
706  bool Replace = false;
707  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
708  if (NewOp.getNode() == 0)
709    return SDValue();
710  AddToWorkList(NewOp.getNode());
711
712  if (Replace)
713    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
714  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
715                     DAG.getValueType(OldVT));
716}
717
718SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
719  EVT OldVT = Op.getValueType();
720  DebugLoc dl = Op.getDebugLoc();
721  bool Replace = false;
722  SDValue NewOp = PromoteOperand(Op, PVT, Replace);
723  if (NewOp.getNode() == 0)
724    return SDValue();
725  AddToWorkList(NewOp.getNode());
726
727  if (Replace)
728    ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
729  return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
730}
731
732/// PromoteIntBinOp - Promote the specified integer binary operation if the
733/// target indicates it is beneficial. e.g. On x86, it's usually better to
734/// promote i16 operations to i32 since i16 instructions are longer.
735SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
736  if (!LegalOperations)
737    return SDValue();
738
739  EVT VT = Op.getValueType();
740  if (VT.isVector() || !VT.isInteger())
741    return SDValue();
742
743  // If operation type is 'undesirable', e.g. i16 on x86, consider
744  // promoting it.
745  unsigned Opc = Op.getOpcode();
746  if (TLI.isTypeDesirableForOp(Opc, VT))
747    return SDValue();
748
749  EVT PVT = VT;
750  // Consult target whether it is a good idea to promote this operation and
751  // what's the right type to promote it to.
752  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
753    assert(PVT != VT && "Don't know what type to promote to!");
754
755    bool Replace0 = false;
756    SDValue N0 = Op.getOperand(0);
757    SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
758    if (NN0.getNode() == 0)
759      return SDValue();
760
761    bool Replace1 = false;
762    SDValue N1 = Op.getOperand(1);
763    SDValue NN1 = PromoteOperand(N1, PVT, Replace1);
764    if (NN1.getNode() == 0)
765      return SDValue();
766
767    AddToWorkList(NN0.getNode());
768    AddToWorkList(NN1.getNode());
769
770    if (Replace0)
771      ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
772    if (Replace1)
773      ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
774
775    DEBUG(dbgs() << "\nPromoting ";
776          Op.getNode()->dump(&DAG));
777    DebugLoc dl = Op.getDebugLoc();
778    return DAG.getNode(ISD::TRUNCATE, dl, VT,
779                       DAG.getNode(Opc, dl, PVT, NN0, NN1));
780  }
781  return SDValue();
782}
783
784/// PromoteIntShiftOp - Promote the specified integer shift operation if the
785/// target indicates it is beneficial. e.g. On x86, it's usually better to
786/// promote i16 operations to i32 since i16 instructions are longer.
787SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
788  if (!LegalOperations)
789    return SDValue();
790
791  EVT VT = Op.getValueType();
792  if (VT.isVector() || !VT.isInteger())
793    return SDValue();
794
795  // If operation type is 'undesirable', e.g. i16 on x86, consider
796  // promoting it.
797  unsigned Opc = Op.getOpcode();
798  if (TLI.isTypeDesirableForOp(Opc, VT))
799    return SDValue();
800
801  EVT PVT = VT;
802  // Consult target whether it is a good idea to promote this operation and
803  // what's the right type to promote it to.
804  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
805    assert(PVT != VT && "Don't know what type to promote to!");
806
807    bool Replace = false;
808    SDValue N0 = Op.getOperand(0);
809    if (Opc == ISD::SRA)
810      N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
811    else if (Opc == ISD::SRL)
812      N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
813    else
814      N0 = PromoteOperand(N0, PVT, Replace);
815    if (N0.getNode() == 0)
816      return SDValue();
817
818    AddToWorkList(N0.getNode());
819    if (Replace)
820      ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
821
822    DEBUG(dbgs() << "\nPromoting ";
823          Op.getNode()->dump(&DAG));
824    DebugLoc dl = Op.getDebugLoc();
825    return DAG.getNode(ISD::TRUNCATE, dl, VT,
826                       DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
827  }
828  return SDValue();
829}
830
831SDValue DAGCombiner::PromoteExtend(SDValue Op) {
832  if (!LegalOperations)
833    return SDValue();
834
835  EVT VT = Op.getValueType();
836  if (VT.isVector() || !VT.isInteger())
837    return SDValue();
838
839  // If operation type is 'undesirable', e.g. i16 on x86, consider
840  // promoting it.
841  unsigned Opc = Op.getOpcode();
842  if (TLI.isTypeDesirableForOp(Opc, VT))
843    return SDValue();
844
845  EVT PVT = VT;
846  // Consult target whether it is a good idea to promote this operation and
847  // what's the right type to promote it to.
848  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
849    assert(PVT != VT && "Don't know what type to promote to!");
850    // fold (aext (aext x)) -> (aext x)
851    // fold (aext (zext x)) -> (zext x)
852    // fold (aext (sext x)) -> (sext x)
853    DEBUG(dbgs() << "\nPromoting ";
854          Op.getNode()->dump(&DAG));
855    return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0));
856  }
857  return SDValue();
858}
859
860bool DAGCombiner::PromoteLoad(SDValue Op) {
861  if (!LegalOperations)
862    return false;
863
864  EVT VT = Op.getValueType();
865  if (VT.isVector() || !VT.isInteger())
866    return false;
867
868  // If operation type is 'undesirable', e.g. i16 on x86, consider
869  // promoting it.
870  unsigned Opc = Op.getOpcode();
871  if (TLI.isTypeDesirableForOp(Opc, VT))
872    return false;
873
874  EVT PVT = VT;
875  // Consult target whether it is a good idea to promote this operation and
876  // what's the right type to promote it to.
877  if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
878    assert(PVT != VT && "Don't know what type to promote to!");
879
880    DebugLoc dl = Op.getDebugLoc();
881    SDNode *N = Op.getNode();
882    LoadSDNode *LD = cast<LoadSDNode>(N);
883    EVT MemVT = LD->getMemoryVT();
884    ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
885      ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
886      : LD->getExtensionType();
887    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
888                                   LD->getChain(), LD->getBasePtr(),
889                                   LD->getSrcValue(), LD->getSrcValueOffset(),
890                                   MemVT, LD->isVolatile(),
891                                   LD->isNonTemporal(), LD->getAlignment());
892    SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
893
894    DEBUG(dbgs() << "\nPromoting ";
895          N->dump(&DAG);
896          dbgs() << "\nTo: ";
897          Result.getNode()->dump(&DAG);
898          dbgs() << '\n');
899    WorkListRemover DeadNodes(*this);
900    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result, &DeadNodes);
901    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1), &DeadNodes);
902    removeFromWorkList(N);
903    DAG.DeleteNode(N);
904    AddToWorkList(Result.getNode());
905    return true;
906  }
907  return false;
908}
909
910
911//===----------------------------------------------------------------------===//
912//  Main DAG Combiner implementation
913//===----------------------------------------------------------------------===//
914
915void DAGCombiner::Run(CombineLevel AtLevel) {
916  // set the instance variables, so that the various visit routines may use it.
917  Level = AtLevel;
918  LegalOperations = Level >= NoIllegalOperations;
919  LegalTypes = Level >= NoIllegalTypes;
920
921  // Add all the dag nodes to the worklist.
922  WorkList.reserve(DAG.allnodes_size());
923  for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
924       E = DAG.allnodes_end(); I != E; ++I)
925    WorkList.push_back(I);
926
927  // Create a dummy node (which is not added to allnodes), that adds a reference
928  // to the root node, preventing it from being deleted, and tracking any
929  // changes of the root.
930  HandleSDNode Dummy(DAG.getRoot());
931
932  // The root of the dag may dangle to deleted nodes until the dag combiner is
933  // done.  Set it to null to avoid confusion.
934  DAG.setRoot(SDValue());
935
936  // while the worklist isn't empty, inspect the node on the end of it and
937  // try and combine it.
938  while (!WorkList.empty()) {
939    SDNode *N = WorkList.back();
940    WorkList.pop_back();
941
942    // If N has no uses, it is dead.  Make sure to revisit all N's operands once
943    // N is deleted from the DAG, since they too may now be dead or may have a
944    // reduced number of uses, allowing other xforms.
945    if (N->use_empty() && N != &Dummy) {
946      for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
947        AddToWorkList(N->getOperand(i).getNode());
948
949      DAG.DeleteNode(N);
950      continue;
951    }
952
953    SDValue RV = combine(N);
954
955    if (RV.getNode() == 0)
956      continue;
957
958    ++NodesCombined;
959
960    // If we get back the same node we passed in, rather than a new node or
961    // zero, we know that the node must have defined multiple values and
962    // CombineTo was used.  Since CombineTo takes care of the worklist
963    // mechanics for us, we have no work to do in this case.
964    if (RV.getNode() == N)
965      continue;
966
967    assert(N->getOpcode() != ISD::DELETED_NODE &&
968           RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
969           "Node was deleted but visit returned new node!");
970
971    DEBUG(dbgs() << "\nReplacing.3 ";
972          N->dump(&DAG);
973          dbgs() << "\nWith: ";
974          RV.getNode()->dump(&DAG);
975          dbgs() << '\n');
976    WorkListRemover DeadNodes(*this);
977    if (N->getNumValues() == RV.getNode()->getNumValues())
978      DAG.ReplaceAllUsesWith(N, RV.getNode(), &DeadNodes);
979    else {
980      assert(N->getValueType(0) == RV.getValueType() &&
981             N->getNumValues() == 1 && "Type mismatch");
982      SDValue OpV = RV;
983      DAG.ReplaceAllUsesWith(N, &OpV, &DeadNodes);
984    }
985
986    // Push the new node and any users onto the worklist
987    AddToWorkList(RV.getNode());
988    AddUsersToWorkList(RV.getNode());
989
990    // Add any uses of the old node to the worklist in case this node is the
991    // last one that uses them.  They may become dead after this node is
992    // deleted.
993    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
994      AddToWorkList(N->getOperand(i).getNode());
995
996    // Finally, if the node is now dead, remove it from the graph.  The node
997    // may not be dead if the replacement process recursively simplified to
998    // something else needing this node.
999    if (N->use_empty()) {
1000      // Nodes can be reintroduced into the worklist.  Make sure we do not
1001      // process a node that has been replaced.
1002      removeFromWorkList(N);
1003
1004      // Finally, since the node is now dead, remove it from the graph.
1005      DAG.DeleteNode(N);
1006    }
1007  }
1008
1009  // If the root changed (e.g. it was a dead load, update the root).
1010  DAG.setRoot(Dummy.getValue());
1011}
1012
1013SDValue DAGCombiner::visit(SDNode *N) {
1014  switch(N->getOpcode()) {
1015  default: break;
1016  case ISD::TokenFactor:        return visitTokenFactor(N);
1017  case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
1018  case ISD::ADD:                return visitADD(N);
1019  case ISD::SUB:                return visitSUB(N);
1020  case ISD::ADDC:               return visitADDC(N);
1021  case ISD::ADDE:               return visitADDE(N);
1022  case ISD::MUL:                return visitMUL(N);
1023  case ISD::SDIV:               return visitSDIV(N);
1024  case ISD::UDIV:               return visitUDIV(N);
1025  case ISD::SREM:               return visitSREM(N);
1026  case ISD::UREM:               return visitUREM(N);
1027  case ISD::MULHU:              return visitMULHU(N);
1028  case ISD::MULHS:              return visitMULHS(N);
1029  case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
1030  case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
1031  case ISD::SDIVREM:            return visitSDIVREM(N);
1032  case ISD::UDIVREM:            return visitUDIVREM(N);
1033  case ISD::AND:                return visitAND(N);
1034  case ISD::OR:                 return visitOR(N);
1035  case ISD::XOR:                return visitXOR(N);
1036  case ISD::SHL:                return visitSHL(N);
1037  case ISD::SRA:                return visitSRA(N);
1038  case ISD::SRL:                return visitSRL(N);
1039  case ISD::CTLZ:               return visitCTLZ(N);
1040  case ISD::CTTZ:               return visitCTTZ(N);
1041  case ISD::CTPOP:              return visitCTPOP(N);
1042  case ISD::SELECT:             return visitSELECT(N);
1043  case ISD::SELECT_CC:          return visitSELECT_CC(N);
1044  case ISD::SETCC:              return visitSETCC(N);
1045  case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
1046  case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
1047  case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
1048  case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
1049  case ISD::TRUNCATE:           return visitTRUNCATE(N);
1050  case ISD::BIT_CONVERT:        return visitBIT_CONVERT(N);
1051  case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
1052  case ISD::FADD:               return visitFADD(N);
1053  case ISD::FSUB:               return visitFSUB(N);
1054  case ISD::FMUL:               return visitFMUL(N);
1055  case ISD::FDIV:               return visitFDIV(N);
1056  case ISD::FREM:               return visitFREM(N);
1057  case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
1058  case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
1059  case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
1060  case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
1061  case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
1062  case ISD::FP_ROUND:           return visitFP_ROUND(N);
1063  case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
1064  case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
1065  case ISD::FNEG:               return visitFNEG(N);
1066  case ISD::FABS:               return visitFABS(N);
1067  case ISD::BRCOND:             return visitBRCOND(N);
1068  case ISD::BR_CC:              return visitBR_CC(N);
1069  case ISD::LOAD:               return visitLOAD(N);
1070  case ISD::STORE:              return visitSTORE(N);
1071  case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
1072  case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
1073  case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
1074  case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
1075  case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
1076  }
1077  return SDValue();
1078}
1079
1080SDValue DAGCombiner::combine(SDNode *N) {
1081  SDValue RV = visit(N);
1082
1083  // If nothing happened, try a target-specific DAG combine.
1084  if (RV.getNode() == 0) {
1085    assert(N->getOpcode() != ISD::DELETED_NODE &&
1086           "Node was deleted but visit returned NULL!");
1087
1088    if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
1089        TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
1090
1091      // Expose the DAG combiner to the target combiner impls.
1092      TargetLowering::DAGCombinerInfo
1093        DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
1094
1095      RV = TLI.PerformDAGCombine(N, DagCombineInfo);
1096    }
1097  }
1098
1099  // If N is a commutative binary node, try commuting it to enable more
1100  // sdisel CSE.
1101  if (RV.getNode() == 0 &&
1102      SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
1103      N->getNumValues() == 1) {
1104    SDValue N0 = N->getOperand(0);
1105    SDValue N1 = N->getOperand(1);
1106
1107    // Constant operands are canonicalized to RHS.
1108    if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
1109      SDValue Ops[] = { N1, N0 };
1110      SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(),
1111                                            Ops, 2);
1112      if (CSENode)
1113        return SDValue(CSENode, 0);
1114    }
1115  }
1116
1117  return RV;
1118}
1119
1120/// getInputChainForNode - Given a node, return its input chain if it has one,
1121/// otherwise return a null sd operand.
1122static SDValue getInputChainForNode(SDNode *N) {
1123  if (unsigned NumOps = N->getNumOperands()) {
1124    if (N->getOperand(0).getValueType() == MVT::Other)
1125      return N->getOperand(0);
1126    else if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
1127      return N->getOperand(NumOps-1);
1128    for (unsigned i = 1; i < NumOps-1; ++i)
1129      if (N->getOperand(i).getValueType() == MVT::Other)
1130        return N->getOperand(i);
1131  }
1132  return SDValue();
1133}
1134
1135SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
1136  // If N has two operands, where one has an input chain equal to the other,
1137  // the 'other' chain is redundant.
1138  if (N->getNumOperands() == 2) {
1139    if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
1140      return N->getOperand(0);
1141    if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
1142      return N->getOperand(1);
1143  }
1144
1145  SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
1146  SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
1147  SmallPtrSet<SDNode*, 16> SeenOps;
1148  bool Changed = false;             // If we should replace this token factor.
1149
1150  // Start out with this token factor.
1151  TFs.push_back(N);
1152
1153  // Iterate through token factors.  The TFs grows when new token factors are
1154  // encountered.
1155  for (unsigned i = 0; i < TFs.size(); ++i) {
1156    SDNode *TF = TFs[i];
1157
1158    // Check each of the operands.
1159    for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) {
1160      SDValue Op = TF->getOperand(i);
1161
1162      switch (Op.getOpcode()) {
1163      case ISD::EntryToken:
1164        // Entry tokens don't need to be added to the list. They are
1165        // rededundant.
1166        Changed = true;
1167        break;
1168
1169      case ISD::TokenFactor:
1170        if (Op.hasOneUse() &&
1171            std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
1172          // Queue up for processing.
1173          TFs.push_back(Op.getNode());
1174          // Clean up in case the token factor is removed.
1175          AddToWorkList(Op.getNode());
1176          Changed = true;
1177          break;
1178        }
1179        // Fall thru
1180
1181      default:
1182        // Only add if it isn't already in the list.
1183        if (SeenOps.insert(Op.getNode()))
1184          Ops.push_back(Op);
1185        else
1186          Changed = true;
1187        break;
1188      }
1189    }
1190  }
1191
1192  SDValue Result;
1193
1194  // If we've change things around then replace token factor.
1195  if (Changed) {
1196    if (Ops.empty()) {
1197      // The entry token is the only possible outcome.
1198      Result = DAG.getEntryNode();
1199    } else {
1200      // New and improved token factor.
1201      Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
1202                           MVT::Other, &Ops[0], Ops.size());
1203    }
1204
1205    // Don't add users to work list.
1206    return CombineTo(N, Result, false);
1207  }
1208
1209  return Result;
1210}
1211
1212/// MERGE_VALUES can always be eliminated.
1213SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
1214  WorkListRemover DeadNodes(*this);
1215  // Replacing results may cause a different MERGE_VALUES to suddenly
1216  // be CSE'd with N, and carry its uses with it. Iterate until no
1217  // uses remain, to ensure that the node can be safely deleted.
1218  do {
1219    for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
1220      DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i),
1221                                    &DeadNodes);
1222  } while (!N->use_empty());
1223  removeFromWorkList(N);
1224  DAG.DeleteNode(N);
1225  return SDValue(N, 0);   // Return N so it doesn't get rechecked!
1226}
1227
1228static
1229SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1,
1230                              SelectionDAG &DAG) {
1231  EVT VT = N0.getValueType();
1232  SDValue N00 = N0.getOperand(0);
1233  SDValue N01 = N0.getOperand(1);
1234  ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01);
1235
1236  if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() &&
1237      isa<ConstantSDNode>(N00.getOperand(1))) {
1238    // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
1239    N0 = DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT,
1240                     DAG.getNode(ISD::SHL, N00.getDebugLoc(), VT,
1241                                 N00.getOperand(0), N01),
1242                     DAG.getNode(ISD::SHL, N01.getDebugLoc(), VT,
1243                                 N00.getOperand(1), N01));
1244    return DAG.getNode(ISD::ADD, DL, VT, N0, N1);
1245  }
1246
1247  return SDValue();
1248}
1249
1250SDValue DAGCombiner::visitADD(SDNode *N) {
1251  SDValue N0 = N->getOperand(0);
1252  SDValue N1 = N->getOperand(1);
1253  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1254  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1255  EVT VT = N0.getValueType();
1256
1257  // fold vector ops
1258  if (VT.isVector()) {
1259    SDValue FoldedVOp = SimplifyVBinOp(N);
1260    if (FoldedVOp.getNode()) return FoldedVOp;
1261  }
1262
1263  // fold (add x, undef) -> undef
1264  if (N0.getOpcode() == ISD::UNDEF)
1265    return N0;
1266  if (N1.getOpcode() == ISD::UNDEF)
1267    return N1;
1268  // fold (add c1, c2) -> c1+c2
1269  if (N0C && N1C)
1270    return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C);
1271  // canonicalize constant to RHS
1272  if (N0C && !N1C)
1273    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0);
1274  // fold (add x, 0) -> x
1275  if (N1C && N1C->isNullValue())
1276    return N0;
1277  // fold (add Sym, c) -> Sym+c
1278  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1279    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
1280        GA->getOpcode() == ISD::GlobalAddress)
1281      return DAG.getGlobalAddress(GA->getGlobal(), VT,
1282                                  GA->getOffset() +
1283                                    (uint64_t)N1C->getSExtValue());
1284  // fold ((c1-A)+c2) -> (c1+c2)-A
1285  if (N1C && N0.getOpcode() == ISD::SUB)
1286    if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0)))
1287      return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
1288                         DAG.getConstant(N1C->getAPIntValue()+
1289                                         N0C->getAPIntValue(), VT),
1290                         N0.getOperand(1));
1291  // reassociate add
1292  SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1);
1293  if (RADD.getNode() != 0)
1294    return RADD;
1295  // fold ((0-A) + B) -> B-A
1296  if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) &&
1297      cast<ConstantSDNode>(N0.getOperand(0))->isNullValue())
1298    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1));
1299  // fold (A + (0-B)) -> A-B
1300  if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) &&
1301      cast<ConstantSDNode>(N1.getOperand(0))->isNullValue())
1302    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1));
1303  // fold (A+(B-A)) -> B
1304  if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
1305    return N1.getOperand(0);
1306  // fold ((B-A)+A) -> B
1307  if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
1308    return N0.getOperand(0);
1309  // fold (A+(B-(A+C))) to (B-C)
1310  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1311      N0 == N1.getOperand(1).getOperand(0))
1312    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
1313                       N1.getOperand(1).getOperand(1));
1314  // fold (A+(B-(C+A))) to (B-C)
1315  if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
1316      N0 == N1.getOperand(1).getOperand(1))
1317    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0),
1318                       N1.getOperand(1).getOperand(0));
1319  // fold (A+((B-A)+or-C)) to (B+or-C)
1320  if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
1321      N1.getOperand(0).getOpcode() == ISD::SUB &&
1322      N0 == N1.getOperand(0).getOperand(1))
1323    return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT,
1324                       N1.getOperand(0).getOperand(0), N1.getOperand(1));
1325
1326  // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
1327  if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
1328    SDValue N00 = N0.getOperand(0);
1329    SDValue N01 = N0.getOperand(1);
1330    SDValue N10 = N1.getOperand(0);
1331    SDValue N11 = N1.getOperand(1);
1332
1333    if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
1334      return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
1335                         DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10),
1336                         DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11));
1337  }
1338
1339  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
1340    return SDValue(N, 0);
1341
1342  // fold (a+b) -> (a|b) iff a and b share no bits.
1343  if (VT.isInteger() && !VT.isVector()) {
1344    APInt LHSZero, LHSOne;
1345    APInt RHSZero, RHSOne;
1346    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
1347    DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
1348
1349    if (LHSZero.getBoolValue()) {
1350      DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
1351
1352      // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1353      // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1354      if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
1355          (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
1356        return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1);
1357    }
1358  }
1359
1360  // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), )
1361  if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) {
1362    SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG);
1363    if (Result.getNode()) return Result;
1364  }
1365  if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) {
1366    SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG);
1367    if (Result.getNode()) return Result;
1368  }
1369
1370  // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
1371  if (N1.getOpcode() == ISD::SHL &&
1372      N1.getOperand(0).getOpcode() == ISD::SUB)
1373    if (ConstantSDNode *C =
1374          dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0)))
1375      if (C->getAPIntValue() == 0)
1376        return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0,
1377                           DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
1378                                       N1.getOperand(0).getOperand(1),
1379                                       N1.getOperand(1)));
1380  if (N0.getOpcode() == ISD::SHL &&
1381      N0.getOperand(0).getOpcode() == ISD::SUB)
1382    if (ConstantSDNode *C =
1383          dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0)))
1384      if (C->getAPIntValue() == 0)
1385        return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1,
1386                           DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
1387                                       N0.getOperand(0).getOperand(1),
1388                                       N0.getOperand(1)));
1389
1390  return PromoteIntBinOp(SDValue(N, 0));
1391}
1392
1393SDValue DAGCombiner::visitADDC(SDNode *N) {
1394  SDValue N0 = N->getOperand(0);
1395  SDValue N1 = N->getOperand(1);
1396  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1397  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1398  EVT VT = N0.getValueType();
1399
1400  // If the flag result is dead, turn this into an ADD.
1401  if (N->hasNUsesOfValue(0, 1))
1402    return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
1403                     DAG.getNode(ISD::CARRY_FALSE,
1404                                 N->getDebugLoc(), MVT::Flag));
1405
1406  // canonicalize constant to RHS.
1407  if (N0C && !N1C)
1408    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
1409
1410  // fold (addc x, 0) -> x + no carry out
1411  if (N1C && N1C->isNullValue())
1412    return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
1413                                        N->getDebugLoc(), MVT::Flag));
1414
1415  // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
1416  APInt LHSZero, LHSOne;
1417  APInt RHSZero, RHSOne;
1418  APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
1419  DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
1420
1421  if (LHSZero.getBoolValue()) {
1422    DAG.ComputeMaskedBits(N1, Mask, RHSZero, RHSOne);
1423
1424    // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
1425    // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
1426    if ((RHSZero & (~LHSZero & Mask)) == (~LHSZero & Mask) ||
1427        (LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
1428      return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
1429                       DAG.getNode(ISD::CARRY_FALSE,
1430                                   N->getDebugLoc(), MVT::Flag));
1431  }
1432
1433  return SDValue();
1434}
1435
1436SDValue DAGCombiner::visitADDE(SDNode *N) {
1437  SDValue N0 = N->getOperand(0);
1438  SDValue N1 = N->getOperand(1);
1439  SDValue CarryIn = N->getOperand(2);
1440  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1441  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1442
1443  // canonicalize constant to RHS
1444  if (N0C && !N1C)
1445    return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(),
1446                       N1, N0, CarryIn);
1447
1448  // fold (adde x, y, false) -> (addc x, y)
1449  if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
1450    return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0);
1451
1452  return SDValue();
1453}
1454
1455SDValue DAGCombiner::visitSUB(SDNode *N) {
1456  SDValue N0 = N->getOperand(0);
1457  SDValue N1 = N->getOperand(1);
1458  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
1459  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
1460  EVT VT = N0.getValueType();
1461
1462  // fold vector ops
1463  if (VT.isVector()) {
1464    SDValue FoldedVOp = SimplifyVBinOp(N);
1465    if (FoldedVOp.getNode()) return FoldedVOp;
1466  }
1467
1468  // fold (sub x, x) -> 0
1469  if (N0 == N1)
1470    return DAG.getConstant(0, N->getValueType(0));
1471  // fold (sub c1, c2) -> c1-c2
1472  if (N0C && N1C)
1473    return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
1474  // fold (sub x, c) -> (add x, -c)
1475  if (N1C)
1476    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0,
1477                       DAG.getConstant(-N1C->getAPIntValue(), VT));
1478  // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
1479  if (N0C && N0C->isAllOnesValue())
1480    return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
1481  // fold (A+B)-A -> B
1482  if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
1483    return N0.getOperand(1);
1484  // fold (A+B)-B -> A
1485  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
1486    return N0.getOperand(0);
1487  // fold ((A+(B+or-C))-B) -> A+or-C
1488  if (N0.getOpcode() == ISD::ADD &&
1489      (N0.getOperand(1).getOpcode() == ISD::SUB ||
1490       N0.getOperand(1).getOpcode() == ISD::ADD) &&
1491      N0.getOperand(1).getOperand(0) == N1)
1492    return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT,
1493                       N0.getOperand(0), N0.getOperand(1).getOperand(1));
1494  // fold ((A+(C+B))-B) -> A+C
1495  if (N0.getOpcode() == ISD::ADD &&
1496      N0.getOperand(1).getOpcode() == ISD::ADD &&
1497      N0.getOperand(1).getOperand(1) == N1)
1498    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
1499                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
1500  // fold ((A-(B-C))-C) -> A-B
1501  if (N0.getOpcode() == ISD::SUB &&
1502      N0.getOperand(1).getOpcode() == ISD::SUB &&
1503      N0.getOperand(1).getOperand(1) == N1)
1504    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
1505                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
1506
1507  // If either operand of a sub is undef, the result is undef
1508  if (N0.getOpcode() == ISD::UNDEF)
1509    return N0;
1510  if (N1.getOpcode() == ISD::UNDEF)
1511    return N1;
1512
1513  // If the relocation model supports it, consider symbol offsets.
1514  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
1515    if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
1516      // fold (sub Sym, c) -> Sym-c
1517      if (N1C && GA->getOpcode() == ISD::GlobalAddress)
1518        return DAG.getGlobalAddress(GA->getGlobal(), VT,
1519                                    GA->getOffset() -
1520                                      (uint64_t)N1C->getSExtValue());
1521      // fold (sub Sym+c1, Sym+c2) -> c1-c2
1522      if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
1523        if (GA->getGlobal() == GB->getGlobal())
1524          return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
1525                                 VT);
1526    }
1527
1528  return PromoteIntBinOp(SDValue(N, 0));
1529}
1530
1531SDValue DAGCombiner::visitMUL(SDNode *N) {
1532  SDValue N0 = N->getOperand(0);
1533  SDValue N1 = N->getOperand(1);
1534  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1535  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1536  EVT VT = N0.getValueType();
1537
1538  // fold vector ops
1539  if (VT.isVector()) {
1540    SDValue FoldedVOp = SimplifyVBinOp(N);
1541    if (FoldedVOp.getNode()) return FoldedVOp;
1542  }
1543
1544  // fold (mul x, undef) -> 0
1545  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
1546    return DAG.getConstant(0, VT);
1547  // fold (mul c1, c2) -> c1*c2
1548  if (N0C && N1C)
1549    return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C);
1550  // canonicalize constant to RHS
1551  if (N0C && !N1C)
1552    return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0);
1553  // fold (mul x, 0) -> 0
1554  if (N1C && N1C->isNullValue())
1555    return N1;
1556  // fold (mul x, -1) -> 0-x
1557  if (N1C && N1C->isAllOnesValue())
1558    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
1559                       DAG.getConstant(0, VT), N0);
1560  // fold (mul x, (1 << c)) -> x << c
1561  if (N1C && N1C->getAPIntValue().isPowerOf2())
1562    return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
1563                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
1564                                       getShiftAmountTy()));
1565  // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
1566  if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) {
1567    unsigned Log2Val = (-N1C->getAPIntValue()).logBase2();
1568    // FIXME: If the input is something that is easily negated (e.g. a
1569    // single-use add), we should put the negate there.
1570    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
1571                       DAG.getConstant(0, VT),
1572                       DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
1573                            DAG.getConstant(Log2Val, getShiftAmountTy())));
1574  }
1575  // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
1576  if (N1C && N0.getOpcode() == ISD::SHL &&
1577      isa<ConstantSDNode>(N0.getOperand(1))) {
1578    SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
1579                             N1, N0.getOperand(1));
1580    AddToWorkList(C3.getNode());
1581    return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
1582                       N0.getOperand(0), C3);
1583  }
1584
1585  // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
1586  // use.
1587  {
1588    SDValue Sh(0,0), Y(0,0);
1589    // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
1590    if (N0.getOpcode() == ISD::SHL && isa<ConstantSDNode>(N0.getOperand(1)) &&
1591        N0.getNode()->hasOneUse()) {
1592      Sh = N0; Y = N1;
1593    } else if (N1.getOpcode() == ISD::SHL &&
1594               isa<ConstantSDNode>(N1.getOperand(1)) &&
1595               N1.getNode()->hasOneUse()) {
1596      Sh = N1; Y = N0;
1597    }
1598
1599    if (Sh.getNode()) {
1600      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
1601                                Sh.getOperand(0), Y);
1602      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT,
1603                         Mul, Sh.getOperand(1));
1604    }
1605  }
1606
1607  // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
1608  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
1609      isa<ConstantSDNode>(N0.getOperand(1)))
1610    return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT,
1611                       DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT,
1612                                   N0.getOperand(0), N1),
1613                       DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT,
1614                                   N0.getOperand(1), N1));
1615
1616  // reassociate mul
1617  SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1);
1618  if (RMUL.getNode() != 0)
1619    return RMUL;
1620
1621  return PromoteIntBinOp(SDValue(N, 0));
1622}
1623
1624SDValue DAGCombiner::visitSDIV(SDNode *N) {
1625  SDValue N0 = N->getOperand(0);
1626  SDValue N1 = N->getOperand(1);
1627  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
1628  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
1629  EVT VT = N->getValueType(0);
1630
1631  // fold vector ops
1632  if (VT.isVector()) {
1633    SDValue FoldedVOp = SimplifyVBinOp(N);
1634    if (FoldedVOp.getNode()) return FoldedVOp;
1635  }
1636
1637  // fold (sdiv c1, c2) -> c1/c2
1638  if (N0C && N1C && !N1C->isNullValue())
1639    return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C);
1640  // fold (sdiv X, 1) -> X
1641  if (N1C && N1C->getSExtValue() == 1LL)
1642    return N0;
1643  // fold (sdiv X, -1) -> 0-X
1644  if (N1C && N1C->isAllOnesValue())
1645    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
1646                       DAG.getConstant(0, VT), N0);
1647  // If we know the sign bits of both operands are zero, strength reduce to a
1648  // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
1649  if (!VT.isVector()) {
1650    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
1651      return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(),
1652                         N0, N1);
1653  }
1654  // fold (sdiv X, pow2) -> simple ops after legalize
1655  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() &&
1656      (isPowerOf2_64(N1C->getSExtValue()) ||
1657       isPowerOf2_64(-N1C->getSExtValue()))) {
1658    // If dividing by powers of two is cheap, then don't perform the following
1659    // fold.
1660    if (TLI.isPow2DivCheap())
1661      return SDValue();
1662
1663    int64_t pow2 = N1C->getSExtValue();
1664    int64_t abs2 = pow2 > 0 ? pow2 : -pow2;
1665    unsigned lg2 = Log2_64(abs2);
1666
1667    // Splat the sign bit into the register
1668    SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
1669                              DAG.getConstant(VT.getSizeInBits()-1,
1670                                              getShiftAmountTy()));
1671    AddToWorkList(SGN.getNode());
1672
1673    // Add (N0 < 0) ? abs2 - 1 : 0;
1674    SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN,
1675                              DAG.getConstant(VT.getSizeInBits() - lg2,
1676                                              getShiftAmountTy()));
1677    SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL);
1678    AddToWorkList(SRL.getNode());
1679    AddToWorkList(ADD.getNode());    // Divide by pow2
1680    SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD,
1681                              DAG.getConstant(lg2, getShiftAmountTy()));
1682
1683    // If we're dividing by a positive value, we're done.  Otherwise, we must
1684    // negate the result.
1685    if (pow2 > 0)
1686      return SRA;
1687
1688    AddToWorkList(SRA.getNode());
1689    return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT,
1690                       DAG.getConstant(0, VT), SRA);
1691  }
1692
1693  // if integer divide is expensive and we satisfy the requirements, emit an
1694  // alternate sequence.
1695  if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) &&
1696      !TLI.isIntDivCheap()) {
1697    SDValue Op = BuildSDIV(N);
1698    if (Op.getNode()) return Op;
1699  }
1700
1701  // undef / X -> 0
1702  if (N0.getOpcode() == ISD::UNDEF)
1703    return DAG.getConstant(0, VT);
1704  // X / undef -> undef
1705  if (N1.getOpcode() == ISD::UNDEF)
1706    return N1;
1707
1708  return SDValue();
1709}
1710
1711SDValue DAGCombiner::visitUDIV(SDNode *N) {
1712  SDValue N0 = N->getOperand(0);
1713  SDValue N1 = N->getOperand(1);
1714  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode());
1715  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
1716  EVT VT = N->getValueType(0);
1717
1718  // fold vector ops
1719  if (VT.isVector()) {
1720    SDValue FoldedVOp = SimplifyVBinOp(N);
1721    if (FoldedVOp.getNode()) return FoldedVOp;
1722  }
1723
1724  // fold (udiv c1, c2) -> c1/c2
1725  if (N0C && N1C && !N1C->isNullValue())
1726    return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C);
1727  // fold (udiv x, (1 << c)) -> x >>u c
1728  if (N1C && N1C->getAPIntValue().isPowerOf2())
1729    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
1730                       DAG.getConstant(N1C->getAPIntValue().logBase2(),
1731                                       getShiftAmountTy()));
1732  // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
1733  if (N1.getOpcode() == ISD::SHL) {
1734    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
1735      if (SHC->getAPIntValue().isPowerOf2()) {
1736        EVT ADDVT = N1.getOperand(1).getValueType();
1737        SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT,
1738                                  N1.getOperand(1),
1739                                  DAG.getConstant(SHC->getAPIntValue()
1740                                                                  .logBase2(),
1741                                                  ADDVT));
1742        AddToWorkList(Add.getNode());
1743        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add);
1744      }
1745    }
1746  }
1747  // fold (udiv x, c) -> alternate
1748  if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) {
1749    SDValue Op = BuildUDIV(N);
1750    if (Op.getNode()) return Op;
1751  }
1752
1753  // undef / X -> 0
1754  if (N0.getOpcode() == ISD::UNDEF)
1755    return DAG.getConstant(0, VT);
1756  // X / undef -> undef
1757  if (N1.getOpcode() == ISD::UNDEF)
1758    return N1;
1759
1760  return SDValue();
1761}
1762
1763SDValue DAGCombiner::visitSREM(SDNode *N) {
1764  SDValue N0 = N->getOperand(0);
1765  SDValue N1 = N->getOperand(1);
1766  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1767  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1768  EVT VT = N->getValueType(0);
1769
1770  // fold (srem c1, c2) -> c1%c2
1771  if (N0C && N1C && !N1C->isNullValue())
1772    return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C);
1773  // If we know the sign bits of both operands are zero, strength reduce to a
1774  // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
1775  if (!VT.isVector()) {
1776    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
1777      return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1);
1778  }
1779
1780  // If X/C can be simplified by the division-by-constant logic, lower
1781  // X%C to the equivalent of X-X/C*C.
1782  if (N1C && !N1C->isNullValue()) {
1783    SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1);
1784    AddToWorkList(Div.getNode());
1785    SDValue OptimizedDiv = combine(Div.getNode());
1786    if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
1787      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
1788                                OptimizedDiv, N1);
1789      SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
1790      AddToWorkList(Mul.getNode());
1791      return Sub;
1792    }
1793  }
1794
1795  // undef % X -> 0
1796  if (N0.getOpcode() == ISD::UNDEF)
1797    return DAG.getConstant(0, VT);
1798  // X % undef -> undef
1799  if (N1.getOpcode() == ISD::UNDEF)
1800    return N1;
1801
1802  return SDValue();
1803}
1804
1805SDValue DAGCombiner::visitUREM(SDNode *N) {
1806  SDValue N0 = N->getOperand(0);
1807  SDValue N1 = N->getOperand(1);
1808  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
1809  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1810  EVT VT = N->getValueType(0);
1811
1812  // fold (urem c1, c2) -> c1%c2
1813  if (N0C && N1C && !N1C->isNullValue())
1814    return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C);
1815  // fold (urem x, pow2) -> (and x, pow2-1)
1816  if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2())
1817    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0,
1818                       DAG.getConstant(N1C->getAPIntValue()-1,VT));
1819  // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
1820  if (N1.getOpcode() == ISD::SHL) {
1821    if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) {
1822      if (SHC->getAPIntValue().isPowerOf2()) {
1823        SDValue Add =
1824          DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1,
1825                 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()),
1826                                 VT));
1827        AddToWorkList(Add.getNode());
1828        return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add);
1829      }
1830    }
1831  }
1832
1833  // If X/C can be simplified by the division-by-constant logic, lower
1834  // X%C to the equivalent of X-X/C*C.
1835  if (N1C && !N1C->isNullValue()) {
1836    SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1);
1837    AddToWorkList(Div.getNode());
1838    SDValue OptimizedDiv = combine(Div.getNode());
1839    if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
1840      SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT,
1841                                OptimizedDiv, N1);
1842      SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul);
1843      AddToWorkList(Mul.getNode());
1844      return Sub;
1845    }
1846  }
1847
1848  // undef % X -> 0
1849  if (N0.getOpcode() == ISD::UNDEF)
1850    return DAG.getConstant(0, VT);
1851  // X % undef -> undef
1852  if (N1.getOpcode() == ISD::UNDEF)
1853    return N1;
1854
1855  return SDValue();
1856}
1857
1858SDValue DAGCombiner::visitMULHS(SDNode *N) {
1859  SDValue N0 = N->getOperand(0);
1860  SDValue N1 = N->getOperand(1);
1861  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1862  EVT VT = N->getValueType(0);
1863
1864  // fold (mulhs x, 0) -> 0
1865  if (N1C && N1C->isNullValue())
1866    return N1;
1867  // fold (mulhs x, 1) -> (sra x, size(x)-1)
1868  if (N1C && N1C->getAPIntValue() == 1)
1869    return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0,
1870                       DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
1871                                       getShiftAmountTy()));
1872  // fold (mulhs x, undef) -> 0
1873  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
1874    return DAG.getConstant(0, VT);
1875
1876  return SDValue();
1877}
1878
1879SDValue DAGCombiner::visitMULHU(SDNode *N) {
1880  SDValue N0 = N->getOperand(0);
1881  SDValue N1 = N->getOperand(1);
1882  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
1883  EVT VT = N->getValueType(0);
1884
1885  // fold (mulhu x, 0) -> 0
1886  if (N1C && N1C->isNullValue())
1887    return N1;
1888  // fold (mulhu x, 1) -> 0
1889  if (N1C && N1C->getAPIntValue() == 1)
1890    return DAG.getConstant(0, N0.getValueType());
1891  // fold (mulhu x, undef) -> 0
1892  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
1893    return DAG.getConstant(0, VT);
1894
1895  return SDValue();
1896}
1897
1898/// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that
1899/// compute two values. LoOp and HiOp give the opcodes for the two computations
1900/// that are being performed. Return true if a simplification was made.
1901///
1902SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
1903                                                unsigned HiOp) {
1904  // If the high half is not needed, just compute the low half.
1905  bool HiExists = N->hasAnyUseOfValue(1);
1906  if (!HiExists &&
1907      (!LegalOperations ||
1908       TLI.isOperationLegal(LoOp, N->getValueType(0)))) {
1909    SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
1910                              N->op_begin(), N->getNumOperands());
1911    return CombineTo(N, Res, Res);
1912  }
1913
1914  // If the low half is not needed, just compute the high half.
1915  bool LoExists = N->hasAnyUseOfValue(0);
1916  if (!LoExists &&
1917      (!LegalOperations ||
1918       TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
1919    SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
1920                              N->op_begin(), N->getNumOperands());
1921    return CombineTo(N, Res, Res);
1922  }
1923
1924  // If both halves are used, return as it is.
1925  if (LoExists && HiExists)
1926    return SDValue();
1927
1928  // If the two computed results can be simplified separately, separate them.
1929  if (LoExists) {
1930    SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0),
1931                             N->op_begin(), N->getNumOperands());
1932    AddToWorkList(Lo.getNode());
1933    SDValue LoOpt = combine(Lo.getNode());
1934    if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
1935        (!LegalOperations ||
1936         TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
1937      return CombineTo(N, LoOpt, LoOpt);
1938  }
1939
1940  if (HiExists) {
1941    SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1),
1942                             N->op_begin(), N->getNumOperands());
1943    AddToWorkList(Hi.getNode());
1944    SDValue HiOpt = combine(Hi.getNode());
1945    if (HiOpt.getNode() && HiOpt != Hi &&
1946        (!LegalOperations ||
1947         TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
1948      return CombineTo(N, HiOpt, HiOpt);
1949  }
1950
1951  return SDValue();
1952}
1953
1954SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
1955  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
1956  if (Res.getNode()) return Res;
1957
1958  return SDValue();
1959}
1960
1961SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
1962  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
1963  if (Res.getNode()) return Res;
1964
1965  return SDValue();
1966}
1967
1968SDValue DAGCombiner::visitSDIVREM(SDNode *N) {
1969  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM);
1970  if (Res.getNode()) return Res;
1971
1972  return SDValue();
1973}
1974
1975SDValue DAGCombiner::visitUDIVREM(SDNode *N) {
1976  SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM);
1977  if (Res.getNode()) return Res;
1978
1979  return SDValue();
1980}
1981
1982/// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with
1983/// two operands of the same opcode, try to simplify it.
1984SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
1985  SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
1986  EVT VT = N0.getValueType();
1987  assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
1988
1989  // Bail early if none of these transforms apply.
1990  if (N0.getNode()->getNumOperands() == 0) return SDValue();
1991
1992  // For each of OP in AND/OR/XOR:
1993  // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
1994  // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
1995  // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
1996  // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y))
1997  //
1998  // do not sink logical op inside of a vector extend, since it may combine
1999  // into a vsetcc.
2000  EVT Op0VT = N0.getOperand(0).getValueType();
2001  if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
2002       N0.getOpcode() == ISD::SIGN_EXTEND ||
2003       // Avoid infinite looping with PromoteIntBinOp.
2004       (N0.getOpcode() == ISD::ANY_EXTEND &&
2005        (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
2006       (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
2007      !VT.isVector() &&
2008      Op0VT == N1.getOperand(0).getValueType() &&
2009      (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
2010    SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
2011                                 N0.getOperand(0).getValueType(),
2012                                 N0.getOperand(0), N1.getOperand(0));
2013    AddToWorkList(ORNode.getNode());
2014    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode);
2015  }
2016
2017  // For each of OP in SHL/SRL/SRA/AND...
2018  //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
2019  //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
2020  //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
2021  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
2022       N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
2023      N0.getOperand(1) == N1.getOperand(1)) {
2024    SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(),
2025                                 N0.getOperand(0).getValueType(),
2026                                 N0.getOperand(0), N1.getOperand(0));
2027    AddToWorkList(ORNode.getNode());
2028    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
2029                       ORNode, N0.getOperand(1));
2030  }
2031
2032  return SDValue();
2033}
2034
2035SDValue DAGCombiner::visitAND(SDNode *N) {
2036  SDValue N0 = N->getOperand(0);
2037  SDValue N1 = N->getOperand(1);
2038  SDValue LL, LR, RL, RR, CC0, CC1;
2039  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2040  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2041  EVT VT = N1.getValueType();
2042  unsigned BitWidth = VT.getScalarType().getSizeInBits();
2043
2044  // fold vector ops
2045  if (VT.isVector()) {
2046    SDValue FoldedVOp = SimplifyVBinOp(N);
2047    if (FoldedVOp.getNode()) return FoldedVOp;
2048  }
2049
2050  // fold (and x, undef) -> 0
2051  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
2052    return DAG.getConstant(0, VT);
2053  // fold (and c1, c2) -> c1&c2
2054  if (N0C && N1C)
2055    return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C);
2056  // canonicalize constant to RHS
2057  if (N0C && !N1C)
2058    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0);
2059  // fold (and x, -1) -> x
2060  if (N1C && N1C->isAllOnesValue())
2061    return N0;
2062  // if (and x, c) is known to be zero, return 0
2063  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
2064                                   APInt::getAllOnesValue(BitWidth)))
2065    return DAG.getConstant(0, VT);
2066  // reassociate and
2067  SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
2068  if (RAND.getNode() != 0)
2069    return RAND;
2070  // fold (and (or x, C), D) -> D if (C & D) == D
2071  if (N1C && N0.getOpcode() == ISD::OR)
2072    if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
2073      if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
2074        return N1;
2075  // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
2076  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
2077    SDValue N0Op0 = N0.getOperand(0);
2078    APInt Mask = ~N1C->getAPIntValue();
2079    Mask.trunc(N0Op0.getValueSizeInBits());
2080    if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
2081      SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
2082                                 N0.getValueType(), N0Op0);
2083
2084      // Replace uses of the AND with uses of the Zero extend node.
2085      CombineTo(N, Zext);
2086
2087      // We actually want to replace all uses of the any_extend with the
2088      // zero_extend, to avoid duplicating things.  This will later cause this
2089      // AND to be folded.
2090      CombineTo(N0.getNode(), Zext);
2091      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2092    }
2093  }
2094  // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
2095  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
2096    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
2097    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
2098
2099    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
2100        LL.getValueType().isInteger()) {
2101      // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
2102      if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) {
2103        SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
2104                                     LR.getValueType(), LL, RL);
2105        AddToWorkList(ORNode.getNode());
2106        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
2107      }
2108      // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
2109      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) {
2110        SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(),
2111                                      LR.getValueType(), LL, RL);
2112        AddToWorkList(ANDNode.getNode());
2113        return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
2114      }
2115      // fold (and (setgt X,  -1), (setgt Y,  -1)) -> (setgt (or X, Y), -1)
2116      if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) {
2117        SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(),
2118                                     LR.getValueType(), LL, RL);
2119        AddToWorkList(ORNode.getNode());
2120        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
2121      }
2122    }
2123    // canonicalize equivalent to ll == rl
2124    if (LL == RR && LR == RL) {
2125      Op1 = ISD::getSetCCSwappedOperands(Op1);
2126      std::swap(RL, RR);
2127    }
2128    if (LL == RL && LR == RR) {
2129      bool isInteger = LL.getValueType().isInteger();
2130      ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
2131      if (Result != ISD::SETCC_INVALID &&
2132          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
2133        return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
2134                            LL, LR, Result);
2135    }
2136  }
2137
2138  // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
2139  if (N0.getOpcode() == N1.getOpcode()) {
2140    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
2141    if (Tmp.getNode()) return Tmp;
2142  }
2143
2144  // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
2145  // fold (and (sra)) -> (and (srl)) when possible.
2146  if (!VT.isVector() &&
2147      SimplifyDemandedBits(SDValue(N, 0)))
2148    return SDValue(N, 0);
2149
2150  // fold (zext_inreg (extload x)) -> (zextload x)
2151  if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
2152    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
2153    EVT MemVT = LN0->getMemoryVT();
2154    // If we zero all the possible extended bits, then we can turn this into
2155    // a zextload if we are running before legalize or the operation is legal.
2156    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
2157    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
2158                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
2159        ((!LegalOperations && !LN0->isVolatile()) ||
2160         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
2161      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
2162                                       LN0->getChain(), LN0->getBasePtr(),
2163                                       LN0->getSrcValue(),
2164                                       LN0->getSrcValueOffset(), MemVT,
2165                                       LN0->isVolatile(), LN0->isNonTemporal(),
2166                                       LN0->getAlignment());
2167      AddToWorkList(N);
2168      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
2169      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2170    }
2171  }
2172  // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
2173  if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
2174      N0.hasOneUse()) {
2175    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
2176    EVT MemVT = LN0->getMemoryVT();
2177    // If we zero all the possible extended bits, then we can turn this into
2178    // a zextload if we are running before legalize or the operation is legal.
2179    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
2180    if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
2181                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
2182        ((!LegalOperations && !LN0->isVolatile()) ||
2183         TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
2184      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
2185                                       LN0->getChain(),
2186                                       LN0->getBasePtr(), LN0->getSrcValue(),
2187                                       LN0->getSrcValueOffset(), MemVT,
2188                                       LN0->isVolatile(), LN0->isNonTemporal(),
2189                                       LN0->getAlignment());
2190      AddToWorkList(N);
2191      CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
2192      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2193    }
2194  }
2195
2196  // fold (and (load x), 255) -> (zextload x, i8)
2197  // fold (and (extload x, i16), 255) -> (zextload x, i8)
2198  // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
2199  if (N1C && (N0.getOpcode() == ISD::LOAD ||
2200              (N0.getOpcode() == ISD::ANY_EXTEND &&
2201               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
2202    bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
2203    LoadSDNode *LN0 = HasAnyExt
2204      ? cast<LoadSDNode>(N0.getOperand(0))
2205      : cast<LoadSDNode>(N0);
2206    if (LN0->getExtensionType() != ISD::SEXTLOAD &&
2207        LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) {
2208      uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits();
2209      if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){
2210        EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
2211        EVT LoadedVT = LN0->getMemoryVT();
2212
2213        if (ExtVT == LoadedVT &&
2214            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
2215          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
2216
2217          SDValue NewLoad =
2218            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
2219                           LN0->getChain(), LN0->getBasePtr(),
2220                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
2221                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
2222                           LN0->getAlignment());
2223          AddToWorkList(N);
2224          CombineTo(LN0, NewLoad, NewLoad.getValue(1));
2225          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2226        }
2227
2228        // Do not change the width of a volatile load.
2229        // Do not generate loads of non-round integer types since these can
2230        // be expensive (and would be wrong if the type is not byte sized).
2231        if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() &&
2232            (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
2233          EVT PtrType = LN0->getOperand(1).getValueType();
2234
2235          unsigned Alignment = LN0->getAlignment();
2236          SDValue NewPtr = LN0->getBasePtr();
2237
2238          // For big endian targets, we need to add an offset to the pointer
2239          // to load the correct bytes.  For little endian systems, we merely
2240          // need to read fewer bytes from the same pointer.
2241          if (TLI.isBigEndian()) {
2242            unsigned LVTStoreBytes = LoadedVT.getStoreSize();
2243            unsigned EVTStoreBytes = ExtVT.getStoreSize();
2244            unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
2245            NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType,
2246                                 NewPtr, DAG.getConstant(PtrOff, PtrType));
2247            Alignment = MinAlign(Alignment, PtrOff);
2248          }
2249
2250          AddToWorkList(NewPtr.getNode());
2251
2252          EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
2253          SDValue Load =
2254            DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
2255                           LN0->getChain(), NewPtr,
2256                           LN0->getSrcValue(), LN0->getSrcValueOffset(),
2257                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
2258                           Alignment);
2259          AddToWorkList(N);
2260          CombineTo(LN0, Load, Load.getValue(1));
2261          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
2262        }
2263      }
2264    }
2265  }
2266
2267  return PromoteIntBinOp(SDValue(N, 0));
2268}
2269
2270SDValue DAGCombiner::visitOR(SDNode *N) {
2271  SDValue N0 = N->getOperand(0);
2272  SDValue N1 = N->getOperand(1);
2273  SDValue LL, LR, RL, RR, CC0, CC1;
2274  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2275  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2276  EVT VT = N1.getValueType();
2277
2278  // fold vector ops
2279  if (VT.isVector()) {
2280    SDValue FoldedVOp = SimplifyVBinOp(N);
2281    if (FoldedVOp.getNode()) return FoldedVOp;
2282  }
2283
2284  // fold (or x, undef) -> -1
2285  if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
2286    EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
2287    return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
2288  }
2289  // fold (or c1, c2) -> c1|c2
2290  if (N0C && N1C)
2291    return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
2292  // canonicalize constant to RHS
2293  if (N0C && !N1C)
2294    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0);
2295  // fold (or x, 0) -> x
2296  if (N1C && N1C->isNullValue())
2297    return N0;
2298  // fold (or x, -1) -> -1
2299  if (N1C && N1C->isAllOnesValue())
2300    return N1;
2301  // fold (or x, c) -> c iff (x & ~c) == 0
2302  if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
2303    return N1;
2304  // reassociate or
2305  SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1);
2306  if (ROR.getNode() != 0)
2307    return ROR;
2308  // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
2309  // iff (c1 & c2) == 0.
2310  if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
2311             isa<ConstantSDNode>(N0.getOperand(1))) {
2312    ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
2313    if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
2314      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
2315                         DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
2316                                     N0.getOperand(0), N1),
2317                         DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
2318  }
2319  // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
2320  if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
2321    ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
2322    ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
2323
2324    if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
2325        LL.getValueType().isInteger()) {
2326      // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
2327      // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
2328      if (cast<ConstantSDNode>(LR)->isNullValue() &&
2329          (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
2330        SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(),
2331                                     LR.getValueType(), LL, RL);
2332        AddToWorkList(ORNode.getNode());
2333        return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1);
2334      }
2335      // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
2336      // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
2337      if (cast<ConstantSDNode>(LR)->isAllOnesValue() &&
2338          (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
2339        SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(),
2340                                      LR.getValueType(), LL, RL);
2341        AddToWorkList(ANDNode.getNode());
2342        return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1);
2343      }
2344    }
2345    // canonicalize equivalent to ll == rl
2346    if (LL == RR && LR == RL) {
2347      Op1 = ISD::getSetCCSwappedOperands(Op1);
2348      std::swap(RL, RR);
2349    }
2350    if (LL == RL && LR == RR) {
2351      bool isInteger = LL.getValueType().isInteger();
2352      ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
2353      if (Result != ISD::SETCC_INVALID &&
2354          (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType())))
2355        return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(),
2356                            LL, LR, Result);
2357    }
2358  }
2359
2360  // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
2361  if (N0.getOpcode() == N1.getOpcode()) {
2362    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
2363    if (Tmp.getNode()) return Tmp;
2364  }
2365
2366  // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
2367  if (N0.getOpcode() == ISD::AND &&
2368      N1.getOpcode() == ISD::AND &&
2369      N0.getOperand(1).getOpcode() == ISD::Constant &&
2370      N1.getOperand(1).getOpcode() == ISD::Constant &&
2371      // Don't increase # computations.
2372      (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
2373    // We can only do this xform if we know that bits from X that are set in C2
2374    // but not in C1 are already zero.  Likewise for Y.
2375    const APInt &LHSMask =
2376      cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
2377    const APInt &RHSMask =
2378      cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue();
2379
2380    if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
2381        DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
2382      SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
2383                              N0.getOperand(0), N1.getOperand(0));
2384      return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X,
2385                         DAG.getConstant(LHSMask | RHSMask, VT));
2386    }
2387  }
2388
2389  // See if this is some rotate idiom.
2390  if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc()))
2391    return SDValue(Rot, 0);
2392
2393  return PromoteIntBinOp(SDValue(N, 0));
2394}
2395
2396/// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present.
2397static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
2398  if (Op.getOpcode() == ISD::AND) {
2399    if (isa<ConstantSDNode>(Op.getOperand(1))) {
2400      Mask = Op.getOperand(1);
2401      Op = Op.getOperand(0);
2402    } else {
2403      return false;
2404    }
2405  }
2406
2407  if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
2408    Shift = Op;
2409    return true;
2410  }
2411
2412  return false;
2413}
2414
2415// MatchRotate - Handle an 'or' of two operands.  If this is one of the many
2416// idioms for rotate, and if the target supports rotation instructions, generate
2417// a rot[lr].
2418SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) {
2419  // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
2420  EVT VT = LHS.getValueType();
2421  if (!TLI.isTypeLegal(VT)) return 0;
2422
2423  // The target must have at least one rotate flavor.
2424  bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
2425  bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
2426  if (!HasROTL && !HasROTR) return 0;
2427
2428  // Match "(X shl/srl V1) & V2" where V2 may not be present.
2429  SDValue LHSShift;   // The shift.
2430  SDValue LHSMask;    // AND value if any.
2431  if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
2432    return 0; // Not part of a rotate.
2433
2434  SDValue RHSShift;   // The shift.
2435  SDValue RHSMask;    // AND value if any.
2436  if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
2437    return 0; // Not part of a rotate.
2438
2439  if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
2440    return 0;   // Not shifting the same value.
2441
2442  if (LHSShift.getOpcode() == RHSShift.getOpcode())
2443    return 0;   // Shifts must disagree.
2444
2445  // Canonicalize shl to left side in a shl/srl pair.
2446  if (RHSShift.getOpcode() == ISD::SHL) {
2447    std::swap(LHS, RHS);
2448    std::swap(LHSShift, RHSShift);
2449    std::swap(LHSMask , RHSMask );
2450  }
2451
2452  unsigned OpSizeInBits = VT.getSizeInBits();
2453  SDValue LHSShiftArg = LHSShift.getOperand(0);
2454  SDValue LHSShiftAmt = LHSShift.getOperand(1);
2455  SDValue RHSShiftAmt = RHSShift.getOperand(1);
2456
2457  // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
2458  // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
2459  if (LHSShiftAmt.getOpcode() == ISD::Constant &&
2460      RHSShiftAmt.getOpcode() == ISD::Constant) {
2461    uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue();
2462    uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue();
2463    if ((LShVal + RShVal) != OpSizeInBits)
2464      return 0;
2465
2466    SDValue Rot;
2467    if (HasROTL)
2468      Rot = DAG.getNode(ISD::ROTL, DL, VT, LHSShiftArg, LHSShiftAmt);
2469    else
2470      Rot = DAG.getNode(ISD::ROTR, DL, VT, LHSShiftArg, RHSShiftAmt);
2471
2472    // If there is an AND of either shifted operand, apply it to the result.
2473    if (LHSMask.getNode() || RHSMask.getNode()) {
2474      APInt Mask = APInt::getAllOnesValue(OpSizeInBits);
2475
2476      if (LHSMask.getNode()) {
2477        APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal);
2478        Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits;
2479      }
2480      if (RHSMask.getNode()) {
2481        APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal);
2482        Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits;
2483      }
2484
2485      Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT));
2486    }
2487
2488    return Rot.getNode();
2489  }
2490
2491  // If there is a mask here, and we have a variable shift, we can't be sure
2492  // that we're masking out the right stuff.
2493  if (LHSMask.getNode() || RHSMask.getNode())
2494    return 0;
2495
2496  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y)
2497  // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y))
2498  if (RHSShiftAmt.getOpcode() == ISD::SUB &&
2499      LHSShiftAmt == RHSShiftAmt.getOperand(1)) {
2500    if (ConstantSDNode *SUBC =
2501          dyn_cast<ConstantSDNode>(RHSShiftAmt.getOperand(0))) {
2502      if (SUBC->getAPIntValue() == OpSizeInBits) {
2503        if (HasROTL)
2504          return DAG.getNode(ISD::ROTL, DL, VT,
2505                             LHSShiftArg, LHSShiftAmt).getNode();
2506        else
2507          return DAG.getNode(ISD::ROTR, DL, VT,
2508                             LHSShiftArg, RHSShiftAmt).getNode();
2509      }
2510    }
2511  }
2512
2513  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y)
2514  // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y))
2515  if (LHSShiftAmt.getOpcode() == ISD::SUB &&
2516      RHSShiftAmt == LHSShiftAmt.getOperand(1)) {
2517    if (ConstantSDNode *SUBC =
2518          dyn_cast<ConstantSDNode>(LHSShiftAmt.getOperand(0))) {
2519      if (SUBC->getAPIntValue() == OpSizeInBits) {
2520        if (HasROTR)
2521          return DAG.getNode(ISD::ROTR, DL, VT,
2522                             LHSShiftArg, RHSShiftAmt).getNode();
2523        else
2524          return DAG.getNode(ISD::ROTL, DL, VT,
2525                             LHSShiftArg, LHSShiftAmt).getNode();
2526      }
2527    }
2528  }
2529
2530  // Look for sign/zext/any-extended or truncate cases:
2531  if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
2532       || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
2533       || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
2534       || LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
2535      (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND
2536       || RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND
2537       || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND
2538       || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
2539    SDValue LExtOp0 = LHSShiftAmt.getOperand(0);
2540    SDValue RExtOp0 = RHSShiftAmt.getOperand(0);
2541    if (RExtOp0.getOpcode() == ISD::SUB &&
2542        RExtOp0.getOperand(1) == LExtOp0) {
2543      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
2544      //   (rotl x, y)
2545      // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) ->
2546      //   (rotr x, (sub 32, y))
2547      if (ConstantSDNode *SUBC =
2548            dyn_cast<ConstantSDNode>(RExtOp0.getOperand(0))) {
2549        if (SUBC->getAPIntValue() == OpSizeInBits) {
2550          return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
2551                             LHSShiftArg,
2552                             HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode();
2553        }
2554      }
2555    } else if (LExtOp0.getOpcode() == ISD::SUB &&
2556               RExtOp0 == LExtOp0.getOperand(1)) {
2557      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
2558      //   (rotr x, y)
2559      // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) ->
2560      //   (rotl x, (sub 32, y))
2561      if (ConstantSDNode *SUBC =
2562            dyn_cast<ConstantSDNode>(LExtOp0.getOperand(0))) {
2563        if (SUBC->getAPIntValue() == OpSizeInBits) {
2564          return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT,
2565                             LHSShiftArg,
2566                             HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode();
2567        }
2568      }
2569    }
2570  }
2571
2572  return 0;
2573}
2574
2575SDValue DAGCombiner::visitXOR(SDNode *N) {
2576  SDValue N0 = N->getOperand(0);
2577  SDValue N1 = N->getOperand(1);
2578  SDValue LHS, RHS, CC;
2579  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2580  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2581  EVT VT = N0.getValueType();
2582
2583  // fold vector ops
2584  if (VT.isVector()) {
2585    SDValue FoldedVOp = SimplifyVBinOp(N);
2586    if (FoldedVOp.getNode()) return FoldedVOp;
2587  }
2588
2589  // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
2590  if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
2591    return DAG.getConstant(0, VT);
2592  // fold (xor x, undef) -> undef
2593  if (N0.getOpcode() == ISD::UNDEF)
2594    return N0;
2595  if (N1.getOpcode() == ISD::UNDEF)
2596    return N1;
2597  // fold (xor c1, c2) -> c1^c2
2598  if (N0C && N1C)
2599    return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C);
2600  // canonicalize constant to RHS
2601  if (N0C && !N1C)
2602    return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
2603  // fold (xor x, 0) -> x
2604  if (N1C && N1C->isNullValue())
2605    return N0;
2606  // reassociate xor
2607  SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1);
2608  if (RXOR.getNode() != 0)
2609    return RXOR;
2610
2611  // fold !(x cc y) -> (x !cc y)
2612  if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) {
2613    bool isInt = LHS.getValueType().isInteger();
2614    ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
2615                                               isInt);
2616
2617    if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) {
2618      switch (N0.getOpcode()) {
2619      default:
2620        llvm_unreachable("Unhandled SetCC Equivalent!");
2621      case ISD::SETCC:
2622        return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC);
2623      case ISD::SELECT_CC:
2624        return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2),
2625                               N0.getOperand(3), NotCC);
2626      }
2627    }
2628  }
2629
2630  // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
2631  if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND &&
2632      N0.getNode()->hasOneUse() &&
2633      isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
2634    SDValue V = N0.getOperand(0);
2635    V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V,
2636                    DAG.getConstant(1, V.getValueType()));
2637    AddToWorkList(V.getNode());
2638    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V);
2639  }
2640
2641  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
2642  if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 &&
2643      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
2644    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
2645    if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
2646      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
2647      LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
2648      RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
2649      AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
2650      return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
2651    }
2652  }
2653  // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
2654  if (N1C && N1C->isAllOnesValue() &&
2655      (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
2656    SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
2657    if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
2658      unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
2659      LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS
2660      RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS
2661      AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode());
2662      return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS);
2663    }
2664  }
2665  // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
2666  if (N1C && N0.getOpcode() == ISD::XOR) {
2667    ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0));
2668    ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
2669    if (N00C)
2670      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1),
2671                         DAG.getConstant(N1C->getAPIntValue() ^
2672                                         N00C->getAPIntValue(), VT));
2673    if (N01C)
2674      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0),
2675                         DAG.getConstant(N1C->getAPIntValue() ^
2676                                         N01C->getAPIntValue(), VT));
2677  }
2678  // fold (xor x, x) -> 0
2679  if (N0 == N1) {
2680    if (!VT.isVector()) {
2681      return DAG.getConstant(0, VT);
2682    } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
2683      // Produce a vector of zeros.
2684      SDValue El = DAG.getConstant(0, VT.getVectorElementType());
2685      std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
2686      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
2687                         &Ops[0], Ops.size());
2688    }
2689  }
2690
2691  // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
2692  if (N0.getOpcode() == N1.getOpcode()) {
2693    SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N);
2694    if (Tmp.getNode()) return Tmp;
2695  }
2696
2697  // Simplify the expression using non-local knowledge.
2698  if (!VT.isVector() &&
2699      SimplifyDemandedBits(SDValue(N, 0)))
2700    return SDValue(N, 0);
2701
2702  return PromoteIntBinOp(SDValue(N, 0));
2703}
2704
2705/// visitShiftByConstant - Handle transforms common to the three shifts, when
2706/// the shift amount is a constant.
2707SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
2708  SDNode *LHS = N->getOperand(0).getNode();
2709  if (!LHS->hasOneUse()) return SDValue();
2710
2711  // We want to pull some binops through shifts, so that we have (and (shift))
2712  // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
2713  // thing happens with address calculations, so it's important to canonicalize
2714  // it.
2715  bool HighBitSet = false;  // Can we transform this if the high bit is set?
2716
2717  switch (LHS->getOpcode()) {
2718  default: return SDValue();
2719  case ISD::OR:
2720  case ISD::XOR:
2721    HighBitSet = false; // We can only transform sra if the high bit is clear.
2722    break;
2723  case ISD::AND:
2724    HighBitSet = true;  // We can only transform sra if the high bit is set.
2725    break;
2726  case ISD::ADD:
2727    if (N->getOpcode() != ISD::SHL)
2728      return SDValue(); // only shl(add) not sr[al](add).
2729    HighBitSet = false; // We can only transform sra if the high bit is clear.
2730    break;
2731  }
2732
2733  // We require the RHS of the binop to be a constant as well.
2734  ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
2735  if (!BinOpCst) return SDValue();
2736
2737  // FIXME: disable this unless the input to the binop is a shift by a constant.
2738  // If it is not a shift, it pessimizes some common cases like:
2739  //
2740  //    void foo(int *X, int i) { X[i & 1235] = 1; }
2741  //    int bar(int *X, int i) { return X[i & 255]; }
2742  SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
2743  if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
2744       BinOpLHSVal->getOpcode() != ISD::SRA &&
2745       BinOpLHSVal->getOpcode() != ISD::SRL) ||
2746      !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
2747    return SDValue();
2748
2749  EVT VT = N->getValueType(0);
2750
2751  // If this is a signed shift right, and the high bit is modified by the
2752  // logical operation, do not perform the transformation. The highBitSet
2753  // boolean indicates the value of the high bit of the constant which would
2754  // cause it to be modified for this operation.
2755  if (N->getOpcode() == ISD::SRA) {
2756    bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
2757    if (BinOpRHSSignSet != HighBitSet)
2758      return SDValue();
2759  }
2760
2761  // Fold the constants, shifting the binop RHS by the shift amount.
2762  SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(),
2763                               N->getValueType(0),
2764                               LHS->getOperand(1), N->getOperand(1));
2765
2766  // Create the new shift.
2767  SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
2768                                 VT, LHS->getOperand(0), N->getOperand(1));
2769
2770  // Create the new binop.
2771  return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS);
2772}
2773
2774SDValue DAGCombiner::visitSHL(SDNode *N) {
2775  SDValue N0 = N->getOperand(0);
2776  SDValue N1 = N->getOperand(1);
2777  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2778  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2779  EVT VT = N0.getValueType();
2780  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
2781
2782  // fold (shl c1, c2) -> c1<<c2
2783  if (N0C && N1C)
2784    return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C);
2785  // fold (shl 0, x) -> 0
2786  if (N0C && N0C->isNullValue())
2787    return N0;
2788  // fold (shl x, c >= size(x)) -> undef
2789  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
2790    return DAG.getUNDEF(VT);
2791  // fold (shl x, 0) -> x
2792  if (N1C && N1C->isNullValue())
2793    return N0;
2794  // if (shl x, c) is known to be zero, return 0
2795  if (DAG.MaskedValueIsZero(SDValue(N, 0),
2796                            APInt::getAllOnesValue(OpSizeInBits)))
2797    return DAG.getConstant(0, VT);
2798  // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
2799  if (N1.getOpcode() == ISD::TRUNCATE &&
2800      N1.getOperand(0).getOpcode() == ISD::AND &&
2801      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
2802    SDValue N101 = N1.getOperand(0).getOperand(1);
2803    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
2804      EVT TruncVT = N1.getValueType();
2805      SDValue N100 = N1.getOperand(0).getOperand(0);
2806      APInt TruncC = N101C->getAPIntValue();
2807      TruncC.trunc(TruncVT.getSizeInBits());
2808      return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
2809                         DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
2810                                     DAG.getNode(ISD::TRUNCATE,
2811                                                 N->getDebugLoc(),
2812                                                 TruncVT, N100),
2813                                     DAG.getConstant(TruncC, TruncVT)));
2814    }
2815  }
2816
2817  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
2818    return SDValue(N, 0);
2819
2820  // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
2821  if (N1C && N0.getOpcode() == ISD::SHL &&
2822      N0.getOperand(1).getOpcode() == ISD::Constant) {
2823    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
2824    uint64_t c2 = N1C->getZExtValue();
2825    if (c1 + c2 > OpSizeInBits)
2826      return DAG.getConstant(0, VT);
2827    return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
2828                       DAG.getConstant(c1 + c2, N1.getValueType()));
2829  }
2830  // fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
2831  //                               (srl (and x, (shl -1, c1)), (sub c1, c2))
2832  if (N1C && N0.getOpcode() == ISD::SRL &&
2833      N0.getOperand(1).getOpcode() == ISD::Constant) {
2834    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
2835    if (c1 < VT.getSizeInBits()) {
2836      uint64_t c2 = N1C->getZExtValue();
2837      SDValue HiBitsMask =
2838        DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
2839                                              VT.getSizeInBits() - c1),
2840                        VT);
2841      SDValue Mask = DAG.getNode(ISD::AND, N0.getDebugLoc(), VT,
2842                                 N0.getOperand(0),
2843                                 HiBitsMask);
2844      if (c2 > c1)
2845        return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, Mask,
2846                           DAG.getConstant(c2-c1, N1.getValueType()));
2847      else
2848        return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Mask,
2849                           DAG.getConstant(c1-c2, N1.getValueType()));
2850    }
2851  }
2852  // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
2853  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
2854    SDValue HiBitsMask =
2855      DAG.getConstant(APInt::getHighBitsSet(VT.getSizeInBits(),
2856                                            VT.getSizeInBits() -
2857                                              N1C->getZExtValue()),
2858                      VT);
2859    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
2860                       HiBitsMask);
2861  }
2862
2863  if (N1C) {
2864    SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
2865    if (NewSHL.getNode())
2866      return NewSHL;
2867  }
2868
2869  return PromoteIntShiftOp(SDValue(N, 0));
2870}
2871
2872SDValue DAGCombiner::visitSRA(SDNode *N) {
2873  SDValue N0 = N->getOperand(0);
2874  SDValue N1 = N->getOperand(1);
2875  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2876  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2877  EVT VT = N0.getValueType();
2878  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
2879
2880  // fold (sra c1, c2) -> (sra c1, c2)
2881  if (N0C && N1C)
2882    return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C);
2883  // fold (sra 0, x) -> 0
2884  if (N0C && N0C->isNullValue())
2885    return N0;
2886  // fold (sra -1, x) -> -1
2887  if (N0C && N0C->isAllOnesValue())
2888    return N0;
2889  // fold (sra x, (setge c, size(x))) -> undef
2890  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
2891    return DAG.getUNDEF(VT);
2892  // fold (sra x, 0) -> x
2893  if (N1C && N1C->isNullValue())
2894    return N0;
2895  // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
2896  // sext_inreg.
2897  if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
2898    unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
2899    EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
2900    if (VT.isVector())
2901      ExtVT = EVT::getVectorVT(*DAG.getContext(),
2902                               ExtVT, VT.getVectorNumElements());
2903    if ((!LegalOperations ||
2904         TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
2905      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
2906                         N0.getOperand(0), DAG.getValueType(ExtVT));
2907  }
2908
2909  // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
2910  if (N1C && N0.getOpcode() == ISD::SRA) {
2911    if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
2912      unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
2913      if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
2914      return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
2915                         DAG.getConstant(Sum, N1C->getValueType(0)));
2916    }
2917  }
2918
2919  // fold (sra (shl X, m), (sub result_size, n))
2920  // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
2921  // result_size - n != m.
2922  // If truncate is free for the target sext(shl) is likely to result in better
2923  // code.
2924  if (N0.getOpcode() == ISD::SHL) {
2925    // Get the two constanst of the shifts, CN0 = m, CN = n.
2926    const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
2927    if (N01C && N1C) {
2928      // Determine what the truncate's result bitsize and type would be.
2929      EVT TruncVT =
2930        EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
2931      // Determine the residual right-shift amount.
2932      signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
2933
2934      // If the shift is not a no-op (in which case this should be just a sign
2935      // extend already), the truncated to type is legal, sign_extend is legal
2936      // on that type, and the truncate to that type is both legal and free,
2937      // perform the transform.
2938      if ((ShiftAmt > 0) &&
2939          TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
2940          TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
2941          TLI.isTruncateFree(VT, TruncVT)) {
2942
2943          SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy());
2944          SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT,
2945                                      N0.getOperand(0), Amt);
2946          SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT,
2947                                      Shift);
2948          return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
2949                             N->getValueType(0), Trunc);
2950      }
2951    }
2952  }
2953
2954  // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
2955  if (N1.getOpcode() == ISD::TRUNCATE &&
2956      N1.getOperand(0).getOpcode() == ISD::AND &&
2957      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
2958    SDValue N101 = N1.getOperand(0).getOperand(1);
2959    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
2960      EVT TruncVT = N1.getValueType();
2961      SDValue N100 = N1.getOperand(0).getOperand(0);
2962      APInt TruncC = N101C->getAPIntValue();
2963      TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
2964      return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
2965                         DAG.getNode(ISD::AND, N->getDebugLoc(),
2966                                     TruncVT,
2967                                     DAG.getNode(ISD::TRUNCATE,
2968                                                 N->getDebugLoc(),
2969                                                 TruncVT, N100),
2970                                     DAG.getConstant(TruncC, TruncVT)));
2971    }
2972  }
2973
2974  // Simplify, based on bits shifted out of the LHS.
2975  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
2976    return SDValue(N, 0);
2977
2978
2979  // If the sign bit is known to be zero, switch this to a SRL.
2980  if (DAG.SignBitIsZero(N0))
2981    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
2982
2983  if (N1C) {
2984    SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
2985    if (NewSRA.getNode())
2986      return NewSRA;
2987  }
2988
2989  return PromoteIntShiftOp(SDValue(N, 0));
2990}
2991
2992SDValue DAGCombiner::visitSRL(SDNode *N) {
2993  SDValue N0 = N->getOperand(0);
2994  SDValue N1 = N->getOperand(1);
2995  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
2996  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
2997  EVT VT = N0.getValueType();
2998  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
2999
3000  // fold (srl c1, c2) -> c1 >>u c2
3001  if (N0C && N1C)
3002    return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C);
3003  // fold (srl 0, x) -> 0
3004  if (N0C && N0C->isNullValue())
3005    return N0;
3006  // fold (srl x, c >= size(x)) -> undef
3007  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
3008    return DAG.getUNDEF(VT);
3009  // fold (srl x, 0) -> x
3010  if (N1C && N1C->isNullValue())
3011    return N0;
3012  // if (srl x, c) is known to be zero, return 0
3013  if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
3014                                   APInt::getAllOnesValue(OpSizeInBits)))
3015    return DAG.getConstant(0, VT);
3016
3017  // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
3018  if (N1C && N0.getOpcode() == ISD::SRL &&
3019      N0.getOperand(1).getOpcode() == ISD::Constant) {
3020    uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
3021    uint64_t c2 = N1C->getZExtValue();
3022    if (c1 + c2 > OpSizeInBits)
3023      return DAG.getConstant(0, VT);
3024    return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
3025                       DAG.getConstant(c1 + c2, N1.getValueType()));
3026  }
3027
3028  // fold (srl (shl x, c), c) -> (and x, cst2)
3029  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
3030      N0.getValueSizeInBits() <= 64) {
3031    uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits();
3032    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
3033                       DAG.getConstant(~0ULL >> ShAmt, VT));
3034  }
3035
3036
3037  // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
3038  if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
3039    // Shifting in all undef bits?
3040    EVT SmallVT = N0.getOperand(0).getValueType();
3041    if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
3042      return DAG.getUNDEF(VT);
3043
3044    if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
3045      SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
3046                                       N0.getOperand(0), N1);
3047      AddToWorkList(SmallShift.getNode());
3048      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
3049    }
3050  }
3051
3052  // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
3053  // bit, which is unmodified by sra.
3054  if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) {
3055    if (N0.getOpcode() == ISD::SRA)
3056      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1);
3057  }
3058
3059  // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
3060  if (N1C && N0.getOpcode() == ISD::CTLZ &&
3061      N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
3062    APInt KnownZero, KnownOne;
3063    APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
3064    DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
3065
3066    // If any of the input bits are KnownOne, then the input couldn't be all
3067    // zeros, thus the result of the srl will always be zero.
3068    if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT);
3069
3070    // If all of the bits input the to ctlz node are known to be zero, then
3071    // the result of the ctlz is "32" and the result of the shift is one.
3072    APInt UnknownBits = ~KnownZero & Mask;
3073    if (UnknownBits == 0) return DAG.getConstant(1, VT);
3074
3075    // Otherwise, check to see if there is exactly one bit input to the ctlz.
3076    if ((UnknownBits & (UnknownBits - 1)) == 0) {
3077      // Okay, we know that only that the single bit specified by UnknownBits
3078      // could be set on input to the CTLZ node. If this bit is set, the SRL
3079      // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
3080      // to an SRL/XOR pair, which is likely to simplify more.
3081      unsigned ShAmt = UnknownBits.countTrailingZeros();
3082      SDValue Op = N0.getOperand(0);
3083
3084      if (ShAmt) {
3085        Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op,
3086                         DAG.getConstant(ShAmt, getShiftAmountTy()));
3087        AddToWorkList(Op.getNode());
3088      }
3089
3090      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
3091                         Op, DAG.getConstant(1, VT));
3092    }
3093  }
3094
3095  // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
3096  if (N1.getOpcode() == ISD::TRUNCATE &&
3097      N1.getOperand(0).getOpcode() == ISD::AND &&
3098      N1.hasOneUse() && N1.getOperand(0).hasOneUse()) {
3099    SDValue N101 = N1.getOperand(0).getOperand(1);
3100    if (ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N101)) {
3101      EVT TruncVT = N1.getValueType();
3102      SDValue N100 = N1.getOperand(0).getOperand(0);
3103      APInt TruncC = N101C->getAPIntValue();
3104      TruncC.trunc(TruncVT.getSizeInBits());
3105      return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
3106                         DAG.getNode(ISD::AND, N->getDebugLoc(),
3107                                     TruncVT,
3108                                     DAG.getNode(ISD::TRUNCATE,
3109                                                 N->getDebugLoc(),
3110                                                 TruncVT, N100),
3111                                     DAG.getConstant(TruncC, TruncVT)));
3112    }
3113  }
3114
3115  // fold operands of srl based on knowledge that the low bits are not
3116  // demanded.
3117  if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
3118    return SDValue(N, 0);
3119
3120  if (N1C) {
3121    SDValue NewSRL = visitShiftByConstant(N, N1C->getZExtValue());
3122    if (NewSRL.getNode())
3123      return NewSRL;
3124  }
3125
3126  // Here is a common situation. We want to optimize:
3127  //
3128  //   %a = ...
3129  //   %b = and i32 %a, 2
3130  //   %c = srl i32 %b, 1
3131  //   brcond i32 %c ...
3132  //
3133  // into
3134  //
3135  //   %a = ...
3136  //   %b = and %a, 2
3137  //   %c = setcc eq %b, 0
3138  //   brcond %c ...
3139  //
3140  // However when after the source operand of SRL is optimized into AND, the SRL
3141  // itself may not be optimized further. Look for it and add the BRCOND into
3142  // the worklist.
3143  if (N->hasOneUse()) {
3144    SDNode *Use = *N->use_begin();
3145    if (Use->getOpcode() == ISD::BRCOND)
3146      AddToWorkList(Use);
3147    else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
3148      // Also look pass the truncate.
3149      Use = *Use->use_begin();
3150      if (Use->getOpcode() == ISD::BRCOND)
3151        AddToWorkList(Use);
3152    }
3153  }
3154
3155  return PromoteIntShiftOp(SDValue(N, 0));
3156}
3157
3158SDValue DAGCombiner::visitCTLZ(SDNode *N) {
3159  SDValue N0 = N->getOperand(0);
3160  EVT VT = N->getValueType(0);
3161
3162  // fold (ctlz c1) -> c2
3163  if (isa<ConstantSDNode>(N0))
3164    return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0);
3165  return SDValue();
3166}
3167
3168SDValue DAGCombiner::visitCTTZ(SDNode *N) {
3169  SDValue N0 = N->getOperand(0);
3170  EVT VT = N->getValueType(0);
3171
3172  // fold (cttz c1) -> c2
3173  if (isa<ConstantSDNode>(N0))
3174    return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0);
3175  return SDValue();
3176}
3177
3178SDValue DAGCombiner::visitCTPOP(SDNode *N) {
3179  SDValue N0 = N->getOperand(0);
3180  EVT VT = N->getValueType(0);
3181
3182  // fold (ctpop c1) -> c2
3183  if (isa<ConstantSDNode>(N0))
3184    return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0);
3185  return SDValue();
3186}
3187
3188SDValue DAGCombiner::visitSELECT(SDNode *N) {
3189  SDValue N0 = N->getOperand(0);
3190  SDValue N1 = N->getOperand(1);
3191  SDValue N2 = N->getOperand(2);
3192  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
3193  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
3194  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3195  EVT VT = N->getValueType(0);
3196  EVT VT0 = N0.getValueType();
3197
3198  // fold (select C, X, X) -> X
3199  if (N1 == N2)
3200    return N1;
3201  // fold (select true, X, Y) -> X
3202  if (N0C && !N0C->isNullValue())
3203    return N1;
3204  // fold (select false, X, Y) -> Y
3205  if (N0C && N0C->isNullValue())
3206    return N2;
3207  // fold (select C, 1, X) -> (or C, X)
3208  if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1)
3209    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
3210  // fold (select C, 0, 1) -> (xor C, 1)
3211  if (VT.isInteger() &&
3212      (VT0 == MVT::i1 ||
3213       (VT0.isInteger() &&
3214        TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent)) &&
3215      N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) {
3216    SDValue XORNode;
3217    if (VT == VT0)
3218      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0,
3219                         N0, DAG.getConstant(1, VT0));
3220    XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0,
3221                          N0, DAG.getConstant(1, VT0));
3222    AddToWorkList(XORNode.getNode());
3223    if (VT.bitsGT(VT0))
3224      return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode);
3225    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode);
3226  }
3227  // fold (select C, 0, X) -> (and (not C), X)
3228  if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) {
3229    SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
3230    AddToWorkList(NOTNode.getNode());
3231    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2);
3232  }
3233  // fold (select C, X, 1) -> (or (not C), X)
3234  if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) {
3235    SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT);
3236    AddToWorkList(NOTNode.getNode());
3237    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1);
3238  }
3239  // fold (select C, X, 0) -> (and C, X)
3240  if (VT == MVT::i1 && N2C && N2C->isNullValue())
3241    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
3242  // fold (select X, X, Y) -> (or X, Y)
3243  // fold (select X, 1, Y) -> (or X, Y)
3244  if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1)))
3245    return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2);
3246  // fold (select X, Y, X) -> (and X, Y)
3247  // fold (select X, Y, 0) -> (and X, Y)
3248  if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0)))
3249    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1);
3250
3251  // If we can fold this based on the true/false value, do so.
3252  if (SimplifySelectOps(N, N1, N2))
3253    return SDValue(N, 0);  // Don't revisit N.
3254
3255  // fold selects based on a setcc into other things, such as min/max/abs
3256  if (N0.getOpcode() == ISD::SETCC) {
3257    // FIXME:
3258    // Check against MVT::Other for SELECT_CC, which is a workaround for targets
3259    // having to say they don't support SELECT_CC on every type the DAG knows
3260    // about, since there is no way to mark an opcode illegal at all value types
3261    if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) &&
3262        TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))
3263      return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT,
3264                         N0.getOperand(0), N0.getOperand(1),
3265                         N1, N2, N0.getOperand(2));
3266    return SimplifySelect(N->getDebugLoc(), N0, N1, N2);
3267  }
3268
3269  return SDValue();
3270}
3271
3272SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
3273  SDValue N0 = N->getOperand(0);
3274  SDValue N1 = N->getOperand(1);
3275  SDValue N2 = N->getOperand(2);
3276  SDValue N3 = N->getOperand(3);
3277  SDValue N4 = N->getOperand(4);
3278  ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
3279
3280  // fold select_cc lhs, rhs, x, x, cc -> x
3281  if (N2 == N3)
3282    return N2;
3283
3284  // Determine if the condition we're dealing with is constant
3285  SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
3286                              N0, N1, CC, N->getDebugLoc(), false);
3287  if (SCC.getNode()) AddToWorkList(SCC.getNode());
3288
3289  if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
3290    if (!SCCC->isNullValue())
3291      return N2;    // cond always true -> true val
3292    else
3293      return N3;    // cond always false -> false val
3294  }
3295
3296  // Fold to a simpler select_cc
3297  if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC)
3298    return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(),
3299                       SCC.getOperand(0), SCC.getOperand(1), N2, N3,
3300                       SCC.getOperand(2));
3301
3302  // If we can fold this based on the true/false value, do so.
3303  if (SimplifySelectOps(N, N2, N3))
3304    return SDValue(N, 0);  // Don't revisit N.
3305
3306  // fold select_cc into other things, such as min/max/abs
3307  return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC);
3308}
3309
3310SDValue DAGCombiner::visitSETCC(SDNode *N) {
3311  return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
3312                       cast<CondCodeSDNode>(N->getOperand(2))->get(),
3313                       N->getDebugLoc());
3314}
3315
3316// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
3317// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
3318// transformation. Returns true if extension are possible and the above
3319// mentioned transformation is profitable.
3320static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
3321                                    unsigned ExtOpc,
3322                                    SmallVector<SDNode*, 4> &ExtendNodes,
3323                                    const TargetLowering &TLI) {
3324  bool HasCopyToRegUses = false;
3325  bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
3326  for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
3327                            UE = N0.getNode()->use_end();
3328       UI != UE; ++UI) {
3329    SDNode *User = *UI;
3330    if (User == N)
3331      continue;
3332    if (UI.getUse().getResNo() != N0.getResNo())
3333      continue;
3334    // FIXME: Only extend SETCC N, N and SETCC N, c for now.
3335    if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
3336      ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
3337      if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
3338        // Sign bits will be lost after a zext.
3339        return false;
3340      bool Add = false;
3341      for (unsigned i = 0; i != 2; ++i) {
3342        SDValue UseOp = User->getOperand(i);
3343        if (UseOp == N0)
3344          continue;
3345        if (!isa<ConstantSDNode>(UseOp))
3346          return false;
3347        Add = true;
3348      }
3349      if (Add)
3350        ExtendNodes.push_back(User);
3351      continue;
3352    }
3353    // If truncates aren't free and there are users we can't
3354    // extend, it isn't worthwhile.
3355    if (!isTruncFree)
3356      return false;
3357    // Remember if this value is live-out.
3358    if (User->getOpcode() == ISD::CopyToReg)
3359      HasCopyToRegUses = true;
3360  }
3361
3362  if (HasCopyToRegUses) {
3363    bool BothLiveOut = false;
3364    for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
3365         UI != UE; ++UI) {
3366      SDUse &Use = UI.getUse();
3367      if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
3368        BothLiveOut = true;
3369        break;
3370      }
3371    }
3372    if (BothLiveOut)
3373      // Both unextended and extended values are live out. There had better be
3374      // good a reason for the transformation.
3375      return ExtendNodes.size();
3376  }
3377  return true;
3378}
3379
3380SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
3381  SDValue N0 = N->getOperand(0);
3382  EVT VT = N->getValueType(0);
3383
3384  // fold (sext c1) -> c1
3385  if (isa<ConstantSDNode>(N0))
3386    return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0);
3387
3388  // fold (sext (sext x)) -> (sext x)
3389  // fold (sext (aext x)) -> (sext x)
3390  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
3391    return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT,
3392                       N0.getOperand(0));
3393
3394  if (N0.getOpcode() == ISD::TRUNCATE) {
3395    // fold (sext (truncate (load x))) -> (sext (smaller load x))
3396    // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
3397    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
3398    if (NarrowLoad.getNode()) {
3399      if (NarrowLoad.getNode() != N0.getNode())
3400        CombineTo(N0.getNode(), NarrowLoad);
3401      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3402    }
3403
3404    // See if the value being truncated is already sign extended.  If so, just
3405    // eliminate the trunc/sext pair.
3406    SDValue Op = N0.getOperand(0);
3407    unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
3408    unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
3409    unsigned DestBits = VT.getScalarType().getSizeInBits();
3410    unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
3411
3412    if (OpBits == DestBits) {
3413      // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
3414      // bits, it is already ready.
3415      if (NumSignBits > DestBits-MidBits)
3416        return Op;
3417    } else if (OpBits < DestBits) {
3418      // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
3419      // bits, just sext from i32.
3420      if (NumSignBits > OpBits-MidBits)
3421        return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op);
3422    } else {
3423      // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
3424      // bits, just truncate to i32.
3425      if (NumSignBits > OpBits-MidBits)
3426        return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
3427    }
3428
3429    // fold (sext (truncate x)) -> (sextinreg x).
3430    if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
3431                                                 N0.getValueType())) {
3432      if (OpBits < DestBits)
3433        Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op);
3434      else if (OpBits > DestBits)
3435        Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
3436      return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
3437                         DAG.getValueType(N0.getValueType()));
3438    }
3439  }
3440
3441  // fold (sext (load x)) -> (sext (truncate (sextload x)))
3442  if (ISD::isNON_EXTLoad(N0.getNode()) &&
3443      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
3444       TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) {
3445    bool DoXform = true;
3446    SmallVector<SDNode*, 4> SetCCs;
3447    if (!N0.hasOneUse())
3448      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
3449    if (DoXform) {
3450      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3451      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
3452                                       LN0->getChain(),
3453                                       LN0->getBasePtr(), LN0->getSrcValue(),
3454                                       LN0->getSrcValueOffset(),
3455                                       N0.getValueType(),
3456                                       LN0->isVolatile(), LN0->isNonTemporal(),
3457                                       LN0->getAlignment());
3458      CombineTo(N, ExtLoad);
3459      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
3460                                  N0.getValueType(), ExtLoad);
3461      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
3462
3463      // Extend SetCC uses if necessary.
3464      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
3465        SDNode *SetCC = SetCCs[i];
3466        SmallVector<SDValue, 4> Ops;
3467
3468        for (unsigned j = 0; j != 2; ++j) {
3469          SDValue SOp = SetCC->getOperand(j);
3470          if (SOp == Trunc)
3471            Ops.push_back(ExtLoad);
3472          else
3473            Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
3474                                      N->getDebugLoc(), VT, SOp));
3475        }
3476
3477        Ops.push_back(SetCC->getOperand(2));
3478        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
3479                                     SetCC->getValueType(0),
3480                                     &Ops[0], Ops.size()));
3481      }
3482
3483      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3484    }
3485  }
3486
3487  // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
3488  // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
3489  if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
3490      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
3491    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3492    EVT MemVT = LN0->getMemoryVT();
3493    if ((!LegalOperations && !LN0->isVolatile()) ||
3494        TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
3495      SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
3496                                       LN0->getChain(),
3497                                       LN0->getBasePtr(), LN0->getSrcValue(),
3498                                       LN0->getSrcValueOffset(), MemVT,
3499                                       LN0->isVolatile(), LN0->isNonTemporal(),
3500                                       LN0->getAlignment());
3501      CombineTo(N, ExtLoad);
3502      CombineTo(N0.getNode(),
3503                DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
3504                            N0.getValueType(), ExtLoad),
3505                ExtLoad.getValue(1));
3506      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3507    }
3508  }
3509
3510  if (N0.getOpcode() == ISD::SETCC) {
3511    // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
3512    if (VT.isVector() &&
3513        // We know that the # elements of the results is the same as the
3514        // # elements of the compare (and the # elements of the compare result
3515        // for that matter).  Check to see that they are the same size.  If so,
3516        // we know that the element size of the sext'd result matches the
3517        // element size of the compare operands.
3518        VT.getSizeInBits() == N0.getOperand(0).getValueType().getSizeInBits() &&
3519
3520        // Only do this before legalize for now.
3521        !LegalOperations) {
3522      return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0),
3523                           N0.getOperand(1),
3524                           cast<CondCodeSDNode>(N0.getOperand(2))->get());
3525    }
3526
3527    // sext(setcc x, y, cc) -> (select_cc x, y, -1, 0, cc)
3528    unsigned ElementWidth = VT.getScalarType().getSizeInBits();
3529    SDValue NegOne =
3530      DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT);
3531    SDValue SCC =
3532      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
3533                       NegOne, DAG.getConstant(0, VT),
3534                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
3535    if (SCC.getNode()) return SCC;
3536    if (!LegalOperations ||
3537        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))
3538      return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT,
3539                         DAG.getSetCC(N->getDebugLoc(),
3540                                      TLI.getSetCCResultType(VT),
3541                                      N0.getOperand(0), N0.getOperand(1),
3542                                 cast<CondCodeSDNode>(N0.getOperand(2))->get()),
3543                         NegOne, DAG.getConstant(0, VT));
3544  }
3545
3546
3547
3548  // fold (sext x) -> (zext x) if the sign bit is known zero.
3549  if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
3550      DAG.SignBitIsZero(N0))
3551    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
3552
3553  return PromoteExtend(SDValue(N, 0));
3554}
3555
3556SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
3557  SDValue N0 = N->getOperand(0);
3558  EVT VT = N->getValueType(0);
3559
3560  // fold (zext c1) -> c1
3561  if (isa<ConstantSDNode>(N0))
3562    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0);
3563  // fold (zext (zext x)) -> (zext x)
3564  // fold (zext (aext x)) -> (zext x)
3565  if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
3566    return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
3567                       N0.getOperand(0));
3568
3569  // fold (zext (truncate (load x))) -> (zext (smaller load x))
3570  // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
3571  if (N0.getOpcode() == ISD::TRUNCATE) {
3572    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
3573    if (NarrowLoad.getNode()) {
3574      if (NarrowLoad.getNode() != N0.getNode())
3575        CombineTo(N0.getNode(), NarrowLoad);
3576      return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
3577    }
3578  }
3579
3580  // fold (zext (truncate x)) -> (and x, mask)
3581  if (N0.getOpcode() == ISD::TRUNCATE &&
3582      (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) &&
3583      (!TLI.isTruncateFree(N0.getOperand(0).getValueType(),
3584                           N0.getValueType()) ||
3585       !TLI.isZExtFree(N0.getValueType(), VT))) {
3586    SDValue Op = N0.getOperand(0);
3587    if (Op.getValueType().bitsLT(VT)) {
3588      Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
3589    } else if (Op.getValueType().bitsGT(VT)) {
3590      Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
3591    }
3592    return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
3593                                  N0.getValueType().getScalarType());
3594  }
3595
3596  // Fold (zext (and (trunc x), cst)) -> (and x, cst),
3597  // if either of the casts is not free.
3598  if (N0.getOpcode() == ISD::AND &&
3599      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
3600      N0.getOperand(1).getOpcode() == ISD::Constant &&
3601      (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
3602                           N0.getValueType()) ||
3603       !TLI.isZExtFree(N0.getValueType(), VT))) {
3604    SDValue X = N0.getOperand(0).getOperand(0);
3605    if (X.getValueType().bitsLT(VT)) {
3606      X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X);
3607    } else if (X.getValueType().bitsGT(VT)) {
3608      X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
3609    }
3610    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
3611    Mask.zext(VT.getSizeInBits());
3612    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
3613                       X, DAG.getConstant(Mask, VT));
3614  }
3615
3616  // fold (zext (load x)) -> (zext (truncate (zextload x)))
3617  if (ISD::isNON_EXTLoad(N0.getNode()) &&
3618      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
3619       TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) {
3620    bool DoXform = true;
3621    SmallVector<SDNode*, 4> SetCCs;
3622    if (!N0.hasOneUse())
3623      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
3624    if (DoXform) {
3625      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3626      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
3627                                       LN0->getChain(),
3628                                       LN0->getBasePtr(), LN0->getSrcValue(),
3629                                       LN0->getSrcValueOffset(),
3630                                       N0.getValueType(),
3631                                       LN0->isVolatile(), LN0->isNonTemporal(),
3632                                       LN0->getAlignment());
3633      CombineTo(N, ExtLoad);
3634      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
3635                                  N0.getValueType(), ExtLoad);
3636      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
3637
3638      // Extend SetCC uses if necessary.
3639      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
3640        SDNode *SetCC = SetCCs[i];
3641        SmallVector<SDValue, 4> Ops;
3642
3643        for (unsigned j = 0; j != 2; ++j) {
3644          SDValue SOp = SetCC->getOperand(j);
3645          if (SOp == Trunc)
3646            Ops.push_back(ExtLoad);
3647          else
3648            Ops.push_back(DAG.getNode(ISD::ZERO_EXTEND,
3649                                      N->getDebugLoc(), VT, SOp));
3650        }
3651
3652        Ops.push_back(SetCC->getOperand(2));
3653        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
3654                                     SetCC->getValueType(0),
3655                                     &Ops[0], Ops.size()));
3656      }
3657
3658      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3659    }
3660  }
3661
3662  // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
3663  // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
3664  if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
3665      ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
3666    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3667    EVT MemVT = LN0->getMemoryVT();
3668    if ((!LegalOperations && !LN0->isVolatile()) ||
3669        TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
3670      SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
3671                                       LN0->getChain(),
3672                                       LN0->getBasePtr(), LN0->getSrcValue(),
3673                                       LN0->getSrcValueOffset(), MemVT,
3674                                       LN0->isVolatile(), LN0->isNonTemporal(),
3675                                       LN0->getAlignment());
3676      CombineTo(N, ExtLoad);
3677      CombineTo(N0.getNode(),
3678                DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(),
3679                            ExtLoad),
3680                ExtLoad.getValue(1));
3681      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3682    }
3683  }
3684
3685  // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
3686  if (N0.getOpcode() == ISD::SETCC) {
3687    SDValue SCC =
3688      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
3689                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
3690                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
3691    if (SCC.getNode()) return SCC;
3692  }
3693
3694  // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
3695  if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
3696      isa<ConstantSDNode>(N0.getOperand(1)) &&
3697      N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
3698      N0.hasOneUse()) {
3699    if (N0.getOpcode() == ISD::SHL) {
3700      // If the original shl may be shifting out bits, do not perform this
3701      // transformation.
3702      unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
3703      unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
3704        N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
3705      if (ShAmt > KnownZeroBits)
3706        return SDValue();
3707    }
3708    DebugLoc dl = N->getDebugLoc();
3709    return DAG.getNode(N0.getOpcode(), dl, VT,
3710                       DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
3711                       DAG.getNode(ISD::ZERO_EXTEND, dl,
3712                                   N0.getOperand(1).getValueType(),
3713                                   N0.getOperand(1)));
3714  }
3715
3716  return PromoteExtend(SDValue(N, 0));
3717}
3718
3719SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
3720  SDValue N0 = N->getOperand(0);
3721  EVT VT = N->getValueType(0);
3722
3723  // fold (aext c1) -> c1
3724  if (isa<ConstantSDNode>(N0))
3725    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0);
3726  // fold (aext (aext x)) -> (aext x)
3727  // fold (aext (zext x)) -> (zext x)
3728  // fold (aext (sext x)) -> (sext x)
3729  if (N0.getOpcode() == ISD::ANY_EXTEND  ||
3730      N0.getOpcode() == ISD::ZERO_EXTEND ||
3731      N0.getOpcode() == ISD::SIGN_EXTEND)
3732    return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0));
3733
3734  // fold (aext (truncate (load x))) -> (aext (smaller load x))
3735  // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
3736  if (N0.getOpcode() == ISD::TRUNCATE) {
3737    SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
3738    if (NarrowLoad.getNode()) {
3739      if (NarrowLoad.getNode() != N0.getNode())
3740        CombineTo(N0.getNode(), NarrowLoad);
3741      return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, NarrowLoad);
3742    }
3743  }
3744
3745  // fold (aext (truncate x))
3746  if (N0.getOpcode() == ISD::TRUNCATE) {
3747    SDValue TruncOp = N0.getOperand(0);
3748    if (TruncOp.getValueType() == VT)
3749      return TruncOp; // x iff x size == zext size.
3750    if (TruncOp.getValueType().bitsGT(VT))
3751      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp);
3752    return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp);
3753  }
3754
3755  // Fold (aext (and (trunc x), cst)) -> (and x, cst)
3756  // if the trunc is not free.
3757  if (N0.getOpcode() == ISD::AND &&
3758      N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
3759      N0.getOperand(1).getOpcode() == ISD::Constant &&
3760      !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
3761                          N0.getValueType())) {
3762    SDValue X = N0.getOperand(0).getOperand(0);
3763    if (X.getValueType().bitsLT(VT)) {
3764      X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X);
3765    } else if (X.getValueType().bitsGT(VT)) {
3766      X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
3767    }
3768    APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
3769    Mask.zext(VT.getSizeInBits());
3770    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
3771                       X, DAG.getConstant(Mask, VT));
3772  }
3773
3774  // fold (aext (load x)) -> (aext (truncate (extload x)))
3775  if (ISD::isNON_EXTLoad(N0.getNode()) &&
3776      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
3777       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
3778    bool DoXform = true;
3779    SmallVector<SDNode*, 4> SetCCs;
3780    if (!N0.hasOneUse())
3781      DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
3782    if (DoXform) {
3783      LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3784      SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
3785                                       LN0->getChain(),
3786                                       LN0->getBasePtr(), LN0->getSrcValue(),
3787                                       LN0->getSrcValueOffset(),
3788                                       N0.getValueType(),
3789                                       LN0->isVolatile(), LN0->isNonTemporal(),
3790                                       LN0->getAlignment());
3791      CombineTo(N, ExtLoad);
3792      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
3793                                  N0.getValueType(), ExtLoad);
3794      CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
3795
3796      // Extend SetCC uses if necessary.
3797      for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
3798        SDNode *SetCC = SetCCs[i];
3799        SmallVector<SDValue, 4> Ops;
3800
3801        for (unsigned j = 0; j != 2; ++j) {
3802          SDValue SOp = SetCC->getOperand(j);
3803          if (SOp == Trunc)
3804            Ops.push_back(ExtLoad);
3805          else
3806            Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
3807                                      N->getDebugLoc(), VT, SOp));
3808        }
3809
3810        Ops.push_back(SetCC->getOperand(2));
3811        CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
3812                                     SetCC->getValueType(0),
3813                                     &Ops[0], Ops.size()));
3814      }
3815
3816      return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3817    }
3818  }
3819
3820  // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
3821  // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
3822  // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
3823  if (N0.getOpcode() == ISD::LOAD &&
3824      !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
3825      N0.hasOneUse()) {
3826    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3827    EVT MemVT = LN0->getMemoryVT();
3828    SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
3829                                     VT, LN0->getChain(), LN0->getBasePtr(),
3830                                     LN0->getSrcValue(),
3831                                     LN0->getSrcValueOffset(), MemVT,
3832                                     LN0->isVolatile(), LN0->isNonTemporal(),
3833                                     LN0->getAlignment());
3834    CombineTo(N, ExtLoad);
3835    CombineTo(N0.getNode(),
3836              DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
3837                          N0.getValueType(), ExtLoad),
3838              ExtLoad.getValue(1));
3839    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
3840  }
3841
3842  // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
3843  if (N0.getOpcode() == ISD::SETCC) {
3844    SDValue SCC =
3845      SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1),
3846                       DAG.getConstant(1, VT), DAG.getConstant(0, VT),
3847                       cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
3848    if (SCC.getNode())
3849      return SCC;
3850  }
3851
3852  return PromoteExtend(SDValue(N, 0));
3853}
3854
3855/// GetDemandedBits - See if the specified operand can be simplified with the
3856/// knowledge that only the bits specified by Mask are used.  If so, return the
3857/// simpler operand, otherwise return a null SDValue.
3858SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
3859  switch (V.getOpcode()) {
3860  default: break;
3861  case ISD::OR:
3862  case ISD::XOR:
3863    // If the LHS or RHS don't contribute bits to the or, drop them.
3864    if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
3865      return V.getOperand(1);
3866    if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
3867      return V.getOperand(0);
3868    break;
3869  case ISD::SRL:
3870    // Only look at single-use SRLs.
3871    if (!V.getNode()->hasOneUse())
3872      break;
3873    if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
3874      // See if we can recursively simplify the LHS.
3875      unsigned Amt = RHSC->getZExtValue();
3876
3877      // Watch out for shift count overflow though.
3878      if (Amt >= Mask.getBitWidth()) break;
3879      APInt NewMask = Mask << Amt;
3880      SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask);
3881      if (SimplifyLHS.getNode())
3882        return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(),
3883                           SimplifyLHS, V.getOperand(1));
3884    }
3885  }
3886  return SDValue();
3887}
3888
3889/// ReduceLoadWidth - If the result of a wider load is shifted to right of N
3890/// bits and then truncated to a narrower type and where N is a multiple
3891/// of number of bits of the narrower type, transform it to a narrower load
3892/// from address + N / num of bits of new type. If the result is to be
3893/// extended, also fold the extension to form a extending load.
3894SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
3895  unsigned Opc = N->getOpcode();
3896  ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
3897  SDValue N0 = N->getOperand(0);
3898  EVT VT = N->getValueType(0);
3899  EVT ExtVT = VT;
3900
3901  // This transformation isn't valid for vector loads.
3902  if (VT.isVector())
3903    return SDValue();
3904
3905  // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
3906  // extended to VT.
3907  if (Opc == ISD::SIGN_EXTEND_INREG) {
3908    ExtType = ISD::SEXTLOAD;
3909    ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
3910    if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
3911      return SDValue();
3912  }
3913
3914  unsigned EVTBits = ExtVT.getSizeInBits();
3915  unsigned ShAmt = 0;
3916  if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
3917    if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
3918      ShAmt = N01->getZExtValue();
3919      // Is the shift amount a multiple of size of VT?
3920      if ((ShAmt & (EVTBits-1)) == 0) {
3921        N0 = N0.getOperand(0);
3922        // Is the load width a multiple of size of VT?
3923        if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
3924          return SDValue();
3925      }
3926    }
3927  }
3928
3929  // Do not generate loads of non-round integer types since these can
3930  // be expensive (and would be wrong if the type is not byte sized).
3931  if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
3932      cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
3933      // Do not change the width of a volatile load.
3934      !cast<LoadSDNode>(N0)->isVolatile()) {
3935    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
3936    EVT PtrType = N0.getOperand(1).getValueType();
3937
3938    // For big endian targets, we need to adjust the offset to the pointer to
3939    // load the correct bytes.
3940    if (TLI.isBigEndian()) {
3941      unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
3942      unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
3943      ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
3944    }
3945
3946    uint64_t PtrOff =  ShAmt / 8;
3947    unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
3948    SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
3949                                 PtrType, LN0->getBasePtr(),
3950                                 DAG.getConstant(PtrOff, PtrType));
3951    AddToWorkList(NewPtr.getNode());
3952
3953    SDValue Load = (ExtType == ISD::NON_EXTLOAD)
3954      ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
3955                    LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
3956                    LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
3957      : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr,
3958                       LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
3959                       ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
3960                       NewAlign);
3961
3962    // Replace the old load's chain with the new load's chain.
3963    WorkListRemover DeadNodes(*this);
3964    DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
3965                                  &DeadNodes);
3966
3967    // Return the new loaded value.
3968    return Load;
3969  }
3970
3971  return SDValue();
3972}
3973
3974SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
3975  SDValue N0 = N->getOperand(0);
3976  SDValue N1 = N->getOperand(1);
3977  EVT VT = N->getValueType(0);
3978  EVT EVT = cast<VTSDNode>(N1)->getVT();
3979  unsigned VTBits = VT.getScalarType().getSizeInBits();
3980  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
3981
3982  // fold (sext_in_reg c1) -> c1
3983  if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF)
3984    return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
3985
3986  // If the input is already sign extended, just drop the extension.
3987  if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
3988    return N0;
3989
3990  // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
3991  if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
3992      EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) {
3993    return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
3994                       N0.getOperand(0), N1);
3995  }
3996
3997  // fold (sext_in_reg (sext x)) -> (sext x)
3998  // fold (sext_in_reg (aext x)) -> (sext x)
3999  // if x is small enough.
4000  if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
4001    SDValue N00 = N0.getOperand(0);
4002    if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
4003        (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
4004      return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
4005  }
4006
4007  // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
4008  if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
4009    return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT);
4010
4011  // fold operands of sext_in_reg based on knowledge that the top bits are not
4012  // demanded.
4013  if (SimplifyDemandedBits(SDValue(N, 0)))
4014    return SDValue(N, 0);
4015
4016  // fold (sext_in_reg (load x)) -> (smaller sextload x)
4017  // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
4018  SDValue NarrowLoad = ReduceLoadWidth(N);
4019  if (NarrowLoad.getNode())
4020    return NarrowLoad;
4021
4022  // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
4023  // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
4024  // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
4025  if (N0.getOpcode() == ISD::SRL) {
4026    if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
4027      if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
4028        // We can turn this into an SRA iff the input to the SRL is already sign
4029        // extended enough.
4030        unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
4031        if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
4032          return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
4033                             N0.getOperand(0), N0.getOperand(1));
4034      }
4035  }
4036
4037  // fold (sext_inreg (extload x)) -> (sextload x)
4038  if (ISD::isEXTLoad(N0.getNode()) &&
4039      ISD::isUNINDEXEDLoad(N0.getNode()) &&
4040      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
4041      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
4042       TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
4043    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4044    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
4045                                     LN0->getChain(),
4046                                     LN0->getBasePtr(), LN0->getSrcValue(),
4047                                     LN0->getSrcValueOffset(), EVT,
4048                                     LN0->isVolatile(), LN0->isNonTemporal(),
4049                                     LN0->getAlignment());
4050    CombineTo(N, ExtLoad);
4051    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4052    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4053  }
4054  // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
4055  if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
4056      N0.hasOneUse() &&
4057      EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
4058      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
4059       TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
4060    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4061    SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
4062                                     LN0->getChain(),
4063                                     LN0->getBasePtr(), LN0->getSrcValue(),
4064                                     LN0->getSrcValueOffset(), EVT,
4065                                     LN0->isVolatile(), LN0->isNonTemporal(),
4066                                     LN0->getAlignment());
4067    CombineTo(N, ExtLoad);
4068    CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
4069    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4070  }
4071  return SDValue();
4072}
4073
4074SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
4075  SDValue N0 = N->getOperand(0);
4076  EVT VT = N->getValueType(0);
4077
4078  // noop truncate
4079  if (N0.getValueType() == N->getValueType(0))
4080    return N0;
4081  // fold (truncate c1) -> c1
4082  if (isa<ConstantSDNode>(N0))
4083    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0);
4084  // fold (truncate (truncate x)) -> (truncate x)
4085  if (N0.getOpcode() == ISD::TRUNCATE)
4086    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
4087  // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
4088  if (N0.getOpcode() == ISD::ZERO_EXTEND ||
4089      N0.getOpcode() == ISD::SIGN_EXTEND ||
4090      N0.getOpcode() == ISD::ANY_EXTEND) {
4091    if (N0.getOperand(0).getValueType().bitsLT(VT))
4092      // if the source is smaller than the dest, we still need an extend
4093      return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT,
4094                         N0.getOperand(0));
4095    else if (N0.getOperand(0).getValueType().bitsGT(VT))
4096      // if the source is larger than the dest, than we just need the truncate
4097      return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0));
4098    else
4099      // if the source and dest are the same type, we can drop both the extend
4100      // and the truncate.
4101      return N0.getOperand(0);
4102  }
4103
4104  // See if we can simplify the input to this truncate through knowledge that
4105  // only the low bits are being used.  For example "trunc (or (shl x, 8), y)"
4106  // -> trunc y
4107  SDValue Shorter =
4108    GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
4109                                             VT.getSizeInBits()));
4110  if (Shorter.getNode())
4111    return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter);
4112
4113  // fold (truncate (load x)) -> (smaller load x)
4114  // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
4115  if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
4116    return ReduceLoadWidth(N);
4117  return SDValue();
4118}
4119
4120static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
4121  SDValue Elt = N->getOperand(i);
4122  if (Elt.getOpcode() != ISD::MERGE_VALUES)
4123    return Elt.getNode();
4124  return Elt.getOperand(Elt.getResNo()).getNode();
4125}
4126
4127/// CombineConsecutiveLoads - build_pair (load, load) -> load
4128/// if load locations are consecutive.
4129SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
4130  assert(N->getOpcode() == ISD::BUILD_PAIR);
4131
4132  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
4133  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
4134  if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
4135    return SDValue();
4136  EVT LD1VT = LD1->getValueType(0);
4137
4138  if (ISD::isNON_EXTLoad(LD2) &&
4139      LD2->hasOneUse() &&
4140      // If both are volatile this would reduce the number of volatile loads.
4141      // If one is volatile it might be ok, but play conservative and bail out.
4142      !LD1->isVolatile() &&
4143      !LD2->isVolatile() &&
4144      DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
4145    unsigned Align = LD1->getAlignment();
4146    unsigned NewAlign = TLI.getTargetData()->
4147      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
4148
4149    if (NewAlign <= Align &&
4150        (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
4151      return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
4152                         LD1->getBasePtr(), LD1->getSrcValue(),
4153                         LD1->getSrcValueOffset(), false, false, Align);
4154  }
4155
4156  return SDValue();
4157}
4158
4159SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
4160  SDValue N0 = N->getOperand(0);
4161  EVT VT = N->getValueType(0);
4162
4163  // If the input is a BUILD_VECTOR with all constant elements, fold this now.
4164  // Only do this before legalize, since afterward the target may be depending
4165  // on the bitconvert.
4166  // First check to see if this is all constant.
4167  if (!LegalTypes &&
4168      N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
4169      VT.isVector()) {
4170    bool isSimple = true;
4171    for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i)
4172      if (N0.getOperand(i).getOpcode() != ISD::UNDEF &&
4173          N0.getOperand(i).getOpcode() != ISD::Constant &&
4174          N0.getOperand(i).getOpcode() != ISD::ConstantFP) {
4175        isSimple = false;
4176        break;
4177      }
4178
4179    EVT DestEltVT = N->getValueType(0).getVectorElementType();
4180    assert(!DestEltVT.isVector() &&
4181           "Element type of vector ValueType must not be vector!");
4182    if (isSimple)
4183      return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
4184  }
4185
4186  // If the input is a constant, let getNode fold it.
4187  if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
4188    SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
4189    if (Res.getNode() != N) {
4190      if (!LegalOperations ||
4191          TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
4192        return Res;
4193
4194      // Folding it resulted in an illegal node, and it's too late to
4195      // do that. Clean up the old node and forego the transformation.
4196      // Ideally this won't happen very often, because instcombine
4197      // and the earlier dagcombine runs (where illegal nodes are
4198      // permitted) should have folded most of them already.
4199      DAG.DeleteNode(Res.getNode());
4200    }
4201  }
4202
4203  // (conv (conv x, t1), t2) -> (conv x, t2)
4204  if (N0.getOpcode() == ISD::BIT_CONVERT)
4205    return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
4206                       N0.getOperand(0));
4207
4208  // fold (conv (load x)) -> (load (conv*)x)
4209  // If the resultant load doesn't need a higher alignment than the original!
4210  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
4211      // Do not change the width of a volatile load.
4212      !cast<LoadSDNode>(N0)->isVolatile() &&
4213      (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) {
4214    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4215    unsigned Align = TLI.getTargetData()->
4216      getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
4217    unsigned OrigAlign = LN0->getAlignment();
4218
4219    if (Align <= OrigAlign) {
4220      SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
4221                                 LN0->getBasePtr(),
4222                                 LN0->getSrcValue(), LN0->getSrcValueOffset(),
4223                                 LN0->isVolatile(), LN0->isNonTemporal(),
4224                                 OrigAlign);
4225      AddToWorkList(N);
4226      CombineTo(N0.getNode(),
4227                DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
4228                            N0.getValueType(), Load),
4229                Load.getValue(1));
4230      return Load;
4231    }
4232  }
4233
4234  // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
4235  // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
4236  // This often reduces constant pool loads.
4237  if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
4238      N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
4239    SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
4240                                  N0.getOperand(0));
4241    AddToWorkList(NewConv.getNode());
4242
4243    APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
4244    if (N0.getOpcode() == ISD::FNEG)
4245      return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
4246                         NewConv, DAG.getConstant(SignBit, VT));
4247    assert(N0.getOpcode() == ISD::FABS);
4248    return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
4249                       NewConv, DAG.getConstant(~SignBit, VT));
4250  }
4251
4252  // fold (bitconvert (fcopysign cst, x)) ->
4253  //         (or (and (bitconvert x), sign), (and cst, (not sign)))
4254  // Note that we don't handle (copysign x, cst) because this can always be
4255  // folded to an fneg or fabs.
4256  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
4257      isa<ConstantFPSDNode>(N0.getOperand(0)) &&
4258      VT.isInteger() && !VT.isVector()) {
4259    unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
4260    EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
4261    if (isTypeLegal(IntXVT)) {
4262      SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
4263                              IntXVT, N0.getOperand(1));
4264      AddToWorkList(X.getNode());
4265
4266      // If X has a different width than the result/lhs, sext it or truncate it.
4267      unsigned VTWidth = VT.getSizeInBits();
4268      if (OrigXWidth < VTWidth) {
4269        X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X);
4270        AddToWorkList(X.getNode());
4271      } else if (OrigXWidth > VTWidth) {
4272        // To get the sign bit in the right place, we have to shift it right
4273        // before truncating.
4274        X = DAG.getNode(ISD::SRL, X.getDebugLoc(),
4275                        X.getValueType(), X,
4276                        DAG.getConstant(OrigXWidth-VTWidth, X.getValueType()));
4277        AddToWorkList(X.getNode());
4278        X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
4279        AddToWorkList(X.getNode());
4280      }
4281
4282      APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
4283      X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT,
4284                      X, DAG.getConstant(SignBit, VT));
4285      AddToWorkList(X.getNode());
4286
4287      SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
4288                                VT, N0.getOperand(0));
4289      Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
4290                        Cst, DAG.getConstant(~SignBit, VT));
4291      AddToWorkList(Cst.getNode());
4292
4293      return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst);
4294    }
4295  }
4296
4297  // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
4298  if (N0.getOpcode() == ISD::BUILD_PAIR) {
4299    SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT);
4300    if (CombineLD.getNode())
4301      return CombineLD;
4302  }
4303
4304  return SDValue();
4305}
4306
4307SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
4308  EVT VT = N->getValueType(0);
4309  return CombineConsecutiveLoads(N, VT);
4310}
4311
4312/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
4313/// node with Constant, ConstantFP or Undef operands.  DstEltVT indicates the
4314/// destination element value type.
4315SDValue DAGCombiner::
4316ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
4317  EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
4318
4319  // If this is already the right type, we're done.
4320  if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
4321
4322  unsigned SrcBitSize = SrcEltVT.getSizeInBits();
4323  unsigned DstBitSize = DstEltVT.getSizeInBits();
4324
4325  // If this is a conversion of N elements of one type to N elements of another
4326  // type, convert each element.  This handles FP<->INT cases.
4327  if (SrcBitSize == DstBitSize) {
4328    SmallVector<SDValue, 8> Ops;
4329    for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
4330      SDValue Op = BV->getOperand(i);
4331      // If the vector element type is not legal, the BUILD_VECTOR operands
4332      // are promoted and implicitly truncated.  Make that explicit here.
4333      if (Op.getValueType() != SrcEltVT)
4334        Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
4335      Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
4336                                DstEltVT, Op));
4337      AddToWorkList(Ops.back().getNode());
4338    }
4339    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
4340                              BV->getValueType(0).getVectorNumElements());
4341    return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
4342                       &Ops[0], Ops.size());
4343  }
4344
4345  // Otherwise, we're growing or shrinking the elements.  To avoid having to
4346  // handle annoying details of growing/shrinking FP values, we convert them to
4347  // int first.
4348  if (SrcEltVT.isFloatingPoint()) {
4349    // Convert the input float vector to a int vector where the elements are the
4350    // same sizes.
4351    assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
4352    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
4353    BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
4354    SrcEltVT = IntVT;
4355  }
4356
4357  // Now we know the input is an integer vector.  If the output is a FP type,
4358  // convert to integer first, then to FP of the right size.
4359  if (DstEltVT.isFloatingPoint()) {
4360    assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
4361    EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
4362    SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
4363
4364    // Next, convert to FP elements of the same size.
4365    return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
4366  }
4367
4368  // Okay, we know the src/dst types are both integers of differing types.
4369  // Handling growing first.
4370  assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
4371  if (SrcBitSize < DstBitSize) {
4372    unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
4373
4374    SmallVector<SDValue, 8> Ops;
4375    for (unsigned i = 0, e = BV->getNumOperands(); i != e;
4376         i += NumInputsPerOutput) {
4377      bool isLE = TLI.isLittleEndian();
4378      APInt NewBits = APInt(DstBitSize, 0);
4379      bool EltIsUndef = true;
4380      for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
4381        // Shift the previously computed bits over.
4382        NewBits <<= SrcBitSize;
4383        SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
4384        if (Op.getOpcode() == ISD::UNDEF) continue;
4385        EltIsUndef = false;
4386
4387        NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
4388                   zextOrTrunc(SrcBitSize).zext(DstBitSize);
4389      }
4390
4391      if (EltIsUndef)
4392        Ops.push_back(DAG.getUNDEF(DstEltVT));
4393      else
4394        Ops.push_back(DAG.getConstant(NewBits, DstEltVT));
4395    }
4396
4397    EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
4398    return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
4399                       &Ops[0], Ops.size());
4400  }
4401
4402  // Finally, this must be the case where we are shrinking elements: each input
4403  // turns into multiple outputs.
4404  bool isS2V = ISD::isScalarToVector(BV);
4405  unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
4406  EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
4407                            NumOutputsPerInput*BV->getNumOperands());
4408  SmallVector<SDValue, 8> Ops;
4409
4410  for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
4411    if (BV->getOperand(i).getOpcode() == ISD::UNDEF) {
4412      for (unsigned j = 0; j != NumOutputsPerInput; ++j)
4413        Ops.push_back(DAG.getUNDEF(DstEltVT));
4414      continue;
4415    }
4416
4417    APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
4418                        getAPIntValue()).zextOrTrunc(SrcBitSize);
4419
4420    for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
4421      APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
4422      Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
4423      if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
4424        // Simply turn this into a SCALAR_TO_VECTOR of the new type.
4425        return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
4426                           Ops[0]);
4427      OpVal = OpVal.lshr(DstBitSize);
4428    }
4429
4430    // For big endian targets, swap the order of the pieces of each element.
4431    if (TLI.isBigEndian())
4432      std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
4433  }
4434
4435  return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT,
4436                     &Ops[0], Ops.size());
4437}
4438
4439SDValue DAGCombiner::visitFADD(SDNode *N) {
4440  SDValue N0 = N->getOperand(0);
4441  SDValue N1 = N->getOperand(1);
4442  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4443  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
4444  EVT VT = N->getValueType(0);
4445
4446  // fold vector ops
4447  if (VT.isVector()) {
4448    SDValue FoldedVOp = SimplifyVBinOp(N);
4449    if (FoldedVOp.getNode()) return FoldedVOp;
4450  }
4451
4452  // fold (fadd c1, c2) -> (fadd c1, c2)
4453  if (N0CFP && N1CFP && VT != MVT::ppcf128)
4454    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1);
4455  // canonicalize constant to RHS
4456  if (N0CFP && !N1CFP)
4457    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0);
4458  // fold (fadd A, 0) -> A
4459  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
4460    return N0;
4461  // fold (fadd A, (fneg B)) -> (fsub A, B)
4462  if (isNegatibleForFree(N1, LegalOperations) == 2)
4463    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0,
4464                       GetNegatedExpression(N1, DAG, LegalOperations));
4465  // fold (fadd (fneg A), B) -> (fsub B, A)
4466  if (isNegatibleForFree(N0, LegalOperations) == 2)
4467    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1,
4468                       GetNegatedExpression(N0, DAG, LegalOperations));
4469
4470  // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
4471  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD &&
4472      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
4473    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0),
4474                       DAG.getNode(ISD::FADD, N->getDebugLoc(), VT,
4475                                   N0.getOperand(1), N1));
4476
4477  return SDValue();
4478}
4479
4480SDValue DAGCombiner::visitFSUB(SDNode *N) {
4481  SDValue N0 = N->getOperand(0);
4482  SDValue N1 = N->getOperand(1);
4483  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4484  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
4485  EVT VT = N->getValueType(0);
4486
4487  // fold vector ops
4488  if (VT.isVector()) {
4489    SDValue FoldedVOp = SimplifyVBinOp(N);
4490    if (FoldedVOp.getNode()) return FoldedVOp;
4491  }
4492
4493  // fold (fsub c1, c2) -> c1-c2
4494  if (N0CFP && N1CFP && VT != MVT::ppcf128)
4495    return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1);
4496  // fold (fsub A, 0) -> A
4497  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
4498    return N0;
4499  // fold (fsub 0, B) -> -B
4500  if (UnsafeFPMath && N0CFP && N0CFP->getValueAPF().isZero()) {
4501    if (isNegatibleForFree(N1, LegalOperations))
4502      return GetNegatedExpression(N1, DAG, LegalOperations);
4503    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
4504      return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N1);
4505  }
4506  // fold (fsub A, (fneg B)) -> (fadd A, B)
4507  if (isNegatibleForFree(N1, LegalOperations))
4508    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0,
4509                       GetNegatedExpression(N1, DAG, LegalOperations));
4510
4511  return SDValue();
4512}
4513
4514SDValue DAGCombiner::visitFMUL(SDNode *N) {
4515  SDValue N0 = N->getOperand(0);
4516  SDValue N1 = N->getOperand(1);
4517  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4518  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
4519  EVT VT = N->getValueType(0);
4520
4521  // fold vector ops
4522  if (VT.isVector()) {
4523    SDValue FoldedVOp = SimplifyVBinOp(N);
4524    if (FoldedVOp.getNode()) return FoldedVOp;
4525  }
4526
4527  // fold (fmul c1, c2) -> c1*c2
4528  if (N0CFP && N1CFP && VT != MVT::ppcf128)
4529    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1);
4530  // canonicalize constant to RHS
4531  if (N0CFP && !N1CFP)
4532    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0);
4533  // fold (fmul A, 0) -> 0
4534  if (UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero())
4535    return N1;
4536  // fold (fmul A, 0) -> 0, vector edition.
4537  if (UnsafeFPMath && ISD::isBuildVectorAllZeros(N1.getNode()))
4538    return N1;
4539  // fold (fmul X, 2.0) -> (fadd X, X)
4540  if (N1CFP && N1CFP->isExactlyValue(+2.0))
4541    return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0);
4542  // fold (fmul X, -1.0) -> (fneg X)
4543  if (N1CFP && N1CFP->isExactlyValue(-1.0))
4544    if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
4545      return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0);
4546
4547  // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
4548  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
4549    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
4550      // Both can be negated for free, check to see if at least one is cheaper
4551      // negated.
4552      if (LHSNeg == 2 || RHSNeg == 2)
4553        return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
4554                           GetNegatedExpression(N0, DAG, LegalOperations),
4555                           GetNegatedExpression(N1, DAG, LegalOperations));
4556    }
4557  }
4558
4559  // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
4560  if (UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL &&
4561      N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1)))
4562    return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0),
4563                       DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT,
4564                                   N0.getOperand(1), N1));
4565
4566  return SDValue();
4567}
4568
4569SDValue DAGCombiner::visitFDIV(SDNode *N) {
4570  SDValue N0 = N->getOperand(0);
4571  SDValue N1 = N->getOperand(1);
4572  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4573  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
4574  EVT VT = N->getValueType(0);
4575
4576  // fold vector ops
4577  if (VT.isVector()) {
4578    SDValue FoldedVOp = SimplifyVBinOp(N);
4579    if (FoldedVOp.getNode()) return FoldedVOp;
4580  }
4581
4582  // fold (fdiv c1, c2) -> c1/c2
4583  if (N0CFP && N1CFP && VT != MVT::ppcf128)
4584    return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1);
4585
4586
4587  // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
4588  if (char LHSNeg = isNegatibleForFree(N0, LegalOperations)) {
4589    if (char RHSNeg = isNegatibleForFree(N1, LegalOperations)) {
4590      // Both can be negated for free, check to see if at least one is cheaper
4591      // negated.
4592      if (LHSNeg == 2 || RHSNeg == 2)
4593        return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT,
4594                           GetNegatedExpression(N0, DAG, LegalOperations),
4595                           GetNegatedExpression(N1, DAG, LegalOperations));
4596    }
4597  }
4598
4599  return SDValue();
4600}
4601
4602SDValue DAGCombiner::visitFREM(SDNode *N) {
4603  SDValue N0 = N->getOperand(0);
4604  SDValue N1 = N->getOperand(1);
4605  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4606  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
4607  EVT VT = N->getValueType(0);
4608
4609  // fold (frem c1, c2) -> fmod(c1,c2)
4610  if (N0CFP && N1CFP && VT != MVT::ppcf128)
4611    return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1);
4612
4613  return SDValue();
4614}
4615
4616SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
4617  SDValue N0 = N->getOperand(0);
4618  SDValue N1 = N->getOperand(1);
4619  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4620  ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
4621  EVT VT = N->getValueType(0);
4622
4623  if (N0CFP && N1CFP && VT != MVT::ppcf128)  // Constant fold
4624    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1);
4625
4626  if (N1CFP) {
4627    const APFloat& V = N1CFP->getValueAPF();
4628    // copysign(x, c1) -> fabs(x)       iff ispos(c1)
4629    // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
4630    if (!V.isNegative()) {
4631      if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
4632        return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
4633    } else {
4634      if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
4635        return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT,
4636                           DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0));
4637    }
4638  }
4639
4640  // copysign(fabs(x), y) -> copysign(x, y)
4641  // copysign(fneg(x), y) -> copysign(x, y)
4642  // copysign(copysign(x,z), y) -> copysign(x, y)
4643  if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
4644      N0.getOpcode() == ISD::FCOPYSIGN)
4645    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
4646                       N0.getOperand(0), N1);
4647
4648  // copysign(x, abs(y)) -> abs(x)
4649  if (N1.getOpcode() == ISD::FABS)
4650    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
4651
4652  // copysign(x, copysign(y,z)) -> copysign(x, z)
4653  if (N1.getOpcode() == ISD::FCOPYSIGN)
4654    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
4655                       N0, N1.getOperand(1));
4656
4657  // copysign(x, fp_extend(y)) -> copysign(x, y)
4658  // copysign(x, fp_round(y)) -> copysign(x, y)
4659  if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND)
4660    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
4661                       N0, N1.getOperand(0));
4662
4663  return SDValue();
4664}
4665
4666SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
4667  SDValue N0 = N->getOperand(0);
4668  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
4669  EVT VT = N->getValueType(0);
4670  EVT OpVT = N0.getValueType();
4671
4672  // fold (sint_to_fp c1) -> c1fp
4673  if (N0C && OpVT != MVT::ppcf128)
4674    return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
4675
4676  // If the input is a legal type, and SINT_TO_FP is not legal on this target,
4677  // but UINT_TO_FP is legal on this target, try to convert.
4678  if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
4679      TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
4680    // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
4681    if (DAG.SignBitIsZero(N0))
4682      return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
4683  }
4684
4685  return SDValue();
4686}
4687
4688SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
4689  SDValue N0 = N->getOperand(0);
4690  ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
4691  EVT VT = N->getValueType(0);
4692  EVT OpVT = N0.getValueType();
4693
4694  // fold (uint_to_fp c1) -> c1fp
4695  if (N0C && OpVT != MVT::ppcf128)
4696    return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0);
4697
4698  // If the input is a legal type, and UINT_TO_FP is not legal on this target,
4699  // but SINT_TO_FP is legal on this target, try to convert.
4700  if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
4701      TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
4702    // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
4703    if (DAG.SignBitIsZero(N0))
4704      return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0);
4705  }
4706
4707  return SDValue();
4708}
4709
4710SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
4711  SDValue N0 = N->getOperand(0);
4712  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4713  EVT VT = N->getValueType(0);
4714
4715  // fold (fp_to_sint c1fp) -> c1
4716  if (N0CFP)
4717    return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0);
4718
4719  return SDValue();
4720}
4721
4722SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
4723  SDValue N0 = N->getOperand(0);
4724  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4725  EVT VT = N->getValueType(0);
4726
4727  // fold (fp_to_uint c1fp) -> c1
4728  if (N0CFP && VT != MVT::ppcf128)
4729    return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0);
4730
4731  return SDValue();
4732}
4733
4734SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
4735  SDValue N0 = N->getOperand(0);
4736  SDValue N1 = N->getOperand(1);
4737  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4738  EVT VT = N->getValueType(0);
4739
4740  // fold (fp_round c1fp) -> c1fp
4741  if (N0CFP && N0.getValueType() != MVT::ppcf128)
4742    return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1);
4743
4744  // fold (fp_round (fp_extend x)) -> x
4745  if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
4746    return N0.getOperand(0);
4747
4748  // fold (fp_round (fp_round x)) -> (fp_round x)
4749  if (N0.getOpcode() == ISD::FP_ROUND) {
4750    // This is a value preserving truncation if both round's are.
4751    bool IsTrunc = N->getConstantOperandVal(1) == 1 &&
4752                   N0.getNode()->getConstantOperandVal(1) == 1;
4753    return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0),
4754                       DAG.getIntPtrConstant(IsTrunc));
4755  }
4756
4757  // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
4758  if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
4759    SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT,
4760                              N0.getOperand(0), N1);
4761    AddToWorkList(Tmp.getNode());
4762    return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT,
4763                       Tmp, N0.getOperand(1));
4764  }
4765
4766  return SDValue();
4767}
4768
4769SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
4770  SDValue N0 = N->getOperand(0);
4771  EVT VT = N->getValueType(0);
4772  EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
4773  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4774
4775  // fold (fp_round_inreg c1fp) -> c1fp
4776  if (N0CFP && isTypeLegal(EVT)) {
4777    SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT);
4778    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round);
4779  }
4780
4781  return SDValue();
4782}
4783
4784SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
4785  SDValue N0 = N->getOperand(0);
4786  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4787  EVT VT = N->getValueType(0);
4788
4789  // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
4790  if (N->hasOneUse() &&
4791      N->use_begin()->getOpcode() == ISD::FP_ROUND)
4792    return SDValue();
4793
4794  // fold (fp_extend c1fp) -> c1fp
4795  if (N0CFP && VT != MVT::ppcf128)
4796    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0);
4797
4798  // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
4799  // value of X.
4800  if (N0.getOpcode() == ISD::FP_ROUND
4801      && N0.getNode()->getConstantOperandVal(1) == 1) {
4802    SDValue In = N0.getOperand(0);
4803    if (In.getValueType() == VT) return In;
4804    if (VT.bitsLT(In.getValueType()))
4805      return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT,
4806                         In, N0.getOperand(1));
4807    return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In);
4808  }
4809
4810  // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
4811  if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
4812      ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
4813       TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
4814    LoadSDNode *LN0 = cast<LoadSDNode>(N0);
4815    SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
4816                                     LN0->getChain(),
4817                                     LN0->getBasePtr(), LN0->getSrcValue(),
4818                                     LN0->getSrcValueOffset(),
4819                                     N0.getValueType(),
4820                                     LN0->isVolatile(), LN0->isNonTemporal(),
4821                                     LN0->getAlignment());
4822    CombineTo(N, ExtLoad);
4823    CombineTo(N0.getNode(),
4824              DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(),
4825                          N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)),
4826              ExtLoad.getValue(1));
4827    return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4828  }
4829
4830  return SDValue();
4831}
4832
4833SDValue DAGCombiner::visitFNEG(SDNode *N) {
4834  SDValue N0 = N->getOperand(0);
4835  EVT VT = N->getValueType(0);
4836
4837  if (isNegatibleForFree(N0, LegalOperations))
4838    return GetNegatedExpression(N0, DAG, LegalOperations);
4839
4840  // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
4841  // constant pool values.
4842  if (N0.getOpcode() == ISD::BIT_CONVERT &&
4843      !VT.isVector() &&
4844      N0.getNode()->hasOneUse() &&
4845      N0.getOperand(0).getValueType().isInteger()) {
4846    SDValue Int = N0.getOperand(0);
4847    EVT IntVT = Int.getValueType();
4848    if (IntVT.isInteger() && !IntVT.isVector()) {
4849      Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
4850              DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
4851      AddToWorkList(Int.getNode());
4852      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
4853                         VT, Int);
4854    }
4855  }
4856
4857  return SDValue();
4858}
4859
4860SDValue DAGCombiner::visitFABS(SDNode *N) {
4861  SDValue N0 = N->getOperand(0);
4862  ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
4863  EVT VT = N->getValueType(0);
4864
4865  // fold (fabs c1) -> fabs(c1)
4866  if (N0CFP && VT != MVT::ppcf128)
4867    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0);
4868  // fold (fabs (fabs x)) -> (fabs x)
4869  if (N0.getOpcode() == ISD::FABS)
4870    return N->getOperand(0);
4871  // fold (fabs (fneg x)) -> (fabs x)
4872  // fold (fabs (fcopysign x, y)) -> (fabs x)
4873  if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
4874    return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0));
4875
4876  // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
4877  // constant pool values.
4878  if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
4879      N0.getOperand(0).getValueType().isInteger() &&
4880      !N0.getOperand(0).getValueType().isVector()) {
4881    SDValue Int = N0.getOperand(0);
4882    EVT IntVT = Int.getValueType();
4883    if (IntVT.isInteger() && !IntVT.isVector()) {
4884      Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
4885             DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
4886      AddToWorkList(Int.getNode());
4887      return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
4888                         N->getValueType(0), Int);
4889    }
4890  }
4891
4892  return SDValue();
4893}
4894
4895SDValue DAGCombiner::visitBRCOND(SDNode *N) {
4896  SDValue Chain = N->getOperand(0);
4897  SDValue N1 = N->getOperand(1);
4898  SDValue N2 = N->getOperand(2);
4899
4900  // If N is a constant we could fold this into a fallthrough or unconditional
4901  // branch. However that doesn't happen very often in normal code, because
4902  // Instcombine/SimplifyCFG should have handled the available opportunities.
4903  // If we did this folding here, it would be necessary to update the
4904  // MachineBasicBlock CFG, which is awkward.
4905
4906  // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
4907  // on the target.
4908  if (N1.getOpcode() == ISD::SETCC &&
4909      TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) {
4910    return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
4911                       Chain, N1.getOperand(2),
4912                       N1.getOperand(0), N1.getOperand(1), N2);
4913  }
4914
4915  SDNode *Trunc = 0;
4916  if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
4917    // Look past truncate.
4918    Trunc = N1.getNode();
4919    N1 = N1.getOperand(0);
4920  }
4921
4922  if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
4923    // Match this pattern so that we can generate simpler code:
4924    //
4925    //   %a = ...
4926    //   %b = and i32 %a, 2
4927    //   %c = srl i32 %b, 1
4928    //   brcond i32 %c ...
4929    //
4930    // into
4931    //
4932    //   %a = ...
4933    //   %b = and i32 %a, 2
4934    //   %c = setcc eq %b, 0
4935    //   brcond %c ...
4936    //
4937    // This applies only when the AND constant value has one bit set and the
4938    // SRL constant is equal to the log2 of the AND constant. The back-end is
4939    // smart enough to convert the result into a TEST/JMP sequence.
4940    SDValue Op0 = N1.getOperand(0);
4941    SDValue Op1 = N1.getOperand(1);
4942
4943    if (Op0.getOpcode() == ISD::AND &&
4944        Op1.getOpcode() == ISD::Constant) {
4945      SDValue AndOp1 = Op0.getOperand(1);
4946
4947      if (AndOp1.getOpcode() == ISD::Constant) {
4948        const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
4949
4950        if (AndConst.isPowerOf2() &&
4951            cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
4952          SDValue SetCC =
4953            DAG.getSetCC(N->getDebugLoc(),
4954                         TLI.getSetCCResultType(Op0.getValueType()),
4955                         Op0, DAG.getConstant(0, Op0.getValueType()),
4956                         ISD::SETNE);
4957
4958          SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
4959                                          MVT::Other, Chain, SetCC, N2);
4960          // Don't add the new BRCond into the worklist or else SimplifySelectCC
4961          // will convert it back to (X & C1) >> C2.
4962          CombineTo(N, NewBRCond, false);
4963          // Truncate is dead.
4964          if (Trunc) {
4965            removeFromWorkList(Trunc);
4966            DAG.DeleteNode(Trunc);
4967          }
4968          // Replace the uses of SRL with SETCC
4969          WorkListRemover DeadNodes(*this);
4970          DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
4971          removeFromWorkList(N1.getNode());
4972          DAG.DeleteNode(N1.getNode());
4973          return SDValue(N, 0);   // Return N so it doesn't get rechecked!
4974        }
4975      }
4976    }
4977  }
4978
4979  // Transform br(xor(x, y)) -> br(x != y)
4980  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
4981  if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
4982    SDNode *TheXor = N1.getNode();
4983    SDValue Op0 = TheXor->getOperand(0);
4984    SDValue Op1 = TheXor->getOperand(1);
4985    if (Op0.getOpcode() == Op1.getOpcode()) {
4986      // Avoid missing important xor optimizations.
4987      SDValue Tmp = visitXOR(TheXor);
4988      if (Tmp.getNode() && Tmp.getNode() != TheXor) {
4989        DEBUG(dbgs() << "\nReplacing.8 ";
4990              TheXor->dump(&DAG);
4991              dbgs() << "\nWith: ";
4992              Tmp.getNode()->dump(&DAG);
4993              dbgs() << '\n');
4994        WorkListRemover DeadNodes(*this);
4995        DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
4996        removeFromWorkList(TheXor);
4997        DAG.DeleteNode(TheXor);
4998        return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
4999                           MVT::Other, Chain, Tmp, N2);
5000      }
5001    }
5002
5003    if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
5004      bool Equal = false;
5005      if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
5006        if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
5007            Op0.getOpcode() == ISD::XOR) {
5008          TheXor = Op0.getNode();
5009          Equal = true;
5010        }
5011
5012      SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1;
5013
5014      EVT SetCCVT = NodeToReplace.getValueType();
5015      if (LegalTypes)
5016        SetCCVT = TLI.getSetCCResultType(SetCCVT);
5017      SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
5018                                   SetCCVT,
5019                                   Op0, Op1,
5020                                   Equal ? ISD::SETEQ : ISD::SETNE);
5021      // Replace the uses of XOR with SETCC
5022      WorkListRemover DeadNodes(*this);
5023      DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes);
5024      removeFromWorkList(NodeToReplace.getNode());
5025      DAG.DeleteNode(NodeToReplace.getNode());
5026      return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
5027                         MVT::Other, Chain, SetCC, N2);
5028    }
5029  }
5030
5031  return SDValue();
5032}
5033
5034// Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
5035//
5036SDValue DAGCombiner::visitBR_CC(SDNode *N) {
5037  CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
5038  SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
5039
5040  // If N is a constant we could fold this into a fallthrough or unconditional
5041  // branch. However that doesn't happen very often in normal code, because
5042  // Instcombine/SimplifyCFG should have handled the available opportunities.
5043  // If we did this folding here, it would be necessary to update the
5044  // MachineBasicBlock CFG, which is awkward.
5045
5046  // Use SimplifySetCC to simplify SETCC's.
5047  SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
5048                               CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
5049                               false);
5050  if (Simp.getNode()) AddToWorkList(Simp.getNode());
5051
5052  // fold to a simpler setcc
5053  if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
5054    return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
5055                       N->getOperand(0), Simp.getOperand(2),
5056                       Simp.getOperand(0), Simp.getOperand(1),
5057                       N->getOperand(4));
5058
5059  return SDValue();
5060}
5061
5062/// CombineToPreIndexedLoadStore - Try turning a load / store into a
5063/// pre-indexed load / store when the base pointer is an add or subtract
5064/// and it has other uses besides the load / store. After the
5065/// transformation, the new indexed load / store has effectively folded
5066/// the add / subtract in and all of its other uses are redirected to the
5067/// new load / store.
5068bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
5069  if (!LegalOperations)
5070    return false;
5071
5072  bool isLoad = true;
5073  SDValue Ptr;
5074  EVT VT;
5075  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
5076    if (LD->isIndexed())
5077      return false;
5078    VT = LD->getMemoryVT();
5079    if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
5080        !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
5081      return false;
5082    Ptr = LD->getBasePtr();
5083  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
5084    if (ST->isIndexed())
5085      return false;
5086    VT = ST->getMemoryVT();
5087    if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
5088        !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
5089      return false;
5090    Ptr = ST->getBasePtr();
5091    isLoad = false;
5092  } else {
5093    return false;
5094  }
5095
5096  // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
5097  // out.  There is no reason to make this a preinc/predec.
5098  if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
5099      Ptr.getNode()->hasOneUse())
5100    return false;
5101
5102  // Ask the target to do addressing mode selection.
5103  SDValue BasePtr;
5104  SDValue Offset;
5105  ISD::MemIndexedMode AM = ISD::UNINDEXED;
5106  if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
5107    return false;
5108  // Don't create a indexed load / store with zero offset.
5109  if (isa<ConstantSDNode>(Offset) &&
5110      cast<ConstantSDNode>(Offset)->isNullValue())
5111    return false;
5112
5113  // Try turning it into a pre-indexed load / store except when:
5114  // 1) The new base ptr is a frame index.
5115  // 2) If N is a store and the new base ptr is either the same as or is a
5116  //    predecessor of the value being stored.
5117  // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
5118  //    that would create a cycle.
5119  // 4) All uses are load / store ops that use it as old base ptr.
5120
5121  // Check #1.  Preinc'ing a frame index would require copying the stack pointer
5122  // (plus the implicit offset) to a register to preinc anyway.
5123  if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
5124    return false;
5125
5126  // Check #2.
5127  if (!isLoad) {
5128    SDValue Val = cast<StoreSDNode>(N)->getValue();
5129    if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
5130      return false;
5131  }
5132
5133  // Now check for #3 and #4.
5134  bool RealUse = false;
5135  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
5136         E = Ptr.getNode()->use_end(); I != E; ++I) {
5137    SDNode *Use = *I;
5138    if (Use == N)
5139      continue;
5140    if (Use->isPredecessorOf(N))
5141      return false;
5142
5143    if (!((Use->getOpcode() == ISD::LOAD &&
5144           cast<LoadSDNode>(Use)->getBasePtr() == Ptr) ||
5145          (Use->getOpcode() == ISD::STORE &&
5146           cast<StoreSDNode>(Use)->getBasePtr() == Ptr)))
5147      RealUse = true;
5148  }
5149
5150  if (!RealUse)
5151    return false;
5152
5153  SDValue Result;
5154  if (isLoad)
5155    Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
5156                                BasePtr, Offset, AM);
5157  else
5158    Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
5159                                 BasePtr, Offset, AM);
5160  ++PreIndexedNodes;
5161  ++NodesCombined;
5162  DEBUG(dbgs() << "\nReplacing.4 ";
5163        N->dump(&DAG);
5164        dbgs() << "\nWith: ";
5165        Result.getNode()->dump(&DAG);
5166        dbgs() << '\n');
5167  WorkListRemover DeadNodes(*this);
5168  if (isLoad) {
5169    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
5170                                  &DeadNodes);
5171    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
5172                                  &DeadNodes);
5173  } else {
5174    DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
5175                                  &DeadNodes);
5176  }
5177
5178  // Finally, since the node is now dead, remove it from the graph.
5179  DAG.DeleteNode(N);
5180
5181  // Replace the uses of Ptr with uses of the updated base value.
5182  DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0),
5183                                &DeadNodes);
5184  removeFromWorkList(Ptr.getNode());
5185  DAG.DeleteNode(Ptr.getNode());
5186
5187  return true;
5188}
5189
5190/// CombineToPostIndexedLoadStore - Try to combine a load / store with a
5191/// add / sub of the base pointer node into a post-indexed load / store.
5192/// The transformation folded the add / subtract into the new indexed
5193/// load / store effectively and all of its uses are redirected to the
5194/// new load / store.
5195bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
5196  if (!LegalOperations)
5197    return false;
5198
5199  bool isLoad = true;
5200  SDValue Ptr;
5201  EVT VT;
5202  if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
5203    if (LD->isIndexed())
5204      return false;
5205    VT = LD->getMemoryVT();
5206    if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
5207        !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
5208      return false;
5209    Ptr = LD->getBasePtr();
5210  } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
5211    if (ST->isIndexed())
5212      return false;
5213    VT = ST->getMemoryVT();
5214    if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
5215        !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
5216      return false;
5217    Ptr = ST->getBasePtr();
5218    isLoad = false;
5219  } else {
5220    return false;
5221  }
5222
5223  if (Ptr.getNode()->hasOneUse())
5224    return false;
5225
5226  for (SDNode::use_iterator I = Ptr.getNode()->use_begin(),
5227         E = Ptr.getNode()->use_end(); I != E; ++I) {
5228    SDNode *Op = *I;
5229    if (Op == N ||
5230        (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
5231      continue;
5232
5233    SDValue BasePtr;
5234    SDValue Offset;
5235    ISD::MemIndexedMode AM = ISD::UNINDEXED;
5236    if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
5237      if (Ptr == Offset && Op->getOpcode() == ISD::ADD)
5238        std::swap(BasePtr, Offset);
5239      if (Ptr != BasePtr)
5240        continue;
5241      // Don't create a indexed load / store with zero offset.
5242      if (isa<ConstantSDNode>(Offset) &&
5243          cast<ConstantSDNode>(Offset)->isNullValue())
5244        continue;
5245
5246      // Try turning it into a post-indexed load / store except when
5247      // 1) All uses are load / store ops that use it as base ptr.
5248      // 2) Op must be independent of N, i.e. Op is neither a predecessor
5249      //    nor a successor of N. Otherwise, if Op is folded that would
5250      //    create a cycle.
5251
5252      if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
5253        continue;
5254
5255      // Check for #1.
5256      bool TryNext = false;
5257      for (SDNode::use_iterator II = BasePtr.getNode()->use_begin(),
5258             EE = BasePtr.getNode()->use_end(); II != EE; ++II) {
5259        SDNode *Use = *II;
5260        if (Use == Ptr.getNode())
5261          continue;
5262
5263        // If all the uses are load / store addresses, then don't do the
5264        // transformation.
5265        if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
5266          bool RealUse = false;
5267          for (SDNode::use_iterator III = Use->use_begin(),
5268                 EEE = Use->use_end(); III != EEE; ++III) {
5269            SDNode *UseUse = *III;
5270            if (!((UseUse->getOpcode() == ISD::LOAD &&
5271                   cast<LoadSDNode>(UseUse)->getBasePtr().getNode() == Use) ||
5272                  (UseUse->getOpcode() == ISD::STORE &&
5273                   cast<StoreSDNode>(UseUse)->getBasePtr().getNode() == Use)))
5274              RealUse = true;
5275          }
5276
5277          if (!RealUse) {
5278            TryNext = true;
5279            break;
5280          }
5281        }
5282      }
5283
5284      if (TryNext)
5285        continue;
5286
5287      // Check for #2
5288      if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
5289        SDValue Result = isLoad
5290          ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(),
5291                               BasePtr, Offset, AM)
5292          : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(),
5293                                BasePtr, Offset, AM);
5294        ++PostIndexedNodes;
5295        ++NodesCombined;
5296        DEBUG(dbgs() << "\nReplacing.5 ";
5297              N->dump(&DAG);
5298              dbgs() << "\nWith: ";
5299              Result.getNode()->dump(&DAG);
5300              dbgs() << '\n');
5301        WorkListRemover DeadNodes(*this);
5302        if (isLoad) {
5303          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0),
5304                                        &DeadNodes);
5305          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2),
5306                                        &DeadNodes);
5307        } else {
5308          DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1),
5309                                        &DeadNodes);
5310        }
5311
5312        // Finally, since the node is now dead, remove it from the graph.
5313        DAG.DeleteNode(N);
5314
5315        // Replace the uses of Use with uses of the updated base value.
5316        DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
5317                                      Result.getValue(isLoad ? 1 : 0),
5318                                      &DeadNodes);
5319        removeFromWorkList(Op);
5320        DAG.DeleteNode(Op);
5321        return true;
5322      }
5323    }
5324  }
5325
5326  return false;
5327}
5328
5329SDValue DAGCombiner::visitLOAD(SDNode *N) {
5330  LoadSDNode *LD  = cast<LoadSDNode>(N);
5331  SDValue Chain = LD->getChain();
5332  SDValue Ptr   = LD->getBasePtr();
5333
5334  // If load is not volatile and there are no uses of the loaded value (and
5335  // the updated indexed value in case of indexed loads), change uses of the
5336  // chain value into uses of the chain input (i.e. delete the dead load).
5337  if (!LD->isVolatile()) {
5338    if (N->getValueType(1) == MVT::Other) {
5339      // Unindexed loads.
5340      if (N->hasNUsesOfValue(0, 0)) {
5341        // It's not safe to use the two value CombineTo variant here. e.g.
5342        // v1, chain2 = load chain1, loc
5343        // v2, chain3 = load chain2, loc
5344        // v3         = add v2, c
5345        // Now we replace use of chain2 with chain1.  This makes the second load
5346        // isomorphic to the one we are deleting, and thus makes this load live.
5347        DEBUG(dbgs() << "\nReplacing.6 ";
5348              N->dump(&DAG);
5349              dbgs() << "\nWith chain: ";
5350              Chain.getNode()->dump(&DAG);
5351              dbgs() << "\n");
5352        WorkListRemover DeadNodes(*this);
5353        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain, &DeadNodes);
5354
5355        if (N->use_empty()) {
5356          removeFromWorkList(N);
5357          DAG.DeleteNode(N);
5358        }
5359
5360        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5361      }
5362    } else {
5363      // Indexed loads.
5364      assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
5365      if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
5366        SDValue Undef = DAG.getUNDEF(N->getValueType(0));
5367        DEBUG(dbgs() << "\nReplacing.7 ";
5368              N->dump(&DAG);
5369              dbgs() << "\nWith: ";
5370              Undef.getNode()->dump(&DAG);
5371              dbgs() << " and 2 other values\n");
5372        WorkListRemover DeadNodes(*this);
5373        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef, &DeadNodes);
5374        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1),
5375                                      DAG.getUNDEF(N->getValueType(1)),
5376                                      &DeadNodes);
5377        DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain, &DeadNodes);
5378        removeFromWorkList(N);
5379        DAG.DeleteNode(N);
5380        return SDValue(N, 0);   // Return N so it doesn't get rechecked!
5381      }
5382    }
5383  }
5384
5385  // If this load is directly stored, replace the load value with the stored
5386  // value.
5387  // TODO: Handle store large -> read small portion.
5388  // TODO: Handle TRUNCSTORE/LOADEXT
5389  if (LD->getExtensionType() == ISD::NON_EXTLOAD &&
5390      !LD->isVolatile()) {
5391    if (ISD::isNON_TRUNCStore(Chain.getNode())) {
5392      StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
5393      if (PrevST->getBasePtr() == Ptr &&
5394          PrevST->getValue().getValueType() == N->getValueType(0))
5395      return CombineTo(N, Chain.getOperand(1), Chain);
5396    }
5397  }
5398
5399  // Try to infer better alignment information than the load already has.
5400  if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
5401    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
5402      if (Align > LD->getAlignment())
5403        return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
5404                              LD->getValueType(0),
5405                              Chain, Ptr, LD->getSrcValue(),
5406                              LD->getSrcValueOffset(), LD->getMemoryVT(),
5407                              LD->isVolatile(), LD->isNonTemporal(), Align);
5408    }
5409  }
5410
5411  if (CombinerAA) {
5412    // Walk up chain skipping non-aliasing memory nodes.
5413    SDValue BetterChain = FindBetterChain(N, Chain);
5414
5415    // If there is a better chain.
5416    if (Chain != BetterChain) {
5417      SDValue ReplLoad;
5418
5419      // Replace the chain to void dependency.
5420      if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
5421        ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
5422                               BetterChain, Ptr,
5423                               LD->getSrcValue(), LD->getSrcValueOffset(),
5424                               LD->isVolatile(), LD->isNonTemporal(),
5425                               LD->getAlignment());
5426      } else {
5427        ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
5428                                  LD->getValueType(0),
5429                                  BetterChain, Ptr, LD->getSrcValue(),
5430                                  LD->getSrcValueOffset(),
5431                                  LD->getMemoryVT(),
5432                                  LD->isVolatile(),
5433                                  LD->isNonTemporal(),
5434                                  LD->getAlignment());
5435      }
5436
5437      // Create token factor to keep old chain connected.
5438      SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
5439                                  MVT::Other, Chain, ReplLoad.getValue(1));
5440
5441      // Make sure the new and old chains are cleaned up.
5442      AddToWorkList(Token.getNode());
5443
5444      // Replace uses with load result and token factor. Don't add users
5445      // to work list.
5446      return CombineTo(N, ReplLoad.getValue(0), Token, false);
5447    }
5448  }
5449
5450  // Try transforming N to an indexed load.
5451  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
5452    return SDValue(N, 0);
5453
5454  if (PromoteLoad(SDValue(N, 0)))
5455    return SDValue(N, 0);
5456  return SDValue();
5457}
5458
5459/// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the
5460/// load is having specific bytes cleared out.  If so, return the byte size
5461/// being masked out and the shift amount.
5462static std::pair<unsigned, unsigned>
5463CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
5464  std::pair<unsigned, unsigned> Result(0, 0);
5465
5466  // Check for the structure we're looking for.
5467  if (V->getOpcode() != ISD::AND ||
5468      !isa<ConstantSDNode>(V->getOperand(1)) ||
5469      !ISD::isNormalLoad(V->getOperand(0).getNode()))
5470    return Result;
5471
5472  // Check the chain and pointer.
5473  LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
5474  if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
5475
5476  // The store should be chained directly to the load or be an operand of a
5477  // tokenfactor.
5478  if (LD == Chain.getNode())
5479    ; // ok.
5480  else if (Chain->getOpcode() != ISD::TokenFactor)
5481    return Result; // Fail.
5482  else {
5483    bool isOk = false;
5484    for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i)
5485      if (Chain->getOperand(i).getNode() == LD) {
5486        isOk = true;
5487        break;
5488      }
5489    if (!isOk) return Result;
5490  }
5491
5492  // This only handles simple types.
5493  if (V.getValueType() != MVT::i16 &&
5494      V.getValueType() != MVT::i32 &&
5495      V.getValueType() != MVT::i64)
5496    return Result;
5497
5498  // Check the constant mask.  Invert it so that the bits being masked out are
5499  // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
5500  // follow the sign bit for uniformity.
5501  uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
5502  unsigned NotMaskLZ = CountLeadingZeros_64(NotMask);
5503  if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
5504  unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
5505  if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
5506  if (NotMaskLZ == 64) return Result;  // All zero mask.
5507
5508  // See if we have a continuous run of bits.  If so, we have 0*1+0*
5509  if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
5510    return Result;
5511
5512  // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
5513  if (V.getValueType() != MVT::i64 && NotMaskLZ)
5514    NotMaskLZ -= 64-V.getValueSizeInBits();
5515
5516  unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
5517  switch (MaskedBytes) {
5518  case 1:
5519  case 2:
5520  case 4: break;
5521  default: return Result; // All one mask, or 5-byte mask.
5522  }
5523
5524  // Verify that the first bit starts at a multiple of mask so that the access
5525  // is aligned the same as the access width.
5526  if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
5527
5528  Result.first = MaskedBytes;
5529  Result.second = NotMaskTZ/8;
5530  return Result;
5531}
5532
5533
5534/// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that
5535/// provides a value as specified by MaskInfo.  If so, replace the specified
5536/// store with a narrower store of truncated IVal.
5537static SDNode *
5538ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
5539                                SDValue IVal, StoreSDNode *St,
5540                                DAGCombiner *DC) {
5541  unsigned NumBytes = MaskInfo.first;
5542  unsigned ByteShift = MaskInfo.second;
5543  SelectionDAG &DAG = DC->getDAG();
5544
5545  // Check to see if IVal is all zeros in the part being masked in by the 'or'
5546  // that uses this.  If not, this is not a replacement.
5547  APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
5548                                  ByteShift*8, (ByteShift+NumBytes)*8);
5549  if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
5550
5551  // Check that it is legal on the target to do this.  It is legal if the new
5552  // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
5553  // legalization.
5554  MVT VT = MVT::getIntegerVT(NumBytes*8);
5555  if (!DC->isTypeLegal(VT))
5556    return 0;
5557
5558  // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
5559  // shifted by ByteShift and truncated down to NumBytes.
5560  if (ByteShift)
5561    IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal,
5562                       DAG.getConstant(ByteShift*8, DC->getShiftAmountTy()));
5563
5564  // Figure out the offset for the store and the alignment of the access.
5565  unsigned StOffset;
5566  unsigned NewAlign = St->getAlignment();
5567
5568  if (DAG.getTargetLoweringInfo().isLittleEndian())
5569    StOffset = ByteShift;
5570  else
5571    StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
5572
5573  SDValue Ptr = St->getBasePtr();
5574  if (StOffset) {
5575    Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
5576                      Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
5577    NewAlign = MinAlign(NewAlign, StOffset);
5578  }
5579
5580  // Truncate down to the new size.
5581  IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
5582
5583  ++OpsNarrowed;
5584  return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
5585                      St->getSrcValue(), St->getSrcValueOffset()+StOffset,
5586                      false, false, NewAlign).getNode();
5587}
5588
5589
5590/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
5591/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
5592/// of the loaded bits, try narrowing the load and store if it would end up
5593/// being a win for performance or code size.
5594SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
5595  StoreSDNode *ST  = cast<StoreSDNode>(N);
5596  if (ST->isVolatile())
5597    return SDValue();
5598
5599  SDValue Chain = ST->getChain();
5600  SDValue Value = ST->getValue();
5601  SDValue Ptr   = ST->getBasePtr();
5602  EVT VT = Value.getValueType();
5603
5604  if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
5605    return SDValue();
5606
5607  unsigned Opc = Value.getOpcode();
5608
5609  // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
5610  // is a byte mask indicating a consecutive number of bytes, check to see if
5611  // Y is known to provide just those bytes.  If so, we try to replace the
5612  // load + replace + store sequence with a single (narrower) store, which makes
5613  // the load dead.
5614  if (Opc == ISD::OR) {
5615    std::pair<unsigned, unsigned> MaskedLoad;
5616    MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
5617    if (MaskedLoad.first)
5618      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
5619                                                  Value.getOperand(1), ST,this))
5620        return SDValue(NewST, 0);
5621
5622    // Or is commutative, so try swapping X and Y.
5623    MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
5624    if (MaskedLoad.first)
5625      if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
5626                                                  Value.getOperand(0), ST,this))
5627        return SDValue(NewST, 0);
5628  }
5629
5630  if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
5631      Value.getOperand(1).getOpcode() != ISD::Constant)
5632    return SDValue();
5633
5634  SDValue N0 = Value.getOperand(0);
5635  if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
5636    LoadSDNode *LD = cast<LoadSDNode>(N0);
5637    if (LD->getBasePtr() != Ptr)
5638      return SDValue();
5639
5640    // Find the type to narrow it the load / op / store to.
5641    SDValue N1 = Value.getOperand(1);
5642    unsigned BitWidth = N1.getValueSizeInBits();
5643    APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
5644    if (Opc == ISD::AND)
5645      Imm ^= APInt::getAllOnesValue(BitWidth);
5646    if (Imm == 0 || Imm.isAllOnesValue())
5647      return SDValue();
5648    unsigned ShAmt = Imm.countTrailingZeros();
5649    unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
5650    unsigned NewBW = NextPowerOf2(MSB - ShAmt);
5651    EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
5652    while (NewBW < BitWidth &&
5653           !(TLI.isOperationLegalOrCustom(Opc, NewVT) &&
5654             TLI.isNarrowingProfitable(VT, NewVT))) {
5655      NewBW = NextPowerOf2(NewBW);
5656      NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
5657    }
5658    if (NewBW >= BitWidth)
5659      return SDValue();
5660
5661    // If the lsb changed does not start at the type bitwidth boundary,
5662    // start at the previous one.
5663    if (ShAmt % NewBW)
5664      ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
5665    APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
5666    if ((Imm & Mask) == Imm) {
5667      APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
5668      if (Opc == ISD::AND)
5669        NewImm ^= APInt::getAllOnesValue(NewBW);
5670      uint64_t PtrOff = ShAmt / 8;
5671      // For big endian targets, we need to adjust the offset to the pointer to
5672      // load the correct bytes.
5673      if (TLI.isBigEndian())
5674        PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
5675
5676      unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
5677      const Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
5678      if (NewAlign < TLI.getTargetData()->getABITypeAlignment(NewVTTy))
5679        return SDValue();
5680
5681      SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
5682                                   Ptr.getValueType(), Ptr,
5683                                   DAG.getConstant(PtrOff, Ptr.getValueType()));
5684      SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
5685                                  LD->getChain(), NewPtr,
5686                                  LD->getSrcValue(), LD->getSrcValueOffset(),
5687                                  LD->isVolatile(), LD->isNonTemporal(),
5688                                  NewAlign);
5689      SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
5690                                   DAG.getConstant(NewImm, NewVT));
5691      SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
5692                                   NewVal, NewPtr,
5693                                   ST->getSrcValue(), ST->getSrcValueOffset(),
5694                                   false, false, NewAlign);
5695
5696      AddToWorkList(NewPtr.getNode());
5697      AddToWorkList(NewLD.getNode());
5698      AddToWorkList(NewVal.getNode());
5699      WorkListRemover DeadNodes(*this);
5700      DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
5701                                    &DeadNodes);
5702      ++OpsNarrowed;
5703      return NewST;
5704    }
5705  }
5706
5707  return SDValue();
5708}
5709
5710SDValue DAGCombiner::visitSTORE(SDNode *N) {
5711  StoreSDNode *ST  = cast<StoreSDNode>(N);
5712  SDValue Chain = ST->getChain();
5713  SDValue Value = ST->getValue();
5714  SDValue Ptr   = ST->getBasePtr();
5715
5716  // If this is a store of a bit convert, store the input value if the
5717  // resultant store does not need a higher alignment than the original.
5718  if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
5719      ST->isUnindexed()) {
5720    unsigned OrigAlign = ST->getAlignment();
5721    EVT SVT = Value.getOperand(0).getValueType();
5722    unsigned Align = TLI.getTargetData()->
5723      getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext()));
5724    if (Align <= OrigAlign &&
5725        ((!LegalOperations && !ST->isVolatile()) ||
5726         TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
5727      return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
5728                          Ptr, ST->getSrcValue(),
5729                          ST->getSrcValueOffset(), ST->isVolatile(),
5730                          ST->isNonTemporal(), OrigAlign);
5731  }
5732
5733  // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
5734  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) {
5735    // NOTE: If the original store is volatile, this transform must not increase
5736    // the number of stores.  For example, on x86-32 an f64 can be stored in one
5737    // processor operation but an i64 (which is not legal) requires two.  So the
5738    // transform should not be done in this case.
5739    if (Value.getOpcode() != ISD::TargetConstantFP) {
5740      SDValue Tmp;
5741      switch (CFP->getValueType(0).getSimpleVT().SimpleTy) {
5742      default: llvm_unreachable("Unknown FP type");
5743      case MVT::f80:    // We don't do this for these yet.
5744      case MVT::f128:
5745      case MVT::ppcf128:
5746        break;
5747      case MVT::f32:
5748        if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
5749            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
5750          Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
5751                              bitcastToAPInt().getZExtValue(), MVT::i32);
5752          return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
5753                              Ptr, ST->getSrcValue(),
5754                              ST->getSrcValueOffset(), ST->isVolatile(),
5755                              ST->isNonTemporal(), ST->getAlignment());
5756        }
5757        break;
5758      case MVT::f64:
5759        if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
5760             !ST->isVolatile()) ||
5761            TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
5762          Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
5763                                getZExtValue(), MVT::i64);
5764          return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
5765                              Ptr, ST->getSrcValue(),
5766                              ST->getSrcValueOffset(), ST->isVolatile(),
5767                              ST->isNonTemporal(), ST->getAlignment());
5768        } else if (!ST->isVolatile() &&
5769                   TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
5770          // Many FP stores are not made apparent until after legalize, e.g. for
5771          // argument passing.  Since this is so common, custom legalize the
5772          // 64-bit integer store into two 32-bit stores.
5773          uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
5774          SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32);
5775          SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
5776          if (TLI.isBigEndian()) std::swap(Lo, Hi);
5777
5778          int SVOffset = ST->getSrcValueOffset();
5779          unsigned Alignment = ST->getAlignment();
5780          bool isVolatile = ST->isVolatile();
5781          bool isNonTemporal = ST->isNonTemporal();
5782
5783          SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
5784                                     Ptr, ST->getSrcValue(),
5785                                     ST->getSrcValueOffset(),
5786                                     isVolatile, isNonTemporal,
5787                                     ST->getAlignment());
5788          Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
5789                            DAG.getConstant(4, Ptr.getValueType()));
5790          SVOffset += 4;
5791          Alignment = MinAlign(Alignment, 4U);
5792          SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
5793                                     Ptr, ST->getSrcValue(),
5794                                     SVOffset, isVolatile, isNonTemporal,
5795                                     Alignment);
5796          return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
5797                             St0, St1);
5798        }
5799
5800        break;
5801      }
5802    }
5803  }
5804
5805  // Try to infer better alignment information than the store already has.
5806  if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
5807    if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
5808      if (Align > ST->getAlignment())
5809        return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
5810                                 Ptr, ST->getSrcValue(),
5811                                 ST->getSrcValueOffset(), ST->getMemoryVT(),
5812                                 ST->isVolatile(), ST->isNonTemporal(), Align);
5813    }
5814  }
5815
5816  if (CombinerAA) {
5817    // Walk up chain skipping non-aliasing memory nodes.
5818    SDValue BetterChain = FindBetterChain(N, Chain);
5819
5820    // If there is a better chain.
5821    if (Chain != BetterChain) {
5822      SDValue ReplStore;
5823
5824      // Replace the chain to avoid dependency.
5825      if (ST->isTruncatingStore()) {
5826        ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
5827                                      ST->getSrcValue(),ST->getSrcValueOffset(),
5828                                      ST->getMemoryVT(), ST->isVolatile(),
5829                                      ST->isNonTemporal(), ST->getAlignment());
5830      } else {
5831        ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
5832                                 ST->getSrcValue(), ST->getSrcValueOffset(),
5833                                 ST->isVolatile(), ST->isNonTemporal(),
5834                                 ST->getAlignment());
5835      }
5836
5837      // Create token to keep both nodes around.
5838      SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
5839                                  MVT::Other, Chain, ReplStore);
5840
5841      // Make sure the new and old chains are cleaned up.
5842      AddToWorkList(Token.getNode());
5843
5844      // Don't add users to work list.
5845      return CombineTo(N, Token, false);
5846    }
5847  }
5848
5849  // Try transforming N to an indexed store.
5850  if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
5851    return SDValue(N, 0);
5852
5853  // FIXME: is there such a thing as a truncating indexed store?
5854  if (ST->isTruncatingStore() && ST->isUnindexed() &&
5855      Value.getValueType().isInteger()) {
5856    // See if we can simplify the input to this truncstore with knowledge that
5857    // only the low bits are being used.  For example:
5858    // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
5859    SDValue Shorter =
5860      GetDemandedBits(Value,
5861                      APInt::getLowBitsSet(Value.getValueSizeInBits(),
5862                                           ST->getMemoryVT().getSizeInBits()));
5863    AddToWorkList(Value.getNode());
5864    if (Shorter.getNode())
5865      return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
5866                               Ptr, ST->getSrcValue(),
5867                               ST->getSrcValueOffset(), ST->getMemoryVT(),
5868                               ST->isVolatile(), ST->isNonTemporal(),
5869                               ST->getAlignment());
5870
5871    // Otherwise, see if we can simplify the operation with
5872    // SimplifyDemandedBits, which only works if the value has a single use.
5873    if (SimplifyDemandedBits(Value,
5874                             APInt::getLowBitsSet(
5875                               Value.getValueType().getScalarType().getSizeInBits(),
5876                               ST->getMemoryVT().getScalarType().getSizeInBits())))
5877      return SDValue(N, 0);
5878  }
5879
5880  // If this is a load followed by a store to the same location, then the store
5881  // is dead/noop.
5882  if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
5883    if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
5884        ST->isUnindexed() && !ST->isVolatile() &&
5885        // There can't be any side effects between the load and store, such as
5886        // a call or store.
5887        Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
5888      // The store is dead, remove it.
5889      return Chain;
5890    }
5891  }
5892
5893  // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
5894  // truncating store.  We can do this even if this is already a truncstore.
5895  if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
5896      && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
5897      TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
5898                            ST->getMemoryVT())) {
5899    return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
5900                             Ptr, ST->getSrcValue(),
5901                             ST->getSrcValueOffset(), ST->getMemoryVT(),
5902                             ST->isVolatile(), ST->isNonTemporal(),
5903                             ST->getAlignment());
5904  }
5905
5906  return ReduceLoadOpStoreWidth(N);
5907}
5908
5909SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
5910  SDValue InVec = N->getOperand(0);
5911  SDValue InVal = N->getOperand(1);
5912  SDValue EltNo = N->getOperand(2);
5913
5914  // If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
5915  // vector with the inserted element.
5916  if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
5917    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
5918    SmallVector<SDValue, 8> Ops(InVec.getNode()->op_begin(),
5919                                InVec.getNode()->op_end());
5920    if (Elt < Ops.size())
5921      Ops[Elt] = InVal;
5922    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
5923                       InVec.getValueType(), &Ops[0], Ops.size());
5924  }
5925  // If the invec is an UNDEF and if EltNo is a constant, create a new
5926  // BUILD_VECTOR with undef elements and the inserted element.
5927  if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
5928      isa<ConstantSDNode>(EltNo)) {
5929    EVT VT = InVec.getValueType();
5930    EVT EltVT = VT.getVectorElementType();
5931    unsigned NElts = VT.getVectorNumElements();
5932    SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
5933
5934    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
5935    if (Elt < Ops.size())
5936      Ops[Elt] = InVal;
5937    return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
5938                       InVec.getValueType(), &Ops[0], Ops.size());
5939  }
5940  return SDValue();
5941}
5942
5943SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
5944  // (vextract (scalar_to_vector val, 0) -> val
5945  SDValue InVec = N->getOperand(0);
5946
5947 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
5948   // Check if the result type doesn't match the inserted element type. A
5949   // SCALAR_TO_VECTOR may truncate the inserted element and the
5950   // EXTRACT_VECTOR_ELT may widen the extracted vector.
5951   EVT EltVT = InVec.getValueType().getVectorElementType();
5952   SDValue InOp = InVec.getOperand(0);
5953   EVT NVT = N->getValueType(0);
5954   if (InOp.getValueType() != NVT) {
5955     assert(InOp.getValueType().isInteger() && NVT.isInteger());
5956     return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT);
5957   }
5958   return InOp;
5959 }
5960
5961  // Perform only after legalization to ensure build_vector / vector_shuffle
5962  // optimizations have already been done.
5963  if (!LegalOperations) return SDValue();
5964
5965  // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
5966  // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
5967  // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
5968  SDValue EltNo = N->getOperand(1);
5969
5970  if (isa<ConstantSDNode>(EltNo)) {
5971    unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
5972    bool NewLoad = false;
5973    bool BCNumEltsChanged = false;
5974    EVT VT = InVec.getValueType();
5975    EVT ExtVT = VT.getVectorElementType();
5976    EVT LVT = ExtVT;
5977
5978    if (InVec.getOpcode() == ISD::BIT_CONVERT) {
5979      EVT BCVT = InVec.getOperand(0).getValueType();
5980      if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
5981        return SDValue();
5982      if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
5983        BCNumEltsChanged = true;
5984      InVec = InVec.getOperand(0);
5985      ExtVT = BCVT.getVectorElementType();
5986      NewLoad = true;
5987    }
5988
5989    LoadSDNode *LN0 = NULL;
5990    const ShuffleVectorSDNode *SVN = NULL;
5991    if (ISD::isNormalLoad(InVec.getNode())) {
5992      LN0 = cast<LoadSDNode>(InVec);
5993    } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
5994               InVec.getOperand(0).getValueType() == ExtVT &&
5995               ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
5996      LN0 = cast<LoadSDNode>(InVec.getOperand(0));
5997    } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
5998      // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
5999      // =>
6000      // (load $addr+1*size)
6001
6002      // If the bit convert changed the number of elements, it is unsafe
6003      // to examine the mask.
6004      if (BCNumEltsChanged)
6005        return SDValue();
6006
6007      // Select the input vector, guarding against out of range extract vector.
6008      unsigned NumElems = VT.getVectorNumElements();
6009      int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
6010      InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
6011
6012      if (InVec.getOpcode() == ISD::BIT_CONVERT)
6013        InVec = InVec.getOperand(0);
6014      if (ISD::isNormalLoad(InVec.getNode())) {
6015        LN0 = cast<LoadSDNode>(InVec);
6016        Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
6017      }
6018    }
6019
6020    if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
6021      return SDValue();
6022
6023    unsigned Align = LN0->getAlignment();
6024    if (NewLoad) {
6025      // Check the resultant load doesn't need a higher alignment than the
6026      // original load.
6027      unsigned NewAlign =
6028        TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
6029
6030      if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
6031        return SDValue();
6032
6033      Align = NewAlign;
6034    }
6035
6036    SDValue NewPtr = LN0->getBasePtr();
6037    if (Elt) {
6038      unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
6039      EVT PtrType = NewPtr.getValueType();
6040      if (TLI.isBigEndian())
6041        PtrOff = VT.getSizeInBits() / 8 - PtrOff;
6042      NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr,
6043                           DAG.getConstant(PtrOff, PtrType));
6044    }
6045
6046    return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
6047                       LN0->getSrcValue(), LN0->getSrcValueOffset(),
6048                       LN0->isVolatile(), LN0->isNonTemporal(), Align);
6049  }
6050
6051  return SDValue();
6052}
6053
6054SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
6055  unsigned NumInScalars = N->getNumOperands();
6056  EVT VT = N->getValueType(0);
6057
6058  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
6059  // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
6060  // at most two distinct vectors, turn this into a shuffle node.
6061  SDValue VecIn1, VecIn2;
6062  for (unsigned i = 0; i != NumInScalars; ++i) {
6063    // Ignore undef inputs.
6064    if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue;
6065
6066    // If this input is something other than a EXTRACT_VECTOR_ELT with a
6067    // constant index, bail out.
6068    if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
6069        !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) {
6070      VecIn1 = VecIn2 = SDValue(0, 0);
6071      break;
6072    }
6073
6074    // If the input vector type disagrees with the result of the build_vector,
6075    // we can't make a shuffle.
6076    SDValue ExtractedFromVec = N->getOperand(i).getOperand(0);
6077    if (ExtractedFromVec.getValueType() != VT) {
6078      VecIn1 = VecIn2 = SDValue(0, 0);
6079      break;
6080    }
6081
6082    // Otherwise, remember this.  We allow up to two distinct input vectors.
6083    if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
6084      continue;
6085
6086    if (VecIn1.getNode() == 0) {
6087      VecIn1 = ExtractedFromVec;
6088    } else if (VecIn2.getNode() == 0) {
6089      VecIn2 = ExtractedFromVec;
6090    } else {
6091      // Too many inputs.
6092      VecIn1 = VecIn2 = SDValue(0, 0);
6093      break;
6094    }
6095  }
6096
6097  // If everything is good, we can make a shuffle operation.
6098  if (VecIn1.getNode()) {
6099    SmallVector<int, 8> Mask;
6100    for (unsigned i = 0; i != NumInScalars; ++i) {
6101      if (N->getOperand(i).getOpcode() == ISD::UNDEF) {
6102        Mask.push_back(-1);
6103        continue;
6104      }
6105
6106      // If extracting from the first vector, just use the index directly.
6107      SDValue Extract = N->getOperand(i);
6108      SDValue ExtVal = Extract.getOperand(1);
6109      if (Extract.getOperand(0) == VecIn1) {
6110        unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
6111        if (ExtIndex > VT.getVectorNumElements())
6112          return SDValue();
6113
6114        Mask.push_back(ExtIndex);
6115        continue;
6116      }
6117
6118      // Otherwise, use InIdx + VecSize
6119      unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue();
6120      Mask.push_back(Idx+NumInScalars);
6121    }
6122
6123    // Add count and size info.
6124    if (!isTypeLegal(VT))
6125      return SDValue();
6126
6127    // Return the new VECTOR_SHUFFLE node.
6128    SDValue Ops[2];
6129    Ops[0] = VecIn1;
6130    Ops[1] = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
6131    return DAG.getVectorShuffle(VT, N->getDebugLoc(), Ops[0], Ops[1], &Mask[0]);
6132  }
6133
6134  return SDValue();
6135}
6136
6137SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
6138  // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of
6139  // EXTRACT_SUBVECTOR operations.  If so, and if the EXTRACT_SUBVECTOR vector
6140  // inputs come from at most two distinct vectors, turn this into a shuffle
6141  // node.
6142
6143  // If we only have one input vector, we don't need to do any concatenation.
6144  if (N->getNumOperands() == 1)
6145    return N->getOperand(0);
6146
6147  return SDValue();
6148}
6149
6150SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
6151  return SDValue();
6152
6153  EVT VT = N->getValueType(0);
6154  unsigned NumElts = VT.getVectorNumElements();
6155
6156  SDValue N0 = N->getOperand(0);
6157
6158  assert(N0.getValueType().getVectorNumElements() == NumElts &&
6159        "Vector shuffle must be normalized in DAG");
6160
6161  // FIXME: implement canonicalizations from DAG.getVectorShuffle()
6162
6163  // If it is a splat, check if the argument vector is a build_vector with
6164  // all scalar elements the same.
6165  if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
6166    SDNode *V = N0.getNode();
6167
6168
6169    // If this is a bit convert that changes the element type of the vector but
6170    // not the number of vector elements, look through it.  Be careful not to
6171    // look though conversions that change things like v4f32 to v2f64.
6172    if (V->getOpcode() == ISD::BIT_CONVERT) {
6173      SDValue ConvInput = V->getOperand(0);
6174      if (ConvInput.getValueType().isVector() &&
6175          ConvInput.getValueType().getVectorNumElements() == NumElts)
6176        V = ConvInput.getNode();
6177    }
6178
6179    if (V->getOpcode() == ISD::BUILD_VECTOR) {
6180      unsigned NumElems = V->getNumOperands();
6181      unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
6182      if (NumElems > BaseIdx) {
6183        SDValue Base;
6184        bool AllSame = true;
6185        for (unsigned i = 0; i != NumElems; ++i) {
6186          if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
6187            Base = V->getOperand(i);
6188            break;
6189          }
6190        }
6191        // Splat of <u, u, u, u>, return <u, u, u, u>
6192        if (!Base.getNode())
6193          return N0;
6194        for (unsigned i = 0; i != NumElems; ++i) {
6195          if (V->getOperand(i) != Base) {
6196            AllSame = false;
6197            break;
6198          }
6199        }
6200        // Splat of <x, x, x, x>, return <x, x, x, x>
6201        if (AllSame)
6202          return N0;
6203      }
6204    }
6205  }
6206  return SDValue();
6207}
6208
6209/// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform
6210/// an AND to a vector_shuffle with the destination vector and a zero vector.
6211/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
6212///      vector_shuffle V, Zero, <0, 4, 2, 4>
6213SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
6214  EVT VT = N->getValueType(0);
6215  DebugLoc dl = N->getDebugLoc();
6216  SDValue LHS = N->getOperand(0);
6217  SDValue RHS = N->getOperand(1);
6218  if (N->getOpcode() == ISD::AND) {
6219    if (RHS.getOpcode() == ISD::BIT_CONVERT)
6220      RHS = RHS.getOperand(0);
6221    if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
6222      SmallVector<int, 8> Indices;
6223      unsigned NumElts = RHS.getNumOperands();
6224      for (unsigned i = 0; i != NumElts; ++i) {
6225        SDValue Elt = RHS.getOperand(i);
6226        if (!isa<ConstantSDNode>(Elt))
6227          return SDValue();
6228        else if (cast<ConstantSDNode>(Elt)->isAllOnesValue())
6229          Indices.push_back(i);
6230        else if (cast<ConstantSDNode>(Elt)->isNullValue())
6231          Indices.push_back(NumElts);
6232        else
6233          return SDValue();
6234      }
6235
6236      // Let's see if the target supports this vector_shuffle.
6237      EVT RVT = RHS.getValueType();
6238      if (!TLI.isVectorClearMaskLegal(Indices, RVT))
6239        return SDValue();
6240
6241      // Return the new VECTOR_SHUFFLE node.
6242      EVT EltVT = RVT.getVectorElementType();
6243      SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(),
6244                                     DAG.getConstant(0, EltVT));
6245      SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
6246                                 RVT, &ZeroOps[0], ZeroOps.size());
6247      LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
6248      SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
6249      return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
6250    }
6251  }
6252
6253  return SDValue();
6254}
6255
6256/// SimplifyVBinOp - Visit a binary vector operation, like ADD.
6257SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
6258  // After legalize, the target may be depending on adds and other
6259  // binary ops to provide legal ways to construct constants or other
6260  // things. Simplifying them may result in a loss of legality.
6261  if (LegalOperations) return SDValue();
6262
6263  EVT VT = N->getValueType(0);
6264  assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
6265
6266  EVT EltType = VT.getVectorElementType();
6267  SDValue LHS = N->getOperand(0);
6268  SDValue RHS = N->getOperand(1);
6269  SDValue Shuffle = XformToShuffleWithZero(N);
6270  if (Shuffle.getNode()) return Shuffle;
6271
6272  // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold
6273  // this operation.
6274  if (LHS.getOpcode() == ISD::BUILD_VECTOR &&
6275      RHS.getOpcode() == ISD::BUILD_VECTOR) {
6276    SmallVector<SDValue, 8> Ops;
6277    for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) {
6278      SDValue LHSOp = LHS.getOperand(i);
6279      SDValue RHSOp = RHS.getOperand(i);
6280      // If these two elements can't be folded, bail out.
6281      if ((LHSOp.getOpcode() != ISD::UNDEF &&
6282           LHSOp.getOpcode() != ISD::Constant &&
6283           LHSOp.getOpcode() != ISD::ConstantFP) ||
6284          (RHSOp.getOpcode() != ISD::UNDEF &&
6285           RHSOp.getOpcode() != ISD::Constant &&
6286           RHSOp.getOpcode() != ISD::ConstantFP))
6287        break;
6288
6289      // Can't fold divide by zero.
6290      if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV ||
6291          N->getOpcode() == ISD::FDIV) {
6292        if ((RHSOp.getOpcode() == ISD::Constant &&
6293             cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) ||
6294            (RHSOp.getOpcode() == ISD::ConstantFP &&
6295             cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero()))
6296          break;
6297      }
6298
6299      Ops.push_back(DAG.getNode(N->getOpcode(), LHS.getDebugLoc(),
6300                                EltType, LHSOp, RHSOp));
6301      AddToWorkList(Ops.back().getNode());
6302      assert((Ops.back().getOpcode() == ISD::UNDEF ||
6303              Ops.back().getOpcode() == ISD::Constant ||
6304              Ops.back().getOpcode() == ISD::ConstantFP) &&
6305             "Scalar binop didn't fold!");
6306    }
6307
6308    if (Ops.size() == LHS.getNumOperands()) {
6309      EVT VT = LHS.getValueType();
6310      return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
6311                         &Ops[0], Ops.size());
6312    }
6313  }
6314
6315  return SDValue();
6316}
6317
6318SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
6319                                    SDValue N1, SDValue N2){
6320  assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
6321
6322  SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
6323                                 cast<CondCodeSDNode>(N0.getOperand(2))->get());
6324
6325  // If we got a simplified select_cc node back from SimplifySelectCC, then
6326  // break it down into a new SETCC node, and a new SELECT node, and then return
6327  // the SELECT node, since we were called with a SELECT node.
6328  if (SCC.getNode()) {
6329    // Check to see if we got a select_cc back (to turn into setcc/select).
6330    // Otherwise, just return whatever node we got back, like fabs.
6331    if (SCC.getOpcode() == ISD::SELECT_CC) {
6332      SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(),
6333                                  N0.getValueType(),
6334                                  SCC.getOperand(0), SCC.getOperand(1),
6335                                  SCC.getOperand(4));
6336      AddToWorkList(SETCC.getNode());
6337      return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(),
6338                         SCC.getOperand(2), SCC.getOperand(3), SETCC);
6339    }
6340
6341    return SCC;
6342  }
6343  return SDValue();
6344}
6345
6346/// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS
6347/// are the two values being selected between, see if we can simplify the
6348/// select.  Callers of this should assume that TheSelect is deleted if this
6349/// returns true.  As such, they should return the appropriate thing (e.g. the
6350/// node) back to the top-level of the DAG combiner loop to avoid it being
6351/// looked at.
6352bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
6353                                    SDValue RHS) {
6354
6355  // If this is a select from two identical things, try to pull the operation
6356  // through the select.
6357  if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
6358    // If this is a load and the token chain is identical, replace the select
6359    // of two loads with a load through a select of the address to load from.
6360    // This triggers in things like "select bool X, 10.0, 123.0" after the FP
6361    // constants have been dropped into the constant pool.
6362    if (LHS.getOpcode() == ISD::LOAD &&
6363        // Do not let this transformation reduce the number of volatile loads.
6364        !cast<LoadSDNode>(LHS)->isVolatile() &&
6365        !cast<LoadSDNode>(RHS)->isVolatile() &&
6366        // Token chains must be identical.
6367        LHS.getOperand(0) == RHS.getOperand(0)) {
6368      LoadSDNode *LLD = cast<LoadSDNode>(LHS);
6369      LoadSDNode *RLD = cast<LoadSDNode>(RHS);
6370
6371      // If this is an EXTLOAD, the VT's must match.
6372      if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
6373        // FIXME: this discards src value information.  This is
6374        // over-conservative. It would be beneficial to be able to remember
6375        // both potential memory locations.  Since we are discarding
6376        // src value info, don't do the transformation if the memory
6377        // locations are not in the default address space.
6378        unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
6379        if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
6380          if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
6381            LLDAddrSpace = PT->getAddressSpace();
6382        }
6383        if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
6384          if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
6385            RLDAddrSpace = PT->getAddressSpace();
6386        }
6387        SDValue Addr;
6388        if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
6389          if (TheSelect->getOpcode() == ISD::SELECT) {
6390            // Check that the condition doesn't reach either load.  If so, folding
6391            // this will induce a cycle into the DAG.
6392            if ((!LLD->hasAnyUseOfValue(1) ||
6393                 !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
6394                (!RLD->hasAnyUseOfValue(1) ||
6395                 !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
6396              Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
6397                                 LLD->getBasePtr().getValueType(),
6398                                 TheSelect->getOperand(0), LLD->getBasePtr(),
6399                                 RLD->getBasePtr());
6400            }
6401          } else {
6402            // Check that the condition doesn't reach either load.  If so, folding
6403            // this will induce a cycle into the DAG.
6404            if ((!LLD->hasAnyUseOfValue(1) ||
6405                 (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
6406                  !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
6407                (!RLD->hasAnyUseOfValue(1) ||
6408                 (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
6409                  !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
6410              Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
6411                                 LLD->getBasePtr().getValueType(),
6412                                 TheSelect->getOperand(0),
6413                                 TheSelect->getOperand(1),
6414                                 LLD->getBasePtr(), RLD->getBasePtr(),
6415                                 TheSelect->getOperand(4));
6416            }
6417          }
6418        }
6419
6420        if (Addr.getNode()) {
6421          SDValue Load;
6422          if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
6423            Load = DAG.getLoad(TheSelect->getValueType(0),
6424                               TheSelect->getDebugLoc(),
6425                               LLD->getChain(),
6426                               Addr, 0, 0,
6427                               LLD->isVolatile(),
6428                               LLD->isNonTemporal(),
6429                               LLD->getAlignment());
6430          } else {
6431            Load = DAG.getExtLoad(LLD->getExtensionType(),
6432                                  TheSelect->getDebugLoc(),
6433                                  TheSelect->getValueType(0),
6434                                  LLD->getChain(), Addr, 0, 0,
6435                                  LLD->getMemoryVT(),
6436                                  LLD->isVolatile(),
6437                                  LLD->isNonTemporal(),
6438                                  LLD->getAlignment());
6439          }
6440
6441          // Users of the select now use the result of the load.
6442          CombineTo(TheSelect, Load);
6443
6444          // Users of the old loads now use the new load's chain.  We know the
6445          // old-load value is dead now.
6446          CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
6447          CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
6448          return true;
6449        }
6450      }
6451    }
6452  }
6453
6454  return false;
6455}
6456
6457/// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3
6458/// where 'cond' is the comparison specified by CC.
6459SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
6460                                      SDValue N2, SDValue N3,
6461                                      ISD::CondCode CC, bool NotExtCompare) {
6462  // (x ? y : y) -> y.
6463  if (N2 == N3) return N2;
6464
6465  EVT VT = N2.getValueType();
6466  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
6467  ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
6468  ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
6469
6470  // Determine if the condition we're dealing with is constant
6471  SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()),
6472                              N0, N1, CC, DL, false);
6473  if (SCC.getNode()) AddToWorkList(SCC.getNode());
6474  ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode());
6475
6476  // fold select_cc true, x, y -> x
6477  if (SCCC && !SCCC->isNullValue())
6478    return N2;
6479  // fold select_cc false, x, y -> y
6480  if (SCCC && SCCC->isNullValue())
6481    return N3;
6482
6483  // Check to see if we can simplify the select into an fabs node
6484  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
6485    // Allow either -0.0 or 0.0
6486    if (CFP->getValueAPF().isZero()) {
6487      // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
6488      if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
6489          N0 == N2 && N3.getOpcode() == ISD::FNEG &&
6490          N2 == N3.getOperand(0))
6491        return DAG.getNode(ISD::FABS, DL, VT, N0);
6492
6493      // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
6494      if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
6495          N0 == N3 && N2.getOpcode() == ISD::FNEG &&
6496          N2.getOperand(0) == N3)
6497        return DAG.getNode(ISD::FABS, DL, VT, N3);
6498    }
6499  }
6500
6501  // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
6502  // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
6503  // in it.  This is a win when the constant is not otherwise available because
6504  // it replaces two constant pool loads with one.  We only do this if the FP
6505  // type is known to be legal, because if it isn't, then we are before legalize
6506  // types an we want the other legalization to happen first (e.g. to avoid
6507  // messing with soft float) and if the ConstantFP is not legal, because if
6508  // it is legal, we may not need to store the FP constant in a constant pool.
6509  if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
6510    if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
6511      if (TLI.isTypeLegal(N2.getValueType()) &&
6512          (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
6513           TargetLowering::Legal) &&
6514          // If both constants have multiple uses, then we won't need to do an
6515          // extra load, they are likely around in registers for other users.
6516          (TV->hasOneUse() || FV->hasOneUse())) {
6517        Constant *Elts[] = {
6518          const_cast<ConstantFP*>(FV->getConstantFPValue()),
6519          const_cast<ConstantFP*>(TV->getConstantFPValue())
6520        };
6521        const Type *FPTy = Elts[0]->getType();
6522        const TargetData &TD = *TLI.getTargetData();
6523
6524        // Create a ConstantArray of the two constants.
6525        Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
6526        SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
6527                                            TD.getPrefTypeAlignment(FPTy));
6528        unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
6529
6530        // Get the offsets to the 0 and 1 element of the array so that we can
6531        // select between them.
6532        SDValue Zero = DAG.getIntPtrConstant(0);
6533        unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
6534        SDValue One = DAG.getIntPtrConstant(EltSize);
6535
6536        SDValue Cond = DAG.getSetCC(DL,
6537                                    TLI.getSetCCResultType(N0.getValueType()),
6538                                    N0, N1, CC);
6539        SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(),
6540                                        Cond, One, Zero);
6541        CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
6542                            CstOffset);
6543        return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
6544                           PseudoSourceValue::getConstantPool(), 0, false,
6545                           false, Alignment);
6546
6547      }
6548    }
6549
6550  // Check to see if we can perform the "gzip trick", transforming
6551  // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
6552  if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT &&
6553      N0.getValueType().isInteger() &&
6554      N2.getValueType().isInteger() &&
6555      (N1C->isNullValue() ||                         // (a < 0) ? b : 0
6556       (N1C->getAPIntValue() == 1 && N0 == N2))) {   // (a < 1) ? a : 0
6557    EVT XType = N0.getValueType();
6558    EVT AType = N2.getValueType();
6559    if (XType.bitsGE(AType)) {
6560      // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
6561      // single-bit constant.
6562      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) {
6563        unsigned ShCtV = N2C->getAPIntValue().logBase2();
6564        ShCtV = XType.getSizeInBits()-ShCtV-1;
6565        SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy());
6566        SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(),
6567                                    XType, N0, ShCt);
6568        AddToWorkList(Shift.getNode());
6569
6570        if (XType.bitsGT(AType)) {
6571          Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
6572          AddToWorkList(Shift.getNode());
6573        }
6574
6575        return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
6576      }
6577
6578      SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(),
6579                                  XType, N0,
6580                                  DAG.getConstant(XType.getSizeInBits()-1,
6581                                                  getShiftAmountTy()));
6582      AddToWorkList(Shift.getNode());
6583
6584      if (XType.bitsGT(AType)) {
6585        Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
6586        AddToWorkList(Shift.getNode());
6587      }
6588
6589      return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
6590    }
6591  }
6592
6593  // fold select C, 16, 0 -> shl C, 4
6594  if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
6595      TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
6596
6597    // If the caller doesn't want us to simplify this into a zext of a compare,
6598    // don't do it.
6599    if (NotExtCompare && N2C->getAPIntValue() == 1)
6600      return SDValue();
6601
6602    // Get a SetCC of the condition
6603    // FIXME: Should probably make sure that setcc is legal if we ever have a
6604    // target where it isn't.
6605    SDValue Temp, SCC;
6606    // cast from setcc result type to select result type
6607    if (LegalTypes) {
6608      SCC  = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()),
6609                          N0, N1, CC);
6610      if (N2.getValueType().bitsLT(SCC.getValueType()))
6611        Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), N2.getValueType());
6612      else
6613        Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
6614                           N2.getValueType(), SCC);
6615    } else {
6616      SCC  = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC);
6617      Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(),
6618                         N2.getValueType(), SCC);
6619    }
6620
6621    AddToWorkList(SCC.getNode());
6622    AddToWorkList(Temp.getNode());
6623
6624    if (N2C->getAPIntValue() == 1)
6625      return Temp;
6626
6627    // shl setcc result by log2 n2c
6628    return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp,
6629                       DAG.getConstant(N2C->getAPIntValue().logBase2(),
6630                                       getShiftAmountTy()));
6631  }
6632
6633  // Check to see if this is the equivalent of setcc
6634  // FIXME: Turn all of these into setcc if setcc if setcc is legal
6635  // otherwise, go ahead with the folds.
6636  if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) {
6637    EVT XType = N0.getValueType();
6638    if (!LegalOperations ||
6639        TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) {
6640      SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC);
6641      if (Res.getValueType() != VT)
6642        Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res);
6643      return Res;
6644    }
6645
6646    // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X))))
6647    if (N1C && N1C->isNullValue() && CC == ISD::SETEQ &&
6648        (!LegalOperations ||
6649         TLI.isOperationLegal(ISD::CTLZ, XType))) {
6650      SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0);
6651      return DAG.getNode(ISD::SRL, DL, XType, Ctlz,
6652                         DAG.getConstant(Log2_32(XType.getSizeInBits()),
6653                                         getShiftAmountTy()));
6654    }
6655    // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1))
6656    if (N1C && N1C->isNullValue() && CC == ISD::SETGT) {
6657      SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(),
6658                                  XType, DAG.getConstant(0, XType), N0);
6659      SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType);
6660      return DAG.getNode(ISD::SRL, DL, XType,
6661                         DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0),
6662                         DAG.getConstant(XType.getSizeInBits()-1,
6663                                         getShiftAmountTy()));
6664    }
6665    // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1))
6666    if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) {
6667      SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0,
6668                                 DAG.getConstant(XType.getSizeInBits()-1,
6669                                                 getShiftAmountTy()));
6670      return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType));
6671    }
6672  }
6673
6674  // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X ->
6675  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6676  if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) &&
6677      N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) &&
6678      N2.getOperand(0) == N1 && N0.getValueType().isInteger()) {
6679    EVT XType = N0.getValueType();
6680    SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0,
6681                                DAG.getConstant(XType.getSizeInBits()-1,
6682                                                getShiftAmountTy()));
6683    SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType,
6684                              N0, Shift);
6685    AddToWorkList(Shift.getNode());
6686    AddToWorkList(Add.getNode());
6687    return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
6688  }
6689  // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X ->
6690  // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
6691  if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT &&
6692      N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) {
6693    if (ConstantSDNode *SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0))) {
6694      EVT XType = N0.getValueType();
6695      if (SubC->isNullValue() && XType.isInteger()) {
6696        SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType,
6697                                    N0,
6698                                    DAG.getConstant(XType.getSizeInBits()-1,
6699                                                    getShiftAmountTy()));
6700        SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(),
6701                                  XType, N0, Shift);
6702        AddToWorkList(Shift.getNode());
6703        AddToWorkList(Add.getNode());
6704        return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
6705      }
6706    }
6707  }
6708
6709  return SDValue();
6710}
6711
6712/// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC.
6713SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
6714                                   SDValue N1, ISD::CondCode Cond,
6715                                   DebugLoc DL, bool foldBooleans) {
6716  TargetLowering::DAGCombinerInfo
6717    DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this);
6718  return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
6719}
6720
6721/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant,
6722/// return a DAG expression to select that will generate the same value by
6723/// multiplying by a magic number.  See:
6724/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
6725SDValue DAGCombiner::BuildSDIV(SDNode *N) {
6726  std::vector<SDNode*> Built;
6727  SDValue S = TLI.BuildSDIV(N, DAG, &Built);
6728
6729  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
6730       ii != ee; ++ii)
6731    AddToWorkList(*ii);
6732  return S;
6733}
6734
6735/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant,
6736/// return a DAG expression to select that will generate the same value by
6737/// multiplying by a magic number.  See:
6738/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
6739SDValue DAGCombiner::BuildUDIV(SDNode *N) {
6740  std::vector<SDNode*> Built;
6741  SDValue S = TLI.BuildUDIV(N, DAG, &Built);
6742
6743  for (std::vector<SDNode*>::iterator ii = Built.begin(), ee = Built.end();
6744       ii != ee; ++ii)
6745    AddToWorkList(*ii);
6746  return S;
6747}
6748
6749/// FindBaseOffset - Return true if base is a frame index, which is known not
6750// to alias with anything but itself.  Provides base object and offset as results.
6751static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
6752                           const GlobalValue *&GV, void *&CV) {
6753  // Assume it is a primitive operation.
6754  Base = Ptr; Offset = 0; GV = 0; CV = 0;
6755
6756  // If it's an adding a simple constant then integrate the offset.
6757  if (Base.getOpcode() == ISD::ADD) {
6758    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
6759      Base = Base.getOperand(0);
6760      Offset += C->getZExtValue();
6761    }
6762  }
6763
6764  // Return the underlying GlobalValue, and update the Offset.  Return false
6765  // for GlobalAddressSDNode since the same GlobalAddress may be represented
6766  // by multiple nodes with different offsets.
6767  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
6768    GV = G->getGlobal();
6769    Offset += G->getOffset();
6770    return false;
6771  }
6772
6773  // Return the underlying Constant value, and update the Offset.  Return false
6774  // for ConstantSDNodes since the same constant pool entry may be represented
6775  // by multiple nodes with different offsets.
6776  if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
6777    CV = C->isMachineConstantPoolEntry() ? (void *)C->getMachineCPVal()
6778                                         : (void *)C->getConstVal();
6779    Offset += C->getOffset();
6780    return false;
6781  }
6782  // If it's any of the following then it can't alias with anything but itself.
6783  return isa<FrameIndexSDNode>(Base);
6784}
6785
6786/// isAlias - Return true if there is any possibility that the two addresses
6787/// overlap.
6788bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
6789                          const Value *SrcValue1, int SrcValueOffset1,
6790                          unsigned SrcValueAlign1,
6791                          SDValue Ptr2, int64_t Size2,
6792                          const Value *SrcValue2, int SrcValueOffset2,
6793                          unsigned SrcValueAlign2) const {
6794  // If they are the same then they must be aliases.
6795  if (Ptr1 == Ptr2) return true;
6796
6797  // Gather base node and offset information.
6798  SDValue Base1, Base2;
6799  int64_t Offset1, Offset2;
6800  const GlobalValue *GV1, *GV2;
6801  void *CV1, *CV2;
6802  bool isFrameIndex1 = FindBaseOffset(Ptr1, Base1, Offset1, GV1, CV1);
6803  bool isFrameIndex2 = FindBaseOffset(Ptr2, Base2, Offset2, GV2, CV2);
6804
6805  // If they have a same base address then check to see if they overlap.
6806  if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
6807    return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
6808
6809  // If we know what the bases are, and they aren't identical, then we know they
6810  // cannot alias.
6811  if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
6812    return false;
6813
6814  // If we know required SrcValue1 and SrcValue2 have relatively large alignment
6815  // compared to the size and offset of the access, we may be able to prove they
6816  // do not alias.  This check is conservative for now to catch cases created by
6817  // splitting vector types.
6818  if ((SrcValueAlign1 == SrcValueAlign2) &&
6819      (SrcValueOffset1 != SrcValueOffset2) &&
6820      (Size1 == Size2) && (SrcValueAlign1 > Size1)) {
6821    int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
6822    int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
6823
6824    // There is no overlap between these relatively aligned accesses of similar
6825    // size, return no alias.
6826    if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
6827      return false;
6828  }
6829
6830  if (CombinerGlobalAA) {
6831    // Use alias analysis information.
6832    int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
6833    int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
6834    int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
6835    AliasAnalysis::AliasResult AAResult =
6836                             AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
6837    if (AAResult == AliasAnalysis::NoAlias)
6838      return false;
6839  }
6840
6841  // Otherwise we have to assume they alias.
6842  return true;
6843}
6844
6845/// FindAliasInfo - Extracts the relevant alias information from the memory
6846/// node.  Returns true if the operand was a load.
6847bool DAGCombiner::FindAliasInfo(SDNode *N,
6848                        SDValue &Ptr, int64_t &Size,
6849                        const Value *&SrcValue,
6850                        int &SrcValueOffset,
6851                        unsigned &SrcValueAlign) const {
6852  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
6853    Ptr = LD->getBasePtr();
6854    Size = LD->getMemoryVT().getSizeInBits() >> 3;
6855    SrcValue = LD->getSrcValue();
6856    SrcValueOffset = LD->getSrcValueOffset();
6857    SrcValueAlign = LD->getOriginalAlignment();
6858    return true;
6859  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
6860    Ptr = ST->getBasePtr();
6861    Size = ST->getMemoryVT().getSizeInBits() >> 3;
6862    SrcValue = ST->getSrcValue();
6863    SrcValueOffset = ST->getSrcValueOffset();
6864    SrcValueAlign = ST->getOriginalAlignment();
6865  } else {
6866    llvm_unreachable("FindAliasInfo expected a memory operand");
6867  }
6868
6869  return false;
6870}
6871
6872/// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes,
6873/// looking for aliasing nodes and adding them to the Aliases vector.
6874void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
6875                                   SmallVector<SDValue, 8> &Aliases) {
6876  SmallVector<SDValue, 8> Chains;     // List of chains to visit.
6877  SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
6878
6879  // Get alias information for node.
6880  SDValue Ptr;
6881  int64_t Size;
6882  const Value *SrcValue;
6883  int SrcValueOffset;
6884  unsigned SrcValueAlign;
6885  bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
6886                              SrcValueAlign);
6887
6888  // Starting off.
6889  Chains.push_back(OriginalChain);
6890  unsigned Depth = 0;
6891
6892  // Look at each chain and determine if it is an alias.  If so, add it to the
6893  // aliases list.  If not, then continue up the chain looking for the next
6894  // candidate.
6895  while (!Chains.empty()) {
6896    SDValue Chain = Chains.back();
6897    Chains.pop_back();
6898
6899    // For TokenFactor nodes, look at each operand and only continue up the
6900    // chain until we find two aliases.  If we've seen two aliases, assume we'll
6901    // find more and revert to original chain since the xform is unlikely to be
6902    // profitable.
6903    //
6904    // FIXME: The depth check could be made to return the last non-aliasing
6905    // chain we found before we hit a tokenfactor rather than the original
6906    // chain.
6907    if (Depth > 6 || Aliases.size() == 2) {
6908      Aliases.clear();
6909      Aliases.push_back(OriginalChain);
6910      break;
6911    }
6912
6913    // Don't bother if we've been before.
6914    if (!Visited.insert(Chain.getNode()))
6915      continue;
6916
6917    switch (Chain.getOpcode()) {
6918    case ISD::EntryToken:
6919      // Entry token is ideal chain operand, but handled in FindBetterChain.
6920      break;
6921
6922    case ISD::LOAD:
6923    case ISD::STORE: {
6924      // Get alias information for Chain.
6925      SDValue OpPtr;
6926      int64_t OpSize;
6927      const Value *OpSrcValue;
6928      int OpSrcValueOffset;
6929      unsigned OpSrcValueAlign;
6930      bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
6931                                    OpSrcValue, OpSrcValueOffset,
6932                                    OpSrcValueAlign);
6933
6934      // If chain is alias then stop here.
6935      if (!(IsLoad && IsOpLoad) &&
6936          isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
6937                  OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
6938                  OpSrcValueAlign)) {
6939        Aliases.push_back(Chain);
6940      } else {
6941        // Look further up the chain.
6942        Chains.push_back(Chain.getOperand(0));
6943        ++Depth;
6944      }
6945      break;
6946    }
6947
6948    case ISD::TokenFactor:
6949      // We have to check each of the operands of the token factor for "small"
6950      // token factors, so we queue them up.  Adding the operands to the queue
6951      // (stack) in reverse order maintains the original order and increases the
6952      // likelihood that getNode will find a matching token factor (CSE.)
6953      if (Chain.getNumOperands() > 16) {
6954        Aliases.push_back(Chain);
6955        break;
6956      }
6957      for (unsigned n = Chain.getNumOperands(); n;)
6958        Chains.push_back(Chain.getOperand(--n));
6959      ++Depth;
6960      break;
6961
6962    default:
6963      // For all other instructions we will just have to take what we can get.
6964      Aliases.push_back(Chain);
6965      break;
6966    }
6967  }
6968}
6969
6970/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking
6971/// for a better chain (aliasing node.)
6972SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
6973  SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
6974
6975  // Accumulate all the aliases to this node.
6976  GatherAllAliases(N, OldChain, Aliases);
6977
6978  if (Aliases.size() == 0) {
6979    // If no operands then chain to entry token.
6980    return DAG.getEntryNode();
6981  } else if (Aliases.size() == 1) {
6982    // If a single operand then chain to it.  We don't need to revisit it.
6983    return Aliases[0];
6984  }
6985
6986  // Construct a custom tailored token factor.
6987  return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
6988                     &Aliases[0], Aliases.size());
6989}
6990
6991// SelectionDAG::Combine - This is the entry point for the file.
6992//
6993void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
6994                           CodeGenOpt::Level OptLevel) {
6995  /// run - This is the main entry point to this class.
6996  ///
6997  DAGCombiner(*this, AA, OptLevel).Run(Level);
6998}
6999