DependenceAnalysis.cpp revision 5230ad61fd35d3006e7764c3152d28e2e68c288f
1//===-- DependenceAnalysis.cpp - DA Implementation --------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// DependenceAnalysis is an LLVM pass that analyses dependences between memory
11// accesses. Currently, it is an (incomplete) implementation of the approach
12// described in
13//
14//            Practical Dependence Testing
15//            Goff, Kennedy, Tseng
16//            PLDI 1991
17//
18// There's a single entry point that analyzes the dependence between a pair
19// of memory references in a function, returning either NULL, for no dependence,
20// or a more-or-less detailed description of the dependence between them.
21//
22// Currently, the implementation cannot propagate constraints between
23// coupled RDIV subscripts and lacks a multi-subscript MIV test.
24// Both of these are conservative weaknesses;
25// that is, not a source of correctness problems.
26//
27// The implementation depends on the GEP instruction to
28// differentiate subscripts. Since Clang linearizes subscripts
29// for most arrays, we give up some precision (though the existing MIV tests
30// will help). We trust that the GEP instruction will eventually be extended.
31// In the meantime, we should explore Maslov's ideas about delinearization.
32//
33// We should pay some careful attention to the possibility of integer overflow
34// in the implementation of the various tests. This could happen with Add,
35// Subtract, or Multiply, with both APInt's and SCEV's.
36//
37// Some non-linear subscript pairs can be handled by the GCD test
38// (and perhaps other tests).
39// Should explore how often these things occur.
40//
41// Finally, it seems like certain test cases expose weaknesses in the SCEV
42// simplification, especially in the handling of sign and zero extensions.
43// It could be useful to spend time exploring these.
44//
45// Please note that this is work in progress and the interface is subject to
46// change.
47//
48//===----------------------------------------------------------------------===//
49//                                                                            //
50//                   In memory of Ken Kennedy, 1945 - 2007                    //
51//                                                                            //
52//===----------------------------------------------------------------------===//
53
54#define DEBUG_TYPE "da"
55
56#include "llvm/Analysis/DependenceAnalysis.h"
57#include "llvm/ADT/Statistic.h"
58#include "llvm/Analysis/AliasAnalysis.h"
59#include "llvm/Analysis/LoopInfo.h"
60#include "llvm/Analysis/ScalarEvolution.h"
61#include "llvm/Analysis/ScalarEvolutionExpressions.h"
62#include "llvm/Analysis/ValueTracking.h"
63#include "llvm/IR/Operator.h"
64#include "llvm/Support/CommandLine.h"
65#include "llvm/Support/Debug.h"
66#include "llvm/Support/ErrorHandling.h"
67#include "llvm/Support/InstIterator.h"
68#include "llvm/Support/raw_ostream.h"
69
70using namespace llvm;
71
72//===----------------------------------------------------------------------===//
73// statistics
74
75STATISTIC(TotalArrayPairs, "Array pairs tested");
76STATISTIC(SeparableSubscriptPairs, "Separable subscript pairs");
77STATISTIC(CoupledSubscriptPairs, "Coupled subscript pairs");
78STATISTIC(NonlinearSubscriptPairs, "Nonlinear subscript pairs");
79STATISTIC(ZIVapplications, "ZIV applications");
80STATISTIC(ZIVindependence, "ZIV independence");
81STATISTIC(StrongSIVapplications, "Strong SIV applications");
82STATISTIC(StrongSIVsuccesses, "Strong SIV successes");
83STATISTIC(StrongSIVindependence, "Strong SIV independence");
84STATISTIC(WeakCrossingSIVapplications, "Weak-Crossing SIV applications");
85STATISTIC(WeakCrossingSIVsuccesses, "Weak-Crossing SIV successes");
86STATISTIC(WeakCrossingSIVindependence, "Weak-Crossing SIV independence");
87STATISTIC(ExactSIVapplications, "Exact SIV applications");
88STATISTIC(ExactSIVsuccesses, "Exact SIV successes");
89STATISTIC(ExactSIVindependence, "Exact SIV independence");
90STATISTIC(WeakZeroSIVapplications, "Weak-Zero SIV applications");
91STATISTIC(WeakZeroSIVsuccesses, "Weak-Zero SIV successes");
92STATISTIC(WeakZeroSIVindependence, "Weak-Zero SIV independence");
93STATISTIC(ExactRDIVapplications, "Exact RDIV applications");
94STATISTIC(ExactRDIVindependence, "Exact RDIV independence");
95STATISTIC(SymbolicRDIVapplications, "Symbolic RDIV applications");
96STATISTIC(SymbolicRDIVindependence, "Symbolic RDIV independence");
97STATISTIC(DeltaApplications, "Delta applications");
98STATISTIC(DeltaSuccesses, "Delta successes");
99STATISTIC(DeltaIndependence, "Delta independence");
100STATISTIC(DeltaPropagations, "Delta propagations");
101STATISTIC(GCDapplications, "GCD applications");
102STATISTIC(GCDsuccesses, "GCD successes");
103STATISTIC(GCDindependence, "GCD independence");
104STATISTIC(BanerjeeApplications, "Banerjee applications");
105STATISTIC(BanerjeeIndependence, "Banerjee independence");
106STATISTIC(BanerjeeSuccesses, "Banerjee successes");
107
108static cl::opt<bool>
109Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,
110            cl::desc("Try to delinearize array references."));
111
112//===----------------------------------------------------------------------===//
113// basics
114
115INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",
116                      "Dependence Analysis", true, true)
117INITIALIZE_PASS_DEPENDENCY(LoopInfo)
118INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
119INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
120INITIALIZE_PASS_END(DependenceAnalysis, "da",
121                    "Dependence Analysis", true, true)
122
123char DependenceAnalysis::ID = 0;
124
125
126FunctionPass *llvm::createDependenceAnalysisPass() {
127  return new DependenceAnalysis();
128}
129
130
131bool DependenceAnalysis::runOnFunction(Function &F) {
132  this->F = &F;
133  AA = &getAnalysis<AliasAnalysis>();
134  SE = &getAnalysis<ScalarEvolution>();
135  LI = &getAnalysis<LoopInfo>();
136  return false;
137}
138
139
140void DependenceAnalysis::releaseMemory() {
141}
142
143
144void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
145  AU.setPreservesAll();
146  AU.addRequiredTransitive<AliasAnalysis>();
147  AU.addRequiredTransitive<ScalarEvolution>();
148  AU.addRequiredTransitive<LoopInfo>();
149}
150
151
152// Used to test the dependence analyzer.
153// Looks through the function, noting loads and stores.
154// Calls depends() on every possible pair and prints out the result.
155// Ignores all other instructions.
156static
157void dumpExampleDependence(raw_ostream &OS, Function *F,
158                           DependenceAnalysis *DA) {
159  for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F);
160       SrcI != SrcE; ++SrcI) {
161    if (isa<StoreInst>(*SrcI) || isa<LoadInst>(*SrcI)) {
162      for (inst_iterator DstI = SrcI, DstE = inst_end(F);
163           DstI != DstE; ++DstI) {
164        if (isa<StoreInst>(*DstI) || isa<LoadInst>(*DstI)) {
165          OS << "da analyze - ";
166          if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) {
167            D->dump(OS);
168            for (unsigned Level = 1; Level <= D->getLevels(); Level++) {
169              if (D->isSplitable(Level)) {
170                OS << "da analyze - split level = " << Level;
171                OS << ", iteration = " << *DA->getSplitIteration(D, Level);
172                OS << "!\n";
173              }
174            }
175            delete D;
176          }
177          else
178            OS << "none!\n";
179        }
180      }
181    }
182  }
183}
184
185
186void DependenceAnalysis::print(raw_ostream &OS, const Module*) const {
187  dumpExampleDependence(OS, F, const_cast<DependenceAnalysis *>(this));
188}
189
190//===----------------------------------------------------------------------===//
191// Dependence methods
192
193// Returns true if this is an input dependence.
194bool Dependence::isInput() const {
195  return Src->mayReadFromMemory() && Dst->mayReadFromMemory();
196}
197
198
199// Returns true if this is an output dependence.
200bool Dependence::isOutput() const {
201  return Src->mayWriteToMemory() && Dst->mayWriteToMemory();
202}
203
204
205// Returns true if this is an flow (aka true)  dependence.
206bool Dependence::isFlow() const {
207  return Src->mayWriteToMemory() && Dst->mayReadFromMemory();
208}
209
210
211// Returns true if this is an anti dependence.
212bool Dependence::isAnti() const {
213  return Src->mayReadFromMemory() && Dst->mayWriteToMemory();
214}
215
216
217// Returns true if a particular level is scalar; that is,
218// if no subscript in the source or destination mention the induction
219// variable associated with the loop at this level.
220// Leave this out of line, so it will serve as a virtual method anchor
221bool Dependence::isScalar(unsigned level) const {
222  return false;
223}
224
225
226//===----------------------------------------------------------------------===//
227// FullDependence methods
228
229FullDependence::FullDependence(Instruction *Source,
230                               Instruction *Destination,
231                               bool PossiblyLoopIndependent,
232                               unsigned CommonLevels) :
233  Dependence(Source, Destination),
234  Levels(CommonLevels),
235  LoopIndependent(PossiblyLoopIndependent) {
236  Consistent = true;
237  DV = CommonLevels ? new DVEntry[CommonLevels] : NULL;
238}
239
240// The rest are simple getters that hide the implementation.
241
242// getDirection - Returns the direction associated with a particular level.
243unsigned FullDependence::getDirection(unsigned Level) const {
244  assert(0 < Level && Level <= Levels && "Level out of range");
245  return DV[Level - 1].Direction;
246}
247
248
249// Returns the distance (or NULL) associated with a particular level.
250const SCEV *FullDependence::getDistance(unsigned Level) const {
251  assert(0 < Level && Level <= Levels && "Level out of range");
252  return DV[Level - 1].Distance;
253}
254
255
256// Returns true if a particular level is scalar; that is,
257// if no subscript in the source or destination mention the induction
258// variable associated with the loop at this level.
259bool FullDependence::isScalar(unsigned Level) const {
260  assert(0 < Level && Level <= Levels && "Level out of range");
261  return DV[Level - 1].Scalar;
262}
263
264
265// Returns true if peeling the first iteration from this loop
266// will break this dependence.
267bool FullDependence::isPeelFirst(unsigned Level) const {
268  assert(0 < Level && Level <= Levels && "Level out of range");
269  return DV[Level - 1].PeelFirst;
270}
271
272
273// Returns true if peeling the last iteration from this loop
274// will break this dependence.
275bool FullDependence::isPeelLast(unsigned Level) const {
276  assert(0 < Level && Level <= Levels && "Level out of range");
277  return DV[Level - 1].PeelLast;
278}
279
280
281// Returns true if splitting this loop will break the dependence.
282bool FullDependence::isSplitable(unsigned Level) const {
283  assert(0 < Level && Level <= Levels && "Level out of range");
284  return DV[Level - 1].Splitable;
285}
286
287
288//===----------------------------------------------------------------------===//
289// DependenceAnalysis::Constraint methods
290
291// If constraint is a point <X, Y>, returns X.
292// Otherwise assert.
293const SCEV *DependenceAnalysis::Constraint::getX() const {
294  assert(Kind == Point && "Kind should be Point");
295  return A;
296}
297
298
299// If constraint is a point <X, Y>, returns Y.
300// Otherwise assert.
301const SCEV *DependenceAnalysis::Constraint::getY() const {
302  assert(Kind == Point && "Kind should be Point");
303  return B;
304}
305
306
307// If constraint is a line AX + BY = C, returns A.
308// Otherwise assert.
309const SCEV *DependenceAnalysis::Constraint::getA() const {
310  assert((Kind == Line || Kind == Distance) &&
311         "Kind should be Line (or Distance)");
312  return A;
313}
314
315
316// If constraint is a line AX + BY = C, returns B.
317// Otherwise assert.
318const SCEV *DependenceAnalysis::Constraint::getB() const {
319  assert((Kind == Line || Kind == Distance) &&
320         "Kind should be Line (or Distance)");
321  return B;
322}
323
324
325// If constraint is a line AX + BY = C, returns C.
326// Otherwise assert.
327const SCEV *DependenceAnalysis::Constraint::getC() const {
328  assert((Kind == Line || Kind == Distance) &&
329         "Kind should be Line (or Distance)");
330  return C;
331}
332
333
334// If constraint is a distance, returns D.
335// Otherwise assert.
336const SCEV *DependenceAnalysis::Constraint::getD() const {
337  assert(Kind == Distance && "Kind should be Distance");
338  return SE->getNegativeSCEV(C);
339}
340
341
342// Returns the loop associated with this constraint.
343const Loop *DependenceAnalysis::Constraint::getAssociatedLoop() const {
344  assert((Kind == Distance || Kind == Line || Kind == Point) &&
345         "Kind should be Distance, Line, or Point");
346  return AssociatedLoop;
347}
348
349
350void DependenceAnalysis::Constraint::setPoint(const SCEV *X,
351                                              const SCEV *Y,
352                                              const Loop *CurLoop) {
353  Kind = Point;
354  A = X;
355  B = Y;
356  AssociatedLoop = CurLoop;
357}
358
359
360void DependenceAnalysis::Constraint::setLine(const SCEV *AA,
361                                             const SCEV *BB,
362                                             const SCEV *CC,
363                                             const Loop *CurLoop) {
364  Kind = Line;
365  A = AA;
366  B = BB;
367  C = CC;
368  AssociatedLoop = CurLoop;
369}
370
371
372void DependenceAnalysis::Constraint::setDistance(const SCEV *D,
373                                                 const Loop *CurLoop) {
374  Kind = Distance;
375  A = SE->getConstant(D->getType(), 1);
376  B = SE->getNegativeSCEV(A);
377  C = SE->getNegativeSCEV(D);
378  AssociatedLoop = CurLoop;
379}
380
381
382void DependenceAnalysis::Constraint::setEmpty() {
383  Kind = Empty;
384}
385
386
387void DependenceAnalysis::Constraint::setAny(ScalarEvolution *NewSE) {
388  SE = NewSE;
389  Kind = Any;
390}
391
392
393// For debugging purposes. Dumps the constraint out to OS.
394void DependenceAnalysis::Constraint::dump(raw_ostream &OS) const {
395  if (isEmpty())
396    OS << " Empty\n";
397  else if (isAny())
398    OS << " Any\n";
399  else if (isPoint())
400    OS << " Point is <" << *getX() << ", " << *getY() << ">\n";
401  else if (isDistance())
402    OS << " Distance is " << *getD() <<
403      " (" << *getA() << "*X + " << *getB() << "*Y = " << *getC() << ")\n";
404  else if (isLine())
405    OS << " Line is " << *getA() << "*X + " <<
406      *getB() << "*Y = " << *getC() << "\n";
407  else
408    llvm_unreachable("unknown constraint type in Constraint::dump");
409}
410
411
412// Updates X with the intersection
413// of the Constraints X and Y. Returns true if X has changed.
414// Corresponds to Figure 4 from the paper
415//
416//            Practical Dependence Testing
417//            Goff, Kennedy, Tseng
418//            PLDI 1991
419bool DependenceAnalysis::intersectConstraints(Constraint *X,
420                                              const Constraint *Y) {
421  ++DeltaApplications;
422  DEBUG(dbgs() << "\tintersect constraints\n");
423  DEBUG(dbgs() << "\t    X ="; X->dump(dbgs()));
424  DEBUG(dbgs() << "\t    Y ="; Y->dump(dbgs()));
425  assert(!Y->isPoint() && "Y must not be a Point");
426  if (X->isAny()) {
427    if (Y->isAny())
428      return false;
429    *X = *Y;
430    return true;
431  }
432  if (X->isEmpty())
433    return false;
434  if (Y->isEmpty()) {
435    X->setEmpty();
436    return true;
437  }
438
439  if (X->isDistance() && Y->isDistance()) {
440    DEBUG(dbgs() << "\t    intersect 2 distances\n");
441    if (isKnownPredicate(CmpInst::ICMP_EQ, X->getD(), Y->getD()))
442      return false;
443    if (isKnownPredicate(CmpInst::ICMP_NE, X->getD(), Y->getD())) {
444      X->setEmpty();
445      ++DeltaSuccesses;
446      return true;
447    }
448    // Hmmm, interesting situation.
449    // I guess if either is constant, keep it and ignore the other.
450    if (isa<SCEVConstant>(Y->getD())) {
451      *X = *Y;
452      return true;
453    }
454    return false;
455  }
456
457  // At this point, the pseudo-code in Figure 4 of the paper
458  // checks if (X->isPoint() && Y->isPoint()).
459  // This case can't occur in our implementation,
460  // since a Point can only arise as the result of intersecting
461  // two Line constraints, and the right-hand value, Y, is never
462  // the result of an intersection.
463  assert(!(X->isPoint() && Y->isPoint()) &&
464         "We shouldn't ever see X->isPoint() && Y->isPoint()");
465
466  if (X->isLine() && Y->isLine()) {
467    DEBUG(dbgs() << "\t    intersect 2 lines\n");
468    const SCEV *Prod1 = SE->getMulExpr(X->getA(), Y->getB());
469    const SCEV *Prod2 = SE->getMulExpr(X->getB(), Y->getA());
470    if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2)) {
471      // slopes are equal, so lines are parallel
472      DEBUG(dbgs() << "\t\tsame slope\n");
473      Prod1 = SE->getMulExpr(X->getC(), Y->getB());
474      Prod2 = SE->getMulExpr(X->getB(), Y->getC());
475      if (isKnownPredicate(CmpInst::ICMP_EQ, Prod1, Prod2))
476        return false;
477      if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
478        X->setEmpty();
479        ++DeltaSuccesses;
480        return true;
481      }
482      return false;
483    }
484    if (isKnownPredicate(CmpInst::ICMP_NE, Prod1, Prod2)) {
485      // slopes differ, so lines intersect
486      DEBUG(dbgs() << "\t\tdifferent slopes\n");
487      const SCEV *C1B2 = SE->getMulExpr(X->getC(), Y->getB());
488      const SCEV *C1A2 = SE->getMulExpr(X->getC(), Y->getA());
489      const SCEV *C2B1 = SE->getMulExpr(Y->getC(), X->getB());
490      const SCEV *C2A1 = SE->getMulExpr(Y->getC(), X->getA());
491      const SCEV *A1B2 = SE->getMulExpr(X->getA(), Y->getB());
492      const SCEV *A2B1 = SE->getMulExpr(Y->getA(), X->getB());
493      const SCEVConstant *C1A2_C2A1 =
494        dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1A2, C2A1));
495      const SCEVConstant *C1B2_C2B1 =
496        dyn_cast<SCEVConstant>(SE->getMinusSCEV(C1B2, C2B1));
497      const SCEVConstant *A1B2_A2B1 =
498        dyn_cast<SCEVConstant>(SE->getMinusSCEV(A1B2, A2B1));
499      const SCEVConstant *A2B1_A1B2 =
500        dyn_cast<SCEVConstant>(SE->getMinusSCEV(A2B1, A1B2));
501      if (!C1B2_C2B1 || !C1A2_C2A1 ||
502          !A1B2_A2B1 || !A2B1_A1B2)
503        return false;
504      APInt Xtop = C1B2_C2B1->getValue()->getValue();
505      APInt Xbot = A1B2_A2B1->getValue()->getValue();
506      APInt Ytop = C1A2_C2A1->getValue()->getValue();
507      APInt Ybot = A2B1_A1B2->getValue()->getValue();
508      DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");
509      DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");
510      DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n");
511      DEBUG(dbgs() << "\t\tYbot = " << Ybot << "\n");
512      APInt Xq = Xtop; // these need to be initialized, even
513      APInt Xr = Xtop; // though they're just going to be overwritten
514      APInt::sdivrem(Xtop, Xbot, Xq, Xr);
515      APInt Yq = Ytop;
516      APInt Yr = Ytop;
517      APInt::sdivrem(Ytop, Ybot, Yq, Yr);
518      if (Xr != 0 || Yr != 0) {
519        X->setEmpty();
520        ++DeltaSuccesses;
521        return true;
522      }
523      DEBUG(dbgs() << "\t\tX = " << Xq << ", Y = " << Yq << "\n");
524      if (Xq.slt(0) || Yq.slt(0)) {
525        X->setEmpty();
526        ++DeltaSuccesses;
527        return true;
528      }
529      if (const SCEVConstant *CUB =
530          collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) {
531        APInt UpperBound = CUB->getValue()->getValue();
532        DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");
533        if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {
534          X->setEmpty();
535          ++DeltaSuccesses;
536          return true;
537        }
538      }
539      X->setPoint(SE->getConstant(Xq),
540                  SE->getConstant(Yq),
541                  X->getAssociatedLoop());
542      ++DeltaSuccesses;
543      return true;
544    }
545    return false;
546  }
547
548  // if (X->isLine() && Y->isPoint()) This case can't occur.
549  assert(!(X->isLine() && Y->isPoint()) && "This case should never occur");
550
551  if (X->isPoint() && Y->isLine()) {
552    DEBUG(dbgs() << "\t    intersect Point and Line\n");
553    const SCEV *A1X1 = SE->getMulExpr(Y->getA(), X->getX());
554    const SCEV *B1Y1 = SE->getMulExpr(Y->getB(), X->getY());
555    const SCEV *Sum = SE->getAddExpr(A1X1, B1Y1);
556    if (isKnownPredicate(CmpInst::ICMP_EQ, Sum, Y->getC()))
557      return false;
558    if (isKnownPredicate(CmpInst::ICMP_NE, Sum, Y->getC())) {
559      X->setEmpty();
560      ++DeltaSuccesses;
561      return true;
562    }
563    return false;
564  }
565
566  llvm_unreachable("shouldn't reach the end of Constraint intersection");
567  return false;
568}
569
570
571//===----------------------------------------------------------------------===//
572// DependenceAnalysis methods
573
574// For debugging purposes. Dumps a dependence to OS.
575void Dependence::dump(raw_ostream &OS) const {
576  bool Splitable = false;
577  if (isConfused())
578    OS << "confused";
579  else {
580    if (isConsistent())
581      OS << "consistent ";
582    if (isFlow())
583      OS << "flow";
584    else if (isOutput())
585      OS << "output";
586    else if (isAnti())
587      OS << "anti";
588    else if (isInput())
589      OS << "input";
590    unsigned Levels = getLevels();
591    OS << " [";
592    for (unsigned II = 1; II <= Levels; ++II) {
593      if (isSplitable(II))
594        Splitable = true;
595      if (isPeelFirst(II))
596        OS << 'p';
597      const SCEV *Distance = getDistance(II);
598      if (Distance)
599        OS << *Distance;
600      else if (isScalar(II))
601        OS << "S";
602      else {
603        unsigned Direction = getDirection(II);
604        if (Direction == DVEntry::ALL)
605          OS << "*";
606        else {
607          if (Direction & DVEntry::LT)
608            OS << "<";
609          if (Direction & DVEntry::EQ)
610            OS << "=";
611          if (Direction & DVEntry::GT)
612            OS << ">";
613        }
614      }
615      if (isPeelLast(II))
616        OS << 'p';
617      if (II < Levels)
618        OS << " ";
619    }
620    if (isLoopIndependent())
621      OS << "|<";
622    OS << "]";
623    if (Splitable)
624      OS << " splitable";
625  }
626  OS << "!\n";
627}
628
629
630
631static
632AliasAnalysis::AliasResult underlyingObjectsAlias(AliasAnalysis *AA,
633                                                  const Value *A,
634                                                  const Value *B) {
635  const Value *AObj = GetUnderlyingObject(A);
636  const Value *BObj = GetUnderlyingObject(B);
637  return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()),
638                   BObj, AA->getTypeStoreSize(BObj->getType()));
639}
640
641
642// Returns true if the load or store can be analyzed. Atomic and volatile
643// operations have properties which this analysis does not understand.
644static
645bool isLoadOrStore(const Instruction *I) {
646  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
647    return LI->isUnordered();
648  else if (const StoreInst *SI = dyn_cast<StoreInst>(I))
649    return SI->isUnordered();
650  return false;
651}
652
653
654static
655Value *getPointerOperand(Instruction *I) {
656  if (LoadInst *LI = dyn_cast<LoadInst>(I))
657    return LI->getPointerOperand();
658  if (StoreInst *SI = dyn_cast<StoreInst>(I))
659    return SI->getPointerOperand();
660  llvm_unreachable("Value is not load or store instruction");
661  return 0;
662}
663
664
665// Examines the loop nesting of the Src and Dst
666// instructions and establishes their shared loops. Sets the variables
667// CommonLevels, SrcLevels, and MaxLevels.
668// The source and destination instructions needn't be contained in the same
669// loop. The routine establishNestingLevels finds the level of most deeply
670// nested loop that contains them both, CommonLevels. An instruction that's
671// not contained in a loop is at level = 0. MaxLevels is equal to the level
672// of the source plus the level of the destination, minus CommonLevels.
673// This lets us allocate vectors MaxLevels in length, with room for every
674// distinct loop referenced in both the source and destination subscripts.
675// The variable SrcLevels is the nesting depth of the source instruction.
676// It's used to help calculate distinct loops referenced by the destination.
677// Here's the map from loops to levels:
678//            0 - unused
679//            1 - outermost common loop
680//          ... - other common loops
681// CommonLevels - innermost common loop
682//          ... - loops containing Src but not Dst
683//    SrcLevels - innermost loop containing Src but not Dst
684//          ... - loops containing Dst but not Src
685//    MaxLevels - innermost loops containing Dst but not Src
686// Consider the follow code fragment:
687//   for (a = ...) {
688//     for (b = ...) {
689//       for (c = ...) {
690//         for (d = ...) {
691//           A[] = ...;
692//         }
693//       }
694//       for (e = ...) {
695//         for (f = ...) {
696//           for (g = ...) {
697//             ... = A[];
698//           }
699//         }
700//       }
701//     }
702//   }
703// If we're looking at the possibility of a dependence between the store
704// to A (the Src) and the load from A (the Dst), we'll note that they
705// have 2 loops in common, so CommonLevels will equal 2 and the direction
706// vector for Result will have 2 entries. SrcLevels = 4 and MaxLevels = 7.
707// A map from loop names to loop numbers would look like
708//     a - 1
709//     b - 2 = CommonLevels
710//     c - 3
711//     d - 4 = SrcLevels
712//     e - 5
713//     f - 6
714//     g - 7 = MaxLevels
715void DependenceAnalysis::establishNestingLevels(const Instruction *Src,
716                                                const Instruction *Dst) {
717  const BasicBlock *SrcBlock = Src->getParent();
718  const BasicBlock *DstBlock = Dst->getParent();
719  unsigned SrcLevel = LI->getLoopDepth(SrcBlock);
720  unsigned DstLevel = LI->getLoopDepth(DstBlock);
721  const Loop *SrcLoop = LI->getLoopFor(SrcBlock);
722  const Loop *DstLoop = LI->getLoopFor(DstBlock);
723  SrcLevels = SrcLevel;
724  MaxLevels = SrcLevel + DstLevel;
725  while (SrcLevel > DstLevel) {
726    SrcLoop = SrcLoop->getParentLoop();
727    SrcLevel--;
728  }
729  while (DstLevel > SrcLevel) {
730    DstLoop = DstLoop->getParentLoop();
731    DstLevel--;
732  }
733  while (SrcLoop != DstLoop) {
734    SrcLoop = SrcLoop->getParentLoop();
735    DstLoop = DstLoop->getParentLoop();
736    SrcLevel--;
737  }
738  CommonLevels = SrcLevel;
739  MaxLevels -= CommonLevels;
740}
741
742
743// Given one of the loops containing the source, return
744// its level index in our numbering scheme.
745unsigned DependenceAnalysis::mapSrcLoop(const Loop *SrcLoop) const {
746  return SrcLoop->getLoopDepth();
747}
748
749
750// Given one of the loops containing the destination,
751// return its level index in our numbering scheme.
752unsigned DependenceAnalysis::mapDstLoop(const Loop *DstLoop) const {
753  unsigned D = DstLoop->getLoopDepth();
754  if (D > CommonLevels)
755    return D - CommonLevels + SrcLevels;
756  else
757    return D;
758}
759
760
761// Returns true if Expression is loop invariant in LoopNest.
762bool DependenceAnalysis::isLoopInvariant(const SCEV *Expression,
763                                         const Loop *LoopNest) const {
764  if (!LoopNest)
765    return true;
766  return SE->isLoopInvariant(Expression, LoopNest) &&
767    isLoopInvariant(Expression, LoopNest->getParentLoop());
768}
769
770
771
772// Finds the set of loops from the LoopNest that
773// have a level <= CommonLevels and are referred to by the SCEV Expression.
774void DependenceAnalysis::collectCommonLoops(const SCEV *Expression,
775                                            const Loop *LoopNest,
776                                            SmallBitVector &Loops) const {
777  while (LoopNest) {
778    unsigned Level = LoopNest->getLoopDepth();
779    if (Level <= CommonLevels && !SE->isLoopInvariant(Expression, LoopNest))
780      Loops.set(Level);
781    LoopNest = LoopNest->getParentLoop();
782  }
783}
784
785
786// removeMatchingExtensions - Examines a subscript pair.
787// If the source and destination are identically sign (or zero)
788// extended, it strips off the extension in an effect to simplify
789// the actual analysis.
790void DependenceAnalysis::removeMatchingExtensions(Subscript *Pair) {
791  const SCEV *Src = Pair->Src;
792  const SCEV *Dst = Pair->Dst;
793  if ((isa<SCEVZeroExtendExpr>(Src) && isa<SCEVZeroExtendExpr>(Dst)) ||
794      (isa<SCEVSignExtendExpr>(Src) && isa<SCEVSignExtendExpr>(Dst))) {
795    const SCEVCastExpr *SrcCast = cast<SCEVCastExpr>(Src);
796    const SCEVCastExpr *DstCast = cast<SCEVCastExpr>(Dst);
797    if (SrcCast->getType() == DstCast->getType()) {
798      Pair->Src = SrcCast->getOperand();
799      Pair->Dst = DstCast->getOperand();
800    }
801  }
802}
803
804
805// Examine the scev and return true iff it's linear.
806// Collect any loops mentioned in the set of "Loops".
807bool DependenceAnalysis::checkSrcSubscript(const SCEV *Src,
808                                           const Loop *LoopNest,
809                                           SmallBitVector &Loops) {
810  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Src);
811  if (!AddRec)
812    return isLoopInvariant(Src, LoopNest);
813  const SCEV *Start = AddRec->getStart();
814  const SCEV *Step = AddRec->getStepRecurrence(*SE);
815  if (!isLoopInvariant(Step, LoopNest))
816    return false;
817  Loops.set(mapSrcLoop(AddRec->getLoop()));
818  return checkSrcSubscript(Start, LoopNest, Loops);
819}
820
821
822
823// Examine the scev and return true iff it's linear.
824// Collect any loops mentioned in the set of "Loops".
825bool DependenceAnalysis::checkDstSubscript(const SCEV *Dst,
826                                           const Loop *LoopNest,
827                                           SmallBitVector &Loops) {
828  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Dst);
829  if (!AddRec)
830    return isLoopInvariant(Dst, LoopNest);
831  const SCEV *Start = AddRec->getStart();
832  const SCEV *Step = AddRec->getStepRecurrence(*SE);
833  if (!isLoopInvariant(Step, LoopNest))
834    return false;
835  Loops.set(mapDstLoop(AddRec->getLoop()));
836  return checkDstSubscript(Start, LoopNest, Loops);
837}
838
839
840// Examines the subscript pair (the Src and Dst SCEVs)
841// and classifies it as either ZIV, SIV, RDIV, MIV, or Nonlinear.
842// Collects the associated loops in a set.
843DependenceAnalysis::Subscript::ClassificationKind
844DependenceAnalysis::classifyPair(const SCEV *Src, const Loop *SrcLoopNest,
845                                 const SCEV *Dst, const Loop *DstLoopNest,
846                                 SmallBitVector &Loops) {
847  SmallBitVector SrcLoops(MaxLevels + 1);
848  SmallBitVector DstLoops(MaxLevels + 1);
849  if (!checkSrcSubscript(Src, SrcLoopNest, SrcLoops))
850    return Subscript::NonLinear;
851  if (!checkDstSubscript(Dst, DstLoopNest, DstLoops))
852    return Subscript::NonLinear;
853  Loops = SrcLoops;
854  Loops |= DstLoops;
855  unsigned N = Loops.count();
856  if (N == 0)
857    return Subscript::ZIV;
858  if (N == 1)
859    return Subscript::SIV;
860  if (N == 2 && (SrcLoops.count() == 0 ||
861                 DstLoops.count() == 0 ||
862                 (SrcLoops.count() == 1 && DstLoops.count() == 1)))
863    return Subscript::RDIV;
864  return Subscript::MIV;
865}
866
867
868// A wrapper around SCEV::isKnownPredicate.
869// Looks for cases where we're interested in comparing for equality.
870// If both X and Y have been identically sign or zero extended,
871// it strips off the (confusing) extensions before invoking
872// SCEV::isKnownPredicate. Perhaps, someday, the ScalarEvolution package
873// will be similarly updated.
874//
875// If SCEV::isKnownPredicate can't prove the predicate,
876// we try simple subtraction, which seems to help in some cases
877// involving symbolics.
878bool DependenceAnalysis::isKnownPredicate(ICmpInst::Predicate Pred,
879                                          const SCEV *X,
880                                          const SCEV *Y) const {
881  if (Pred == CmpInst::ICMP_EQ ||
882      Pred == CmpInst::ICMP_NE) {
883    if ((isa<SCEVSignExtendExpr>(X) &&
884         isa<SCEVSignExtendExpr>(Y)) ||
885        (isa<SCEVZeroExtendExpr>(X) &&
886         isa<SCEVZeroExtendExpr>(Y))) {
887      const SCEVCastExpr *CX = cast<SCEVCastExpr>(X);
888      const SCEVCastExpr *CY = cast<SCEVCastExpr>(Y);
889      const SCEV *Xop = CX->getOperand();
890      const SCEV *Yop = CY->getOperand();
891      if (Xop->getType() == Yop->getType()) {
892        X = Xop;
893        Y = Yop;
894      }
895    }
896  }
897  if (SE->isKnownPredicate(Pred, X, Y))
898    return true;
899  // If SE->isKnownPredicate can't prove the condition,
900  // we try the brute-force approach of subtracting
901  // and testing the difference.
902  // By testing with SE->isKnownPredicate first, we avoid
903  // the possibility of overflow when the arguments are constants.
904  const SCEV *Delta = SE->getMinusSCEV(X, Y);
905  switch (Pred) {
906  case CmpInst::ICMP_EQ:
907    return Delta->isZero();
908  case CmpInst::ICMP_NE:
909    return SE->isKnownNonZero(Delta);
910  case CmpInst::ICMP_SGE:
911    return SE->isKnownNonNegative(Delta);
912  case CmpInst::ICMP_SLE:
913    return SE->isKnownNonPositive(Delta);
914  case CmpInst::ICMP_SGT:
915    return SE->isKnownPositive(Delta);
916  case CmpInst::ICMP_SLT:
917    return SE->isKnownNegative(Delta);
918  default:
919    llvm_unreachable("unexpected predicate in isKnownPredicate");
920  }
921}
922
923
924// All subscripts are all the same type.
925// Loop bound may be smaller (e.g., a char).
926// Should zero extend loop bound, since it's always >= 0.
927// This routine collects upper bound and extends if needed.
928// Return null if no bound available.
929const SCEV *DependenceAnalysis::collectUpperBound(const Loop *L,
930                                                  Type *T) const {
931  if (SE->hasLoopInvariantBackedgeTakenCount(L)) {
932    const SCEV *UB = SE->getBackedgeTakenCount(L);
933    return SE->getNoopOrZeroExtend(UB, T);
934  }
935  return NULL;
936}
937
938
939// Calls collectUpperBound(), then attempts to cast it to SCEVConstant.
940// If the cast fails, returns NULL.
941const SCEVConstant *DependenceAnalysis::collectConstantUpperBound(const Loop *L,
942                                                                  Type *T
943                                                                  ) const {
944  if (const SCEV *UB = collectUpperBound(L, T))
945    return dyn_cast<SCEVConstant>(UB);
946  return NULL;
947}
948
949
950// testZIV -
951// When we have a pair of subscripts of the form [c1] and [c2],
952// where c1 and c2 are both loop invariant, we attack it using
953// the ZIV test. Basically, we test by comparing the two values,
954// but there are actually three possible results:
955// 1) the values are equal, so there's a dependence
956// 2) the values are different, so there's no dependence
957// 3) the values might be equal, so we have to assume a dependence.
958//
959// Return true if dependence disproved.
960bool DependenceAnalysis::testZIV(const SCEV *Src,
961                                 const SCEV *Dst,
962                                 FullDependence &Result) const {
963  DEBUG(dbgs() << "    src = " << *Src << "\n");
964  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
965  ++ZIVapplications;
966  if (isKnownPredicate(CmpInst::ICMP_EQ, Src, Dst)) {
967    DEBUG(dbgs() << "    provably dependent\n");
968    return false; // provably dependent
969  }
970  if (isKnownPredicate(CmpInst::ICMP_NE, Src, Dst)) {
971    DEBUG(dbgs() << "    provably independent\n");
972    ++ZIVindependence;
973    return true; // provably independent
974  }
975  DEBUG(dbgs() << "    possibly dependent\n");
976  Result.Consistent = false;
977  return false; // possibly dependent
978}
979
980
981// strongSIVtest -
982// From the paper, Practical Dependence Testing, Section 4.2.1
983//
984// When we have a pair of subscripts of the form [c1 + a*i] and [c2 + a*i],
985// where i is an induction variable, c1 and c2 are loop invariant,
986//  and a is a constant, we can solve it exactly using the Strong SIV test.
987//
988// Can prove independence. Failing that, can compute distance (and direction).
989// In the presence of symbolic terms, we can sometimes make progress.
990//
991// If there's a dependence,
992//
993//    c1 + a*i = c2 + a*i'
994//
995// The dependence distance is
996//
997//    d = i' - i = (c1 - c2)/a
998//
999// A dependence only exists if d is an integer and abs(d) <= U, where U is the
1000// loop's upper bound. If a dependence exists, the dependence direction is
1001// defined as
1002//
1003//                { < if d > 0
1004//    direction = { = if d = 0
1005//                { > if d < 0
1006//
1007// Return true if dependence disproved.
1008bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff,
1009                                       const SCEV *SrcConst,
1010                                       const SCEV *DstConst,
1011                                       const Loop *CurLoop,
1012                                       unsigned Level,
1013                                       FullDependence &Result,
1014                                       Constraint &NewConstraint) const {
1015  DEBUG(dbgs() << "\tStrong SIV test\n");
1016  DEBUG(dbgs() << "\t    Coeff = " << *Coeff);
1017  DEBUG(dbgs() << ", " << *Coeff->getType() << "\n");
1018  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst);
1019  DEBUG(dbgs() << ", " << *SrcConst->getType() << "\n");
1020  DEBUG(dbgs() << "\t    DstConst = " << *DstConst);
1021  DEBUG(dbgs() << ", " << *DstConst->getType() << "\n");
1022  ++StrongSIVapplications;
1023  assert(0 < Level && Level <= CommonLevels && "level out of range");
1024  Level--;
1025
1026  const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1027  DEBUG(dbgs() << "\t    Delta = " << *Delta);
1028  DEBUG(dbgs() << ", " << *Delta->getType() << "\n");
1029
1030  // check that |Delta| < iteration count
1031  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1032    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound);
1033    DEBUG(dbgs() << ", " << *UpperBound->getType() << "\n");
1034    const SCEV *AbsDelta =
1035      SE->isKnownNonNegative(Delta) ? Delta : SE->getNegativeSCEV(Delta);
1036    const SCEV *AbsCoeff =
1037      SE->isKnownNonNegative(Coeff) ? Coeff : SE->getNegativeSCEV(Coeff);
1038    const SCEV *Product = SE->getMulExpr(UpperBound, AbsCoeff);
1039    if (isKnownPredicate(CmpInst::ICMP_SGT, AbsDelta, Product)) {
1040      // Distance greater than trip count - no dependence
1041      ++StrongSIVindependence;
1042      ++StrongSIVsuccesses;
1043      return true;
1044    }
1045  }
1046
1047  // Can we compute distance?
1048  if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) {
1049    APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue();
1050    APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue();
1051    APInt Distance  = ConstDelta; // these need to be initialized
1052    APInt Remainder = ConstDelta;
1053    APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder);
1054    DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
1055    DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1056    // Make sure Coeff divides Delta exactly
1057    if (Remainder != 0) {
1058      // Coeff doesn't divide Distance, no dependence
1059      ++StrongSIVindependence;
1060      ++StrongSIVsuccesses;
1061      return true;
1062    }
1063    Result.DV[Level].Distance = SE->getConstant(Distance);
1064    NewConstraint.setDistance(SE->getConstant(Distance), CurLoop);
1065    if (Distance.sgt(0))
1066      Result.DV[Level].Direction &= Dependence::DVEntry::LT;
1067    else if (Distance.slt(0))
1068      Result.DV[Level].Direction &= Dependence::DVEntry::GT;
1069    else
1070      Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1071    ++StrongSIVsuccesses;
1072  }
1073  else if (Delta->isZero()) {
1074    // since 0/X == 0
1075    Result.DV[Level].Distance = Delta;
1076    NewConstraint.setDistance(Delta, CurLoop);
1077    Result.DV[Level].Direction &= Dependence::DVEntry::EQ;
1078    ++StrongSIVsuccesses;
1079  }
1080  else {
1081    if (Coeff->isOne()) {
1082      DEBUG(dbgs() << "\t    Distance = " << *Delta << "\n");
1083      Result.DV[Level].Distance = Delta; // since X/1 == X
1084      NewConstraint.setDistance(Delta, CurLoop);
1085    }
1086    else {
1087      Result.Consistent = false;
1088      NewConstraint.setLine(Coeff,
1089                            SE->getNegativeSCEV(Coeff),
1090                            SE->getNegativeSCEV(Delta), CurLoop);
1091    }
1092
1093    // maybe we can get a useful direction
1094    bool DeltaMaybeZero     = !SE->isKnownNonZero(Delta);
1095    bool DeltaMaybePositive = !SE->isKnownNonPositive(Delta);
1096    bool DeltaMaybeNegative = !SE->isKnownNonNegative(Delta);
1097    bool CoeffMaybePositive = !SE->isKnownNonPositive(Coeff);
1098    bool CoeffMaybeNegative = !SE->isKnownNonNegative(Coeff);
1099    // The double negatives above are confusing.
1100    // It helps to read !SE->isKnownNonZero(Delta)
1101    // as "Delta might be Zero"
1102    unsigned NewDirection = Dependence::DVEntry::NONE;
1103    if ((DeltaMaybePositive && CoeffMaybePositive) ||
1104        (DeltaMaybeNegative && CoeffMaybeNegative))
1105      NewDirection = Dependence::DVEntry::LT;
1106    if (DeltaMaybeZero)
1107      NewDirection |= Dependence::DVEntry::EQ;
1108    if ((DeltaMaybeNegative && CoeffMaybePositive) ||
1109        (DeltaMaybePositive && CoeffMaybeNegative))
1110      NewDirection |= Dependence::DVEntry::GT;
1111    if (NewDirection < Result.DV[Level].Direction)
1112      ++StrongSIVsuccesses;
1113    Result.DV[Level].Direction &= NewDirection;
1114  }
1115  return false;
1116}
1117
1118
1119// weakCrossingSIVtest -
1120// From the paper, Practical Dependence Testing, Section 4.2.2
1121//
1122// When we have a pair of subscripts of the form [c1 + a*i] and [c2 - a*i],
1123// where i is an induction variable, c1 and c2 are loop invariant,
1124// and a is a constant, we can solve it exactly using the
1125// Weak-Crossing SIV test.
1126//
1127// Given c1 + a*i = c2 - a*i', we can look for the intersection of
1128// the two lines, where i = i', yielding
1129//
1130//    c1 + a*i = c2 - a*i
1131//    2a*i = c2 - c1
1132//    i = (c2 - c1)/2a
1133//
1134// If i < 0, there is no dependence.
1135// If i > upperbound, there is no dependence.
1136// If i = 0 (i.e., if c1 = c2), there's a dependence with distance = 0.
1137// If i = upperbound, there's a dependence with distance = 0.
1138// If i is integral, there's a dependence (all directions).
1139// If the non-integer part = 1/2, there's a dependence (<> directions).
1140// Otherwise, there's no dependence.
1141//
1142// Can prove independence. Failing that,
1143// can sometimes refine the directions.
1144// Can determine iteration for splitting.
1145//
1146// Return true if dependence disproved.
1147bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,
1148                                             const SCEV *SrcConst,
1149                                             const SCEV *DstConst,
1150                                             const Loop *CurLoop,
1151                                             unsigned Level,
1152                                             FullDependence &Result,
1153                                             Constraint &NewConstraint,
1154                                             const SCEV *&SplitIter) const {
1155  DEBUG(dbgs() << "\tWeak-Crossing SIV test\n");
1156  DEBUG(dbgs() << "\t    Coeff = " << *Coeff << "\n");
1157  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1158  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1159  ++WeakCrossingSIVapplications;
1160  assert(0 < Level && Level <= CommonLevels && "Level out of range");
1161  Level--;
1162  Result.Consistent = false;
1163  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1164  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1165  NewConstraint.setLine(Coeff, Coeff, Delta, CurLoop);
1166  if (Delta->isZero()) {
1167    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
1168    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
1169    ++WeakCrossingSIVsuccesses;
1170    if (!Result.DV[Level].Direction) {
1171      ++WeakCrossingSIVindependence;
1172      return true;
1173    }
1174    Result.DV[Level].Distance = Delta; // = 0
1175    return false;
1176  }
1177  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(Coeff);
1178  if (!ConstCoeff)
1179    return false;
1180
1181  Result.DV[Level].Splitable = true;
1182  if (SE->isKnownNegative(ConstCoeff)) {
1183    ConstCoeff = dyn_cast<SCEVConstant>(SE->getNegativeSCEV(ConstCoeff));
1184    assert(ConstCoeff &&
1185           "dynamic cast of negative of ConstCoeff should yield constant");
1186    Delta = SE->getNegativeSCEV(Delta);
1187  }
1188  assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");
1189
1190  // compute SplitIter for use by DependenceAnalysis::getSplitIteration()
1191  SplitIter =
1192    SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0),
1193                                    Delta),
1194                    SE->getMulExpr(SE->getConstant(Delta->getType(), 2),
1195                                   ConstCoeff));
1196  DEBUG(dbgs() << "\t    Split iter = " << *SplitIter << "\n");
1197
1198  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1199  if (!ConstDelta)
1200    return false;
1201
1202  // We're certain that ConstCoeff > 0; therefore,
1203  // if Delta < 0, then no dependence.
1204  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1205  DEBUG(dbgs() << "\t    ConstCoeff = " << *ConstCoeff << "\n");
1206  if (SE->isKnownNegative(Delta)) {
1207    // No dependence, Delta < 0
1208    ++WeakCrossingSIVindependence;
1209    ++WeakCrossingSIVsuccesses;
1210    return true;
1211  }
1212
1213  // We're certain that Delta > 0 and ConstCoeff > 0.
1214  // Check Delta/(2*ConstCoeff) against upper loop bound
1215  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1216    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1217    const SCEV *ConstantTwo = SE->getConstant(UpperBound->getType(), 2);
1218    const SCEV *ML = SE->getMulExpr(SE->getMulExpr(ConstCoeff, UpperBound),
1219                                    ConstantTwo);
1220    DEBUG(dbgs() << "\t    ML = " << *ML << "\n");
1221    if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, ML)) {
1222      // Delta too big, no dependence
1223      ++WeakCrossingSIVindependence;
1224      ++WeakCrossingSIVsuccesses;
1225      return true;
1226    }
1227    if (isKnownPredicate(CmpInst::ICMP_EQ, Delta, ML)) {
1228      // i = i' = UB
1229      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::LT);
1230      Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::GT);
1231      ++WeakCrossingSIVsuccesses;
1232      if (!Result.DV[Level].Direction) {
1233        ++WeakCrossingSIVindependence;
1234        return true;
1235      }
1236      Result.DV[Level].Splitable = false;
1237      Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0);
1238      return false;
1239    }
1240  }
1241
1242  // check that Coeff divides Delta
1243  APInt APDelta = ConstDelta->getValue()->getValue();
1244  APInt APCoeff = ConstCoeff->getValue()->getValue();
1245  APInt Distance = APDelta; // these need to be initialzed
1246  APInt Remainder = APDelta;
1247  APInt::sdivrem(APDelta, APCoeff, Distance, Remainder);
1248  DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1249  if (Remainder != 0) {
1250    // Coeff doesn't divide Delta, no dependence
1251    ++WeakCrossingSIVindependence;
1252    ++WeakCrossingSIVsuccesses;
1253    return true;
1254  }
1255  DEBUG(dbgs() << "\t    Distance = " << Distance << "\n");
1256
1257  // if 2*Coeff doesn't divide Delta, then the equal direction isn't possible
1258  APInt Two = APInt(Distance.getBitWidth(), 2, true);
1259  Remainder = Distance.srem(Two);
1260  DEBUG(dbgs() << "\t    Remainder = " << Remainder << "\n");
1261  if (Remainder != 0) {
1262    // Equal direction isn't possible
1263    Result.DV[Level].Direction &= unsigned(~Dependence::DVEntry::EQ);
1264    ++WeakCrossingSIVsuccesses;
1265  }
1266  return false;
1267}
1268
1269
1270// Kirch's algorithm, from
1271//
1272//        Optimizing Supercompilers for Supercomputers
1273//        Michael Wolfe
1274//        MIT Press, 1989
1275//
1276// Program 2.1, page 29.
1277// Computes the GCD of AM and BM.
1278// Also finds a solution to the equation ax - by = gdc(a, b).
1279// Returns true iff the gcd divides Delta.
1280static
1281bool findGCD(unsigned Bits, APInt AM, APInt BM, APInt Delta,
1282             APInt &G, APInt &X, APInt &Y) {
1283  APInt A0(Bits, 1, true), A1(Bits, 0, true);
1284  APInt B0(Bits, 0, true), B1(Bits, 1, true);
1285  APInt G0 = AM.abs();
1286  APInt G1 = BM.abs();
1287  APInt Q = G0; // these need to be initialized
1288  APInt R = G0;
1289  APInt::sdivrem(G0, G1, Q, R);
1290  while (R != 0) {
1291    APInt A2 = A0 - Q*A1; A0 = A1; A1 = A2;
1292    APInt B2 = B0 - Q*B1; B0 = B1; B1 = B2;
1293    G0 = G1; G1 = R;
1294    APInt::sdivrem(G0, G1, Q, R);
1295  }
1296  G = G1;
1297  DEBUG(dbgs() << "\t    GCD = " << G << "\n");
1298  X = AM.slt(0) ? -A1 : A1;
1299  Y = BM.slt(0) ? B1 : -B1;
1300
1301  // make sure gcd divides Delta
1302  R = Delta.srem(G);
1303  if (R != 0)
1304    return true; // gcd doesn't divide Delta, no dependence
1305  Q = Delta.sdiv(G);
1306  X *= Q;
1307  Y *= Q;
1308  return false;
1309}
1310
1311
1312static
1313APInt floorOfQuotient(APInt A, APInt B) {
1314  APInt Q = A; // these need to be initialized
1315  APInt R = A;
1316  APInt::sdivrem(A, B, Q, R);
1317  if (R == 0)
1318    return Q;
1319  if ((A.sgt(0) && B.sgt(0)) ||
1320      (A.slt(0) && B.slt(0)))
1321    return Q;
1322  else
1323    return Q - 1;
1324}
1325
1326
1327static
1328APInt ceilingOfQuotient(APInt A, APInt B) {
1329  APInt Q = A; // these need to be initialized
1330  APInt R = A;
1331  APInt::sdivrem(A, B, Q, R);
1332  if (R == 0)
1333    return Q;
1334  if ((A.sgt(0) && B.sgt(0)) ||
1335      (A.slt(0) && B.slt(0)))
1336    return Q + 1;
1337  else
1338    return Q;
1339}
1340
1341
1342static
1343APInt maxAPInt(APInt A, APInt B) {
1344  return A.sgt(B) ? A : B;
1345}
1346
1347
1348static
1349APInt minAPInt(APInt A, APInt B) {
1350  return A.slt(B) ? A : B;
1351}
1352
1353
1354// exactSIVtest -
1355// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*i],
1356// where i is an induction variable, c1 and c2 are loop invariant, and a1
1357// and a2 are constant, we can solve it exactly using an algorithm developed
1358// by Banerjee and Wolfe. See Section 2.5.3 in
1359//
1360//        Optimizing Supercompilers for Supercomputers
1361//        Michael Wolfe
1362//        MIT Press, 1989
1363//
1364// It's slower than the specialized tests (strong SIV, weak-zero SIV, etc),
1365// so use them if possible. They're also a bit better with symbolics and,
1366// in the case of the strong SIV test, can compute Distances.
1367//
1368// Return true if dependence disproved.
1369bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,
1370                                      const SCEV *DstCoeff,
1371                                      const SCEV *SrcConst,
1372                                      const SCEV *DstConst,
1373                                      const Loop *CurLoop,
1374                                      unsigned Level,
1375                                      FullDependence &Result,
1376                                      Constraint &NewConstraint) const {
1377  DEBUG(dbgs() << "\tExact SIV test\n");
1378  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
1379  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
1380  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1381  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1382  ++ExactSIVapplications;
1383  assert(0 < Level && Level <= CommonLevels && "Level out of range");
1384  Level--;
1385  Result.Consistent = false;
1386  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1387  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1388  NewConstraint.setLine(SrcCoeff, SE->getNegativeSCEV(DstCoeff),
1389                        Delta, CurLoop);
1390  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1391  const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1392  const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1393  if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1394    return false;
1395
1396  // find gcd
1397  APInt G, X, Y;
1398  APInt AM = ConstSrcCoeff->getValue()->getValue();
1399  APInt BM = ConstDstCoeff->getValue()->getValue();
1400  unsigned Bits = AM.getBitWidth();
1401  if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
1402    // gcd doesn't divide Delta, no dependence
1403    ++ExactSIVindependence;
1404    ++ExactSIVsuccesses;
1405    return true;
1406  }
1407
1408  DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
1409
1410  // since SCEV construction normalizes, LM = 0
1411  APInt UM(Bits, 1, true);
1412  bool UMvalid = false;
1413  // UM is perhaps unavailable, let's check
1414  if (const SCEVConstant *CUB =
1415      collectConstantUpperBound(CurLoop, Delta->getType())) {
1416    UM = CUB->getValue()->getValue();
1417    DEBUG(dbgs() << "\t    UM = " << UM << "\n");
1418    UMvalid = true;
1419  }
1420
1421  APInt TU(APInt::getSignedMaxValue(Bits));
1422  APInt TL(APInt::getSignedMinValue(Bits));
1423
1424  // test(BM/G, LM-X) and test(-BM/G, X-UM)
1425  APInt TMUL = BM.sdiv(G);
1426  if (TMUL.sgt(0)) {
1427    TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
1428    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1429    if (UMvalid) {
1430      TU = minAPInt(TU, floorOfQuotient(UM - X, TMUL));
1431      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1432    }
1433  }
1434  else {
1435    TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
1436    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1437    if (UMvalid) {
1438      TL = maxAPInt(TL, ceilingOfQuotient(UM - X, TMUL));
1439      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1440    }
1441  }
1442
1443  // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
1444  TMUL = AM.sdiv(G);
1445  if (TMUL.sgt(0)) {
1446    TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
1447    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1448    if (UMvalid) {
1449      TU = minAPInt(TU, floorOfQuotient(UM - Y, TMUL));
1450      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1451    }
1452  }
1453  else {
1454    TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
1455    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1456    if (UMvalid) {
1457      TL = maxAPInt(TL, ceilingOfQuotient(UM - Y, TMUL));
1458      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1459    }
1460  }
1461  if (TL.sgt(TU)) {
1462    ++ExactSIVindependence;
1463    ++ExactSIVsuccesses;
1464    return true;
1465  }
1466
1467  // explore directions
1468  unsigned NewDirection = Dependence::DVEntry::NONE;
1469
1470  // less than
1471  APInt SaveTU(TU); // save these
1472  APInt SaveTL(TL);
1473  DEBUG(dbgs() << "\t    exploring LT direction\n");
1474  TMUL = AM - BM;
1475  if (TMUL.sgt(0)) {
1476    TL = maxAPInt(TL, ceilingOfQuotient(X - Y + 1, TMUL));
1477    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1478  }
1479  else {
1480    TU = minAPInt(TU, floorOfQuotient(X - Y + 1, TMUL));
1481    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1482  }
1483  if (TL.sle(TU)) {
1484    NewDirection |= Dependence::DVEntry::LT;
1485    ++ExactSIVsuccesses;
1486  }
1487
1488  // equal
1489  TU = SaveTU; // restore
1490  TL = SaveTL;
1491  DEBUG(dbgs() << "\t    exploring EQ direction\n");
1492  if (TMUL.sgt(0)) {
1493    TL = maxAPInt(TL, ceilingOfQuotient(X - Y, TMUL));
1494    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1495  }
1496  else {
1497    TU = minAPInt(TU, floorOfQuotient(X - Y, TMUL));
1498    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1499  }
1500  TMUL = BM - AM;
1501  if (TMUL.sgt(0)) {
1502    TL = maxAPInt(TL, ceilingOfQuotient(Y - X, TMUL));
1503    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1504  }
1505  else {
1506    TU = minAPInt(TU, floorOfQuotient(Y - X, TMUL));
1507    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1508  }
1509  if (TL.sle(TU)) {
1510    NewDirection |= Dependence::DVEntry::EQ;
1511    ++ExactSIVsuccesses;
1512  }
1513
1514  // greater than
1515  TU = SaveTU; // restore
1516  TL = SaveTL;
1517  DEBUG(dbgs() << "\t    exploring GT direction\n");
1518  if (TMUL.sgt(0)) {
1519    TL = maxAPInt(TL, ceilingOfQuotient(Y - X + 1, TMUL));
1520    DEBUG(dbgs() << "\t\t    TL = " << TL << "\n");
1521  }
1522  else {
1523    TU = minAPInt(TU, floorOfQuotient(Y - X + 1, TMUL));
1524    DEBUG(dbgs() << "\t\t    TU = " << TU << "\n");
1525  }
1526  if (TL.sle(TU)) {
1527    NewDirection |= Dependence::DVEntry::GT;
1528    ++ExactSIVsuccesses;
1529  }
1530
1531  // finished
1532  Result.DV[Level].Direction &= NewDirection;
1533  if (Result.DV[Level].Direction == Dependence::DVEntry::NONE)
1534    ++ExactSIVindependence;
1535  return Result.DV[Level].Direction == Dependence::DVEntry::NONE;
1536}
1537
1538
1539
1540// Return true if the divisor evenly divides the dividend.
1541static
1542bool isRemainderZero(const SCEVConstant *Dividend,
1543                     const SCEVConstant *Divisor) {
1544  APInt ConstDividend = Dividend->getValue()->getValue();
1545  APInt ConstDivisor = Divisor->getValue()->getValue();
1546  return ConstDividend.srem(ConstDivisor) == 0;
1547}
1548
1549
1550// weakZeroSrcSIVtest -
1551// From the paper, Practical Dependence Testing, Section 4.2.2
1552//
1553// When we have a pair of subscripts of the form [c1] and [c2 + a*i],
1554// where i is an induction variable, c1 and c2 are loop invariant,
1555// and a is a constant, we can solve it exactly using the
1556// Weak-Zero SIV test.
1557//
1558// Given
1559//
1560//    c1 = c2 + a*i
1561//
1562// we get
1563//
1564//    (c1 - c2)/a = i
1565//
1566// If i is not an integer, there's no dependence.
1567// If i < 0 or > UB, there's no dependence.
1568// If i = 0, the direction is <= and peeling the
1569// 1st iteration will break the dependence.
1570// If i = UB, the direction is >= and peeling the
1571// last iteration will break the dependence.
1572// Otherwise, the direction is *.
1573//
1574// Can prove independence. Failing that, we can sometimes refine
1575// the directions. Can sometimes show that first or last
1576// iteration carries all the dependences (so worth peeling).
1577//
1578// (see also weakZeroDstSIVtest)
1579//
1580// Return true if dependence disproved.
1581bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff,
1582                                            const SCEV *SrcConst,
1583                                            const SCEV *DstConst,
1584                                            const Loop *CurLoop,
1585                                            unsigned Level,
1586                                            FullDependence &Result,
1587                                            Constraint &NewConstraint) const {
1588  // For the WeakSIV test, it's possible the loop isn't common to
1589  // the Src and Dst loops. If it isn't, then there's no need to
1590  // record a direction.
1591  DEBUG(dbgs() << "\tWeak-Zero (src) SIV test\n");
1592  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << "\n");
1593  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1594  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1595  ++WeakZeroSIVapplications;
1596  assert(0 < Level && Level <= MaxLevels && "Level out of range");
1597  Level--;
1598  Result.Consistent = false;
1599  const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst);
1600  NewConstraint.setLine(SE->getConstant(Delta->getType(), 0),
1601                        DstCoeff, Delta, CurLoop);
1602  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1603  if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {
1604    if (Level < CommonLevels) {
1605      Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1606      Result.DV[Level].PeelFirst = true;
1607      ++WeakZeroSIVsuccesses;
1608    }
1609    return false; // dependences caused by first iteration
1610  }
1611  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1612  if (!ConstCoeff)
1613    return false;
1614  const SCEV *AbsCoeff =
1615    SE->isKnownNegative(ConstCoeff) ?
1616    SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
1617  const SCEV *NewDelta =
1618    SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
1619
1620  // check that Delta/SrcCoeff < iteration count
1621  // really check NewDelta < count*AbsCoeff
1622  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1623    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1624    const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1625    if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
1626      ++WeakZeroSIVindependence;
1627      ++WeakZeroSIVsuccesses;
1628      return true;
1629    }
1630    if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
1631      // dependences caused by last iteration
1632      if (Level < CommonLevels) {
1633        Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1634        Result.DV[Level].PeelLast = true;
1635        ++WeakZeroSIVsuccesses;
1636      }
1637      return false;
1638    }
1639  }
1640
1641  // check that Delta/SrcCoeff >= 0
1642  // really check that NewDelta >= 0
1643  if (SE->isKnownNegative(NewDelta)) {
1644    // No dependence, newDelta < 0
1645    ++WeakZeroSIVindependence;
1646    ++WeakZeroSIVsuccesses;
1647    return true;
1648  }
1649
1650  // if SrcCoeff doesn't divide Delta, then no dependence
1651  if (isa<SCEVConstant>(Delta) &&
1652      !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
1653    ++WeakZeroSIVindependence;
1654    ++WeakZeroSIVsuccesses;
1655    return true;
1656  }
1657  return false;
1658}
1659
1660
1661// weakZeroDstSIVtest -
1662// From the paper, Practical Dependence Testing, Section 4.2.2
1663//
1664// When we have a pair of subscripts of the form [c1 + a*i] and [c2],
1665// where i is an induction variable, c1 and c2 are loop invariant,
1666// and a is a constant, we can solve it exactly using the
1667// Weak-Zero SIV test.
1668//
1669// Given
1670//
1671//    c1 + a*i = c2
1672//
1673// we get
1674//
1675//    i = (c2 - c1)/a
1676//
1677// If i is not an integer, there's no dependence.
1678// If i < 0 or > UB, there's no dependence.
1679// If i = 0, the direction is <= and peeling the
1680// 1st iteration will break the dependence.
1681// If i = UB, the direction is >= and peeling the
1682// last iteration will break the dependence.
1683// Otherwise, the direction is *.
1684//
1685// Can prove independence. Failing that, we can sometimes refine
1686// the directions. Can sometimes show that first or last
1687// iteration carries all the dependences (so worth peeling).
1688//
1689// (see also weakZeroSrcSIVtest)
1690//
1691// Return true if dependence disproved.
1692bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff,
1693                                            const SCEV *SrcConst,
1694                                            const SCEV *DstConst,
1695                                            const Loop *CurLoop,
1696                                            unsigned Level,
1697                                            FullDependence &Result,
1698                                            Constraint &NewConstraint) const {
1699  // For the WeakSIV test, it's possible the loop isn't common to the
1700  // Src and Dst loops. If it isn't, then there's no need to record a direction.
1701  DEBUG(dbgs() << "\tWeak-Zero (dst) SIV test\n");
1702  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << "\n");
1703  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1704  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1705  ++WeakZeroSIVapplications;
1706  assert(0 < Level && Level <= SrcLevels && "Level out of range");
1707  Level--;
1708  Result.Consistent = false;
1709  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1710  NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0),
1711                        Delta, CurLoop);
1712  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1713  if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {
1714    if (Level < CommonLevels) {
1715      Result.DV[Level].Direction &= Dependence::DVEntry::LE;
1716      Result.DV[Level].PeelFirst = true;
1717      ++WeakZeroSIVsuccesses;
1718    }
1719    return false; // dependences caused by first iteration
1720  }
1721  const SCEVConstant *ConstCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1722  if (!ConstCoeff)
1723    return false;
1724  const SCEV *AbsCoeff =
1725    SE->isKnownNegative(ConstCoeff) ?
1726    SE->getNegativeSCEV(ConstCoeff) : ConstCoeff;
1727  const SCEV *NewDelta =
1728    SE->isKnownNegative(ConstCoeff) ? SE->getNegativeSCEV(Delta) : Delta;
1729
1730  // check that Delta/SrcCoeff < iteration count
1731  // really check NewDelta < count*AbsCoeff
1732  if (const SCEV *UpperBound = collectUpperBound(CurLoop, Delta->getType())) {
1733    DEBUG(dbgs() << "\t    UpperBound = " << *UpperBound << "\n");
1734    const SCEV *Product = SE->getMulExpr(AbsCoeff, UpperBound);
1735    if (isKnownPredicate(CmpInst::ICMP_SGT, NewDelta, Product)) {
1736      ++WeakZeroSIVindependence;
1737      ++WeakZeroSIVsuccesses;
1738      return true;
1739    }
1740    if (isKnownPredicate(CmpInst::ICMP_EQ, NewDelta, Product)) {
1741      // dependences caused by last iteration
1742      if (Level < CommonLevels) {
1743        Result.DV[Level].Direction &= Dependence::DVEntry::GE;
1744        Result.DV[Level].PeelLast = true;
1745        ++WeakZeroSIVsuccesses;
1746      }
1747      return false;
1748    }
1749  }
1750
1751  // check that Delta/SrcCoeff >= 0
1752  // really check that NewDelta >= 0
1753  if (SE->isKnownNegative(NewDelta)) {
1754    // No dependence, newDelta < 0
1755    ++WeakZeroSIVindependence;
1756    ++WeakZeroSIVsuccesses;
1757    return true;
1758  }
1759
1760  // if SrcCoeff doesn't divide Delta, then no dependence
1761  if (isa<SCEVConstant>(Delta) &&
1762      !isRemainderZero(cast<SCEVConstant>(Delta), ConstCoeff)) {
1763    ++WeakZeroSIVindependence;
1764    ++WeakZeroSIVsuccesses;
1765    return true;
1766  }
1767  return false;
1768}
1769
1770
1771// exactRDIVtest - Tests the RDIV subscript pair for dependence.
1772// Things of the form [c1 + a*i] and [c2 + b*j],
1773// where i and j are induction variable, c1 and c2 are loop invariant,
1774// and a and b are constants.
1775// Returns true if any possible dependence is disproved.
1776// Marks the result as inconsistent.
1777// Works in some cases that symbolicRDIVtest doesn't, and vice versa.
1778bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,
1779                                       const SCEV *DstCoeff,
1780                                       const SCEV *SrcConst,
1781                                       const SCEV *DstConst,
1782                                       const Loop *SrcLoop,
1783                                       const Loop *DstLoop,
1784                                       FullDependence &Result) const {
1785  DEBUG(dbgs() << "\tExact RDIV test\n");
1786  DEBUG(dbgs() << "\t    SrcCoeff = " << *SrcCoeff << " = AM\n");
1787  DEBUG(dbgs() << "\t    DstCoeff = " << *DstCoeff << " = BM\n");
1788  DEBUG(dbgs() << "\t    SrcConst = " << *SrcConst << "\n");
1789  DEBUG(dbgs() << "\t    DstConst = " << *DstConst << "\n");
1790  ++ExactRDIVapplications;
1791  Result.Consistent = false;
1792  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
1793  DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");
1794  const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta);
1795  const SCEVConstant *ConstSrcCoeff = dyn_cast<SCEVConstant>(SrcCoeff);
1796  const SCEVConstant *ConstDstCoeff = dyn_cast<SCEVConstant>(DstCoeff);
1797  if (!ConstDelta || !ConstSrcCoeff || !ConstDstCoeff)
1798    return false;
1799
1800  // find gcd
1801  APInt G, X, Y;
1802  APInt AM = ConstSrcCoeff->getValue()->getValue();
1803  APInt BM = ConstDstCoeff->getValue()->getValue();
1804  unsigned Bits = AM.getBitWidth();
1805  if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) {
1806    // gcd doesn't divide Delta, no dependence
1807    ++ExactRDIVindependence;
1808    return true;
1809  }
1810
1811  DEBUG(dbgs() << "\t    X = " << X << ", Y = " << Y << "\n");
1812
1813  // since SCEV construction seems to normalize, LM = 0
1814  APInt SrcUM(Bits, 1, true);
1815  bool SrcUMvalid = false;
1816  // SrcUM is perhaps unavailable, let's check
1817  if (const SCEVConstant *UpperBound =
1818      collectConstantUpperBound(SrcLoop, Delta->getType())) {
1819    SrcUM = UpperBound->getValue()->getValue();
1820    DEBUG(dbgs() << "\t    SrcUM = " << SrcUM << "\n");
1821    SrcUMvalid = true;
1822  }
1823
1824  APInt DstUM(Bits, 1, true);
1825  bool DstUMvalid = false;
1826  // UM is perhaps unavailable, let's check
1827  if (const SCEVConstant *UpperBound =
1828      collectConstantUpperBound(DstLoop, Delta->getType())) {
1829    DstUM = UpperBound->getValue()->getValue();
1830    DEBUG(dbgs() << "\t    DstUM = " << DstUM << "\n");
1831    DstUMvalid = true;
1832  }
1833
1834  APInt TU(APInt::getSignedMaxValue(Bits));
1835  APInt TL(APInt::getSignedMinValue(Bits));
1836
1837  // test(BM/G, LM-X) and test(-BM/G, X-UM)
1838  APInt TMUL = BM.sdiv(G);
1839  if (TMUL.sgt(0)) {
1840    TL = maxAPInt(TL, ceilingOfQuotient(-X, TMUL));
1841    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1842    if (SrcUMvalid) {
1843      TU = minAPInt(TU, floorOfQuotient(SrcUM - X, TMUL));
1844      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1845    }
1846  }
1847  else {
1848    TU = minAPInt(TU, floorOfQuotient(-X, TMUL));
1849    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1850    if (SrcUMvalid) {
1851      TL = maxAPInt(TL, ceilingOfQuotient(SrcUM - X, TMUL));
1852      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1853    }
1854  }
1855
1856  // test(AM/G, LM-Y) and test(-AM/G, Y-UM)
1857  TMUL = AM.sdiv(G);
1858  if (TMUL.sgt(0)) {
1859    TL = maxAPInt(TL, ceilingOfQuotient(-Y, TMUL));
1860    DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1861    if (DstUMvalid) {
1862      TU = minAPInt(TU, floorOfQuotient(DstUM - Y, TMUL));
1863      DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1864    }
1865  }
1866  else {
1867    TU = minAPInt(TU, floorOfQuotient(-Y, TMUL));
1868    DEBUG(dbgs() << "\t    TU = " << TU << "\n");
1869    if (DstUMvalid) {
1870      TL = maxAPInt(TL, ceilingOfQuotient(DstUM - Y, TMUL));
1871      DEBUG(dbgs() << "\t    TL = " << TL << "\n");
1872    }
1873  }
1874  if (TL.sgt(TU))
1875    ++ExactRDIVindependence;
1876  return TL.sgt(TU);
1877}
1878
1879
1880// symbolicRDIVtest -
1881// In Section 4.5 of the Practical Dependence Testing paper,the authors
1882// introduce a special case of Banerjee's Inequalities (also called the
1883// Extreme-Value Test) that can handle some of the SIV and RDIV cases,
1884// particularly cases with symbolics. Since it's only able to disprove
1885// dependence (not compute distances or directions), we'll use it as a
1886// fall back for the other tests.
1887//
1888// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
1889// where i and j are induction variables and c1 and c2 are loop invariants,
1890// we can use the symbolic tests to disprove some dependences, serving as a
1891// backup for the RDIV test. Note that i and j can be the same variable,
1892// letting this test serve as a backup for the various SIV tests.
1893//
1894// For a dependence to exist, c1 + a1*i must equal c2 + a2*j for some
1895//  0 <= i <= N1 and some 0 <= j <= N2, where N1 and N2 are the (normalized)
1896// loop bounds for the i and j loops, respectively. So, ...
1897//
1898// c1 + a1*i = c2 + a2*j
1899// a1*i - a2*j = c2 - c1
1900//
1901// To test for a dependence, we compute c2 - c1 and make sure it's in the
1902// range of the maximum and minimum possible values of a1*i - a2*j.
1903// Considering the signs of a1 and a2, we have 4 possible cases:
1904//
1905// 1) If a1 >= 0 and a2 >= 0, then
1906//        a1*0 - a2*N2 <= c2 - c1 <= a1*N1 - a2*0
1907//              -a2*N2 <= c2 - c1 <= a1*N1
1908//
1909// 2) If a1 >= 0 and a2 <= 0, then
1910//        a1*0 - a2*0 <= c2 - c1 <= a1*N1 - a2*N2
1911//                  0 <= c2 - c1 <= a1*N1 - a2*N2
1912//
1913// 3) If a1 <= 0 and a2 >= 0, then
1914//        a1*N1 - a2*N2 <= c2 - c1 <= a1*0 - a2*0
1915//        a1*N1 - a2*N2 <= c2 - c1 <= 0
1916//
1917// 4) If a1 <= 0 and a2 <= 0, then
1918//        a1*N1 - a2*0  <= c2 - c1 <= a1*0 - a2*N2
1919//        a1*N1         <= c2 - c1 <=       -a2*N2
1920//
1921// return true if dependence disproved
1922bool DependenceAnalysis::symbolicRDIVtest(const SCEV *A1,
1923                                          const SCEV *A2,
1924                                          const SCEV *C1,
1925                                          const SCEV *C2,
1926                                          const Loop *Loop1,
1927                                          const Loop *Loop2) const {
1928  ++SymbolicRDIVapplications;
1929  DEBUG(dbgs() << "\ttry symbolic RDIV test\n");
1930  DEBUG(dbgs() << "\t    A1 = " << *A1);
1931  DEBUG(dbgs() << ", type = " << *A1->getType() << "\n");
1932  DEBUG(dbgs() << "\t    A2 = " << *A2 << "\n");
1933  DEBUG(dbgs() << "\t    C1 = " << *C1 << "\n");
1934  DEBUG(dbgs() << "\t    C2 = " << *C2 << "\n");
1935  const SCEV *N1 = collectUpperBound(Loop1, A1->getType());
1936  const SCEV *N2 = collectUpperBound(Loop2, A1->getType());
1937  DEBUG(if (N1) dbgs() << "\t    N1 = " << *N1 << "\n");
1938  DEBUG(if (N2) dbgs() << "\t    N2 = " << *N2 << "\n");
1939  const SCEV *C2_C1 = SE->getMinusSCEV(C2, C1);
1940  const SCEV *C1_C2 = SE->getMinusSCEV(C1, C2);
1941  DEBUG(dbgs() << "\t    C2 - C1 = " << *C2_C1 << "\n");
1942  DEBUG(dbgs() << "\t    C1 - C2 = " << *C1_C2 << "\n");
1943  if (SE->isKnownNonNegative(A1)) {
1944    if (SE->isKnownNonNegative(A2)) {
1945      // A1 >= 0 && A2 >= 0
1946      if (N1) {
1947        // make sure that c2 - c1 <= a1*N1
1948        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
1949        DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
1950        if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1)) {
1951          ++SymbolicRDIVindependence;
1952          return true;
1953        }
1954      }
1955      if (N2) {
1956        // make sure that -a2*N2 <= c2 - c1, or a2*N2 >= c1 - c2
1957        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
1958        DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
1959        if (isKnownPredicate(CmpInst::ICMP_SLT, A2N2, C1_C2)) {
1960          ++SymbolicRDIVindependence;
1961          return true;
1962        }
1963      }
1964    }
1965    else if (SE->isKnownNonPositive(A2)) {
1966      // a1 >= 0 && a2 <= 0
1967      if (N1 && N2) {
1968        // make sure that c2 - c1 <= a1*N1 - a2*N2
1969        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
1970        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
1971        const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
1972        DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
1973        if (isKnownPredicate(CmpInst::ICMP_SGT, C2_C1, A1N1_A2N2)) {
1974          ++SymbolicRDIVindependence;
1975          return true;
1976        }
1977      }
1978      // make sure that 0 <= c2 - c1
1979      if (SE->isKnownNegative(C2_C1)) {
1980        ++SymbolicRDIVindependence;
1981        return true;
1982      }
1983    }
1984  }
1985  else if (SE->isKnownNonPositive(A1)) {
1986    if (SE->isKnownNonNegative(A2)) {
1987      // a1 <= 0 && a2 >= 0
1988      if (N1 && N2) {
1989        // make sure that a1*N1 - a2*N2 <= c2 - c1
1990        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
1991        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
1992        const SCEV *A1N1_A2N2 = SE->getMinusSCEV(A1N1, A2N2);
1993        DEBUG(dbgs() << "\t    A1*N1 - A2*N2 = " << *A1N1_A2N2 << "\n");
1994        if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1_A2N2, C2_C1)) {
1995          ++SymbolicRDIVindependence;
1996          return true;
1997        }
1998      }
1999      // make sure that c2 - c1 <= 0
2000      if (SE->isKnownPositive(C2_C1)) {
2001        ++SymbolicRDIVindependence;
2002        return true;
2003      }
2004    }
2005    else if (SE->isKnownNonPositive(A2)) {
2006      // a1 <= 0 && a2 <= 0
2007      if (N1) {
2008        // make sure that a1*N1 <= c2 - c1
2009        const SCEV *A1N1 = SE->getMulExpr(A1, N1);
2010        DEBUG(dbgs() << "\t    A1*N1 = " << *A1N1 << "\n");
2011        if (isKnownPredicate(CmpInst::ICMP_SGT, A1N1, C2_C1)) {
2012          ++SymbolicRDIVindependence;
2013          return true;
2014        }
2015      }
2016      if (N2) {
2017        // make sure that c2 - c1 <= -a2*N2, or c1 - c2 >= a2*N2
2018        const SCEV *A2N2 = SE->getMulExpr(A2, N2);
2019        DEBUG(dbgs() << "\t    A2*N2 = " << *A2N2 << "\n");
2020        if (isKnownPredicate(CmpInst::ICMP_SLT, C1_C2, A2N2)) {
2021          ++SymbolicRDIVindependence;
2022          return true;
2023        }
2024      }
2025    }
2026  }
2027  return false;
2028}
2029
2030
2031// testSIV -
2032// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 - a2*i]
2033// where i is an induction variable, c1 and c2 are loop invariant, and a1 and
2034// a2 are constant, we attack it with an SIV test. While they can all be
2035// solved with the Exact SIV test, it's worthwhile to use simpler tests when
2036// they apply; they're cheaper and sometimes more precise.
2037//
2038// Return true if dependence disproved.
2039bool DependenceAnalysis::testSIV(const SCEV *Src,
2040                                 const SCEV *Dst,
2041                                 unsigned &Level,
2042                                 FullDependence &Result,
2043                                 Constraint &NewConstraint,
2044                                 const SCEV *&SplitIter) const {
2045  DEBUG(dbgs() << "    src = " << *Src << "\n");
2046  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2047  const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2048  const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2049  if (SrcAddRec && DstAddRec) {
2050    const SCEV *SrcConst = SrcAddRec->getStart();
2051    const SCEV *DstConst = DstAddRec->getStart();
2052    const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2053    const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2054    const Loop *CurLoop = SrcAddRec->getLoop();
2055    assert(CurLoop == DstAddRec->getLoop() &&
2056           "both loops in SIV should be same");
2057    Level = mapSrcLoop(CurLoop);
2058    bool disproven;
2059    if (SrcCoeff == DstCoeff)
2060      disproven = strongSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2061                                Level, Result, NewConstraint);
2062    else if (SrcCoeff == SE->getNegativeSCEV(DstCoeff))
2063      disproven = weakCrossingSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2064                                      Level, Result, NewConstraint, SplitIter);
2065    else
2066      disproven = exactSIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop,
2067                               Level, Result, NewConstraint);
2068    return disproven ||
2069      gcdMIVtest(Src, Dst, Result) ||
2070      symbolicRDIVtest(SrcCoeff, DstCoeff, SrcConst, DstConst, CurLoop, CurLoop);
2071  }
2072  if (SrcAddRec) {
2073    const SCEV *SrcConst = SrcAddRec->getStart();
2074    const SCEV *SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2075    const SCEV *DstConst = Dst;
2076    const Loop *CurLoop = SrcAddRec->getLoop();
2077    Level = mapSrcLoop(CurLoop);
2078    return weakZeroDstSIVtest(SrcCoeff, SrcConst, DstConst, CurLoop,
2079                              Level, Result, NewConstraint) ||
2080      gcdMIVtest(Src, Dst, Result);
2081  }
2082  if (DstAddRec) {
2083    const SCEV *DstConst = DstAddRec->getStart();
2084    const SCEV *DstCoeff = DstAddRec->getStepRecurrence(*SE);
2085    const SCEV *SrcConst = Src;
2086    const Loop *CurLoop = DstAddRec->getLoop();
2087    Level = mapDstLoop(CurLoop);
2088    return weakZeroSrcSIVtest(DstCoeff, SrcConst, DstConst,
2089                              CurLoop, Level, Result, NewConstraint) ||
2090      gcdMIVtest(Src, Dst, Result);
2091  }
2092  llvm_unreachable("SIV test expected at least one AddRec");
2093  return false;
2094}
2095
2096
2097// testRDIV -
2098// When we have a pair of subscripts of the form [c1 + a1*i] and [c2 + a2*j]
2099// where i and j are induction variables, c1 and c2 are loop invariant,
2100// and a1 and a2 are constant, we can solve it exactly with an easy adaptation
2101// of the Exact SIV test, the Restricted Double Index Variable (RDIV) test.
2102// It doesn't make sense to talk about distance or direction in this case,
2103// so there's no point in making special versions of the Strong SIV test or
2104// the Weak-crossing SIV test.
2105//
2106// With minor algebra, this test can also be used for things like
2107// [c1 + a1*i + a2*j][c2].
2108//
2109// Return true if dependence disproved.
2110bool DependenceAnalysis::testRDIV(const SCEV *Src,
2111                                  const SCEV *Dst,
2112                                  FullDependence &Result) const {
2113  // we have 3 possible situations here:
2114  //   1) [a*i + b] and [c*j + d]
2115  //   2) [a*i + c*j + b] and [d]
2116  //   3) [b] and [a*i + c*j + d]
2117  // We need to find what we've got and get organized
2118
2119  const SCEV *SrcConst, *DstConst;
2120  const SCEV *SrcCoeff, *DstCoeff;
2121  const Loop *SrcLoop, *DstLoop;
2122
2123  DEBUG(dbgs() << "    src = " << *Src << "\n");
2124  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2125  const SCEVAddRecExpr *SrcAddRec = dyn_cast<SCEVAddRecExpr>(Src);
2126  const SCEVAddRecExpr *DstAddRec = dyn_cast<SCEVAddRecExpr>(Dst);
2127  if (SrcAddRec && DstAddRec) {
2128    SrcConst = SrcAddRec->getStart();
2129    SrcCoeff = SrcAddRec->getStepRecurrence(*SE);
2130    SrcLoop = SrcAddRec->getLoop();
2131    DstConst = DstAddRec->getStart();
2132    DstCoeff = DstAddRec->getStepRecurrence(*SE);
2133    DstLoop = DstAddRec->getLoop();
2134  }
2135  else if (SrcAddRec) {
2136    if (const SCEVAddRecExpr *tmpAddRec =
2137        dyn_cast<SCEVAddRecExpr>(SrcAddRec->getStart())) {
2138      SrcConst = tmpAddRec->getStart();
2139      SrcCoeff = tmpAddRec->getStepRecurrence(*SE);
2140      SrcLoop = tmpAddRec->getLoop();
2141      DstConst = Dst;
2142      DstCoeff = SE->getNegativeSCEV(SrcAddRec->getStepRecurrence(*SE));
2143      DstLoop = SrcAddRec->getLoop();
2144    }
2145    else
2146      llvm_unreachable("RDIV reached by surprising SCEVs");
2147  }
2148  else if (DstAddRec) {
2149    if (const SCEVAddRecExpr *tmpAddRec =
2150        dyn_cast<SCEVAddRecExpr>(DstAddRec->getStart())) {
2151      DstConst = tmpAddRec->getStart();
2152      DstCoeff = tmpAddRec->getStepRecurrence(*SE);
2153      DstLoop = tmpAddRec->getLoop();
2154      SrcConst = Src;
2155      SrcCoeff = SE->getNegativeSCEV(DstAddRec->getStepRecurrence(*SE));
2156      SrcLoop = DstAddRec->getLoop();
2157    }
2158    else
2159      llvm_unreachable("RDIV reached by surprising SCEVs");
2160  }
2161  else
2162    llvm_unreachable("RDIV expected at least one AddRec");
2163  return exactRDIVtest(SrcCoeff, DstCoeff,
2164                       SrcConst, DstConst,
2165                       SrcLoop, DstLoop,
2166                       Result) ||
2167    gcdMIVtest(Src, Dst, Result) ||
2168    symbolicRDIVtest(SrcCoeff, DstCoeff,
2169                     SrcConst, DstConst,
2170                     SrcLoop, DstLoop);
2171}
2172
2173
2174// Tests the single-subscript MIV pair (Src and Dst) for dependence.
2175// Return true if dependence disproved.
2176// Can sometimes refine direction vectors.
2177bool DependenceAnalysis::testMIV(const SCEV *Src,
2178                                 const SCEV *Dst,
2179                                 const SmallBitVector &Loops,
2180                                 FullDependence &Result) const {
2181  DEBUG(dbgs() << "    src = " << *Src << "\n");
2182  DEBUG(dbgs() << "    dst = " << *Dst << "\n");
2183  Result.Consistent = false;
2184  return gcdMIVtest(Src, Dst, Result) ||
2185    banerjeeMIVtest(Src, Dst, Loops, Result);
2186}
2187
2188
2189// Given a product, e.g., 10*X*Y, returns the first constant operand,
2190// in this case 10. If there is no constant part, returns NULL.
2191static
2192const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) {
2193  for (unsigned Op = 0, Ops = Product->getNumOperands(); Op < Ops; Op++) {
2194    if (const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Product->getOperand(Op)))
2195      return Constant;
2196  }
2197  return NULL;
2198}
2199
2200
2201//===----------------------------------------------------------------------===//
2202// gcdMIVtest -
2203// Tests an MIV subscript pair for dependence.
2204// Returns true if any possible dependence is disproved.
2205// Marks the result as inconsistent.
2206// Can sometimes disprove the equal direction for 1 or more loops,
2207// as discussed in Michael Wolfe's book,
2208// High Performance Compilers for Parallel Computing, page 235.
2209//
2210// We spend some effort (code!) to handle cases like
2211// [10*i + 5*N*j + 15*M + 6], where i and j are induction variables,
2212// but M and N are just loop-invariant variables.
2213// This should help us handle linearized subscripts;
2214// also makes this test a useful backup to the various SIV tests.
2215//
2216// It occurs to me that the presence of loop-invariant variables
2217// changes the nature of the test from "greatest common divisor"
2218// to "a common divisor".
2219bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,
2220                                    const SCEV *Dst,
2221                                    FullDependence &Result) const {
2222  DEBUG(dbgs() << "starting gcd\n");
2223  ++GCDapplications;
2224  unsigned BitWidth = SE->getTypeSizeInBits(Src->getType());
2225  APInt RunningGCD = APInt::getNullValue(BitWidth);
2226
2227  // Examine Src coefficients.
2228  // Compute running GCD and record source constant.
2229  // Because we're looking for the constant at the end of the chain,
2230  // we can't quit the loop just because the GCD == 1.
2231  const SCEV *Coefficients = Src;
2232  while (const SCEVAddRecExpr *AddRec =
2233         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2234    const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2235    const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
2236    if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
2237      // If the coefficient is the product of a constant and other stuff,
2238      // we can use the constant in the GCD computation.
2239      Constant = getConstantPart(Product);
2240    if (!Constant)
2241      return false;
2242    APInt ConstCoeff = Constant->getValue()->getValue();
2243    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2244    Coefficients = AddRec->getStart();
2245  }
2246  const SCEV *SrcConst = Coefficients;
2247
2248  // Examine Dst coefficients.
2249  // Compute running GCD and record destination constant.
2250  // Because we're looking for the constant at the end of the chain,
2251  // we can't quit the loop just because the GCD == 1.
2252  Coefficients = Dst;
2253  while (const SCEVAddRecExpr *AddRec =
2254         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2255    const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2256    const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Coeff);
2257    if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
2258      // If the coefficient is the product of a constant and other stuff,
2259      // we can use the constant in the GCD computation.
2260      Constant = getConstantPart(Product);
2261    if (!Constant)
2262      return false;
2263    APInt ConstCoeff = Constant->getValue()->getValue();
2264    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2265    Coefficients = AddRec->getStart();
2266  }
2267  const SCEV *DstConst = Coefficients;
2268
2269  APInt ExtraGCD = APInt::getNullValue(BitWidth);
2270  const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst);
2271  DEBUG(dbgs() << "    Delta = " << *Delta << "\n");
2272  const SCEVConstant *Constant = dyn_cast<SCEVConstant>(Delta);
2273  if (const SCEVAddExpr *Sum = dyn_cast<SCEVAddExpr>(Delta)) {
2274    // If Delta is a sum of products, we may be able to make further progress.
2275    for (unsigned Op = 0, Ops = Sum->getNumOperands(); Op < Ops; Op++) {
2276      const SCEV *Operand = Sum->getOperand(Op);
2277      if (isa<SCEVConstant>(Operand)) {
2278        assert(!Constant && "Surprised to find multiple constants");
2279        Constant = cast<SCEVConstant>(Operand);
2280      }
2281      else if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Operand)) {
2282        // Search for constant operand to participate in GCD;
2283        // If none found; return false.
2284        const SCEVConstant *ConstOp = getConstantPart(Product);
2285        if (!ConstOp)
2286          return false;
2287        APInt ConstOpValue = ConstOp->getValue()->getValue();
2288        ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,
2289                                                   ConstOpValue.abs());
2290      }
2291      else
2292        return false;
2293    }
2294  }
2295  if (!Constant)
2296    return false;
2297  APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue();
2298  DEBUG(dbgs() << "    ConstDelta = " << ConstDelta << "\n");
2299  if (ConstDelta == 0)
2300    return false;
2301  RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ExtraGCD);
2302  DEBUG(dbgs() << "    RunningGCD = " << RunningGCD << "\n");
2303  APInt Remainder = ConstDelta.srem(RunningGCD);
2304  if (Remainder != 0) {
2305    ++GCDindependence;
2306    return true;
2307  }
2308
2309  // Try to disprove equal directions.
2310  // For example, given a subscript pair [3*i + 2*j] and [i' + 2*j' - 1],
2311  // the code above can't disprove the dependence because the GCD = 1.
2312  // So we consider what happen if i = i' and what happens if j = j'.
2313  // If i = i', we can simplify the subscript to [2*i + 2*j] and [2*j' - 1],
2314  // which is infeasible, so we can disallow the = direction for the i level.
2315  // Setting j = j' doesn't help matters, so we end up with a direction vector
2316  // of [<>, *]
2317  //
2318  // Given A[5*i + 10*j*M + 9*M*N] and A[15*i + 20*j*M - 21*N*M + 5],
2319  // we need to remember that the constant part is 5 and the RunningGCD should
2320  // be initialized to ExtraGCD = 30.
2321  DEBUG(dbgs() << "    ExtraGCD = " << ExtraGCD << '\n');
2322
2323  bool Improved = false;
2324  Coefficients = Src;
2325  while (const SCEVAddRecExpr *AddRec =
2326         dyn_cast<SCEVAddRecExpr>(Coefficients)) {
2327    Coefficients = AddRec->getStart();
2328    const Loop *CurLoop = AddRec->getLoop();
2329    RunningGCD = ExtraGCD;
2330    const SCEV *SrcCoeff = AddRec->getStepRecurrence(*SE);
2331    const SCEV *DstCoeff = SE->getMinusSCEV(SrcCoeff, SrcCoeff);
2332    const SCEV *Inner = Src;
2333    while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
2334      AddRec = cast<SCEVAddRecExpr>(Inner);
2335      const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2336      if (CurLoop == AddRec->getLoop())
2337        ; // SrcCoeff == Coeff
2338      else {
2339        if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
2340          // If the coefficient is the product of a constant and other stuff,
2341          // we can use the constant in the GCD computation.
2342          Constant = getConstantPart(Product);
2343        else
2344          Constant = cast<SCEVConstant>(Coeff);
2345        APInt ConstCoeff = Constant->getValue()->getValue();
2346        RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2347      }
2348      Inner = AddRec->getStart();
2349    }
2350    Inner = Dst;
2351    while (RunningGCD != 1 && isa<SCEVAddRecExpr>(Inner)) {
2352      AddRec = cast<SCEVAddRecExpr>(Inner);
2353      const SCEV *Coeff = AddRec->getStepRecurrence(*SE);
2354      if (CurLoop == AddRec->getLoop())
2355        DstCoeff = Coeff;
2356      else {
2357        if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Coeff))
2358          // If the coefficient is the product of a constant and other stuff,
2359          // we can use the constant in the GCD computation.
2360          Constant = getConstantPart(Product);
2361        else
2362          Constant = cast<SCEVConstant>(Coeff);
2363        APInt ConstCoeff = Constant->getValue()->getValue();
2364        RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2365      }
2366      Inner = AddRec->getStart();
2367    }
2368    Delta = SE->getMinusSCEV(SrcCoeff, DstCoeff);
2369    if (const SCEVMulExpr *Product = dyn_cast<SCEVMulExpr>(Delta))
2370      // If the coefficient is the product of a constant and other stuff,
2371      // we can use the constant in the GCD computation.
2372      Constant = getConstantPart(Product);
2373    else if (isa<SCEVConstant>(Delta))
2374      Constant = cast<SCEVConstant>(Delta);
2375    else {
2376      // The difference of the two coefficients might not be a product
2377      // or constant, in which case we give up on this direction.
2378      continue;
2379    }
2380    APInt ConstCoeff = Constant->getValue()->getValue();
2381    RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());
2382    DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");
2383    if (RunningGCD != 0) {
2384      Remainder = ConstDelta.srem(RunningGCD);
2385      DEBUG(dbgs() << "\tRemainder = " << Remainder << "\n");
2386      if (Remainder != 0) {
2387        unsigned Level = mapSrcLoop(CurLoop);
2388        Result.DV[Level - 1].Direction &= unsigned(~Dependence::DVEntry::EQ);
2389        Improved = true;
2390      }
2391    }
2392  }
2393  if (Improved)
2394    ++GCDsuccesses;
2395  DEBUG(dbgs() << "all done\n");
2396  return false;
2397}
2398
2399
2400//===----------------------------------------------------------------------===//
2401// banerjeeMIVtest -
2402// Use Banerjee's Inequalities to test an MIV subscript pair.
2403// (Wolfe, in the race-car book, calls this the Extreme Value Test.)
2404// Generally follows the discussion in Section 2.5.2 of
2405//
2406//    Optimizing Supercompilers for Supercomputers
2407//    Michael Wolfe
2408//
2409// The inequalities given on page 25 are simplified in that loops are
2410// normalized so that the lower bound is always 0 and the stride is always 1.
2411// For example, Wolfe gives
2412//
2413//     LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2414//
2415// where A_k is the coefficient of the kth index in the source subscript,
2416// B_k is the coefficient of the kth index in the destination subscript,
2417// U_k is the upper bound of the kth index, L_k is the lower bound of the Kth
2418// index, and N_k is the stride of the kth index. Since all loops are normalized
2419// by the SCEV package, N_k = 1 and L_k = 0, allowing us to simplify the
2420// equation to
2421//
2422//     LB^<_k = (A^-_k - B_k)^- (U_k - 0 - 1) + (A_k - B_k)0 - B_k 1
2423//            = (A^-_k - B_k)^- (U_k - 1)  - B_k
2424//
2425// Similar simplifications are possible for the other equations.
2426//
2427// When we can't determine the number of iterations for a loop,
2428// we use NULL as an indicator for the worst case, infinity.
2429// When computing the upper bound, NULL denotes +inf;
2430// for the lower bound, NULL denotes -inf.
2431//
2432// Return true if dependence disproved.
2433bool DependenceAnalysis::banerjeeMIVtest(const SCEV *Src,
2434                                         const SCEV *Dst,
2435                                         const SmallBitVector &Loops,
2436                                         FullDependence &Result) const {
2437  DEBUG(dbgs() << "starting Banerjee\n");
2438  ++BanerjeeApplications;
2439  DEBUG(dbgs() << "    Src = " << *Src << '\n');
2440  const SCEV *A0;
2441  CoefficientInfo *A = collectCoeffInfo(Src, true, A0);
2442  DEBUG(dbgs() << "    Dst = " << *Dst << '\n');
2443  const SCEV *B0;
2444  CoefficientInfo *B = collectCoeffInfo(Dst, false, B0);
2445  BoundInfo *Bound = new BoundInfo[MaxLevels + 1];
2446  const SCEV *Delta = SE->getMinusSCEV(B0, A0);
2447  DEBUG(dbgs() << "\tDelta = " << *Delta << '\n');
2448
2449  // Compute bounds for all the * directions.
2450  DEBUG(dbgs() << "\tBounds[*]\n");
2451  for (unsigned K = 1; K <= MaxLevels; ++K) {
2452    Bound[K].Iterations = A[K].Iterations ? A[K].Iterations : B[K].Iterations;
2453    Bound[K].Direction = Dependence::DVEntry::ALL;
2454    Bound[K].DirSet = Dependence::DVEntry::NONE;
2455    findBoundsALL(A, B, Bound, K);
2456#ifndef NDEBUG
2457    DEBUG(dbgs() << "\t    " << K << '\t');
2458    if (Bound[K].Lower[Dependence::DVEntry::ALL])
2459      DEBUG(dbgs() << *Bound[K].Lower[Dependence::DVEntry::ALL] << '\t');
2460    else
2461      DEBUG(dbgs() << "-inf\t");
2462    if (Bound[K].Upper[Dependence::DVEntry::ALL])
2463      DEBUG(dbgs() << *Bound[K].Upper[Dependence::DVEntry::ALL] << '\n');
2464    else
2465      DEBUG(dbgs() << "+inf\n");
2466#endif
2467  }
2468
2469  // Test the *, *, *, ... case.
2470  bool Disproved = false;
2471  if (testBounds(Dependence::DVEntry::ALL, 0, Bound, Delta)) {
2472    // Explore the direction vector hierarchy.
2473    unsigned DepthExpanded = 0;
2474    unsigned NewDeps = exploreDirections(1, A, B, Bound,
2475                                         Loops, DepthExpanded, Delta);
2476    if (NewDeps > 0) {
2477      bool Improved = false;
2478      for (unsigned K = 1; K <= CommonLevels; ++K) {
2479        if (Loops[K]) {
2480          unsigned Old = Result.DV[K - 1].Direction;
2481          Result.DV[K - 1].Direction = Old & Bound[K].DirSet;
2482          Improved |= Old != Result.DV[K - 1].Direction;
2483          if (!Result.DV[K - 1].Direction) {
2484            Improved = false;
2485            Disproved = true;
2486            break;
2487          }
2488        }
2489      }
2490      if (Improved)
2491        ++BanerjeeSuccesses;
2492    }
2493    else {
2494      ++BanerjeeIndependence;
2495      Disproved = true;
2496    }
2497  }
2498  else {
2499    ++BanerjeeIndependence;
2500    Disproved = true;
2501  }
2502  delete [] Bound;
2503  delete [] A;
2504  delete [] B;
2505  return Disproved;
2506}
2507
2508
2509// Hierarchically expands the direction vector
2510// search space, combining the directions of discovered dependences
2511// in the DirSet field of Bound. Returns the number of distinct
2512// dependences discovered. If the dependence is disproved,
2513// it will return 0.
2514unsigned DependenceAnalysis::exploreDirections(unsigned Level,
2515                                               CoefficientInfo *A,
2516                                               CoefficientInfo *B,
2517                                               BoundInfo *Bound,
2518                                               const SmallBitVector &Loops,
2519                                               unsigned &DepthExpanded,
2520                                               const SCEV *Delta) const {
2521  if (Level > CommonLevels) {
2522    // record result
2523    DEBUG(dbgs() << "\t[");
2524    for (unsigned K = 1; K <= CommonLevels; ++K) {
2525      if (Loops[K]) {
2526        Bound[K].DirSet |= Bound[K].Direction;
2527#ifndef NDEBUG
2528        switch (Bound[K].Direction) {
2529        case Dependence::DVEntry::LT:
2530          DEBUG(dbgs() << " <");
2531          break;
2532        case Dependence::DVEntry::EQ:
2533          DEBUG(dbgs() << " =");
2534          break;
2535        case Dependence::DVEntry::GT:
2536          DEBUG(dbgs() << " >");
2537          break;
2538        case Dependence::DVEntry::ALL:
2539          DEBUG(dbgs() << " *");
2540          break;
2541        default:
2542          llvm_unreachable("unexpected Bound[K].Direction");
2543        }
2544#endif
2545      }
2546    }
2547    DEBUG(dbgs() << " ]\n");
2548    return 1;
2549  }
2550  if (Loops[Level]) {
2551    if (Level > DepthExpanded) {
2552      DepthExpanded = Level;
2553      // compute bounds for <, =, > at current level
2554      findBoundsLT(A, B, Bound, Level);
2555      findBoundsGT(A, B, Bound, Level);
2556      findBoundsEQ(A, B, Bound, Level);
2557#ifndef NDEBUG
2558      DEBUG(dbgs() << "\tBound for level = " << Level << '\n');
2559      DEBUG(dbgs() << "\t    <\t");
2560      if (Bound[Level].Lower[Dependence::DVEntry::LT])
2561        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::LT] << '\t');
2562      else
2563        DEBUG(dbgs() << "-inf\t");
2564      if (Bound[Level].Upper[Dependence::DVEntry::LT])
2565        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::LT] << '\n');
2566      else
2567        DEBUG(dbgs() << "+inf\n");
2568      DEBUG(dbgs() << "\t    =\t");
2569      if (Bound[Level].Lower[Dependence::DVEntry::EQ])
2570        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::EQ] << '\t');
2571      else
2572        DEBUG(dbgs() << "-inf\t");
2573      if (Bound[Level].Upper[Dependence::DVEntry::EQ])
2574        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::EQ] << '\n');
2575      else
2576        DEBUG(dbgs() << "+inf\n");
2577      DEBUG(dbgs() << "\t    >\t");
2578      if (Bound[Level].Lower[Dependence::DVEntry::GT])
2579        DEBUG(dbgs() << *Bound[Level].Lower[Dependence::DVEntry::GT] << '\t');
2580      else
2581        DEBUG(dbgs() << "-inf\t");
2582      if (Bound[Level].Upper[Dependence::DVEntry::GT])
2583        DEBUG(dbgs() << *Bound[Level].Upper[Dependence::DVEntry::GT] << '\n');
2584      else
2585        DEBUG(dbgs() << "+inf\n");
2586#endif
2587    }
2588
2589    unsigned NewDeps = 0;
2590
2591    // test bounds for <, *, *, ...
2592    if (testBounds(Dependence::DVEntry::LT, Level, Bound, Delta))
2593      NewDeps += exploreDirections(Level + 1, A, B, Bound,
2594                                   Loops, DepthExpanded, Delta);
2595
2596    // Test bounds for =, *, *, ...
2597    if (testBounds(Dependence::DVEntry::EQ, Level, Bound, Delta))
2598      NewDeps += exploreDirections(Level + 1, A, B, Bound,
2599                                   Loops, DepthExpanded, Delta);
2600
2601    // test bounds for >, *, *, ...
2602    if (testBounds(Dependence::DVEntry::GT, Level, Bound, Delta))
2603      NewDeps += exploreDirections(Level + 1, A, B, Bound,
2604                                   Loops, DepthExpanded, Delta);
2605
2606    Bound[Level].Direction = Dependence::DVEntry::ALL;
2607    return NewDeps;
2608  }
2609  else
2610    return exploreDirections(Level + 1, A, B, Bound, Loops, DepthExpanded, Delta);
2611}
2612
2613
2614// Returns true iff the current bounds are plausible.
2615bool DependenceAnalysis::testBounds(unsigned char DirKind,
2616                                    unsigned Level,
2617                                    BoundInfo *Bound,
2618                                    const SCEV *Delta) const {
2619  Bound[Level].Direction = DirKind;
2620  if (const SCEV *LowerBound = getLowerBound(Bound))
2621    if (isKnownPredicate(CmpInst::ICMP_SGT, LowerBound, Delta))
2622      return false;
2623  if (const SCEV *UpperBound = getUpperBound(Bound))
2624    if (isKnownPredicate(CmpInst::ICMP_SGT, Delta, UpperBound))
2625      return false;
2626  return true;
2627}
2628
2629
2630// Computes the upper and lower bounds for level K
2631// using the * direction. Records them in Bound.
2632// Wolfe gives the equations
2633//
2634//    LB^*_k = (A^-_k - B^+_k)(U_k - L_k) + (A_k - B_k)L_k
2635//    UB^*_k = (A^+_k - B^-_k)(U_k - L_k) + (A_k - B_k)L_k
2636//
2637// Since we normalize loops, we can simplify these equations to
2638//
2639//    LB^*_k = (A^-_k - B^+_k)U_k
2640//    UB^*_k = (A^+_k - B^-_k)U_k
2641//
2642// We must be careful to handle the case where the upper bound is unknown.
2643// Note that the lower bound is always <= 0
2644// and the upper bound is always >= 0.
2645void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,
2646                                       CoefficientInfo *B,
2647                                       BoundInfo *Bound,
2648                                       unsigned K) const {
2649  Bound[K].Lower[Dependence::DVEntry::ALL] = NULL; // Default value = -infinity.
2650  Bound[K].Upper[Dependence::DVEntry::ALL] = NULL; // Default value = +infinity.
2651  if (Bound[K].Iterations) {
2652    Bound[K].Lower[Dependence::DVEntry::ALL] =
2653      SE->getMulExpr(SE->getMinusSCEV(A[K].NegPart, B[K].PosPart),
2654                     Bound[K].Iterations);
2655    Bound[K].Upper[Dependence::DVEntry::ALL] =
2656      SE->getMulExpr(SE->getMinusSCEV(A[K].PosPart, B[K].NegPart),
2657                     Bound[K].Iterations);
2658  }
2659  else {
2660    // If the difference is 0, we won't need to know the number of iterations.
2661    if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))
2662      Bound[K].Lower[Dependence::DVEntry::ALL] =
2663        SE->getConstant(A[K].Coeff->getType(), 0);
2664    if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))
2665      Bound[K].Upper[Dependence::DVEntry::ALL] =
2666        SE->getConstant(A[K].Coeff->getType(), 0);
2667  }
2668}
2669
2670
2671// Computes the upper and lower bounds for level K
2672// using the = direction. Records them in Bound.
2673// Wolfe gives the equations
2674//
2675//    LB^=_k = (A_k - B_k)^- (U_k - L_k) + (A_k - B_k)L_k
2676//    UB^=_k = (A_k - B_k)^+ (U_k - L_k) + (A_k - B_k)L_k
2677//
2678// Since we normalize loops, we can simplify these equations to
2679//
2680//    LB^=_k = (A_k - B_k)^- U_k
2681//    UB^=_k = (A_k - B_k)^+ U_k
2682//
2683// We must be careful to handle the case where the upper bound is unknown.
2684// Note that the lower bound is always <= 0
2685// and the upper bound is always >= 0.
2686void DependenceAnalysis::findBoundsEQ(CoefficientInfo *A,
2687                                      CoefficientInfo *B,
2688                                      BoundInfo *Bound,
2689                                      unsigned K) const {
2690  Bound[K].Lower[Dependence::DVEntry::EQ] = NULL; // Default value = -infinity.
2691  Bound[K].Upper[Dependence::DVEntry::EQ] = NULL; // Default value = +infinity.
2692  if (Bound[K].Iterations) {
2693    const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2694    const SCEV *NegativePart = getNegativePart(Delta);
2695    Bound[K].Lower[Dependence::DVEntry::EQ] =
2696      SE->getMulExpr(NegativePart, Bound[K].Iterations);
2697    const SCEV *PositivePart = getPositivePart(Delta);
2698    Bound[K].Upper[Dependence::DVEntry::EQ] =
2699      SE->getMulExpr(PositivePart, Bound[K].Iterations);
2700  }
2701  else {
2702    // If the positive/negative part of the difference is 0,
2703    // we won't need to know the number of iterations.
2704    const SCEV *Delta = SE->getMinusSCEV(A[K].Coeff, B[K].Coeff);
2705    const SCEV *NegativePart = getNegativePart(Delta);
2706    if (NegativePart->isZero())
2707      Bound[K].Lower[Dependence::DVEntry::EQ] = NegativePart; // Zero
2708    const SCEV *PositivePart = getPositivePart(Delta);
2709    if (PositivePart->isZero())
2710      Bound[K].Upper[Dependence::DVEntry::EQ] = PositivePart; // Zero
2711  }
2712}
2713
2714
2715// Computes the upper and lower bounds for level K
2716// using the < direction. Records them in Bound.
2717// Wolfe gives the equations
2718//
2719//    LB^<_k = (A^-_k - B_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2720//    UB^<_k = (A^+_k - B_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k - B_k N_k
2721//
2722// Since we normalize loops, we can simplify these equations to
2723//
2724//    LB^<_k = (A^-_k - B_k)^- (U_k - 1) - B_k
2725//    UB^<_k = (A^+_k - B_k)^+ (U_k - 1) - B_k
2726//
2727// We must be careful to handle the case where the upper bound is unknown.
2728void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,
2729                                      CoefficientInfo *B,
2730                                      BoundInfo *Bound,
2731                                      unsigned K) const {
2732  Bound[K].Lower[Dependence::DVEntry::LT] = NULL; // Default value = -infinity.
2733  Bound[K].Upper[Dependence::DVEntry::LT] = NULL; // Default value = +infinity.
2734  if (Bound[K].Iterations) {
2735    const SCEV *Iter_1 =
2736      SE->getMinusSCEV(Bound[K].Iterations,
2737                       SE->getConstant(Bound[K].Iterations->getType(), 1));
2738    const SCEV *NegPart =
2739      getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2740    Bound[K].Lower[Dependence::DVEntry::LT] =
2741      SE->getMinusSCEV(SE->getMulExpr(NegPart, Iter_1), B[K].Coeff);
2742    const SCEV *PosPart =
2743      getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2744    Bound[K].Upper[Dependence::DVEntry::LT] =
2745      SE->getMinusSCEV(SE->getMulExpr(PosPart, Iter_1), B[K].Coeff);
2746  }
2747  else {
2748    // If the positive/negative part of the difference is 0,
2749    // we won't need to know the number of iterations.
2750    const SCEV *NegPart =
2751      getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));
2752    if (NegPart->isZero())
2753      Bound[K].Lower[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2754    const SCEV *PosPart =
2755      getPositivePart(SE->getMinusSCEV(A[K].PosPart, B[K].Coeff));
2756    if (PosPart->isZero())
2757      Bound[K].Upper[Dependence::DVEntry::LT] = SE->getNegativeSCEV(B[K].Coeff);
2758  }
2759}
2760
2761
2762// Computes the upper and lower bounds for level K
2763// using the > direction. Records them in Bound.
2764// Wolfe gives the equations
2765//
2766//    LB^>_k = (A_k - B^+_k)^- (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2767//    UB^>_k = (A_k - B^-_k)^+ (U_k - L_k - N_k) + (A_k - B_k)L_k + A_k N_k
2768//
2769// Since we normalize loops, we can simplify these equations to
2770//
2771//    LB^>_k = (A_k - B^+_k)^- (U_k - 1) + A_k
2772//    UB^>_k = (A_k - B^-_k)^+ (U_k - 1) + A_k
2773//
2774// We must be careful to handle the case where the upper bound is unknown.
2775void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,
2776                                      CoefficientInfo *B,
2777                                      BoundInfo *Bound,
2778                                      unsigned K) const {
2779  Bound[K].Lower[Dependence::DVEntry::GT] = NULL; // Default value = -infinity.
2780  Bound[K].Upper[Dependence::DVEntry::GT] = NULL; // Default value = +infinity.
2781  if (Bound[K].Iterations) {
2782    const SCEV *Iter_1 =
2783      SE->getMinusSCEV(Bound[K].Iterations,
2784                       SE->getConstant(Bound[K].Iterations->getType(), 1));
2785    const SCEV *NegPart =
2786      getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2787    Bound[K].Lower[Dependence::DVEntry::GT] =
2788      SE->getAddExpr(SE->getMulExpr(NegPart, Iter_1), A[K].Coeff);
2789    const SCEV *PosPart =
2790      getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2791    Bound[K].Upper[Dependence::DVEntry::GT] =
2792      SE->getAddExpr(SE->getMulExpr(PosPart, Iter_1), A[K].Coeff);
2793  }
2794  else {
2795    // If the positive/negative part of the difference is 0,
2796    // we won't need to know the number of iterations.
2797    const SCEV *NegPart = getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));
2798    if (NegPart->isZero())
2799      Bound[K].Lower[Dependence::DVEntry::GT] = A[K].Coeff;
2800    const SCEV *PosPart = getPositivePart(SE->getMinusSCEV(A[K].Coeff, B[K].NegPart));
2801    if (PosPart->isZero())
2802      Bound[K].Upper[Dependence::DVEntry::GT] = A[K].Coeff;
2803  }
2804}
2805
2806
2807// X^+ = max(X, 0)
2808const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const {
2809  return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0));
2810}
2811
2812
2813// X^- = min(X, 0)
2814const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const {
2815  return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0));
2816}
2817
2818
2819// Walks through the subscript,
2820// collecting each coefficient, the associated loop bounds,
2821// and recording its positive and negative parts for later use.
2822DependenceAnalysis::CoefficientInfo *
2823DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,
2824                                     bool SrcFlag,
2825                                     const SCEV *&Constant) const {
2826  const SCEV *Zero = SE->getConstant(Subscript->getType(), 0);
2827  CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];
2828  for (unsigned K = 1; K <= MaxLevels; ++K) {
2829    CI[K].Coeff = Zero;
2830    CI[K].PosPart = Zero;
2831    CI[K].NegPart = Zero;
2832    CI[K].Iterations = NULL;
2833  }
2834  while (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Subscript)) {
2835    const Loop *L = AddRec->getLoop();
2836    unsigned K = SrcFlag ? mapSrcLoop(L) : mapDstLoop(L);
2837    CI[K].Coeff = AddRec->getStepRecurrence(*SE);
2838    CI[K].PosPart = getPositivePart(CI[K].Coeff);
2839    CI[K].NegPart = getNegativePart(CI[K].Coeff);
2840    CI[K].Iterations = collectUpperBound(L, Subscript->getType());
2841    Subscript = AddRec->getStart();
2842  }
2843  Constant = Subscript;
2844#ifndef NDEBUG
2845  DEBUG(dbgs() << "\tCoefficient Info\n");
2846  for (unsigned K = 1; K <= MaxLevels; ++K) {
2847    DEBUG(dbgs() << "\t    " << K << "\t" << *CI[K].Coeff);
2848    DEBUG(dbgs() << "\tPos Part = ");
2849    DEBUG(dbgs() << *CI[K].PosPart);
2850    DEBUG(dbgs() << "\tNeg Part = ");
2851    DEBUG(dbgs() << *CI[K].NegPart);
2852    DEBUG(dbgs() << "\tUpper Bound = ");
2853    if (CI[K].Iterations)
2854      DEBUG(dbgs() << *CI[K].Iterations);
2855    else
2856      DEBUG(dbgs() << "+inf");
2857    DEBUG(dbgs() << '\n');
2858  }
2859  DEBUG(dbgs() << "\t    Constant = " << *Subscript << '\n');
2860#endif
2861  return CI;
2862}
2863
2864
2865// Looks through all the bounds info and
2866// computes the lower bound given the current direction settings
2867// at each level. If the lower bound for any level is -inf,
2868// the result is -inf.
2869const SCEV *DependenceAnalysis::getLowerBound(BoundInfo *Bound) const {
2870  const SCEV *Sum = Bound[1].Lower[Bound[1].Direction];
2871  for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
2872    if (Bound[K].Lower[Bound[K].Direction])
2873      Sum = SE->getAddExpr(Sum, Bound[K].Lower[Bound[K].Direction]);
2874    else
2875      Sum = NULL;
2876  }
2877  return Sum;
2878}
2879
2880
2881// Looks through all the bounds info and
2882// computes the upper bound given the current direction settings
2883// at each level. If the upper bound at any level is +inf,
2884// the result is +inf.
2885const SCEV *DependenceAnalysis::getUpperBound(BoundInfo *Bound) const {
2886  const SCEV *Sum = Bound[1].Upper[Bound[1].Direction];
2887  for (unsigned K = 2; Sum && K <= MaxLevels; ++K) {
2888    if (Bound[K].Upper[Bound[K].Direction])
2889      Sum = SE->getAddExpr(Sum, Bound[K].Upper[Bound[K].Direction]);
2890    else
2891      Sum = NULL;
2892  }
2893  return Sum;
2894}
2895
2896
2897//===----------------------------------------------------------------------===//
2898// Constraint manipulation for Delta test.
2899
2900// Given a linear SCEV,
2901// return the coefficient (the step)
2902// corresponding to the specified loop.
2903// If there isn't one, return 0.
2904// For example, given a*i + b*j + c*k, zeroing the coefficient
2905// corresponding to the j loop would yield b.
2906const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,
2907                                                const Loop *TargetLoop)  const {
2908  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
2909  if (!AddRec)
2910    return SE->getConstant(Expr->getType(), 0);
2911  if (AddRec->getLoop() == TargetLoop)
2912    return AddRec->getStepRecurrence(*SE);
2913  return findCoefficient(AddRec->getStart(), TargetLoop);
2914}
2915
2916
2917// Given a linear SCEV,
2918// return the SCEV given by zeroing out the coefficient
2919// corresponding to the specified loop.
2920// For example, given a*i + b*j + c*k, zeroing the coefficient
2921// corresponding to the j loop would yield a*i + c*k.
2922const SCEV *DependenceAnalysis::zeroCoefficient(const SCEV *Expr,
2923                                                const Loop *TargetLoop)  const {
2924  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
2925  if (!AddRec)
2926    return Expr; // ignore
2927  if (AddRec->getLoop() == TargetLoop)
2928    return AddRec->getStart();
2929  return SE->getAddRecExpr(zeroCoefficient(AddRec->getStart(), TargetLoop),
2930                           AddRec->getStepRecurrence(*SE),
2931                           AddRec->getLoop(),
2932                           AddRec->getNoWrapFlags());
2933}
2934
2935
2936// Given a linear SCEV Expr,
2937// return the SCEV given by adding some Value to the
2938// coefficient corresponding to the specified TargetLoop.
2939// For example, given a*i + b*j + c*k, adding 1 to the coefficient
2940// corresponding to the j loop would yield a*i + (b+1)*j + c*k.
2941const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr,
2942                                                 const Loop *TargetLoop,
2943                                                 const SCEV *Value)  const {
2944  const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);
2945  if (!AddRec) // create a new addRec
2946    return SE->getAddRecExpr(Expr,
2947                             Value,
2948                             TargetLoop,
2949                             SCEV::FlagAnyWrap); // Worst case, with no info.
2950  if (AddRec->getLoop() == TargetLoop) {
2951    const SCEV *Sum = SE->getAddExpr(AddRec->getStepRecurrence(*SE), Value);
2952    if (Sum->isZero())
2953      return AddRec->getStart();
2954    return SE->getAddRecExpr(AddRec->getStart(),
2955                             Sum,
2956                             AddRec->getLoop(),
2957                             AddRec->getNoWrapFlags());
2958  }
2959  if (SE->isLoopInvariant(AddRec, TargetLoop))
2960    return SE->getAddRecExpr(AddRec,
2961			     Value,
2962			     TargetLoop,
2963			     SCEV::FlagAnyWrap);
2964  return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(),
2965                                            TargetLoop, Value),
2966                           AddRec->getStepRecurrence(*SE),
2967                           AddRec->getLoop(),
2968                           AddRec->getNoWrapFlags());
2969}
2970
2971
2972// Review the constraints, looking for opportunities
2973// to simplify a subscript pair (Src and Dst).
2974// Return true if some simplification occurs.
2975// If the simplification isn't exact (that is, if it is conservative
2976// in terms of dependence), set consistent to false.
2977// Corresponds to Figure 5 from the paper
2978//
2979//            Practical Dependence Testing
2980//            Goff, Kennedy, Tseng
2981//            PLDI 1991
2982bool DependenceAnalysis::propagate(const SCEV *&Src,
2983                                   const SCEV *&Dst,
2984                                   SmallBitVector &Loops,
2985                                   SmallVectorImpl<Constraint> &Constraints,
2986                                   bool &Consistent) {
2987  bool Result = false;
2988  for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) {
2989    DEBUG(dbgs() << "\t    Constraint[" << LI << "] is");
2990    DEBUG(Constraints[LI].dump(dbgs()));
2991    if (Constraints[LI].isDistance())
2992      Result |= propagateDistance(Src, Dst, Constraints[LI], Consistent);
2993    else if (Constraints[LI].isLine())
2994      Result |= propagateLine(Src, Dst, Constraints[LI], Consistent);
2995    else if (Constraints[LI].isPoint())
2996      Result |= propagatePoint(Src, Dst, Constraints[LI]);
2997  }
2998  return Result;
2999}
3000
3001
3002// Attempt to propagate a distance
3003// constraint into a subscript pair (Src and Dst).
3004// Return true if some simplification occurs.
3005// If the simplification isn't exact (that is, if it is conservative
3006// in terms of dependence), set consistent to false.
3007bool DependenceAnalysis::propagateDistance(const SCEV *&Src,
3008                                           const SCEV *&Dst,
3009                                           Constraint &CurConstraint,
3010                                           bool &Consistent) {
3011  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3012  DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3013  const SCEV *A_K = findCoefficient(Src, CurLoop);
3014  if (A_K->isZero())
3015    return false;
3016  const SCEV *DA_K = SE->getMulExpr(A_K, CurConstraint.getD());
3017  Src = SE->getMinusSCEV(Src, DA_K);
3018  Src = zeroCoefficient(Src, CurLoop);
3019  DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3020  DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3021  Dst = addToCoefficient(Dst, CurLoop, SE->getNegativeSCEV(A_K));
3022  DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3023  if (!findCoefficient(Dst, CurLoop)->isZero())
3024    Consistent = false;
3025  return true;
3026}
3027
3028
3029// Attempt to propagate a line
3030// constraint into a subscript pair (Src and Dst).
3031// Return true if some simplification occurs.
3032// If the simplification isn't exact (that is, if it is conservative
3033// in terms of dependence), set consistent to false.
3034bool DependenceAnalysis::propagateLine(const SCEV *&Src,
3035                                       const SCEV *&Dst,
3036                                       Constraint &CurConstraint,
3037                                       bool &Consistent) {
3038  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3039  const SCEV *A = CurConstraint.getA();
3040  const SCEV *B = CurConstraint.getB();
3041  const SCEV *C = CurConstraint.getC();
3042  DEBUG(dbgs() << "\t\tA = " << *A << ", B = " << *B << ", C = " << *C << "\n");
3043  DEBUG(dbgs() << "\t\tSrc = " << *Src << "\n");
3044  DEBUG(dbgs() << "\t\tDst = " << *Dst << "\n");
3045  if (A->isZero()) {
3046    const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);
3047    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3048    if (!Bconst || !Cconst) return false;
3049    APInt Beta = Bconst->getValue()->getValue();
3050    APInt Charlie = Cconst->getValue()->getValue();
3051    APInt CdivB = Charlie.sdiv(Beta);
3052    assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");
3053    const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3054    //    Src = SE->getAddExpr(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3055    Src = SE->getMinusSCEV(Src, SE->getMulExpr(AP_K, SE->getConstant(CdivB)));
3056    Dst = zeroCoefficient(Dst, CurLoop);
3057    if (!findCoefficient(Src, CurLoop)->isZero())
3058      Consistent = false;
3059  }
3060  else if (B->isZero()) {
3061    const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3062    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3063    if (!Aconst || !Cconst) return false;
3064    APInt Alpha = Aconst->getValue()->getValue();
3065    APInt Charlie = Cconst->getValue()->getValue();
3066    APInt CdivA = Charlie.sdiv(Alpha);
3067    assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3068    const SCEV *A_K = findCoefficient(Src, CurLoop);
3069    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3070    Src = zeroCoefficient(Src, CurLoop);
3071    if (!findCoefficient(Dst, CurLoop)->isZero())
3072      Consistent = false;
3073  }
3074  else if (isKnownPredicate(CmpInst::ICMP_EQ, A, B)) {
3075    const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);
3076    const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);
3077    if (!Aconst || !Cconst) return false;
3078    APInt Alpha = Aconst->getValue()->getValue();
3079    APInt Charlie = Cconst->getValue()->getValue();
3080    APInt CdivA = Charlie.sdiv(Alpha);
3081    assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");
3082    const SCEV *A_K = findCoefficient(Src, CurLoop);
3083    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, SE->getConstant(CdivA)));
3084    Src = zeroCoefficient(Src, CurLoop);
3085    Dst = addToCoefficient(Dst, CurLoop, A_K);
3086    if (!findCoefficient(Dst, CurLoop)->isZero())
3087      Consistent = false;
3088  }
3089  else {
3090    // paper is incorrect here, or perhaps just misleading
3091    const SCEV *A_K = findCoefficient(Src, CurLoop);
3092    Src = SE->getMulExpr(Src, A);
3093    Dst = SE->getMulExpr(Dst, A);
3094    Src = SE->getAddExpr(Src, SE->getMulExpr(A_K, C));
3095    Src = zeroCoefficient(Src, CurLoop);
3096    Dst = addToCoefficient(Dst, CurLoop, SE->getMulExpr(A_K, B));
3097    if (!findCoefficient(Dst, CurLoop)->isZero())
3098      Consistent = false;
3099  }
3100  DEBUG(dbgs() << "\t\tnew Src = " << *Src << "\n");
3101  DEBUG(dbgs() << "\t\tnew Dst = " << *Dst << "\n");
3102  return true;
3103}
3104
3105
3106// Attempt to propagate a point
3107// constraint into a subscript pair (Src and Dst).
3108// Return true if some simplification occurs.
3109bool DependenceAnalysis::propagatePoint(const SCEV *&Src,
3110                                        const SCEV *&Dst,
3111                                        Constraint &CurConstraint) {
3112  const Loop *CurLoop = CurConstraint.getAssociatedLoop();
3113  const SCEV *A_K = findCoefficient(Src, CurLoop);
3114  const SCEV *AP_K = findCoefficient(Dst, CurLoop);
3115  const SCEV *XA_K = SE->getMulExpr(A_K, CurConstraint.getX());
3116  const SCEV *YAP_K = SE->getMulExpr(AP_K, CurConstraint.getY());
3117  DEBUG(dbgs() << "\t\tSrc is " << *Src << "\n");
3118  Src = SE->getAddExpr(Src, SE->getMinusSCEV(XA_K, YAP_K));
3119  Src = zeroCoefficient(Src, CurLoop);
3120  DEBUG(dbgs() << "\t\tnew Src is " << *Src << "\n");
3121  DEBUG(dbgs() << "\t\tDst is " << *Dst << "\n");
3122  Dst = zeroCoefficient(Dst, CurLoop);
3123  DEBUG(dbgs() << "\t\tnew Dst is " << *Dst << "\n");
3124  return true;
3125}
3126
3127
3128// Update direction vector entry based on the current constraint.
3129void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,
3130                                         const Constraint &CurConstraint
3131                                         ) const {
3132  DEBUG(dbgs() << "\tUpdate direction, constraint =");
3133  DEBUG(CurConstraint.dump(dbgs()));
3134  if (CurConstraint.isAny())
3135    ; // use defaults
3136  else if (CurConstraint.isDistance()) {
3137    // this one is consistent, the others aren't
3138    Level.Scalar = false;
3139    Level.Distance = CurConstraint.getD();
3140    unsigned NewDirection = Dependence::DVEntry::NONE;
3141    if (!SE->isKnownNonZero(Level.Distance)) // if may be zero
3142      NewDirection = Dependence::DVEntry::EQ;
3143    if (!SE->isKnownNonPositive(Level.Distance)) // if may be positive
3144      NewDirection |= Dependence::DVEntry::LT;
3145    if (!SE->isKnownNonNegative(Level.Distance)) // if may be negative
3146      NewDirection |= Dependence::DVEntry::GT;
3147    Level.Direction &= NewDirection;
3148  }
3149  else if (CurConstraint.isLine()) {
3150    Level.Scalar = false;
3151    Level.Distance = NULL;
3152    // direction should be accurate
3153  }
3154  else if (CurConstraint.isPoint()) {
3155    Level.Scalar = false;
3156    Level.Distance = NULL;
3157    unsigned NewDirection = Dependence::DVEntry::NONE;
3158    if (!isKnownPredicate(CmpInst::ICMP_NE,
3159                          CurConstraint.getY(),
3160                          CurConstraint.getX()))
3161      // if X may be = Y
3162      NewDirection |= Dependence::DVEntry::EQ;
3163    if (!isKnownPredicate(CmpInst::ICMP_SLE,
3164                          CurConstraint.getY(),
3165                          CurConstraint.getX()))
3166      // if Y may be > X
3167      NewDirection |= Dependence::DVEntry::LT;
3168    if (!isKnownPredicate(CmpInst::ICMP_SGE,
3169                          CurConstraint.getY(),
3170                          CurConstraint.getX()))
3171      // if Y may be < X
3172      NewDirection |= Dependence::DVEntry::GT;
3173    Level.Direction &= NewDirection;
3174  }
3175  else
3176    llvm_unreachable("constraint has unexpected kind");
3177}
3178
3179/// Check if we can delinearize the subscripts. If the SCEVs representing the
3180/// source and destination array references are recurrences on a nested loop,
3181/// this function flattens the nested recurrences into seperate recurrences
3182/// for each loop level.
3183bool
3184DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV,
3185                                   SmallVectorImpl<Subscript> &Pair) const {
3186  const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);
3187  const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV);
3188  if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine())
3189    return false;
3190
3191  SmallVector<const SCEV *, 4> SrcSubscripts, DstSubscripts, SrcSizes, DstSizes;
3192  SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes);
3193  DstAR->delinearize(*SE, DstSubscripts, DstSizes);
3194
3195  int size = SrcSubscripts.size();
3196  int dstSize = DstSubscripts.size();
3197  if (size != dstSize || size < 2)
3198    return false;
3199
3200#ifndef NDEBUG
3201  DEBUG(errs() << "\nSrcSubscripts: ");
3202  for (int i = 0; i < size; i++)
3203    DEBUG(errs() << *SrcSubscripts[i]);
3204  DEBUG(errs() << "\nDstSubscripts: ");
3205  for (int i = 0; i < size; i++)
3206    DEBUG(errs() << *DstSubscripts[i]);
3207#endif
3208
3209  Pair.resize(size);
3210  for (int i = 0; i < size; ++i) {
3211    Pair[i].Src = SrcSubscripts[i];
3212    Pair[i].Dst = DstSubscripts[i];
3213  }
3214
3215  return true;
3216}
3217
3218//===----------------------------------------------------------------------===//
3219
3220#ifndef NDEBUG
3221// For debugging purposes, dump a small bit vector to dbgs().
3222static void dumpSmallBitVector(SmallBitVector &BV) {
3223  dbgs() << "{";
3224  for (int VI = BV.find_first(); VI >= 0; VI = BV.find_next(VI)) {
3225    dbgs() << VI;
3226    if (BV.find_next(VI) >= 0)
3227      dbgs() << ' ';
3228  }
3229  dbgs() << "}\n";
3230}
3231#endif
3232
3233
3234// depends -
3235// Returns NULL if there is no dependence.
3236// Otherwise, return a Dependence with as many details as possible.
3237// Corresponds to Section 3.1 in the paper
3238//
3239//            Practical Dependence Testing
3240//            Goff, Kennedy, Tseng
3241//            PLDI 1991
3242//
3243// Care is required to keep the routine below, getSplitIteration(),
3244// up to date with respect to this routine.
3245Dependence *DependenceAnalysis::depends(Instruction *Src,
3246                                        Instruction *Dst,
3247                                        bool PossiblyLoopIndependent) {
3248  if (Src == Dst)
3249    PossiblyLoopIndependent = false;
3250
3251  if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) ||
3252      (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory()))
3253    // if both instructions don't reference memory, there's no dependence
3254    return NULL;
3255
3256  if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) {
3257    // can only analyze simple loads and stores, i.e., no calls, invokes, etc.
3258    DEBUG(dbgs() << "can only handle simple loads and stores\n");
3259    return new Dependence(Src, Dst);
3260  }
3261
3262  Value *SrcPtr = getPointerOperand(Src);
3263  Value *DstPtr = getPointerOperand(Dst);
3264
3265  switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) {
3266  case AliasAnalysis::MayAlias:
3267  case AliasAnalysis::PartialAlias:
3268    // cannot analyse objects if we don't understand their aliasing.
3269    DEBUG(dbgs() << "can't analyze may or partial alias\n");
3270    return new Dependence(Src, Dst);
3271  case AliasAnalysis::NoAlias:
3272    // If the objects noalias, they are distinct, accesses are independent.
3273    DEBUG(dbgs() << "no alias\n");
3274    return NULL;
3275  case AliasAnalysis::MustAlias:
3276    break; // The underlying objects alias; test accesses for dependence.
3277  }
3278
3279  // establish loop nesting levels
3280  establishNestingLevels(Src, Dst);
3281  DEBUG(dbgs() << "    common nesting levels = " << CommonLevels << "\n");
3282  DEBUG(dbgs() << "    maximum nesting levels = " << MaxLevels << "\n");
3283
3284  FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels);
3285  ++TotalArrayPairs;
3286
3287  // See if there are GEPs we can use.
3288  bool UsefulGEP = false;
3289  GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
3290  GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
3291  if (SrcGEP && DstGEP &&
3292      SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
3293    const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
3294    const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
3295    DEBUG(dbgs() << "    SrcPtrSCEV = " << *SrcPtrSCEV << "\n");
3296    DEBUG(dbgs() << "    DstPtrSCEV = " << *DstPtrSCEV << "\n");
3297
3298    UsefulGEP =
3299      isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
3300      isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
3301  }
3302  unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
3303  SmallVector<Subscript, 4> Pair(Pairs);
3304  if (UsefulGEP) {
3305    DEBUG(dbgs() << "    using GEPs\n");
3306    unsigned P = 0;
3307    for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
3308           SrcEnd = SrcGEP->idx_end(),
3309           DstIdx = DstGEP->idx_begin();
3310         SrcIdx != SrcEnd;
3311         ++SrcIdx, ++DstIdx, ++P) {
3312      Pair[P].Src = SE->getSCEV(*SrcIdx);
3313      Pair[P].Dst = SE->getSCEV(*DstIdx);
3314    }
3315  }
3316  else {
3317    DEBUG(dbgs() << "    ignoring GEPs\n");
3318    const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
3319    const SCEV *DstSCEV = SE->getSCEV(DstPtr);
3320    DEBUG(dbgs() << "    SrcSCEV = " << *SrcSCEV << "\n");
3321    DEBUG(dbgs() << "    DstSCEV = " << *DstSCEV << "\n");
3322    Pair[0].Src = SrcSCEV;
3323    Pair[0].Dst = DstSCEV;
3324  }
3325
3326  if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
3327      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
3328    DEBUG(dbgs() << "    delinerized GEP\n");
3329    Pairs = Pair.size();
3330  }
3331
3332  for (unsigned P = 0; P < Pairs; ++P) {
3333    Pair[P].Loops.resize(MaxLevels + 1);
3334    Pair[P].GroupLoops.resize(MaxLevels + 1);
3335    Pair[P].Group.resize(Pairs);
3336    removeMatchingExtensions(&Pair[P]);
3337    Pair[P].Classification =
3338      classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
3339                   Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
3340                   Pair[P].Loops);
3341    Pair[P].GroupLoops = Pair[P].Loops;
3342    Pair[P].Group.set(P);
3343    DEBUG(dbgs() << "    subscript " << P << "\n");
3344    DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n");
3345    DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n");
3346    DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n");
3347    DEBUG(dbgs() << "\tloops = ");
3348    DEBUG(dumpSmallBitVector(Pair[P].Loops));
3349  }
3350
3351  SmallBitVector Separable(Pairs);
3352  SmallBitVector Coupled(Pairs);
3353
3354  // Partition subscripts into separable and minimally-coupled groups
3355  // Algorithm in paper is algorithmically better;
3356  // this may be faster in practice. Check someday.
3357  //
3358  // Here's an example of how it works. Consider this code:
3359  //
3360  //   for (i = ...) {
3361  //     for (j = ...) {
3362  //       for (k = ...) {
3363  //         for (l = ...) {
3364  //           for (m = ...) {
3365  //             A[i][j][k][m] = ...;
3366  //             ... = A[0][j][l][i + j];
3367  //           }
3368  //         }
3369  //       }
3370  //     }
3371  //   }
3372  //
3373  // There are 4 subscripts here:
3374  //    0 [i] and [0]
3375  //    1 [j] and [j]
3376  //    2 [k] and [l]
3377  //    3 [m] and [i + j]
3378  //
3379  // We've already classified each subscript pair as ZIV, SIV, etc.,
3380  // and collected all the loops mentioned by pair P in Pair[P].Loops.
3381  // In addition, we've initialized Pair[P].GroupLoops to Pair[P].Loops
3382  // and set Pair[P].Group = {P}.
3383  //
3384  //      Src Dst    Classification Loops  GroupLoops Group
3385  //    0 [i] [0]         SIV       {1}      {1}        {0}
3386  //    1 [j] [j]         SIV       {2}      {2}        {1}
3387  //    2 [k] [l]         RDIV      {3,4}    {3,4}      {2}
3388  //    3 [m] [i + j]     MIV       {1,2,5}  {1,2,5}    {3}
3389  //
3390  // For each subscript SI 0 .. 3, we consider each remaining subscript, SJ.
3391  // So, 0 is compared against 1, 2, and 3; 1 is compared against 2 and 3, etc.
3392  //
3393  // We begin by comparing 0 and 1. The intersection of the GroupLoops is empty.
3394  // Next, 0 and 2. Again, the intersection of their GroupLoops is empty.
3395  // Next 0 and 3. The intersection of their GroupLoop = {1}, not empty,
3396  // so Pair[3].Group = {0,3} and Done = false (that is, 0 will not be added
3397  // to either Separable or Coupled).
3398  //
3399  // Next, we consider 1 and 2. The intersection of the GroupLoops is empty.
3400  // Next, 1 and 3. The intersectionof their GroupLoops = {2}, not empty,
3401  // so Pair[3].Group = {0, 1, 3} and Done = false.
3402  //
3403  // Next, we compare 2 against 3. The intersection of the GroupLoops is empty.
3404  // Since Done remains true, we add 2 to the set of Separable pairs.
3405  //
3406  // Finally, we consider 3. There's nothing to compare it with,
3407  // so Done remains true and we add it to the Coupled set.
3408  // Pair[3].Group = {0, 1, 3} and GroupLoops = {1, 2, 5}.
3409  //
3410  // In the end, we've got 1 separable subscript and 1 coupled group.
3411  for (unsigned SI = 0; SI < Pairs; ++SI) {
3412    if (Pair[SI].Classification == Subscript::NonLinear) {
3413      // ignore these, but collect loops for later
3414      ++NonlinearSubscriptPairs;
3415      collectCommonLoops(Pair[SI].Src,
3416                         LI->getLoopFor(Src->getParent()),
3417                         Pair[SI].Loops);
3418      collectCommonLoops(Pair[SI].Dst,
3419                         LI->getLoopFor(Dst->getParent()),
3420                         Pair[SI].Loops);
3421      Result.Consistent = false;
3422    }
3423    else if (Pair[SI].Classification == Subscript::ZIV) {
3424      // always separable
3425      Separable.set(SI);
3426    }
3427    else {
3428      // SIV, RDIV, or MIV, so check for coupled group
3429      bool Done = true;
3430      for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
3431        SmallBitVector Intersection = Pair[SI].GroupLoops;
3432        Intersection &= Pair[SJ].GroupLoops;
3433        if (Intersection.any()) {
3434          // accumulate set of all the loops in group
3435          Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
3436          // accumulate set of all subscripts in group
3437          Pair[SJ].Group |= Pair[SI].Group;
3438          Done = false;
3439        }
3440      }
3441      if (Done) {
3442        if (Pair[SI].Group.count() == 1) {
3443          Separable.set(SI);
3444          ++SeparableSubscriptPairs;
3445        }
3446        else {
3447          Coupled.set(SI);
3448          ++CoupledSubscriptPairs;
3449        }
3450      }
3451    }
3452  }
3453
3454  DEBUG(dbgs() << "    Separable = ");
3455  DEBUG(dumpSmallBitVector(Separable));
3456  DEBUG(dbgs() << "    Coupled = ");
3457  DEBUG(dumpSmallBitVector(Coupled));
3458
3459  Constraint NewConstraint;
3460  NewConstraint.setAny(SE);
3461
3462  // test separable subscripts
3463  for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
3464    DEBUG(dbgs() << "testing subscript " << SI);
3465    switch (Pair[SI].Classification) {
3466    case Subscript::ZIV:
3467      DEBUG(dbgs() << ", ZIV\n");
3468      if (testZIV(Pair[SI].Src, Pair[SI].Dst, Result))
3469        return NULL;
3470      break;
3471    case Subscript::SIV: {
3472      DEBUG(dbgs() << ", SIV\n");
3473      unsigned Level;
3474      const SCEV *SplitIter = NULL;
3475      if (testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
3476                  Result, NewConstraint, SplitIter))
3477        return NULL;
3478      break;
3479    }
3480    case Subscript::RDIV:
3481      DEBUG(dbgs() << ", RDIV\n");
3482      if (testRDIV(Pair[SI].Src, Pair[SI].Dst, Result))
3483        return NULL;
3484      break;
3485    case Subscript::MIV:
3486      DEBUG(dbgs() << ", MIV\n");
3487      if (testMIV(Pair[SI].Src, Pair[SI].Dst, Pair[SI].Loops, Result))
3488        return NULL;
3489      break;
3490    default:
3491      llvm_unreachable("subscript has unexpected classification");
3492    }
3493  }
3494
3495  if (Coupled.count()) {
3496    // test coupled subscript groups
3497    DEBUG(dbgs() << "starting on coupled subscripts\n");
3498    DEBUG(dbgs() << "MaxLevels + 1 = " << MaxLevels + 1 << "\n");
3499    SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
3500    for (unsigned II = 0; II <= MaxLevels; ++II)
3501      Constraints[II].setAny(SE);
3502    for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
3503      DEBUG(dbgs() << "testing subscript group " << SI << " { ");
3504      SmallBitVector Group(Pair[SI].Group);
3505      SmallBitVector Sivs(Pairs);
3506      SmallBitVector Mivs(Pairs);
3507      SmallBitVector ConstrainedLevels(MaxLevels + 1);
3508      for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
3509        DEBUG(dbgs() << SJ << " ");
3510        if (Pair[SJ].Classification == Subscript::SIV)
3511          Sivs.set(SJ);
3512        else
3513          Mivs.set(SJ);
3514      }
3515      DEBUG(dbgs() << "}\n");
3516      while (Sivs.any()) {
3517        bool Changed = false;
3518        for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
3519          DEBUG(dbgs() << "testing subscript " << SJ << ", SIV\n");
3520          // SJ is an SIV subscript that's part of the current coupled group
3521          unsigned Level;
3522          const SCEV *SplitIter = NULL;
3523          DEBUG(dbgs() << "SIV\n");
3524          if (testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
3525                      Result, NewConstraint, SplitIter))
3526            return NULL;
3527          ConstrainedLevels.set(Level);
3528          if (intersectConstraints(&Constraints[Level], &NewConstraint)) {
3529            if (Constraints[Level].isEmpty()) {
3530              ++DeltaIndependence;
3531              return NULL;
3532            }
3533            Changed = true;
3534          }
3535          Sivs.reset(SJ);
3536        }
3537        if (Changed) {
3538          // propagate, possibly creating new SIVs and ZIVs
3539          DEBUG(dbgs() << "    propagating\n");
3540          DEBUG(dbgs() << "\tMivs = ");
3541          DEBUG(dumpSmallBitVector(Mivs));
3542          for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
3543            // SJ is an MIV subscript that's part of the current coupled group
3544            DEBUG(dbgs() << "\tSJ = " << SJ << "\n");
3545            if (propagate(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops,
3546                          Constraints, Result.Consistent)) {
3547              DEBUG(dbgs() << "\t    Changed\n");
3548              ++DeltaPropagations;
3549              Pair[SJ].Classification =
3550                classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
3551                             Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
3552                             Pair[SJ].Loops);
3553              switch (Pair[SJ].Classification) {
3554              case Subscript::ZIV:
3555                DEBUG(dbgs() << "ZIV\n");
3556                if (testZIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3557                  return NULL;
3558                Mivs.reset(SJ);
3559                break;
3560              case Subscript::SIV:
3561                Sivs.set(SJ);
3562                Mivs.reset(SJ);
3563                break;
3564              case Subscript::RDIV:
3565              case Subscript::MIV:
3566                break;
3567              default:
3568                llvm_unreachable("bad subscript classification");
3569              }
3570            }
3571          }
3572        }
3573      }
3574
3575      // test & propagate remaining RDIVs
3576      for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
3577        if (Pair[SJ].Classification == Subscript::RDIV) {
3578          DEBUG(dbgs() << "RDIV test\n");
3579          if (testRDIV(Pair[SJ].Src, Pair[SJ].Dst, Result))
3580            return NULL;
3581          // I don't yet understand how to propagate RDIV results
3582          Mivs.reset(SJ);
3583        }
3584      }
3585
3586      // test remaining MIVs
3587      // This code is temporary.
3588      // Better to somehow test all remaining subscripts simultaneously.
3589      for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
3590        if (Pair[SJ].Classification == Subscript::MIV) {
3591          DEBUG(dbgs() << "MIV test\n");
3592          if (testMIV(Pair[SJ].Src, Pair[SJ].Dst, Pair[SJ].Loops, Result))
3593            return NULL;
3594        }
3595        else
3596          llvm_unreachable("expected only MIV subscripts at this point");
3597      }
3598
3599      // update Result.DV from constraint vector
3600      DEBUG(dbgs() << "    updating\n");
3601      for (int SJ = ConstrainedLevels.find_first();
3602           SJ >= 0; SJ = ConstrainedLevels.find_next(SJ)) {
3603        updateDirection(Result.DV[SJ - 1], Constraints[SJ]);
3604        if (Result.DV[SJ - 1].Direction == Dependence::DVEntry::NONE)
3605          return NULL;
3606      }
3607    }
3608  }
3609
3610  // Make sure the Scalar flags are set correctly.
3611  SmallBitVector CompleteLoops(MaxLevels + 1);
3612  for (unsigned SI = 0; SI < Pairs; ++SI)
3613    CompleteLoops |= Pair[SI].Loops;
3614  for (unsigned II = 1; II <= CommonLevels; ++II)
3615    if (CompleteLoops[II])
3616      Result.DV[II - 1].Scalar = false;
3617
3618  if (PossiblyLoopIndependent) {
3619    // Make sure the LoopIndependent flag is set correctly.
3620    // All directions must include equal, otherwise no
3621    // loop-independent dependence is possible.
3622    for (unsigned II = 1; II <= CommonLevels; ++II) {
3623      if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) {
3624        Result.LoopIndependent = false;
3625        break;
3626      }
3627    }
3628  }
3629  else {
3630    // On the other hand, if all directions are equal and there's no
3631    // loop-independent dependence possible, then no dependence exists.
3632    bool AllEqual = true;
3633    for (unsigned II = 1; II <= CommonLevels; ++II) {
3634      if (Result.getDirection(II) != Dependence::DVEntry::EQ) {
3635        AllEqual = false;
3636        break;
3637      }
3638    }
3639    if (AllEqual)
3640      return NULL;
3641  }
3642
3643  FullDependence *Final = new FullDependence(Result);
3644  Result.DV = NULL;
3645  return Final;
3646}
3647
3648
3649
3650//===----------------------------------------------------------------------===//
3651// getSplitIteration -
3652// Rather than spend rarely-used space recording the splitting iteration
3653// during the Weak-Crossing SIV test, we re-compute it on demand.
3654// The re-computation is basically a repeat of the entire dependence test,
3655// though simplified since we know that the dependence exists.
3656// It's tedious, since we must go through all propagations, etc.
3657//
3658// Care is required to keep this code up to date with respect to the routine
3659// above, depends().
3660//
3661// Generally, the dependence analyzer will be used to build
3662// a dependence graph for a function (basically a map from instructions
3663// to dependences). Looking for cycles in the graph shows us loops
3664// that cannot be trivially vectorized/parallelized.
3665//
3666// We can try to improve the situation by examining all the dependences
3667// that make up the cycle, looking for ones we can break.
3668// Sometimes, peeling the first or last iteration of a loop will break
3669// dependences, and we've got flags for those possibilities.
3670// Sometimes, splitting a loop at some other iteration will do the trick,
3671// and we've got a flag for that case. Rather than waste the space to
3672// record the exact iteration (since we rarely know), we provide
3673// a method that calculates the iteration. It's a drag that it must work
3674// from scratch, but wonderful in that it's possible.
3675//
3676// Here's an example:
3677//
3678//    for (i = 0; i < 10; i++)
3679//        A[i] = ...
3680//        ... = A[11 - i]
3681//
3682// There's a loop-carried flow dependence from the store to the load,
3683// found by the weak-crossing SIV test. The dependence will have a flag,
3684// indicating that the dependence can be broken by splitting the loop.
3685// Calling getSplitIteration will return 5.
3686// Splitting the loop breaks the dependence, like so:
3687//
3688//    for (i = 0; i <= 5; i++)
3689//        A[i] = ...
3690//        ... = A[11 - i]
3691//    for (i = 6; i < 10; i++)
3692//        A[i] = ...
3693//        ... = A[11 - i]
3694//
3695// breaks the dependence and allows us to vectorize/parallelize
3696// both loops.
3697const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep,
3698                                                   unsigned SplitLevel) {
3699  assert(Dep && "expected a pointer to a Dependence");
3700  assert(Dep->isSplitable(SplitLevel) &&
3701         "Dep should be splitable at SplitLevel");
3702  Instruction *Src = Dep->getSrc();
3703  Instruction *Dst = Dep->getDst();
3704  assert(Src->mayReadFromMemory() || Src->mayWriteToMemory());
3705  assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory());
3706  assert(isLoadOrStore(Src));
3707  assert(isLoadOrStore(Dst));
3708  Value *SrcPtr = getPointerOperand(Src);
3709  Value *DstPtr = getPointerOperand(Dst);
3710  assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) ==
3711         AliasAnalysis::MustAlias);
3712
3713  // establish loop nesting levels
3714  establishNestingLevels(Src, Dst);
3715
3716  FullDependence Result(Src, Dst, false, CommonLevels);
3717
3718  // See if there are GEPs we can use.
3719  bool UsefulGEP = false;
3720  GEPOperator *SrcGEP = dyn_cast<GEPOperator>(SrcPtr);
3721  GEPOperator *DstGEP = dyn_cast<GEPOperator>(DstPtr);
3722  if (SrcGEP && DstGEP &&
3723      SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) {
3724    const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand());
3725    const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand());
3726    UsefulGEP =
3727      isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) &&
3728      isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent()));
3729  }
3730  unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1;
3731  SmallVector<Subscript, 4> Pair(Pairs);
3732  if (UsefulGEP) {
3733    unsigned P = 0;
3734    for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(),
3735           SrcEnd = SrcGEP->idx_end(),
3736           DstIdx = DstGEP->idx_begin();
3737         SrcIdx != SrcEnd;
3738         ++SrcIdx, ++DstIdx, ++P) {
3739      Pair[P].Src = SE->getSCEV(*SrcIdx);
3740      Pair[P].Dst = SE->getSCEV(*DstIdx);
3741    }
3742  }
3743  else {
3744    const SCEV *SrcSCEV = SE->getSCEV(SrcPtr);
3745    const SCEV *DstSCEV = SE->getSCEV(DstPtr);
3746    Pair[0].Src = SrcSCEV;
3747    Pair[0].Dst = DstSCEV;
3748  }
3749
3750  if (Delinearize && Pairs == 1 && CommonLevels > 1 &&
3751      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) {
3752    DEBUG(dbgs() << "    delinerized GEP\n");
3753    Pairs = Pair.size();
3754  }
3755
3756  for (unsigned P = 0; P < Pairs; ++P) {
3757    Pair[P].Loops.resize(MaxLevels + 1);
3758    Pair[P].GroupLoops.resize(MaxLevels + 1);
3759    Pair[P].Group.resize(Pairs);
3760    removeMatchingExtensions(&Pair[P]);
3761    Pair[P].Classification =
3762      classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()),
3763                   Pair[P].Dst, LI->getLoopFor(Dst->getParent()),
3764                   Pair[P].Loops);
3765    Pair[P].GroupLoops = Pair[P].Loops;
3766    Pair[P].Group.set(P);
3767  }
3768
3769  SmallBitVector Separable(Pairs);
3770  SmallBitVector Coupled(Pairs);
3771
3772  // partition subscripts into separable and minimally-coupled groups
3773  for (unsigned SI = 0; SI < Pairs; ++SI) {
3774    if (Pair[SI].Classification == Subscript::NonLinear) {
3775      // ignore these, but collect loops for later
3776      collectCommonLoops(Pair[SI].Src,
3777                         LI->getLoopFor(Src->getParent()),
3778                         Pair[SI].Loops);
3779      collectCommonLoops(Pair[SI].Dst,
3780                         LI->getLoopFor(Dst->getParent()),
3781                         Pair[SI].Loops);
3782      Result.Consistent = false;
3783    }
3784    else if (Pair[SI].Classification == Subscript::ZIV)
3785      Separable.set(SI);
3786    else {
3787      // SIV, RDIV, or MIV, so check for coupled group
3788      bool Done = true;
3789      for (unsigned SJ = SI + 1; SJ < Pairs; ++SJ) {
3790        SmallBitVector Intersection = Pair[SI].GroupLoops;
3791        Intersection &= Pair[SJ].GroupLoops;
3792        if (Intersection.any()) {
3793          // accumulate set of all the loops in group
3794          Pair[SJ].GroupLoops |= Pair[SI].GroupLoops;
3795          // accumulate set of all subscripts in group
3796          Pair[SJ].Group |= Pair[SI].Group;
3797          Done = false;
3798        }
3799      }
3800      if (Done) {
3801        if (Pair[SI].Group.count() == 1)
3802          Separable.set(SI);
3803        else
3804          Coupled.set(SI);
3805      }
3806    }
3807  }
3808
3809  Constraint NewConstraint;
3810  NewConstraint.setAny(SE);
3811
3812  // test separable subscripts
3813  for (int SI = Separable.find_first(); SI >= 0; SI = Separable.find_next(SI)) {
3814    switch (Pair[SI].Classification) {
3815    case Subscript::SIV: {
3816      unsigned Level;
3817      const SCEV *SplitIter = NULL;
3818      (void) testSIV(Pair[SI].Src, Pair[SI].Dst, Level,
3819                     Result, NewConstraint, SplitIter);
3820      if (Level == SplitLevel) {
3821        assert(SplitIter != NULL);
3822        return SplitIter;
3823      }
3824      break;
3825    }
3826    case Subscript::ZIV:
3827    case Subscript::RDIV:
3828    case Subscript::MIV:
3829      break;
3830    default:
3831      llvm_unreachable("subscript has unexpected classification");
3832    }
3833  }
3834
3835  if (Coupled.count()) {
3836    // test coupled subscript groups
3837    SmallVector<Constraint, 4> Constraints(MaxLevels + 1);
3838    for (unsigned II = 0; II <= MaxLevels; ++II)
3839      Constraints[II].setAny(SE);
3840    for (int SI = Coupled.find_first(); SI >= 0; SI = Coupled.find_next(SI)) {
3841      SmallBitVector Group(Pair[SI].Group);
3842      SmallBitVector Sivs(Pairs);
3843      SmallBitVector Mivs(Pairs);
3844      SmallBitVector ConstrainedLevels(MaxLevels + 1);
3845      for (int SJ = Group.find_first(); SJ >= 0; SJ = Group.find_next(SJ)) {
3846        if (Pair[SJ].Classification == Subscript::SIV)
3847          Sivs.set(SJ);
3848        else
3849          Mivs.set(SJ);
3850      }
3851      while (Sivs.any()) {
3852        bool Changed = false;
3853        for (int SJ = Sivs.find_first(); SJ >= 0; SJ = Sivs.find_next(SJ)) {
3854          // SJ is an SIV subscript that's part of the current coupled group
3855          unsigned Level;
3856          const SCEV *SplitIter = NULL;
3857          (void) testSIV(Pair[SJ].Src, Pair[SJ].Dst, Level,
3858                         Result, NewConstraint, SplitIter);
3859          if (Level == SplitLevel && SplitIter)
3860            return SplitIter;
3861          ConstrainedLevels.set(Level);
3862          if (intersectConstraints(&Constraints[Level], &NewConstraint))
3863            Changed = true;
3864          Sivs.reset(SJ);
3865        }
3866        if (Changed) {
3867          // propagate, possibly creating new SIVs and ZIVs
3868          for (int SJ = Mivs.find_first(); SJ >= 0; SJ = Mivs.find_next(SJ)) {
3869            // SJ is an MIV subscript that's part of the current coupled group
3870            if (propagate(Pair[SJ].Src, Pair[SJ].Dst,
3871                          Pair[SJ].Loops, Constraints, Result.Consistent)) {
3872              Pair[SJ].Classification =
3873                classifyPair(Pair[SJ].Src, LI->getLoopFor(Src->getParent()),
3874                             Pair[SJ].Dst, LI->getLoopFor(Dst->getParent()),
3875                             Pair[SJ].Loops);
3876              switch (Pair[SJ].Classification) {
3877              case Subscript::ZIV:
3878                Mivs.reset(SJ);
3879                break;
3880              case Subscript::SIV:
3881                Sivs.set(SJ);
3882                Mivs.reset(SJ);
3883                break;
3884              case Subscript::RDIV:
3885              case Subscript::MIV:
3886                break;
3887              default:
3888                llvm_unreachable("bad subscript classification");
3889              }
3890            }
3891          }
3892        }
3893      }
3894    }
3895  }
3896  llvm_unreachable("somehow reached end of routine");
3897  return NULL;
3898}
3899