1dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//===-- SeparateConstOffsetFromGEP.cpp - ------------------------*- C++ -*-===//
2dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
3dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//                     The LLVM Compiler Infrastructure
4dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
5dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// This file is distributed under the University of Illinois Open Source
6dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// License. See LICENSE.TXT for details.
7dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
8dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//===----------------------------------------------------------------------===//
9dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
10dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// Loop unrolling may create many similar GEPs for array accesses.
11dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// e.g., a 2-level loop
12dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
13dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// float a[32][32]; // global variable
14dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
15dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// for (int i = 0; i < 2; ++i) {
16dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//   for (int j = 0; j < 2; ++j) {
17dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//     ...
18dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//     ... = a[x + i][y + j];
19dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//     ...
20dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//   }
21dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// }
22dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
23dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// will probably be unrolled to:
24dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
25dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// gep %a, 0, %x, %y; load
26dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// gep %a, 0, %x, %y + 1; load
27dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// gep %a, 0, %x + 1, %y; load
28dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// gep %a, 0, %x + 1, %y + 1; load
29dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
30dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// LLVM's GVN does not use partial redundancy elimination yet, and is thus
31dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// unable to reuse (gep %a, 0, %x, %y). As a result, this misoptimization incurs
32dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// significant slowdown in targets with limited addressing modes. For instance,
33dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// because the PTX target does not support the reg+reg addressing mode, the
34dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// NVPTX backend emits PTX code that literally computes the pointer address of
35dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// each GEP, wasting tons of registers. It emits the following PTX for the
36dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// first load and similar PTX for other loads.
37dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
38dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// mov.u32         %r1, %x;
39dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// mov.u32         %r2, %y;
40dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// mul.wide.u32    %rl2, %r1, 128;
41dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// mov.u64         %rl3, a;
42dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// add.s64         %rl4, %rl3, %rl2;
43dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// mul.wide.u32    %rl5, %r2, 4;
44dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// add.s64         %rl6, %rl4, %rl5;
45dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// ld.global.f32   %f1, [%rl6];
46dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
47dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// To reduce the register pressure, the optimization implemented in this file
48dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// merges the common part of a group of GEPs, so we can compute each pointer
49dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// address by adding a simple offset to the common part, saving many registers.
50dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
51dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// It works by splitting each GEP into a variadic base and a constant offset.
52dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// The variadic base can be computed once and reused by multiple GEPs, and the
53dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// constant offsets can be nicely folded into the reg+immediate addressing mode
54dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// (supported by most targets) without using any extra register.
55dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
56dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// For instance, we transform the four GEPs and four loads in the above example
57dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// into:
58dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
59dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// base = gep a, 0, x, y
60dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// load base
61dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// laod base + 1  * sizeof(float)
62dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// load base + 32 * sizeof(float)
63dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// load base + 33 * sizeof(float)
64dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
65dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// Given the transformed IR, a backend that supports the reg+immediate
66dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// addressing mode can easily fold the pointer arithmetics into the loads. For
67dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// example, the NVPTX backend can easily fold the pointer arithmetics into the
68dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// ld.global.f32 instructions, and the resultant PTX uses much fewer registers.
69dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
70dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// mov.u32         %r1, %tid.x;
71dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// mov.u32         %r2, %tid.y;
72dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// mul.wide.u32    %rl2, %r1, 128;
73dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// mov.u64         %rl3, a;
74dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// add.s64         %rl4, %rl3, %rl2;
75dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// mul.wide.u32    %rl5, %r2, 4;
76dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// add.s64         %rl6, %rl4, %rl5;
77dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// ld.global.f32   %f1, [%rl6]; // so far the same as unoptimized PTX
78dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// ld.global.f32   %f2, [%rl6+4]; // much better
79dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// ld.global.f32   %f3, [%rl6+128]; // much better
80dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines// ld.global.f32   %f4, [%rl6+132]; // much better
81dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//
82dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines//===----------------------------------------------------------------------===//
83dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
84dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Analysis/TargetTransformInfo.h"
85dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Analysis/ValueTracking.h"
86dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/Constants.h"
87dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/DataLayout.h"
88dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/Instructions.h"
89dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/LLVMContext.h"
90dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/Module.h"
91dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/IR/Operator.h"
92dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Support/CommandLine.h"
93dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Support/raw_ostream.h"
94dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines#include "llvm/Transforms/Scalar.h"
95dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
96dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesusing namespace llvm;
97dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
98dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesstatic cl::opt<bool> DisableSeparateConstOffsetFromGEP(
99dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    "disable-separate-const-offset-from-gep", cl::init(false),
100dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    cl::desc("Do not separate the constant offset from a GEP instruction"),
101dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    cl::Hidden);
102dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
103dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesnamespace {
104dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
105dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// \brief A helper class for separating a constant offset from a GEP index.
106dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines///
107dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// In real programs, a GEP index may be more complicated than a simple addition
108dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// of something and a constant integer which can be trivially splitted. For
109dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// example, to split ((a << 3) | 5) + b, we need to search deeper for the
110dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// constant offset, so that we can separate the index to (a << 3) + b and 5.
111dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines///
112dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// Therefore, this class looks into the expression that computes a given GEP
113dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// index, and tries to find a constant integer that can be hoisted to the
114dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// outermost level of the expression as an addition. Not every constant in an
115dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// expression can jump out. e.g., we cannot transform (b * (a + 5)) to (b * a +
116dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// 5); nor can we transform (3 * (a + 5)) to (3 * a + 5), however in this case,
117dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// -instcombine probably already optimized (3 * (a + 5)) to (3 * a + 15).
118dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesclass ConstantOffsetExtractor {
119dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines public:
120dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// Extracts a constant offset from the given GEP index. It outputs the
121dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// numeric value of the extracted constant offset (0 if failed), and a
122dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// new index representing the remainder (equal to the original index minus
123dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// the constant offset).
124cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p Idx    The given GEP index
125cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p NewIdx The new index to replace (output)
126cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p DL     The datalayout of the module
127cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p GEP    The given GEP
128dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL,
129cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                         GetElementPtrInst *GEP);
130dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// Looks for a constant offset without extracting it. The meaning of the
131dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// arguments and the return value are the same as Extract.
132cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP);
133dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
134dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines private:
135dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt)
136dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      : DL(Layout), IP(InsertionPt) {}
137cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// Searches the expression that computes V for a non-zero constant C s.t.
138cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// V can be reassociated into the form V' + C. If the searching is
139cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// successful, returns C and update UserChain as a def-use chain from C to V;
140cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// otherwise, UserChain is empty.
141dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  ///
142cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p V            The given expression
143cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p SignExtended Whether V will be sign-extended in the computation of the
144cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///                 GEP index
145cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p ZeroExtended Whether V will be zero-extended in the computation of the
146cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///                 GEP index
147cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p NonNegative  Whether V is guaranteed to be non-negative. For example,
148cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///                 an index of an inbounds GEP is guaranteed to be
149cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///                 non-negative. Levaraging this, we can better split
150cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///                 inbounds GEPs.
151cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative);
152cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// A helper function to look into both operands of a binary operator.
153cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended,
154cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                            bool ZeroExtended);
155cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// After finding the constant offset C from the GEP index I, we build a new
156cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// index I' s.t. I' + C = I. This function builds and returns the new
157cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// index I' according to UserChain produced by function "find".
158cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
159cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// The building conceptually takes two steps:
160cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// 1) iteratively distribute s/zext towards the leaves of the expression tree
161cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// that computes I
162cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// 2) reassociate the expression tree to the form I' + C.
163cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
164cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute
165cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// sext to a, b and 5 so that we have
166cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///   sext(a) + (sext(b) + 5).
167cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// Then, we reassociate it to
168cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///   (sext(a) + sext(b)) + 5.
169cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// Given this form, we know I' is sext(a) + sext(b).
170cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Value *rebuildWithoutConstOffset();
171cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// After the first step of rebuilding the GEP index without the constant
172cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// offset, distribute s/zext to the operands of all operators in UserChain.
173cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) =>
174cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))).
175cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
176cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// The function also updates UserChain to point to new subexpressions after
177cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// distributing s/zext. e.g., the old UserChain of the above example is
178cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)),
179cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// and the new UserChain is
180cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) ->
181cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///   zext(sext(a)) + (zext(sext(b)) + zext(sext(5))
182cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
183cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p ChainIndex The index to UserChain. ChainIndex is initially
184cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///               UserChain.size() - 1, and is decremented during
185cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///               the recursion.
186cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Value *distributeExtsAndCloneChain(unsigned ChainIndex);
187cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// Reassociates the GEP index to the form I' + C and returns I'.
188cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Value *removeConstOffset(unsigned ChainIndex);
189cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// A helper function to apply ExtInsts, a list of s/zext, to value V.
190cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function
191cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// returns "sext i32 (zext i16 V to i32) to i64".
192cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Value *applyExts(Value *V);
193dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
194dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0.
195dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool NoCommonBits(Value *LHS, Value *RHS) const;
196dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// Computes which bits are known to be one or zero.
197dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// \p KnownOne Mask of all bits that are known to be one.
198dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// \p KnownZero Mask of all bits that are known to be zero.
199dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const;
200cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// A helper function that returns whether we can trace into the operands
201cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// of binary operator BO for a constant offset.
202cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
203cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p SignExtended Whether BO is surrounded by sext
204cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p ZeroExtended Whether BO is surrounded by zext
205cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound
206cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///                array index.
207cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO,
208cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                    bool NonNegative);
209dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
210dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// The path from the constant offset to the old GEP index. e.g., if the GEP
211dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// index is "a * b + (c + 5)". After running function find, UserChain[0] will
212dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and
213dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// UserChain[2] will be the entire expression "a * b + (c + 5)".
214dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  ///
215cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// This path helps to rebuild the new GEP index.
216dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  SmallVector<User *, 8> UserChain;
217cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// A data structure used in rebuildWithoutConstOffset. Contains all
218cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// sext/zext instructions along UserChain.
219cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  SmallVector<CastInst *, 16> ExtInsts;
220dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// The data layout of the module. Used in ComputeKnownBits.
221dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  const DataLayout *DL;
222dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  Instruction *IP;  /// Insertion position of cloned instructions.
223dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines};
224dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
225dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// \brief A pass that tries to split every GEP in the function into a variadic
226dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// base and a constant offset. It is a FunctionPass because searching for the
227dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines/// constant offset may inspect other basic blocks.
228dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesclass SeparateConstOffsetFromGEP : public FunctionPass {
229dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines public:
230dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  static char ID;
231dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  SeparateConstOffsetFromGEP() : FunctionPass(ID) {
232dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    initializeSeparateConstOffsetFromGEPPass(*PassRegistry::getPassRegistry());
233dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
234dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
235dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  void getAnalysisUsage(AnalysisUsage &AU) const override {
236dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    AU.addRequired<DataLayoutPass>();
237dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    AU.addRequired<TargetTransformInfo>();
238dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
239cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
240cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  bool doInitialization(Module &M) override {
241cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
242cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (DLP == nullptr)
243cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      report_fatal_error("data layout missing");
244cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    DL = &DLP->getDataLayout();
245cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return false;
246cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
247cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
248dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool runOnFunction(Function &F) override;
249dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
250dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines private:
251dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// Tries to split the given GEP into a variadic base and a constant offset,
252dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// and returns true if the splitting succeeds.
253dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool splitGEP(GetElementPtrInst *GEP);
254dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// Finds the constant offset within each index, and accumulates them. This
255dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// function only inspects the GEP without changing it. The output
256dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// NeedsExtraction indicates whether we can extract a non-zero constant
257dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  /// offset from any index.
258cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction);
259cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// Canonicalize array indices to pointer-size integers. This helps to
260cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// simplify the logic of splitting a GEP. For example, if a + b is a
261cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// pointer-size integer, we have
262cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///   gep base, a + b = gep (gep base, a), b
263cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// However, this equality may not hold if the size of a + b is smaller than
264cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// the pointer size, because LLVM conceptually sign-extends GEP indices to
265cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// pointer size before computing the address
266cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// (http://llvm.org/docs/LangRef.html#id181).
267cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
268cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// This canonicalization is very likely already done in clang and
269cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// instcombine. Therefore, the program will probably remain the same.
270cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
271cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// Returns true if the module changes.
272cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
273cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// Verified in @i32_add in split-gep.ll
274cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP);
275cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// For each array index that is in the form of zext(a), convert it to sext(a)
276cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// if we can prove zext(a) <= max signed value of typeof(a). We prefer
277cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// sext(a) to zext(a), because in the special case where x + y >= 0 and
278cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// (x >= 0 or y >= 0), function CanTraceInto can split sext(x + y),
279cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// while no such case exists for zext(x + y).
280cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
281cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// Note that
282cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///   zext(x + y) = zext(x) + zext(y)
283cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// is wrong, e.g.,
284cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///   zext i32(UINT_MAX + 1) to i64 !=
285cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///   (zext i32 UINT_MAX to i64) + (zext i32 1 to i64)
286cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
287cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// Returns true if the module changes.
288cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ///
289cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// Verified in @inbounds_zext_add in split-gep.ll and @sum_of_array3 in
290cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  /// split-gep-and-gvn.ll
291cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  bool convertInBoundsZExtToSExt(GetElementPtrInst *GEP);
292cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
293cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  const DataLayout *DL;
294dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines};
295dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}  // anonymous namespace
296dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
297dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hineschar SeparateConstOffsetFromGEP::ID = 0;
298dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesINITIALIZE_PASS_BEGIN(
299dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
300dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    "Split GEPs to a variadic base and a constant offset for better CSE", false,
301dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    false)
302dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesINITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
303dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesINITIALIZE_PASS_DEPENDENCY(DataLayoutPass)
304dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesINITIALIZE_PASS_END(
305dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    SeparateConstOffsetFromGEP, "separate-const-offset-from-gep",
306dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    "Split GEPs to a variadic base and a constant offset for better CSE", false,
307dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    false)
308dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
309dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen HinesFunctionPass *llvm::createSeparateConstOffsetFromGEPPass() {
310dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return new SeparateConstOffsetFromGEP();
311dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
312dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
313cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hinesbool ConstantOffsetExtractor::CanTraceInto(bool SignExtended,
314cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                            bool ZeroExtended,
315cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                            BinaryOperator *BO,
316cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                            bool NonNegative) {
317cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // We only consider ADD, SUB and OR, because a non-zero constant found in
318cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // expressions composed of these operations can be easily hoisted as a
319cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // constant offset by reassociation.
320cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (BO->getOpcode() != Instruction::Add &&
321cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      BO->getOpcode() != Instruction::Sub &&
322cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      BO->getOpcode() != Instruction::Or) {
323cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return false;
324cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
325cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
326cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1);
327cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS
328cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS).
329cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (BO->getOpcode() == Instruction::Or && !NoCommonBits(LHS, RHS))
330cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return false;
331cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
332cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // In addition, tracing into BO requires that its surrounding s/zext (if
333cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // any) is distributable to both operands.
334cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //
335cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Suppose BO = A op B.
336cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //  SignExtended | ZeroExtended | Distributable?
337cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // --------------+--------------+----------------------------------
338cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //       0       |      0       | true because no s/zext exists
339cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //       0       |      1       | zext(BO) == zext(A) op zext(B)
340cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //       1       |      0       | sext(BO) == sext(A) op sext(B)
341cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //       1       |      1       | zext(sext(BO)) ==
342cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //               |              |     zext(sext(A)) op zext(sext(B))
343cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) {
344cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // If a + b >= 0 and (a >= 0 or b >= 0), then
345cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //   sext(a + b) = sext(a) + sext(b)
346cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // even if the addition is not marked nsw.
347cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //
348cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Leveraging this invarient, we can trace into an sext'ed inbound GEP
349cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // index if the constant offset is non-negative.
350cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //
351cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Verified in @sext_add in split-gep.ll.
352cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) {
353cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      if (!ConstLHS->isNegative())
354cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        return true;
355cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    }
356cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) {
357cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      if (!ConstRHS->isNegative())
358cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        return true;
359cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    }
360cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
361dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
362dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B)
363dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B)
364dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (BO->getOpcode() == Instruction::Add ||
365dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      BO->getOpcode() == Instruction::Sub) {
366cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (SignExtended && !BO->hasNoSignedWrap())
367cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      return false;
368cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (ZeroExtended && !BO->hasNoUnsignedWrap())
369cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      return false;
370dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
371dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
372cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return true;
373dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
374dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
375cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesAPInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO,
376cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                                   bool SignExtended,
377cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                                   bool ZeroExtended) {
378cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // BO being non-negative does not shed light on whether its operands are
379cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // non-negative. Clear the NonNegative flag here.
380cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended,
381cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                              /* NonNegative */ false);
382dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // If we found a constant offset in the left operand, stop and return that.
383dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // This shortcut might cause us to miss opportunities of combining the
384dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9.
385dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // However, such cases are probably already handled by -instcombine,
386dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // given this pass runs after the standard optimizations.
387dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (ConstantOffset != 0) return ConstantOffset;
388cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended,
389cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                        /* NonNegative */ false);
390dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // If U is a sub operator, negate the constant offset found in the right
391dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // operand.
392cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (BO->getOpcode() == Instruction::Sub)
393cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    ConstantOffset = -ConstantOffset;
394cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return ConstantOffset;
395dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
396dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
397cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesAPInt ConstantOffsetExtractor::find(Value *V, bool SignExtended,
398cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                    bool ZeroExtended, bool NonNegative) {
399cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // TODO(jingyue): We could trace into integer/pointer casts, such as
400dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only
401dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // integers because it gives good enough results for our benchmarks.
402cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth();
403dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
404cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // We cannot do much with Values that are not a User, such as an Argument.
405dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  User *U = dyn_cast<User>(V);
406cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (U == nullptr) return APInt(BitWidth, 0);
407dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
408cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  APInt ConstantOffset(BitWidth, 0);
409cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
410dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // Hooray, we found it!
411cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    ConstantOffset = CI->getValue();
412cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) {
413cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Trace into subexpressions for more hoisting opportunities.
414cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) {
415cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended);
416dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    }
417cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  } else if (isa<SExtInst>(V)) {
418cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    ConstantOffset = find(U->getOperand(0), /* SignExtended */ true,
419cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                          ZeroExtended, NonNegative).sext(BitWidth);
420cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  } else if (isa<ZExtInst>(V)) {
421cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // As an optimization, we can clear the SignExtended flag because
422cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll.
423cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //
424cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0.
425cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    ConstantOffset =
426cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        find(U->getOperand(0), /* SignExtended */ false,
427cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines             /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth);
428dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
429cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
430cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // If we found a non-zero constant offset, add it to the path for
431cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't
432cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // help this optimization.
433dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (ConstantOffset != 0)
434dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    UserChain.push_back(U);
435dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return ConstantOffset;
436dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
437dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
438cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesValue *ConstantOffsetExtractor::applyExts(Value *V) {
439cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Value *Current = V;
440cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // ExtInsts is built in the use-def order. Therefore, we apply them to V
441cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // in the reversed order.
442cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) {
443cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (Constant *C = dyn_cast<Constant>(Current)) {
444cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      // If Current is a constant, apply s/zext using ConstantExpr::getCast.
445cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt.
446cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType());
447cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    } else {
448cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      Instruction *Ext = (*I)->clone();
449cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      Ext->setOperand(0, Current);
450cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      Ext->insertBefore(IP);
451cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      Current = Ext;
452cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    }
453dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
454cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return Current;
455dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
456dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
457cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesValue *ConstantOffsetExtractor::rebuildWithoutConstOffset() {
458cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  distributeExtsAndCloneChain(UserChain.size() - 1);
459cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Remove all nullptrs (used to be s/zext) from UserChain.
460cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  unsigned NewSize = 0;
461cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  for (auto I = UserChain.begin(), E = UserChain.end(); I != E; ++I) {
462cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (*I != nullptr) {
463cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      UserChain[NewSize] = *I;
464cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      NewSize++;
465cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    }
466dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
467cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  UserChain.resize(NewSize);
468cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return removeConstOffset(UserChain.size() - 1);
469dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
470dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
471cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesValue *
472cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) {
473cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  User *U = UserChain[ChainIndex];
474cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (ChainIndex == 0) {
475cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    assert(isa<ConstantInt>(U));
476cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // If U is a ConstantInt, applyExts will return a ConstantInt as well.
477cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U));
478cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
479cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
480cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (CastInst *Cast = dyn_cast<CastInst>(U)) {
481cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) &&
482cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines           "We only traced into two types of CastInst: sext and zext");
483cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    ExtInsts.push_back(Cast);
484cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    UserChain[ChainIndex] = nullptr;
485cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return distributeExtsAndCloneChain(ChainIndex - 1);
486cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
487cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
488cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Function find only trace into BinaryOperator and CastInst.
489cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  BinaryOperator *BO = cast<BinaryOperator>(U);
490cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // OpNo = which operand of BO is UserChain[ChainIndex - 1]
491cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
492cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Value *TheOther = applyExts(BO->getOperand(1 - OpNo));
493cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1);
494cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
495cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  BinaryOperator *NewBO = nullptr;
496cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (OpNo == 0) {
497cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther,
498cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                   BO->getName(), IP);
499cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  } else {
500cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain,
501cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                   BO->getName(), IP);
502cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
503cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return UserChain[ChainIndex] = NewBO;
504dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
505dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
506cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesValue *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) {
507cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (ChainIndex == 0) {
508cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    assert(isa<ConstantInt>(UserChain[ChainIndex]));
509cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return ConstantInt::getNullValue(UserChain[ChainIndex]->getType());
510cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
511cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
512cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]);
513cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1);
514cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]);
515cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Value *NextInChain = removeConstOffset(ChainIndex - 1);
516cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Value *TheOther = BO->getOperand(1 - OpNo);
517cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
518cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // If NextInChain is 0 and not the LHS of a sub, we can simplify the
519cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // sub-expression to be just TheOther.
520cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) {
521cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0))
522cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      return TheOther;
523cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
524cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
525cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (BO->getOpcode() == Instruction::Or) {
526cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Rebuild "or" as "add", because "or" may be invalid for the new
527cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // epxression.
528cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //
529cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // For instance, given
530cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //   a | (b + 5) where a and b + 5 have no common bits,
531cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // we can extract 5 as the constant offset.
532cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //
533cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // However, reusing the "or" in the new index would give us
534cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //   (a | b) + 5
535cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // which does not equal a | (b + 5).
536cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //
537cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Replacing the "or" with "add" is fine, because
538cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    //   a | (b + 5) = a + (b + 5) = (a + b) + 5
539cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return BinaryOperator::CreateAdd(BO->getOperand(0), BO->getOperand(1),
540cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                     BO->getName(), IP);
541cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
542cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
543cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // We can reuse BO in this case, because the new expression shares the same
544cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // instruction type and BO is used at most once.
545cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  assert(BO->getNumUses() <= 1 &&
546cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines         "distributeExtsAndCloneChain clones each BinaryOperator in "
547cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines         "UserChain, so no one should be used more than "
548cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines         "once");
549cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  BO->setOperand(OpNo, NextInChain);
550cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  BO->setHasNoSignedWrap(false);
551cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  BO->setHasNoUnsignedWrap(false);
552cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Make sure it appears after all instructions we've inserted so far.
553cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  BO->moveBefore(IP);
554cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return BO;
555dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
556dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
557dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesint64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx,
558dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                         const DataLayout *DL,
559cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                         GetElementPtrInst *GEP) {
560cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  ConstantOffsetExtractor Extractor(DL, GEP);
561dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Find a non-zero constant offset first.
562cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  APInt ConstantOffset =
563cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
564cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                     GEP->isInBounds());
565cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (ConstantOffset != 0) {
566cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Separates the constant offset from the GEP index.
567cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    NewIdx = Extractor.rebuildWithoutConstOffset();
568cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
569cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return ConstantOffset.getSExtValue();
570dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
571dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
572cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hinesint64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL,
573cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      GetElementPtrInst *GEP) {
574cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative.
575cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return ConstantOffsetExtractor(DL, GEP)
576cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false,
577cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines            GEP->isInBounds())
578cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      .getSExtValue();
579dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
580dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
581dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesvoid ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne,
582dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                               APInt &KnownZero) const {
583dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  IntegerType *IT = cast<IntegerType>(V->getType());
584dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  KnownOne = APInt(IT->getBitWidth(), 0);
585dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  KnownZero = APInt(IT->getBitWidth(), 0);
586dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  llvm::computeKnownBits(V, KnownZero, KnownOne, DL, 0);
587dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
588dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
589dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const {
590dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  assert(LHS->getType() == RHS->getType() &&
591dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines         "LHS and RHS should have the same type");
592dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  APInt LHSKnownOne, LHSKnownZero, RHSKnownOne, RHSKnownZero;
593dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  ComputeKnownBits(LHS, LHSKnownOne, LHSKnownZero);
594dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  ComputeKnownBits(RHS, RHSKnownOne, RHSKnownZero);
595dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
596dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
597dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
598cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hinesbool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
599cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    GetElementPtrInst *GEP) {
600cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  bool Changed = false;
601cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
602cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  gep_type_iterator GTI = gep_type_begin(*GEP);
603cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
604cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines       I != E; ++I, ++GTI) {
605cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    // Skip struct member indices which must be i32.
606cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (isa<SequentialType>(*GTI)) {
607cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      if ((*I)->getType() != IntPtrTy) {
608cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
609cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        Changed = true;
610cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      }
611cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    }
612cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
613cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return Changed;
614cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines}
615cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
616cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hinesbool
617cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesSeparateConstOffsetFromGEP::convertInBoundsZExtToSExt(GetElementPtrInst *GEP) {
618cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (!GEP->isInBounds())
619cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return false;
620cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
621cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // TODO: consider alloca
622cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  GlobalVariable *UnderlyingObject =
623cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      dyn_cast<GlobalVariable>(GEP->getPointerOperand());
624cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  if (UnderlyingObject == nullptr)
625cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    return false;
626cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
627cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  uint64_t ObjectSize =
628cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      DL->getTypeAllocSize(UnderlyingObject->getType()->getElementType());
629cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  gep_type_iterator GTI = gep_type_begin(*GEP);
630cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  bool Changed = false;
631cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E;
632cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines       ++I, ++GTI) {
633cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    if (isa<SequentialType>(*GTI)) {
634cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      if (ZExtInst *Extended = dyn_cast<ZExtInst>(*I)) {
635cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        unsigned SrcBitWidth =
636cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines            cast<IntegerType>(Extended->getSrcTy())->getBitWidth();
637cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        // For GEP operand zext(a), if a <= max signed value of typeof(a), then
638cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        // the sign bit of a is zero and sext(a) = zext(a). Because the GEP is
639cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        // in bounds, we know a <= ObjectSize, so the condition can be reduced
640cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        // to ObjectSize <= max signed value of typeof(a).
641cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        if (ObjectSize <=
642cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines            APInt::getSignedMaxValue(SrcBitWidth).getZExtValue()) {
643cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines          *I = new SExtInst(Extended->getOperand(0), Extended->getType(),
644cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                            Extended->getName(), GEP);
645cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines          Changed = true;
646cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines        }
647cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines      }
648cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines    }
649cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  }
650cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  return Changed;
651cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines}
652cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines
653cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hinesint64_t
654cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen HinesSeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
655cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines                                                 bool &NeedsExtraction) {
656dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  NeedsExtraction = false;
657dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  int64_t AccumulativeByteOffset = 0;
658dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  gep_type_iterator GTI = gep_type_begin(*GEP);
659dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
660dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (isa<SequentialType>(*GTI)) {
661dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // Tries to extract a constant offset from this GEP index.
662dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      int64_t ConstantOffset =
663cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines          ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP);
664dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (ConstantOffset != 0) {
665dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        NeedsExtraction = true;
666dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // A GEP may have multiple indices.  We accumulate the extracted
667dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // constant offset to a byte offset, and later offset the remainder of
668dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        // the original GEP with this byte offset.
669dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        AccumulativeByteOffset +=
670dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines            ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
671dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      }
672dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    }
673dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
674dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return AccumulativeByteOffset;
675dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
676dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
677dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
678dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Skip vector GEPs.
679dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (GEP->getType()->isVectorTy())
680dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return false;
681dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
682dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // The backend can already nicely handle the case where all indices are
683dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // constant.
684dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (GEP->hasAllConstantIndices())
685dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return false;
686dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
687dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool Changed = false;
688cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Changed |= canonicalizeArrayIndicesToPointerSize(GEP);
689cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Changed |= convertInBoundsZExtToSExt(GEP);
690dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
691dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool NeedsExtraction;
692cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction);
693dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
694dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!NeedsExtraction)
695dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return Changed;
696dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Before really splitting the GEP, check whether the backend supports the
697dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // addressing mode we are about to produce. If no, this splitting probably
698dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // won't be beneficial.
699dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  TargetTransformInfo &TTI = getAnalysis<TargetTransformInfo>();
700dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (!TTI.isLegalAddressingMode(GEP->getType()->getElementType(),
701dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                 /*BaseGV=*/nullptr, AccumulativeByteOffset,
702dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                 /*HasBaseReg=*/true, /*Scale=*/0)) {
703dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return Changed;
704dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
705dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
706dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Remove the constant offset in each GEP index. The resultant GEP computes
707dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // the variadic base.
708dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  gep_type_iterator GTI = gep_type_begin(*GEP);
709dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
710dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (isa<SequentialType>(*GTI)) {
711dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      Value *NewIdx = nullptr;
712dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // Tries to extract a constant offset from this GEP index.
713dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      int64_t ConstantOffset =
714dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines          ConstantOffsetExtractor::Extract(GEP->getOperand(I), NewIdx, DL, GEP);
715dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (ConstantOffset != 0) {
716dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        assert(NewIdx != nullptr &&
717dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines               "ConstantOffset != 0 implies NewIdx is set");
718dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        GEP->setOperand(I, NewIdx);
719dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      }
720dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    }
721dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
722cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Clear the inbounds attribute because the new index may be off-bound.
723cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // e.g.,
724cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //
725cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // b = add i64 a, 5
726cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // addr = gep inbounds float* p, i64 b
727cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //
728cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // is transformed to:
729cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //
730cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // addr2 = gep float* p, i64 a
731cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // addr = gep float* addr2, i64 5
732cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //
733cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // If a is -4, although the old index b is in bounds, the new index a is
734cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the
735cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // inbounds keyword is not present, the offsets are added to the base
736cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // address with silently-wrapping two's complement arithmetic".
737cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // Therefore, the final code will be a semantically equivalent.
738cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  //
739cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // TODO(jingyue): do some range analysis to keep as many inbounds as
740cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  // possible. GEPs with inbounds are more friendly to alias analysis.
741cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  GEP->setIsInBounds(false);
742dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
743dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // Offsets the base with the accumulative byte offset.
744dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //
745dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   %gep                        ; the base
746dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   ... %gep ...
747dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //
748dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // => add the offset
749dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //
750dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   %gep2                       ; clone of %gep
751dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   %new.gep = gep %gep2, <offset / sizeof(*%gep)>
752dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   %gep                        ; will be removed
753dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   ... %gep ...
754dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //
755dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // => replace all uses of %gep with %new.gep and remove %gep
756dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //
757dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   %gep2                       ; clone of %gep
758dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   %new.gep = gep %gep2, <offset / sizeof(*%gep)>
759dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   ... %new.gep ...
760dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //
761dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // If AccumulativeByteOffset is not a multiple of sizeof(*%gep), we emit an
762dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // uglygep (http://llvm.org/docs/GetElementPtr.html#what-s-an-uglygep):
763dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // bitcast %gep2 to i8*, add the offset, and bitcast the result back to the
764dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  // type of %gep.
765dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //
766dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   %gep2                       ; clone of %gep
767dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   %0       = bitcast %gep2 to i8*
768dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   %uglygep = gep %0, <offset>
769dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   %new.gep = bitcast %uglygep to <type of %gep>
770dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  //   ... %new.gep ...
771dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  Instruction *NewGEP = GEP->clone();
772dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  NewGEP->insertBefore(GEP);
773dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
774dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  uint64_t ElementTypeSizeOfGEP =
775dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      DL->getTypeAllocSize(GEP->getType()->getElementType());
776cd81d94322a39503e4a3e87b6ee03d4fcb3465fbStephen Hines  Type *IntPtrTy = DL->getIntPtrType(GEP->getType());
777dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) {
778dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // Very likely. As long as %gep is natually aligned, the byte offset we
779dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // extracted should be a multiple of sizeof(*%gep).
780dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // Per ANSI C standard, signed / unsigned = unsigned. Therefore, we
781dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // cast ElementTypeSizeOfGEP to signed.
782dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    int64_t Index =
783dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        AccumulativeByteOffset / static_cast<int64_t>(ElementTypeSizeOfGEP);
784dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    NewGEP = GetElementPtrInst::Create(
785dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        NewGEP, ConstantInt::get(IntPtrTy, Index, true), GEP->getName(), GEP);
786dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  } else {
787dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // Unlikely but possible. For example,
788dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // #pragma pack(1)
789dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // struct S {
790dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    //   int a[3];
791dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    //   int64 b[8];
792dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // };
793dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // #pragma pack()
794dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    //
795dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // Suppose the gep before extraction is &s[i + 1].b[j + 3]. After
796dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // extraction, it becomes &s[i].b[j] and AccumulativeByteOffset is
797dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // sizeof(S) + 3 * sizeof(int64) = 100, which is not a multiple of
798dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // sizeof(int64).
799dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    //
800dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    // Emit an uglygep in this case.
801dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    Type *I8PtrTy = Type::getInt8PtrTy(GEP->getContext(),
802dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines                                       GEP->getPointerAddressSpace());
803dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    NewGEP = new BitCastInst(NewGEP, I8PtrTy, "", GEP);
804dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    NewGEP = GetElementPtrInst::Create(
805dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        NewGEP, ConstantInt::get(IntPtrTy, AccumulativeByteOffset, true),
806dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        "uglygep", GEP);
807dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    if (GEP->getType() != I8PtrTy)
808dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      NewGEP = new BitCastInst(NewGEP, GEP->getType(), GEP->getName(), GEP);
809dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
810dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
811dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  GEP->replaceAllUsesWith(NewGEP);
812dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  GEP->eraseFromParent();
813dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
814dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return true;
815dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
816dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
817dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hinesbool SeparateConstOffsetFromGEP::runOnFunction(Function &F) {
818dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  if (DisableSeparateConstOffsetFromGEP)
819dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    return false;
820dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines
821dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  bool Changed = false;
822dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  for (Function::iterator B = F.begin(), BE = F.end(); B != BE; ++B) {
823dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    for (BasicBlock::iterator I = B->begin(), IE = B->end(); I != IE; ) {
824dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I++)) {
825dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines        Changed |= splitGEP(GEP);
826dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      }
827dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // No need to split GEP ConstantExprs because all its indices are constant
828dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines      // already.
829dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines    }
830dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  }
831dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines  return Changed;
832dce4a407a24b04eebc6a376f8e62b41aaa7b071fStephen Hines}
833